def parse_embedding(c_species, p_species, c_var_id, p_var_id, node_order): # case one, noninput_shared # case two, noninput_separate _, c_graph_parameters, _, _, _, _, _, _, _ = \ graph_data_util.construct_graph_input_feeddict( c_species['node_info'], np.zeros([1, c_species['node_info']['num_nodes'] * 100]), -1, -1, -1, -1, -1, -1, -1, request_data=['ob'] ) _, p_graph_parameters, _, _, _, _, _, _, _ = \ graph_data_util.construct_graph_input_feeddict( p_species['node_info'], np.zeros([1, p_species['node_info']['num_nodes'] * 100]), -1, -1, -1, -1, -1, -1, -1, request_data=['ob'] ) # make sure that only root and joint is in the node type dict node_order_assertion(c_species, p_species) c_node_type_dict = c_species['node_info']['node_type_dict'] p_node_type_dict = p_species['node_info']['node_type_dict'] assigned_id = [] for c_node, p_node in enumerate(node_order): if p_node < 0: # a new joint node pass elif c_node == 0: # the root node case assert p_node == 0 c_pos = int(c_graph_parameters['root'][0, 0]) p_pos = int(p_graph_parameters['root'][0, 0]) else: # c_node > 0. p_node > 0 # shift 1 pos for the root node c_pos = int(c_graph_parameters['joint'][ c_node_type_dict['joint'].index(c_node), 0]) # from util import fpdb; fpdb.fpdb().set_trace() p_pos = int(p_graph_parameters['joint'][ p_node_type_dict['joint'].index(p_node), 0]) # from util import fpdb; fpdb.fpdb().set_trace() c_species['policy_weights'][c_var_id][c_pos, :] = \ p_species['policy_weights'][p_var_id][p_pos, :] assigned_id.append(c_pos) for c_node, p_node in enumerate(node_order): if p_node < 0: # a new node # shift 1 pos for the root node c_pos = int(c_graph_parameters['joint'][ c_node_type_dict['joint'].index(c_node), 0]) # c_pos = c_graph_parameters['joint'][c_node - 1] c_species['policy_weights'][c_var_id][c_pos, :] = np.mean([ c_species['policy_weights'][p_var_id][p_pos, :] for p_pos in assigned_id ], axis=0) else: # c_node > 0. p_node > 0 pass return c_species
def get_feed_dict(self, new_species): adj_matrix, node_attr, xml_str = new_species['adj_matrix'], \ new_species['node_attr'], new_species['xml_str'] node_info = gen_gnn_param.gen_gnn_param( self.args.task_name, adj_matrix, node_attr, gnn_node_option=self.args.gnn_node_option, root_connection_option=self.args.root_connection_option, gnn_output_option=self.args.gnn_output_option, gnn_embedding_option='parameter' ) node_info = gnn_util.construct_ob_size_dict(node_info, 64) node_info = gnn_util.get_inverse_type_offset(node_info, 'node') node_info = gnn_util.get_inverse_type_offset(node_info, 'output') node_info = gnn_util.get_receive_send_idx(node_info) dummy_obs = np.zeros([1, 6 * node_info['num_nodes'] + 6]) _, graph_parameters, receive_idx, send_idx, \ node_type_idx, inverse_node_type_idx, _, _, _ = \ graph_data_util.construct_graph_input_feeddict( node_info, dummy_obs, -1, -1, -1, -1, -1, -1, -1 ) feed_dict = { self._receive_idx: receive_idx, # self._send_idx: send_idx, # self._node_type_idx: node_type_idx, self._inverse_node_type_idx: inverse_node_type_idx, self._batch_size_int: 1, # self._input_parameters: graph_parameters, # self._target_returns: self.data_dict[i_species_id]['LastRwd'] } for i_edge in node_info['edge_type_list']: feed_dict[self._send_idx[i_edge]] = send_idx[i_edge] # append the node type idx for i_node_type in node_info['node_type_dict']: feed_dict[self._node_type_idx[i_node_type]] = \ node_type_idx[i_node_type] for i_node_type in node_info['node_type_dict']: feed_dict[self._input_parameters[i_node_type]] = \ graph_parameters[i_node_type] feed_dict[self._num_nodes_ph] = adj_matrix.shape[0] return feed_dict
def prepare_feed_dict_map(self): ''' @brief: When trying to get the sub diction in @construct_minibatchFeeddict_from_feeddict, some key are just directly transferable. While others might need some other work @1. feed_dict for trpo or ppo update # baseline function @2. feed_dict for generating the policy action # for ggnn only @3. feed_dict for baseline if baseline is a fc-policy and policy is a ggnn policy @return: @self.batch_feed_dict_key: Shared between the fc policy network and ggnn network. Most of them are only used for the update. @self.action_placeholder @self.advantage_placeholder @self.oldaction_dist_mu_placeholder @self.oldaction_dist_logstd_placeholder (if use fc policy) @self.obs_placeholder (if use_ggn and baseline not gnn) @self.raw_obs_placeholder (if use tf baseline) @self.target_return_placeholder (for ppo only) @self.graph_batch_feed_dict_key Used by the ggnn. This feed_dict key list is a little bit different from @self.batch_feed_dict_key if we want to do minibatch @self.graph_obs_placeholder @self.graph_parameters_placeholder @self.static_feed_dict: static elements that are set by optim parameters. These parameters are set differently between minibatch_all_feed equals 0 / equals 1 @self.batch_size_float_placeholder @self.batch_size_int_placeholder @self.dynamical_feed_dict_key: elements that could be changing from time to time @self.kl_lambda_placeholder @self.lr_placeholder @self.graph_index_feed_dict: static index for the ggnn. @self.receive_idx_placeholder @self.inverse_node_type_idx_placeholder @self.output_idx_placeholder @self.send_idx_placeholder[i_edge] @self.node_type_idx_placeholder[i_node_type] ''' # step 1: gather the key for batch_feed_dict self.batch_feed_dict_key = [ self.action_placeholder, self.advantage_placeholder, self.oldaction_dist_mu_placeholder, self.oldaction_dist_logstd_placeholder ] if not self.args.use_gnn_as_policy: self.batch_feed_dict_key.append(self.obs_placeholder) if self.args.use_gnn_as_policy and not self.args.use_gnn_as_value: self.batch_feed_dict_key.append(self.raw_obs_placeholder) self.batch_feed_dict_key.append(self.target_return_placeholder) # step 2: gather the graph batch feed_dict self.graph_batch_feed_dict_key = [] if self.args.use_gnn_as_policy: self.graph_batch_feed_dict_key.extend([ self.graph_obs_placeholder, self.graph_parameters_placeholder ]) # step 2: gather the static feed_dictionary self.static_feed_dict = { self.batch_size_float_placeholder: np.array(float(self.args.optim_batch_size)) } if self.args.use_gnn_as_policy: if not self.args.nervenetplus: self.static_feed_dict.update({ self.batch_size_int_placeholder: self.args.optim_batch_size }) else: self.static_feed_dict.update({ self.batch_size_int_placeholder: int(self.args.optim_batch_size / self.args.gnn_num_prop_steps) }) # step 3: gather the dynamical feed_dictionary self.dynamical_feed_dict_key = [] if self.args.use_kl_penalty: self.dynamical_feed_dict_key.append(self.kl_lambda_placeholder) self.dynamical_feed_dict_key.append(self.lr_placeholder) # step 4: gather the graph_index feed_dict if self.args.use_gnn_as_policy: # construct a dummy obs to pass the batch size info if self.args.nervenetplus: assert self.args.gnn_num_prop_steps % \ self.args.gnn_num_prop_steps == 0 dummy_obs = np.zeros([ int(self.args.optim_batch_size / self.args.gnn_num_prop_steps), 10 ]) else: dummy_obs = np.zeros([self.args.optim_batch_size, 10]) # print dummy_obs.shape node_info = self.policy_network.get_node_info() # get the index for minibatches _, _, receive_idx, send_idx, \ node_type_idx, inverse_node_type_idx, \ output_type_idx, inverse_output_type_idx, _ = \ graph_data_util.construct_graph_input_feeddict( node_info, dummy_obs, -1, -1, -1, -1, -1, -1, -1, request_data=['idx'] ) self.graph_index_feed_dict = { self.receive_idx_placeholder: receive_idx, self.inverse_node_type_idx_placeholder: inverse_node_type_idx, self.inverse_output_type_idx_placeholder: inverse_output_type_idx } # append the send idx for i_edge in node_info['edge_type_list']: self.graph_index_feed_dict[ self.send_idx_placeholder[i_edge]] = send_idx[i_edge] # append the node type idx for i_node_type in node_info['node_type_dict']: self.graph_index_feed_dict[self.node_type_idx_placeholder[ i_node_type]] = node_type_idx[i_node_type] # append the node type idx for i_output_type in node_info['output_type_dict']: self.graph_index_feed_dict[self.output_type_idx_placeholder[ i_output_type]] = output_type_idx[i_output_type]
def prepared_policy_network_feeddict(self, obs_n, rollout_data=None, step_model=False): ''' @brief: prepare the feed dict for the policy network part ''' nervenetplus_batch_pos = None if self.args.use_gnn_as_policy: if not self.args.nervenetplus or obs_n.shape[0] == 1: graph_obs, graph_parameters, \ self.receive_idx, self.send_idx, \ self.node_type_idx, self.inverse_node_type_idx, \ self.output_type_idx, self.inverse_output_type_idx, \ self.last_batch_size = \ graph_data_util.construct_graph_input_feeddict( self.node_info, obs_n, self.receive_idx, self.send_idx, self.node_type_idx, self.inverse_node_type_idx, self.output_type_idx, self.inverse_output_type_idx, self.last_batch_size, request_data=['ob', 'idx'] ) else: assert rollout_data is not None # preprocess the episodic information graph_obs, graph_parameters, _, _, _, _, _, _, _ = \ graph_data_util.construct_graph_input_feeddict( self.node_info, obs_n, -1, -1, -1, -1, -1, -1, -1, request_data=['ob'] ) nervenetplus_batch_pos, total_size = \ nervenetplus_util.nervenetplus_step_assign( rollout_data, self.args.gnn_num_prop_steps ) _, _, self.receive_idx, self.send_idx, \ self.node_type_idx, self.inverse_node_type_idx, \ self.output_type_idx, self.inverse_output_type_idx, \ self.last_batch_size = \ graph_data_util.construct_graph_input_feeddict( self.node_info, np.empty( [int(total_size / self.args.gnn_num_prop_steps)] ), self.receive_idx, self.send_idx, self.node_type_idx, self.inverse_node_type_idx, self.output_type_idx, self.inverse_output_type_idx, self.last_batch_size, request_data=['idx'] ) if step_model: feed_dict = { self.step_batch_size_int_placeholder: int(self.last_batch_size), self.step_receive_idx_placeholder: self.receive_idx, self.step_inverse_node_type_idx_placeholder: self.inverse_node_type_idx, self.step_inverse_output_type_idx_placeholder: self.inverse_output_type_idx } # append the input obs and parameters for i_node_type in self.node_info['node_type_dict']: feed_dict[self.step_graph_obs_placeholder[i_node_type]] = \ graph_obs[i_node_type] feed_dict[self.step_graph_parameters_placeholder[i_node_type]] = \ graph_parameters[i_node_type] # append the send idx for i_edge in self.node_info['edge_type_list']: feed_dict[self.step_send_idx_placeholder[i_edge]] = \ self.send_idx[i_edge] # append the node type idx for i_node_type in self.node_info['node_type_dict']: feed_dict[self.step_node_type_idx_placeholder[i_node_type]] \ = self.node_type_idx[i_node_type] # append the output type idx for i_output_type in self.node_info['output_type_dict']: feed_dict[self.step_output_type_idx_placeholder[i_output_type]] \ = self.output_type_idx[i_output_type] # if the raw_obs is needed for the baseline if self.raw_obs_placeholder is not None: feed_dict[self.raw_obs_placeholder] = obs_n else: feed_dict = { self.batch_size_int_placeholder: int(self.last_batch_size), self.receive_idx_placeholder: self.receive_idx, self.inverse_node_type_idx_placeholder: self.inverse_node_type_idx, self.inverse_output_type_idx_placeholder: self.inverse_output_type_idx } # append the input obs and parameters for i_node_type in self.node_info['node_type_dict']: feed_dict[self.graph_obs_placeholder[i_node_type]] = \ graph_obs[i_node_type] feed_dict[self.graph_parameters_placeholder[i_node_type]] = \ graph_parameters[i_node_type] # append the send idx for i_edge in self.node_info['edge_type_list']: feed_dict[self.send_idx_placeholder[i_edge]] = \ self.send_idx[i_edge] # append the node type idx for i_node_type in self.node_info['node_type_dict']: feed_dict[self.node_type_idx_placeholder[i_node_type]] \ = self.node_type_idx[i_node_type] # append the output type idx for i_output_type in self.node_info['output_type_dict']: feed_dict[self.output_type_idx_placeholder[i_output_type]] \ = self.output_type_idx[i_output_type] # if the raw_obs is needed for the baseline if self.raw_obs_placeholder is not None: feed_dict[self.raw_obs_placeholder] = obs_n else: # it is the most easy case, nice and easy feed_dict = {self.obs_placeholder: obs_n} self.nervenetplus_batch_pos = nervenetplus_batch_pos return feed_dict, nervenetplus_batch_pos
def prepared_policy_network_feeddict(self, obs_n): ''' @brief: prepare the feed dict for the policy network part ''' if self.args.use_gnn_as_policy: # the node information # construct the graph input feed dict # in this case, we need to get the receive_idx, send_idx, # node_idx, inverse_node_idx ready. These index will be helpful # to telling the network how to pass and update the information graph_obs, graph_parameters, \ self.receive_idx, self.send_idx, \ self.node_type_idx, self.inverse_node_type_idx, \ self.output_type_idx, self.inverse_output_type_idx, \ self.last_batch_size = \ graph_data_util.construct_graph_input_feeddict( self.node_info, obs_n, self.receive_idx, self.send_idx, self.node_type_idx, self.inverse_node_type_idx, self.output_type_idx, self.inverse_output_type_idx, self.last_batch_size ) feed_dict = { self.batch_size_int_placeholder: int(self.last_batch_size), self.receive_idx_placeholder: self.receive_idx, self.inverse_node_type_idx_placeholder: self.inverse_node_type_idx, self.inverse_output_type_idx_placeholder: self.inverse_output_type_idx } # append the input obs and parameters for i_node_type in self.node_info['node_type_dict']: feed_dict[self.graph_obs_placeholder[i_node_type]] = \ graph_obs[i_node_type] feed_dict[self.graph_parameters_placeholder[i_node_type]] = \ graph_parameters[i_node_type] # append the send idx for i_edge in self.node_info['edge_type_list']: feed_dict[self.send_idx_placeholder[i_edge]] = \ self.send_idx[i_edge] # append the node type idx for i_node_type in self.node_info['node_type_dict']: feed_dict[self.node_type_idx_placeholder[i_node_type]] \ = self.node_type_idx[i_node_type] # append the output type idx for i_output_type in self.node_info['output_type_dict']: feed_dict[self.output_type_idx_placeholder[i_output_type]] \ = self.output_type_idx[i_output_type] # if the raw_obs is needed for the baseline if self.raw_obs_placeholder is not None: feed_dict[self.raw_obs_placeholder] = obs_n else: # it is the most easy case, nice and easy feed_dict = {self.obs_placeholder: obs_n} return feed_dict
obs = env.reset() obs_n = np.expand_dims(obs, 0) """ ====================================================================== Note: At this point, we convert the state into the graph observation!! ====================================================================== """ graph_obs, graph_parameters, receive_idx, send_idx, node_type_idx, \ inverse_node_type_idx, output_type_idx, \ inverse_output_type_idx, last_batch_size = \ graph_data_util.construct_graph_input_feeddict( node_info, obs_n, receive_idx, send_idx, node_type_idx, inverse_node_type_idx, output_type_idx, inverse_output_type_idx, last_batch_size ) # this is the converted input print(graph_obs) """ === Fit the state at t to the Networks """ _ob_feat = { node_type: MLP_ob_mapping[node_type](graph_obs[node_type])[-1] for node_type in node_info["node_type_dict"]
def prepare_policy_network_feeddict(observations, is_nervenet, node_info, current_idx_dict, rollout_data, nervenetplus, gnn_num_prop_steps): ''' @brief: prepare the feed dict for the policy network part ''' nervenetplus_batch_pos = None if not is_nervenet: # it is the most easy case, nice and easy return {'obs_placeholder': observations}, -1 # construct the graph input feed dict # in this case, we need to get the receive_idx, send_idx, # node_idx, inverse_node_idx ready. These index will be helpful # to telling the network how to pass and update the information if not nervenetplus: graph_obs, graph_parameters, \ receive_idx, send_idx, node_type_idx, inverse_node_type_idx, \ output_type_idx, inverse_output_type_idx, last_batch_size = \ graph_data_util.construct_graph_input_feeddict( node_info, observations, current_idx_dict['receive_idx'], current_idx_dict['send_idx'], current_idx_dict['node_type_idx'], current_idx_dict['inverse_node_type_idx'], current_idx_dict['output_type_idx'], current_idx_dict['inverse_output_type_idx'], current_idx_dict['last_batch_size'] ) # from util import fpdb; fpdb.fpdb().set_trace() else: assert rollout_data is not None # preprocess the episodic information graph_obs, graph_parameters, _, _, _, _, _, _, _ = \ graph_data_util.construct_graph_input_feeddict( node_info, observations, -1, -1, -1, -1, -1, -1, -1, request_data=['ob']) nervenetplus_batch_pos, total_size = \ nervenetplus_util.nervenetplus_step_assign( rollout_data, gnn_num_prop_steps ) _, _, receive_idx, send_idx, \ node_type_idx, inverse_node_type_idx, \ output_type_idx, inverse_output_type_idx, \ last_batch_size = \ graph_data_util.construct_graph_input_feeddict( node_info, np.empty([int(total_size / gnn_num_prop_steps)]), current_idx_dict['receive_idx'], current_idx_dict['send_idx'], current_idx_dict['node_type_idx'], current_idx_dict['inverse_node_type_idx'], current_idx_dict['output_type_idx'], current_idx_dict['inverse_output_type_idx'], current_idx_dict['last_batch_size'], request_data=['idx'] ) return { 'batch_size_int_placeholder': int(last_batch_size), 'raw_obs_placeholder': observations, 'last_batch_size': last_batch_size, 'receive_idx': receive_idx, 'send_idx': send_idx, 'node_type_idx': node_type_idx, 'inverse_node_type_idx': inverse_node_type_idx, 'output_type_idx': output_type_idx, 'inverse_output_type_idx': inverse_output_type_idx, 'graph_obs': graph_obs, 'graph_parameters': graph_parameters, }, nervenetplus_batch_pos