def parse_embedding(c_species, p_species, c_var_id, p_var_id, node_order):
    # case one, noninput_shared # case two, noninput_separate
    _, c_graph_parameters, _, _, _, _, _, _, _ = \
        graph_data_util.construct_graph_input_feeddict(
            c_species['node_info'],
            np.zeros([1, c_species['node_info']['num_nodes'] * 100]),
            -1, -1, -1, -1, -1, -1, -1, request_data=['ob']
        )
    _, p_graph_parameters, _, _, _, _, _, _, _ = \
        graph_data_util.construct_graph_input_feeddict(
            p_species['node_info'],
            np.zeros([1, p_species['node_info']['num_nodes'] * 100]),
            -1, -1, -1, -1, -1, -1, -1, request_data=['ob']
        )

    # make sure that only root and joint is in the node type dict
    node_order_assertion(c_species, p_species)
    c_node_type_dict = c_species['node_info']['node_type_dict']
    p_node_type_dict = p_species['node_info']['node_type_dict']

    assigned_id = []
    for c_node, p_node in enumerate(node_order):
        if p_node < 0:  # a new joint node
            pass

        elif c_node == 0:  # the root node case
            assert p_node == 0
            c_pos = int(c_graph_parameters['root'][0, 0])
            p_pos = int(p_graph_parameters['root'][0, 0])

        else:  # c_node > 0. p_node > 0
            # shift 1 pos for the root node
            c_pos = int(c_graph_parameters['joint'][
                c_node_type_dict['joint'].index(c_node), 0])
            # from util import fpdb; fpdb.fpdb().set_trace()
            p_pos = int(p_graph_parameters['joint'][
                p_node_type_dict['joint'].index(p_node), 0])
        # from util import fpdb; fpdb.fpdb().set_trace()
        c_species['policy_weights'][c_var_id][c_pos, :] = \
            p_species['policy_weights'][p_var_id][p_pos, :]
        assigned_id.append(c_pos)

    for c_node, p_node in enumerate(node_order):
        if p_node < 0:  # a new node
            # shift 1 pos for the root node
            c_pos = int(c_graph_parameters['joint'][
                c_node_type_dict['joint'].index(c_node), 0])
            # c_pos = c_graph_parameters['joint'][c_node - 1]

            c_species['policy_weights'][c_var_id][c_pos, :] = np.mean([
                c_species['policy_weights'][p_var_id][p_pos, :]
                for p_pos in assigned_id
            ],
                                                                      axis=0)
        else:  # c_node > 0. p_node > 0
            pass
    return c_species
    def get_feed_dict(self, new_species):
        adj_matrix, node_attr, xml_str = new_species['adj_matrix'], \
            new_species['node_attr'], new_species['xml_str']

        node_info = gen_gnn_param.gen_gnn_param(
            self.args.task_name,
            adj_matrix,
            node_attr,
            gnn_node_option=self.args.gnn_node_option,
            root_connection_option=self.args.root_connection_option,
            gnn_output_option=self.args.gnn_output_option,
            gnn_embedding_option='parameter'
        )
        node_info = gnn_util.construct_ob_size_dict(node_info, 64)
        node_info = gnn_util.get_inverse_type_offset(node_info, 'node')
        node_info = gnn_util.get_inverse_type_offset(node_info, 'output')
        node_info = gnn_util.get_receive_send_idx(node_info)

        dummy_obs = np.zeros([1, 6 * node_info['num_nodes'] + 6])
        _, graph_parameters, receive_idx, send_idx, \
            node_type_idx, inverse_node_type_idx, _, _, _ = \
            graph_data_util.construct_graph_input_feeddict(
                node_info, dummy_obs, -1, -1, -1, -1, -1, -1, -1
            )

        feed_dict = {
            self._receive_idx: receive_idx,
            # self._send_idx: send_idx,
            # self._node_type_idx: node_type_idx,
            self._inverse_node_type_idx: inverse_node_type_idx,
            self._batch_size_int: 1,
            # self._input_parameters: graph_parameters,
            # self._target_returns: self.data_dict[i_species_id]['LastRwd']
        }
        for i_edge in node_info['edge_type_list']:
            feed_dict[self._send_idx[i_edge]] = send_idx[i_edge]

        # append the node type idx
        for i_node_type in node_info['node_type_dict']:
            feed_dict[self._node_type_idx[i_node_type]] = \
                node_type_idx[i_node_type]

        for i_node_type in node_info['node_type_dict']:
            feed_dict[self._input_parameters[i_node_type]] = \
                graph_parameters[i_node_type]

        feed_dict[self._num_nodes_ph] = adj_matrix.shape[0]
        return feed_dict
    def prepare_feed_dict_map(self):
        '''
            @brief:
                When trying to get the sub diction in
                @construct_minibatchFeeddict_from_feeddict, some key are just
                directly transferable. While others might need some other work

                @1. feed_dict for trpo or ppo update

                    # baseline function

                @2. feed_dict for generating the policy action

                    # for ggnn only
                @3. feed_dict for baseline if baseline is a fc-policy and policy
                    is a ggnn policy

            @return:
                @self.batch_feed_dict_key:
                    Shared between the fc policy network and ggnn network.
                    Most of them are only used for the update.

                        @self.action_placeholder
                        @self.advantage_placeholder
                        @self.oldaction_dist_mu_placeholder
                        @self.oldaction_dist_logstd_placeholder

                        (if use fc policy)
                        @self.obs_placeholder

                        (if use_ggn and baseline not gnn)
                        @self.raw_obs_placeholder

                        (if use tf baseline)
                        @self.target_return_placeholder (for ppo only)

                @self.graph_batch_feed_dict_key
                    Used by the ggnn. This feed_dict key list is a little bit
                    different from @self.batch_feed_dict_key if we want to do
                    minibatch

                        @self.graph_obs_placeholder
                        @self.graph_parameters_placeholder

                @self.static_feed_dict:
                    static elements that are set by optim parameters. These
                    parameters are set differently between minibatch_all_feed
                    equals 0 / equals 1

                        @self.batch_size_float_placeholder
                        @self.batch_size_int_placeholder

                @self.dynamical_feed_dict_key:
                    elements that could be changing from time to time

                        @self.kl_lambda_placeholder
                        @self.lr_placeholder

                @self.graph_index_feed_dict:
                    static index for the ggnn.

                        @self.receive_idx_placeholder
                        @self.inverse_node_type_idx_placeholder
                        @self.output_idx_placeholder
                        @self.send_idx_placeholder[i_edge]
                        @self.node_type_idx_placeholder[i_node_type]
        '''
        # step 1: gather the key for batch_feed_dict
        self.batch_feed_dict_key = [
            self.action_placeholder, self.advantage_placeholder,
            self.oldaction_dist_mu_placeholder,
            self.oldaction_dist_logstd_placeholder
        ]

        if not self.args.use_gnn_as_policy:
            self.batch_feed_dict_key.append(self.obs_placeholder)

        if self.args.use_gnn_as_policy and not self.args.use_gnn_as_value:
            self.batch_feed_dict_key.append(self.raw_obs_placeholder)

        self.batch_feed_dict_key.append(self.target_return_placeholder)

        # step 2: gather the graph batch feed_dict
        self.graph_batch_feed_dict_key = []
        if self.args.use_gnn_as_policy:
            self.graph_batch_feed_dict_key.extend([
                self.graph_obs_placeholder, self.graph_parameters_placeholder
            ])

        # step 2: gather the static feed_dictionary
        self.static_feed_dict = {
            self.batch_size_float_placeholder:
            np.array(float(self.args.optim_batch_size))
        }
        if self.args.use_gnn_as_policy:
            if not self.args.nervenetplus:
                self.static_feed_dict.update({
                    self.batch_size_int_placeholder:
                    self.args.optim_batch_size
                })
            else:
                self.static_feed_dict.update({
                    self.batch_size_int_placeholder:
                    int(self.args.optim_batch_size /
                        self.args.gnn_num_prop_steps)
                })

        # step 3: gather the dynamical feed_dictionary
        self.dynamical_feed_dict_key = []
        if self.args.use_kl_penalty:
            self.dynamical_feed_dict_key.append(self.kl_lambda_placeholder)
        self.dynamical_feed_dict_key.append(self.lr_placeholder)

        # step 4: gather the graph_index feed_dict
        if self.args.use_gnn_as_policy:
            # construct a dummy obs to pass the batch size info
            if self.args.nervenetplus:
                assert self.args.gnn_num_prop_steps % \
                    self.args.gnn_num_prop_steps == 0
                dummy_obs = np.zeros([
                    int(self.args.optim_batch_size /
                        self.args.gnn_num_prop_steps), 10
                ])
            else:
                dummy_obs = np.zeros([self.args.optim_batch_size, 10])
            # print dummy_obs.shape
            node_info = self.policy_network.get_node_info()

            # get the index for minibatches
            _, _, receive_idx, send_idx, \
                node_type_idx, inverse_node_type_idx, \
                output_type_idx, inverse_output_type_idx, _ = \
                graph_data_util.construct_graph_input_feeddict(
                    node_info,
                    dummy_obs, -1, -1, -1, -1, -1, -1, -1,
                    request_data=['idx']
                )

            self.graph_index_feed_dict = {
                self.receive_idx_placeholder: receive_idx,
                self.inverse_node_type_idx_placeholder: inverse_node_type_idx,
                self.inverse_output_type_idx_placeholder:
                inverse_output_type_idx
            }

            # append the send idx
            for i_edge in node_info['edge_type_list']:
                self.graph_index_feed_dict[
                    self.send_idx_placeholder[i_edge]] = send_idx[i_edge]

            # append the node type idx
            for i_node_type in node_info['node_type_dict']:
                self.graph_index_feed_dict[self.node_type_idx_placeholder[
                    i_node_type]] = node_type_idx[i_node_type]

            # append the node type idx
            for i_output_type in node_info['output_type_dict']:
                self.graph_index_feed_dict[self.output_type_idx_placeholder[
                    i_output_type]] = output_type_idx[i_output_type]
Esempio n. 4
0
    def prepared_policy_network_feeddict(self,
                                         obs_n,
                                         rollout_data=None,
                                         step_model=False):
        '''
            @brief: prepare the feed dict for the policy network part
        '''
        nervenetplus_batch_pos = None

        if self.args.use_gnn_as_policy:

            if not self.args.nervenetplus or obs_n.shape[0] == 1:
                graph_obs, graph_parameters, \
                    self.receive_idx, self.send_idx, \
                    self.node_type_idx, self.inverse_node_type_idx, \
                    self.output_type_idx, self.inverse_output_type_idx, \
                    self.last_batch_size = \
                    graph_data_util.construct_graph_input_feeddict(
                        self.node_info,
                        obs_n,
                        self.receive_idx,
                        self.send_idx,
                        self.node_type_idx,
                        self.inverse_node_type_idx,
                        self.output_type_idx,
                        self.inverse_output_type_idx,
                        self.last_batch_size,
                        request_data=['ob', 'idx']
                    )
            else:
                assert rollout_data is not None

                # preprocess the episodic information
                graph_obs, graph_parameters, _, _, _, _, _, _, _ = \
                    graph_data_util.construct_graph_input_feeddict(
                        self.node_info, obs_n,
                        -1, -1, -1, -1, -1, -1, -1,
                        request_data=['ob']
                    )
                nervenetplus_batch_pos, total_size = \
                    nervenetplus_util.nervenetplus_step_assign(
                        rollout_data, self.args.gnn_num_prop_steps
                    )
                _, _, self.receive_idx, self.send_idx, \
                    self.node_type_idx, self.inverse_node_type_idx, \
                    self.output_type_idx, self.inverse_output_type_idx, \
                    self.last_batch_size = \
                    graph_data_util.construct_graph_input_feeddict(
                        self.node_info,
                        np.empty(
                            [int(total_size / self.args.gnn_num_prop_steps)]
                        ),
                        self.receive_idx,
                        self.send_idx,
                        self.node_type_idx,
                        self.inverse_node_type_idx,
                        self.output_type_idx,
                        self.inverse_output_type_idx,
                        self.last_batch_size,
                        request_data=['idx']
                    )

            if step_model:
                feed_dict = {
                    self.step_batch_size_int_placeholder:
                    int(self.last_batch_size),
                    self.step_receive_idx_placeholder:
                    self.receive_idx,
                    self.step_inverse_node_type_idx_placeholder:
                    self.inverse_node_type_idx,
                    self.step_inverse_output_type_idx_placeholder:
                    self.inverse_output_type_idx
                }

                # append the input obs and parameters
                for i_node_type in self.node_info['node_type_dict']:
                    feed_dict[self.step_graph_obs_placeholder[i_node_type]] = \
                        graph_obs[i_node_type]
                    feed_dict[self.step_graph_parameters_placeholder[i_node_type]] = \
                        graph_parameters[i_node_type]

                # append the send idx
                for i_edge in self.node_info['edge_type_list']:
                    feed_dict[self.step_send_idx_placeholder[i_edge]] = \
                        self.send_idx[i_edge]

                # append the node type idx
                for i_node_type in self.node_info['node_type_dict']:
                    feed_dict[self.step_node_type_idx_placeholder[i_node_type]] \
                        = self.node_type_idx[i_node_type]

                # append the output type idx
                for i_output_type in self.node_info['output_type_dict']:
                    feed_dict[self.step_output_type_idx_placeholder[i_output_type]] \
                        = self.output_type_idx[i_output_type]

                # if the raw_obs is needed for the baseline
                if self.raw_obs_placeholder is not None:
                    feed_dict[self.raw_obs_placeholder] = obs_n
            else:
                feed_dict = {
                    self.batch_size_int_placeholder:
                    int(self.last_batch_size),
                    self.receive_idx_placeholder:
                    self.receive_idx,
                    self.inverse_node_type_idx_placeholder:
                    self.inverse_node_type_idx,
                    self.inverse_output_type_idx_placeholder:
                    self.inverse_output_type_idx
                }

                # append the input obs and parameters
                for i_node_type in self.node_info['node_type_dict']:
                    feed_dict[self.graph_obs_placeholder[i_node_type]] = \
                        graph_obs[i_node_type]
                    feed_dict[self.graph_parameters_placeholder[i_node_type]] = \
                        graph_parameters[i_node_type]

                # append the send idx
                for i_edge in self.node_info['edge_type_list']:
                    feed_dict[self.send_idx_placeholder[i_edge]] = \
                        self.send_idx[i_edge]

                # append the node type idx
                for i_node_type in self.node_info['node_type_dict']:
                    feed_dict[self.node_type_idx_placeholder[i_node_type]] \
                        = self.node_type_idx[i_node_type]

                # append the output type idx
                for i_output_type in self.node_info['output_type_dict']:
                    feed_dict[self.output_type_idx_placeholder[i_output_type]] \
                        = self.output_type_idx[i_output_type]

                # if the raw_obs is needed for the baseline
                if self.raw_obs_placeholder is not None:
                    feed_dict[self.raw_obs_placeholder] = obs_n
        else:
            # it is the most easy case, nice and easy
            feed_dict = {self.obs_placeholder: obs_n}
        self.nervenetplus_batch_pos = nervenetplus_batch_pos

        return feed_dict, nervenetplus_batch_pos
Esempio n. 5
0
    def prepared_policy_network_feeddict(self, obs_n):
        '''
            @brief: prepare the feed dict for the policy network part
        '''

        if self.args.use_gnn_as_policy:

            # the node information

            # construct the graph input feed dict
            # in this case, we need to get the receive_idx, send_idx,
            # node_idx, inverse_node_idx ready. These index will be helpful
            # to telling the network how to pass and update the information
            graph_obs, graph_parameters, \
                self.receive_idx, self.send_idx, \
                self.node_type_idx, self.inverse_node_type_idx, \
                self.output_type_idx, self.inverse_output_type_idx, \
                self.last_batch_size = \
                graph_data_util.construct_graph_input_feeddict(
                    self.node_info,
                    obs_n,
                    self.receive_idx,
                    self.send_idx,
                    self.node_type_idx,
                    self.inverse_node_type_idx,
                    self.output_type_idx,
                    self.inverse_output_type_idx,
                    self.last_batch_size
                )

            feed_dict = {
                self.batch_size_int_placeholder:
                int(self.last_batch_size),
                self.receive_idx_placeholder:
                self.receive_idx,
                self.inverse_node_type_idx_placeholder:
                self.inverse_node_type_idx,
                self.inverse_output_type_idx_placeholder:
                self.inverse_output_type_idx
            }

            # append the input obs and parameters
            for i_node_type in self.node_info['node_type_dict']:
                feed_dict[self.graph_obs_placeholder[i_node_type]] = \
                    graph_obs[i_node_type]
                feed_dict[self.graph_parameters_placeholder[i_node_type]] = \
                    graph_parameters[i_node_type]

            # append the send idx
            for i_edge in self.node_info['edge_type_list']:
                feed_dict[self.send_idx_placeholder[i_edge]] = \
                    self.send_idx[i_edge]

            # append the node type idx
            for i_node_type in self.node_info['node_type_dict']:
                feed_dict[self.node_type_idx_placeholder[i_node_type]] \
                    = self.node_type_idx[i_node_type]

            # append the output type idx
            for i_output_type in self.node_info['output_type_dict']:
                feed_dict[self.output_type_idx_placeholder[i_output_type]] \
                    = self.output_type_idx[i_output_type]

            # if the raw_obs is needed for the baseline
            if self.raw_obs_placeholder is not None:
                feed_dict[self.raw_obs_placeholder] = obs_n
        else:
            # it is the most easy case, nice and easy
            feed_dict = {self.obs_placeholder: obs_n}

        return feed_dict
Esempio n. 6
0
obs = env.reset()
obs_n = np.expand_dims(obs, 0)
"""
======================================================================
Note: At this point, we convert the state into the graph observation!!
======================================================================
"""
graph_obs, graph_parameters, receive_idx, send_idx, node_type_idx, \
inverse_node_type_idx, output_type_idx, \
inverse_output_type_idx, last_batch_size = \
    graph_data_util.construct_graph_input_feeddict(
        node_info,
        obs_n,
        receive_idx,
        send_idx,
        node_type_idx,
        inverse_node_type_idx,
        output_type_idx,
        inverse_output_type_idx,
        last_batch_size
    )

# this is the converted input
print(graph_obs)
"""
=== Fit the state at t to the Networks
"""

_ob_feat = {
    node_type: MLP_ob_mapping[node_type](graph_obs[node_type])[-1]
    for node_type in node_info["node_type_dict"]
def prepare_policy_network_feeddict(observations, is_nervenet, node_info,
                                    current_idx_dict, rollout_data,
                                    nervenetplus, gnn_num_prop_steps):
    '''
        @brief: prepare the feed dict for the policy network part
    '''
    nervenetplus_batch_pos = None

    if not is_nervenet:
        # it is the most easy case, nice and easy
        return {'obs_placeholder': observations}, -1

    # construct the graph input feed dict
    # in this case, we need to get the receive_idx, send_idx,
    # node_idx, inverse_node_idx ready. These index will be helpful
    # to telling the network how to pass and update the information
    if not nervenetplus:
        graph_obs, graph_parameters, \
            receive_idx, send_idx, node_type_idx, inverse_node_type_idx, \
            output_type_idx, inverse_output_type_idx, last_batch_size = \
            graph_data_util.construct_graph_input_feeddict(
                node_info,
                observations,
                current_idx_dict['receive_idx'],
                current_idx_dict['send_idx'],
                current_idx_dict['node_type_idx'],
                current_idx_dict['inverse_node_type_idx'],
                current_idx_dict['output_type_idx'],
                current_idx_dict['inverse_output_type_idx'],
                current_idx_dict['last_batch_size']
            )
        # from util import fpdb; fpdb.fpdb().set_trace()
    else:
        assert rollout_data is not None
        # preprocess the episodic information

        graph_obs, graph_parameters, _, _, _, _, _, _, _ = \
            graph_data_util.construct_graph_input_feeddict(
                node_info, observations,
                -1, -1, -1, -1, -1, -1, -1,
                request_data=['ob'])
        nervenetplus_batch_pos, total_size = \
            nervenetplus_util.nervenetplus_step_assign(
                rollout_data, gnn_num_prop_steps
            )
        _, _, receive_idx, send_idx, \
            node_type_idx, inverse_node_type_idx, \
            output_type_idx, inverse_output_type_idx, \
            last_batch_size = \
            graph_data_util.construct_graph_input_feeddict(
                node_info,
                np.empty([int(total_size / gnn_num_prop_steps)]),
                current_idx_dict['receive_idx'],
                current_idx_dict['send_idx'],
                current_idx_dict['node_type_idx'],
                current_idx_dict['inverse_node_type_idx'],
                current_idx_dict['output_type_idx'],
                current_idx_dict['inverse_output_type_idx'],
                current_idx_dict['last_batch_size'],
                request_data=['idx']
            )

    return {
        'batch_size_int_placeholder': int(last_batch_size),
        'raw_obs_placeholder': observations,
        'last_batch_size': last_batch_size,
        'receive_idx': receive_idx,
        'send_idx': send_idx,
        'node_type_idx': node_type_idx,
        'inverse_node_type_idx': inverse_node_type_idx,
        'output_type_idx': output_type_idx,
        'inverse_output_type_idx': inverse_output_type_idx,
        'graph_obs': graph_obs,
        'graph_parameters': graph_parameters,
    }, nervenetplus_batch_pos