def _build_network_weights(self):
        '''
            @brief: build the network
        '''
        # step -1: build the baseline network mlp if needed
        if self._shared_network:
            MLP_baseline_shape = self._network_shape + [1] + \
                [self._hidden_dim]  # (l_1, l_2, ..., l_o, l_i)
            MLP_baseline_act_func = ['tanh'] * (len(MLP_baseline_shape) - 1)
            MLP_baseline_act_func[-1] = None
            with tf.variable_scope('baseline'):
                self._MLP_baseline_out = nn.MLP(MLP_baseline_shape,
                                                init_method=self._init_method,
                                                act_func=MLP_baseline_act_func,
                                                add_bias=True,
                                                scope='vpred')

        # step 1: build the weight parameters (mlp, gru)
        with tf.variable_scope(self._name_scope):
            # step 1_1: build the embedding matrix (mlp)
            # tensor shape (None, para_size) --> (None, input_dim - ob_size)
            assert self._input_feat_dim % 2 == 0
            if 'noninput' not in self._gnn_embedding_option:
                self._MLP_embedding = {
                    node_type: nn.MLP(
                        [
                            self._input_feat_dim / 2,
                            self._node_info['para_size_dict'][node_type]
                        ],
                        init_method=self._init_method,
                        act_func=['tanh'] * 1,  # one layer at most
                        add_bias=True,
                        scope='MLP_embedding_node_type_{}'.format(node_type))
                    for node_type in self._node_info['node_type_dict']
                    if self._node_info['ob_size_dict'][node_type] > 0
                }
                self._MLP_embedding.update({
                    node_type: nn.MLP(
                        [
                            self._input_feat_dim,
                            self._node_info['para_size_dict'][node_type]
                        ],
                        init_method=self._init_method,
                        act_func=['tanh'] * 1,  # one layer at most
                        add_bias=True,
                        scope='MLP_embedding_node_type_{}'.format(node_type))
                    for node_type in self._node_info['node_type_dict']
                    if self._node_info['ob_size_dict'][node_type] == 0
                })
            else:
                embedding_vec_size = max(
                    np.reshape([
                        max(self._node_info['node_parameters'][i_key])
                        for i_key in self._node_info['node_parameters']
                    ], [-1])) + 1
                embedding_vec_size = int(embedding_vec_size)
                self._embedding_variable = {}
                out = self._npr.randn(embedding_vec_size,
                                      self._input_feat_dim / 2).astype(
                                          np.float32)
                out *= 1.0 / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
                self._embedding_variable[False] = tf.Variable(
                    out, name='embedding_HALF', trainable=self._trainable)

                if np.any([
                        node_size == 0 for _, node_size in
                        self._node_info['ob_size_dict'].iteritems()
                ]):

                    out = self._npr.randn(embedding_vec_size,
                                          self._input_feat_dim).astype(
                                              np.float32)
                    out *= 1.0 / np.sqrt(
                        np.square(out).sum(axis=0, keepdims=True))
                    self._embedding_variable[True] = tf.Variable(
                        out, name='embedding_FULL', trainable=self._trainable)

            # step 1_2: build the ob mapping matrix (mlp)
            # tensor shape (None, para_size) --> (None, input_dim - ob_size)
            self._MLP_ob_mapping = {
                node_type: nn.MLP(
                    [
                        self._input_feat_dim / 2,
                        self._node_info['ob_size_dict'][node_type]
                    ],
                    init_method=self._init_method,
                    act_func=['tanh'] * 1,  # one layer at most
                    add_bias=True,
                    scope='MLP_embedding_node_type_{}'.format(node_type))
                for node_type in self._node_info['node_type_dict']
                if self._node_info['ob_size_dict'][node_type] > 0
            }

        with tf.variable_scope(self._name_scope):
            # step 1_4: build the mlp for the propagation between nodes
            MLP_prop_shape = self._network_shape + \
                [self._hidden_dim] + [self._hidden_dim]
            self._MLP_prop = {
                i_edge: nn.MLP(MLP_prop_shape,
                               init_method=self._init_method,
                               act_func=['tanh'] * (len(MLP_prop_shape) - 1),
                               add_bias=True,
                               scope='MLP_prop_edge_{}'.format(i_edge))
                for i_edge in self._node_info['edge_type_list']
            }

            logger.info('building prop mlp for edge type {}'.format(
                self._node_info['edge_type_list']))

            # step 1_5: build the node update function for each node type
            if self._node_update_method == 'GRU':
                self._Node_update = {
                    i_node_type:
                    nn.GRU(self._hidden_dim,
                           self._hidden_dim,
                           init_method=self._init_method,
                           scope='GRU_node_{}'.format(i_node_type))
                    for i_node_type in self._node_info['node_type_dict']
                }
            else:
                assert self._node_update_method == 'MLP'
                hidden_MLP_update_shape = self._network_shape
                self._Node_update = {
                    i_node_type:
                    nn.MLPU(message_dim=self._hidden_dim,
                            embedding_dim=self._hidden_dim,
                            hidden_shape=hidden_MLP_update_shape,
                            init_method=self._init_method,
                            act_func_type='tanh',
                            add_bias=True,
                            scope='MLPU_node_{}'.format(i_node_type))
                    for i_node_type in self._node_info['node_type_dict']
                }

            logger.info(
                'building node update function for node type {}'.format(
                    self._node_info['node_type_dict']))

            # step 1_6: the mlp for the mu of the actions
            MLP_out_shape = self._network_shape + [1] + \
                [self._hidden_dim]  # (l_1, l_2, ..., l_o, l_i)
            MLP_out_act_func = ['tanh'] * (len(MLP_out_shape) - 1)
            MLP_out_act_func[-1] = None

            if not self._is_baseline:
                self._MLP_Out = {
                    output_type: nn.MLP(MLP_out_shape,
                                        init_method=self._init_method,
                                        act_func=MLP_out_act_func,
                                        add_bias=True,
                                        scope='MLP_out')
                    for output_type in self._node_info['output_type_dict']
                }

                # step 1_7 (optional): the mlp for the log std of the actions
            else:
                self._MLP_Out = nn.MLP(MLP_out_shape,
                                       init_method=self._init_method,
                                       act_func=MLP_out_act_func,
                                       add_bias=True,
                                       scope='MLP_out')

            # step 1_8: build the log std for the actions
            with tf.variable_scope(self._name_scope):
                # size: [1, num_action]
                self._action_dist_logstd = tf.Variable(
                    (0.0 * self._npr.randn(1, self._output_size)).astype(
                        np.float32),
                    name="policy_logstd",
                    trainable=self._trainable)
    def _build_network_weights(self):
        '''
            @brief: build the network
            @weights:
                _MLP_embedding (1 layer)
                _MLP_ob_mapping (1 layer)
                _MLP_prop (2 layer)
                _MLP_output (2 layer)
        '''
        # step 1: build the weight parameters (mlp, gru)
        with tf.variable_scope(self._name_scope):
            # step 1_1: build the embedding matrix (mlp)
            # tensor shape (None, para_size) --> (None, input_dim - ob_size)
            assert self._input_feat_dim % 2 == 0
            if 'noninput' not in self._gnn_embedding_option:
                self._MLP_embedding = {
                    node_type: nn.MLP(
                        [
                            self._input_feat_dim / 2,
                            self._node_info['para_size_dict'][node_type]
                        ],
                        init_method=self._init_method,
                        act_func=['tanh'] * 1,  # one layer at most
                        add_bias=True,
                        scope='MLP_embedding_node_type_{}'.format(node_type))
                    for node_type in self._node_info['node_type_dict']
                    if self._node_info['ob_size_dict'][node_type] > 0
                }
                self._MLP_embedding.update({
                    node_type: nn.MLP(
                        [
                            self._input_feat_dim,
                            self._node_info['para_size_dict'][node_type]
                        ],
                        init_method=self._init_method,
                        act_func=['tanh'] * 1,  # one layer at most
                        add_bias=True,
                        scope='MLP_embedding_node_type_{}'.format(node_type))
                    for node_type in self._node_info['node_type_dict']
                    if self._node_info['ob_size_dict'][node_type] == 0
                })
            else:
                embedding_vec_size = max(
                    np.reshape([
                        max(self._node_info['node_parameters'][i_key])
                        for i_key in self._node_info['node_parameters']
                    ], [-1])) + 1
                embedding_vec_size = int(embedding_vec_size)
                self._embedding_variable = {}

                out = self._npr.randn(embedding_vec_size,
                                      int(self._input_feat_dim / 2)).astype(
                                          np.float32)
                out *= 1.0 / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
                self._embedding_variable[False] = tf.Variable(
                    out, name='embedding_HALF', trainable=self._trainable)

                if np.any([
                        node_size == 0 for _, node_size in
                        self._node_info['ob_size_dict'].items()
                ]):

                    out = self._npr.randn(embedding_vec_size,
                                          self._input_feat_dim).astype(
                                              np.float32)
                    out *= 1.0 / np.sqrt(
                        np.square(out).sum(axis=0, keepdims=True))
                    self._embedding_variable[True] = tf.Variable(
                        out, name='embedding_FULL', trainable=self._trainable)

            # step 1_2: build the ob mapping matrix (mlp)
            # tensor shape (None, para_size) --> (None, input_dim - ob_size)
            self._MLP_ob_mapping = {
                node_type: nn.MLP(
                    [
                        self._input_feat_dim / 2,
                        self._node_info['ob_size_dict'][node_type]
                    ],
                    init_method=self._init_method,
                    act_func=['tanh'] * 1,  # one layer at most
                    add_bias=True,
                    scope='MLP_embedding_node_type_{}'.format(node_type))
                for node_type in self._node_info['node_type_dict']
                if self._node_info['ob_size_dict'][node_type] > 0
            }

            # step 1_4: build the mlp for the propogation between nodes
            '''
            MLP_prop_shape = self._network_shape + \
                [self._hidden_dim] + [self._hidden_dim]
            self._MLP_prop = {
                i_edge: nn.MLP(
                    MLP_prop_shape,
                    init_method=self._init_method,
                    act_func=['tanh'] * (len(MLP_prop_shape) - 1),
                    add_bias=True,
                    scope='MLP_prop_edge_{}'.format(i_edge)
                )
                for i_edge in self._node_info['edge_type_list']
            }
            '''

            # step 1_5: build the node update function for each node type
            if self._node_update_method == 'GRU':
                self._Node_update = {
                    i_node_type: nn.GRU(
                        self._hidden_dim,  # for both the message and ob
                        self._hidden_dim,
                        init_method=self._init_method,
                        scope='GRU_node_{}'.format(i_node_type))
                    for i_node_type in self._node_info['node_type_dict']
                }
            else:
                assert False

            # step 1_6: the mlp for the mu of the actions
            # (l_1, l_2, ..., l_o, l_i)
            MLP_out_shape = self._network_shape + \
                [self.args.gnn_output_per_node] + [self._hidden_dim]
            MLP_out_act_func = ['tanh'] * (len(MLP_out_shape) - 1)
            MLP_out_act_func[-1] = None

            self._MLP_Out = {
                output_type: nn.MLP(MLP_out_shape,
                                    init_method=self._init_method,
                                    act_func=MLP_out_act_func,
                                    add_bias=True,
                                    scope='MLP_out')
                for output_type in self._node_info['output_type_dict']
            }

            # step 1_8: build the log std for the actions
            self._action_dist_logstd = tf.Variable(
                (0.0 * self._npr.randn(1, self._output_size)).astype(
                    np.float32),
                name="policy_logstd",
                trainable=self._trainable)
Exemple #3
0
    def _build_network_weights(self):
        '''
            @brief: build the network
        '''
        # step 1: build the weight parameters (mlp, gru)
        with tf.variable_scope(self._name_scope):
            # step 1_1: build the embedding matrix (mlp)
            # tensor shape (None, para_size) --> (None, input_dim - ob_size)
            self._MLP_embedding = {
                node_type: nn.MLP(
                    [
                        self._input_feat_dim,
                        self._node_info['para_size_dict'][node_type]
                    ],
                    init_method=self._init_method,
                    act_func=['tanh'] * 1,  # one layer at most
                    add_bias=True,
                    scope='MLP_embedding_node_type_{}'.format(node_type))
                for node_type in self._node_info['node_type_dict']
            }

            # step 1_4: build the mlp for the propogation between nodes
            MLP_prop_shape = self._network_shape + \
                [self._hidden_dim] + [self._hidden_dim]
            self._MLP_prop = {
                i_edge: nn.MLP(MLP_prop_shape,
                               init_method=self._init_method,
                               act_func=['tanh'] * (len(MLP_prop_shape) - 1),
                               add_bias=True,
                               scope='MLP_prop_edge_{}'.format(i_edge))
                for i_edge in self._node_info['edge_type_list']
            }

            # step 1_5: build the node update function for each node type
            if self._node_update_method == 'GRU':
                self._Node_update = {
                    i_node_type:
                    nn.GRU(self._hidden_dim,
                           self._hidden_dim,
                           init_method=self._init_method,
                           scope='GRU_node_{}'.format(i_node_type))
                    for i_node_type in self._node_info['node_type_dict']
                }
            else:
                assert self._node_update_method == 'MLP'
                hidden_MLP_update_shape = self._network_shape
                self._Node_update = {
                    i_node_type:
                    nn.MLPU(message_dim=self._hidden_dim,
                            embedding_dim=self._hidden_dim,
                            hidden_shape=hidden_MLP_update_shape,
                            init_method=self._init_method,
                            act_func_type='tanh',
                            add_bias=True,
                            scope='MLPU_node_{}'.format(i_node_type))
                    for i_node_type in self._node_info['node_type_dict']
                }

            # step 1_6: the mlp for the mu of the actions
            MLP_out_shape = self._network_shape + [1] + \
                [self._hidden_dim]  # (l_1, l_2, ..., l_o, l_i)
            MLP_out_act_func = ['tanh'] * (len(MLP_out_shape) - 1)

            if self.bayesian_op:
                self._MLP_Out = nn.MLP(MLP_out_shape,
                                       init_method=self._init_method,
                                       act_func=MLP_out_act_func,
                                       add_bias=True,
                                       scope='MLP_out',
                                       use_dropout=True)
                self.test_dropout_mask = []
                for feat_shape in [self._hidden_dim] + self._network_shape:
                    # [64, 64, 1, 64]
                    self.test_dropout_mask.append(
                        tf.placeholder(tf.float32, shape=[1, feat_shape]))
                self.dropout_mask_shape = [self._hidden_dim
                                           ] + self._network_shape
            else:
                self.test_dropout_mask = []
                self.dropout_mask_shape = None
                self._MLP_Out = nn.MLP(MLP_out_shape,
                                       init_method=self._init_method,
                                       act_func=MLP_out_act_func,
                                       add_bias=True,
                                       scope='MLP_out')