Example #1
0
        def automatic_dpatch(hidden):
            """
            """
            if self.parameters['obs_type'] == 'image':
                shape = hidden.get_shape().as_list()
                num_regions = shape[1]*shape[2]
                hidden = tf.reshape(hidden, [-1, num_regions, shape[3]])
                inf_hidden = net.fcn(hidden, 1, self.parameters["inf_num_predictors"],
                                     None, 'fcn_auto_dset')
                # for predictor in range(self.parameters['inf_num_predictors']):
                #     name = "weights"+str(predictor)
                    # weights = tf.get_variable(name, shape=(num_regions,1), dtype=tf.dtypes.float32,
                                            #   initializer=tf.ones_initializer, trainable=True)
                    # softmax_weights = tf.contrib.distributions.RelaxedOneHotCategorical(0.1, weights)
                    # softmax_weights = tf.reshape(softmax_weights,[num_regions,1])
                    # softmax_weights = tf.nn.softmax(weights, axis=0)
                    # inf_hidden.append(tf.reduce_sum(softmax_weights*hidden, axis=1))

                # inf_hidden = tf.stack(inf_hidden, axis=1)
                hidden_size = inf_hidden.get_shape().as_list()[2]*self.parameters['inf_num_predictors']
                inf_hidden = tf.reshape(inf_hidden, shape=[-1, hidden_size])
            else:
                inf_hidden = net.fcn(hidden, 1, self.parameters["inf_num_predictors"],
                                     None, 'fcn_auto_dset')
                # shape = hidden.get_shape().as_list()
                # num_variables = shape[1]
                # for predictor in range(self.parameters['inf_num_predictors']):
                #     name = "weights"+str(predictor)
                #     weights = tf.get_variable(name, shape=(1, num_variables), dtype=tf.dtypes.float32,
                #                               initializer=tf.ones_initializer, trainable=True)
                #     softmax_weights = tf.nn.softmax(weights, axis=0)
                #     inf_hidden.append(tf.reduce_sum(softmax_weights*hidden, axis=1))
                # inf_hidden = tf.stack(inf_hidden, axis=1)
            return inf_hidden#, softmax_weights
 def attention(hidden_conv, inf_hidden):
     """
     """
     shape = hidden_conv.get_shape().as_list()
     num_regions = shape[1] * shape[2]
     hidden_conv = tf.reshape(hidden_conv, [-1, num_regions, shape[3]])
     inf_hidden_vec = []
     for head in range(self.parameters['num_heads']):
         linear_conv = net.fcn(hidden_conv, 1,
                               self.parameters['num_att_units'], None,
                               'att', 'att1_' + str(head))
         linear_hidden = net.fcn(inf_hidden, 1,
                                 self.parameters['num_att_units'], None,
                                 'att', 'att2_' + str(head))
         context = tf.nn.tanh(linear_conv +
                              tf.expand_dims(linear_hidden, 1))
         attention_weights = net.fcn(context, 1, [1], None, 'att',
                                     'att3_' + str(head))
         attention_weights = tf.nn.softmax(attention_weights, axis=1)
         d_patch = tf.reduce_sum(attention_weights * hidden_conv,
                                 axis=1)
         inf_hidden_vec.append(
             tf.concat([
                 d_patch,
                 tf.reshape(attention_weights, shape=[-1, num_regions])
             ],
                       axis=1))
     inf_hidden = tf.concat(inf_hidden_vec, axis=1)
     return inf_hidden
    def build_main_model(self):
        """
        Builds neural network model to approximate policy and value functions
        """
        if self.parameters['obs_type'] == 'image':
            self.observation = tf.placeholder(shape=[
                None, self.parameters["frame_height"],
                self.parameters["frame_width"], self.parameters["num_frames"]
            ],
                                              dtype=tf.float32,
                                              name='observation')
        else:
            self.observation = tf.placeholder(
                shape=[None, self.parameters['obs_size']],
                dtype=tf.float32,
                name='observation')
        # normalize input
        if self.parameters['env_type'] == 'atari':
            self.observation = tf.cast(self.observation, tf.float32) / 255.

        if self.convolutional:
            self.feature_vector = net.cnn(self.observation,
                                          self.parameters["num_conv_layers"],
                                          self.parameters["num_filters"],
                                          self.parameters["kernel_sizes"],
                                          self.parameters["strides"],
                                          tf.nn.relu, False, 'cnn')
            network_input = c_layers.flatten(self.feature_vector)
        else:
            self.feature_vector = self.observation
            network_input = self.feature_vector

        if self.fully_connected:
            hidden = net.fcn(network_input, self.parameters["num_fc_layers"],
                             self.parameters["num_fc_units"], tf.nn.relu,
                             'fcn')

        if self.recurrent:
            self.prev_action = tf.placeholder(shape=[None],
                                              dtype=tf.int32,
                                              name='prev_action')
            self.prev_action_onehot = c_layers.one_hot_encoding(
                self.prev_action, self.act_size)
            # network_input = tf.concat([network_input, self.prev_action_onehot], axis=1)

            c_in = tf.placeholder(tf.float32,
                                  [None, self.parameters['num_rec_units']],
                                  name='c_state')
            h_in = tf.placeholder(tf.float32,
                                  [None, self.parameters['num_rec_units']],
                                  name='h_state')
            self.seq_len = tf.placeholder(shape=None,
                                          dtype=tf.int32,
                                          name='sequence_length')
            self.state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in)
            hidden, self.state_out = net.rnn(network_input, self.state_in,
                                             self.parameters['num_rec_units'],
                                             self.seq_len, 'rnn')
        self.hidden = hidden