Ejemplo n.º 1
0
 def _tech_net(self, data, trainable=True):
     with tf.variable_scope("Tech_net"):
         # if self.use_norm == True:
         #    data = layer.batch_norm(data, self.is_training, 'BN')
         d1 = layer.dense_layer(data,
                                256,
                                "DenseLayer1",
                                is_training=self.is_training,
                                trainable=trainable,
                                norm=self.use_norm)
         d2 = layer.dense_layer(d1,
                                128,
                                "DenseLayer2",
                                is_training=self.is_training,
                                trainable=trainable,
                                norm=self.use_norm)
         dout = layer.dense_layer(d2,
                                  1,
                                  "DenseLayerOut",
                                  func=None,
                                  is_training=self.is_training,
                                  trainable=trainable,
                                  norm=self.use_norm)
         scope = tf.get_variable_scope().name
     return dout, scope
Ejemplo n.º 2
0
    def build_graph(self, obs_ph, acs_ph, reuse=False):
        with tf.variable_scope(self.scope):
            if reuse:
                tf.get_variable_scope().reuse_variables()
            data = tf.concat([obs_ph, acs_ph], axis=1)  # concatenate the two input -> form a transition

            '''p_h1 = tf.contrib.layers.fully_connected(_input, self.hidden_size, activation_fn=tf.nn.relu)
            p_h2 = tf.contrib.layers.fully_connected(p_h1, self.hidden_size, activation_fn=tf.nn.relu)
            logits = tf.contrib.layers.fully_connected(p_h2, 1, activation_fn=tf.identity)'''

            if self.use_norm:
                data = layer.batch_norm(data, self.pop_batch_norm, 'BN')
            d1 = layer.dense_layer(data, 128, "DenseLayer1", is_training=self.pop_batch_norm, trainable=True,
                                   norm=self.use_norm)
            d2 = layer.dense_layer(d1, 32, "DenseLayer2", is_training=self.pop_batch_norm, trainable=True,
                                   norm=self.use_norm)
            dout = layer.dense_layer(d2, 1, "DenseLayer3", func=None,
                                     is_training=self.pop_batch_norm, trainable=True, norm=None)
        return dout
Ejemplo n.º 3
0
    def __init__(self, name: str, sess, ob_space, act_space_array, activation=tf.nn.relu):
        """
        :param name: string
        """
        self.sess = sess
        with tf.variable_scope(name):
            self.obs = tf.placeholder(dtype=tf.float32, shape=[None, ob_space], name='obs')

            with tf.variable_scope('policy_net'):
                with tf.variable_scope('controller'):
                    layer_1 = layer.dense_layer(self.obs, 64, "DenseLayer1", func=activation)
                    layer_2 = layer.dense_layer(layer_1, 64, "DenseLayer2", func=activation)

                    # act_space_array = wait, build worker, build pylon,
                    self.tech_probs = layer.dense_layer(layer_2, act_space_array, "tech_output", func=tf.nn.softmax)
                    self.tech_act = tf.multinomial(tf.log(self.tech_probs), num_samples=1)
                    self.tech_act = tf.reshape(self.tech_act, shape=[-1])

            with tf.variable_scope('value_net'):
                layer_1 = layer.dense_layer(self.obs, 64, "DenseLayer1", func=activation)
                layer_2 = layer.dense_layer(layer_1, 64, "DenseLayer2", func=activation)
                self.v_preds = layer.dense_layer(layer_2, 1, "DenseLayer4", func=None)

            # self.act_stochastic = tf.multinomial(tf.log(self.act_probs), num_samples=1)
            # self.act_stochastic = tf.reshape(self.act_stochastic, shape=[-1])
            # self.act_deterministic = tf.argmax(self.act_probs, axis=1)

            self.scope = tf.get_variable_scope().name
Ejemplo n.º 4
0
    def __init__(self, name: str, sess, ob_space, add_ob_space, act_space_array, add_act_space, freeze_head=True, 
        activation=tf.nn.relu):
        """
        :param name: string
        """
        self.sess = sess
        self.add_weight = 0.2
        self.map_width = 32 if P.use_small_map else 64
        self.map_channel = 10
        with tf.variable_scope(name):
            self.obs = tf.placeholder(dtype=tf.float32, shape=[None, ob_space], name='obs')
            self.obs_add = tf.placeholder(dtype=tf.float32, shape=[None, add_ob_space], name='obs_add')
            self.obs_map = tf.placeholder(dtype=tf.float32, shape=[None, self.map_width, self.map_width, self.map_channel], name='obs_map')

            with tf.variable_scope('policy_net'):
                with tf.variable_scope('controller'):              
                    layer_1 = layer.dense_layer(self.obs, 64, "DenseLayer1", func=activation, trainable=not freeze_head)
                    self.layer_2 = layer.dense_layer(layer_1, 64, "DenseLayer2", func=activation, trainable=not freeze_head)

                    # attention
                    #layer_3 = layer.dense_layer(self.obs_add, 64, "DenseLayer3", func=tf.nn.sigmoid)
                    #layer_2 = layer_2 * layer_3

                    # add obs
                    self.layer_3 = layer.dense_layer(self.obs_add, 64, "DenseLayer3", func=activation, initial_type='original')
                    # map obs
                    self.layer_4, self.map_variable_scope = ops.simple_resnet(self.obs_map, 18, 64, "Resnet")

                    # weighted sum
                    self.layer_5 = (1. - self.add_weight) * self.layer_2 + self.add_weight / 2. * self.layer_3 + self.add_weight / 2. * self.layer_4
                    self.tech_probs = layer.output_layer(self.layer_5, act_space_array, add_act_space, "tech_output", func=tf.nn.softmax)

                    self.tech_act = tf.multinomial(tf.log(self.tech_probs), num_samples=1)
                    self.tech_act = tf.reshape(self.tech_act, shape=[-1])

            with tf.variable_scope('value_net'):
                layer_1 = layer.dense_layer(self.obs, 64, "DenseLayer1", func=activation, trainable=not freeze_head)
                layer_2 = layer.dense_layer(layer_1, 64, "DenseLayer2", func=activation, trainable=not freeze_head)

                #layer_3 = layer.dense_layer(self.obs_add, 32, "DenseLayer3", func=activation)
                #layer_4 = tf.concat([layer_2, layer_3], 1)

                self.v_preds = layer.dense_layer(layer_2, 1, "DenseLayer4", func=None)
                #self.v_preds = layer.dense_layer(layer_2, 1, "DenseLayer4_add", func=None)

            self.scope = tf.get_variable_scope().name
Ejemplo n.º 5
0
    def sep_policy_value_net(self,
                             name,
                             hidden_units=64,
                             resnet=None,
                             activation=tf.nn.relu,
                             freeze_head=False,
                             initial_type='original',
                             norm=True,
                             is_training=True,
                             reuse=False):

        with tf.variable_scope(name, reuse=reuse):

            with tf.variable_scope('policy_net'):
                with tf.variable_scope('controller'):
                    print('freeze_head:', freeze_head)
                    layer_1 = layer.dense_layer(self.obs,
                                                hidden_units,
                                                "DenseLayer1",
                                                norm=norm,
                                                is_training=is_training,
                                                func=activation,
                                                initial_type=initial_type,
                                                trainable=not freeze_head)
                    self.layer_2 = layer.dense_layer(layer_1,
                                                     hidden_units,
                                                     "DenseLayer2",
                                                     norm=False,
                                                     is_training=is_training,
                                                     func=activation,
                                                     initial_type=initial_type,
                                                     trainable=not freeze_head)

                    if self.use_add_obs:
                        self.layer_3 = layer.dense_layer(
                            self.obs_add,
                            hidden_units,
                            "DenseLayer3",
                            norm=False,
                            is_training=is_training,
                            func=activation,
                            initial_type=initial_type)

                        if self.add_image:
                            self.layer_4, self.map_variable_scope = resnet(
                                self.obs_map,
                                18,
                                hidden_units,
                                "Resnet",
                                is_training=is_training)
                        else:
                            print('not add image')
                            self.layer_4, self.map_variable_scope = self.layer_3, []

                        #self.layer_4, self.map_variable_scope = ops.simple_resnet_changed(self.obs_map, 18, hidden_units, "Resnet", is_training=is_training)
                        #self.layer_4, self.map_variable_scope = ops.resnet(self.obs_map, 18, hidden_units, "Resnet", is_training=is_training, reuse=reuse)

                        if self.weighted_sum_type == 'AttentionWeight':
                            raise NotImplementedError
                        elif self.weighted_sum_type == 'AdaptiveWeight':
                            raise NotImplementedError
                        elif self.weighted_sum_type == 'AddWeight':
                            # weighted sum
                            self.layer_5 = (1. - self.add_weight) * self.layer_2 + self.add_weight / 2. * self.layer_3 + \
                                self.add_weight / 2. * self.layer_4
                        elif self.weighted_sum_type == 'Add':
                            print('self.weighted_sum_type:',
                                  self.weighted_sum_type)
                            self.layer_5 = self.layer_2 + self.layer_3 + self.layer_4
                        else:
                            raise NotImplementedError
                    else:
                        self.layer_5 = self.layer_2

                    probs = layer.output_layer(self.layer_5,
                                               self.act_space_array,
                                               self.add_act_space,
                                               "output",
                                               is_training=is_training,
                                               initial_type=initial_type,
                                               func=tf.nn.softmax)

                    act = tf.multinomial(tf.log(probs), num_samples=1)
                    act = tf.reshape(act, shape=[-1])

            with tf.variable_scope('value_net'):
                layer_1 = layer.dense_layer(self.obs,
                                            hidden_units,
                                            "DenseLayer1",
                                            norm=norm,
                                            is_training=is_training,
                                            func=activation,
                                            initial_type=initial_type,
                                            trainable=not freeze_head)
                layer_2 = layer.dense_layer(layer_1,
                                            hidden_units,
                                            "DenseLayer2",
                                            norm=norm,
                                            is_training=is_training,
                                            func=activation,
                                            initial_type=initial_type,
                                            trainable=not freeze_head)

                v_preds = layer.dense_layer(layer_2,
                                            1,
                                            "DenseLayer4",
                                            initial_type=initial_type,
                                            is_training=is_training,
                                            func=None)

            self.scope = tf.get_variable_scope().name

        return probs, act, v_preds
Ejemplo n.º 6
0
    def sep_policy_value_net(self,
                             name,
                             hidden_units=64,
                             activation=tf.nn.relu,
                             freeze_head=False,
                             initial_type='original',
                             norm=True,
                             is_training=True,
                             reuse=False):

        with tf.variable_scope(name, reuse=reuse):

            with tf.variable_scope('policy_net'):
                with tf.variable_scope('controller'):
                    layer_1 = layer.dense_layer(self.obs,
                                                hidden_units,
                                                "DenseLayer1",
                                                norm=norm,
                                                is_training=is_training,
                                                func=activation,
                                                initial_type=initial_type,
                                                trainable=not freeze_head)
                    self.layer_2 = layer.dense_layer(layer_1,
                                                     hidden_units,
                                                     "DenseLayer2",
                                                     norm=norm,
                                                     is_training=is_training,
                                                     func=activation,
                                                     initial_type=initial_type,
                                                     trainable=not freeze_head)

                    # weighted sum
                    if self.use_add_obs:
                        self.layer_3 = layer.dense_layer(
                            self.obs_add,
                            hidden_units,
                            "DenseLayer3",
                            norm=norm,
                            is_training=is_training,
                            func=activation,
                            initial_type=initial_type)

                        if self.weight_type == 'AttentionWeight':
                            self.attention_weight = layer.dense_layer(
                                self.layer_3,
                                hidden_units,
                                "AttentionWeight",
                                initial_type='original',
                                func=tf.nn.sigmoid)
                            #self.attention_weight = tf.nn.sigmoid(self.layer_3)
                            self.layer_4 = self.layer_2 * self.attention_weight

                        elif self.weight_type == 'AdaptiveWeight':
                            self.adaptive_weight = tf.get_variable(
                                name="AdaptiveWeight",
                                shape=[hidden_units],
                                initializer=tf.zeros_initializer(),
                                trainable=True)
                            self.layer_4 = (
                                1 - self.adaptive_weight
                            ) * self.layer_2 + self.adaptive_weight * self.layer_3

                        elif self.weight_type == 'AddWeight':
                            self.layer_4 = (
                                1. - self.add_weight
                            ) * self.layer_2 + self.add_weight * self.layer_3

                        else:
                            self.layer_4 = self.layer_2 + self.layer_3
                    else:
                        self.layer_4 = self.layer_2

                    #self.train_probs = layer.dense_layer(layer_2, act_space_array, "output", func=tf.nn.softmax)
                    probs = layer.output_layer(self.layer_4,
                                               self.act_space_array,
                                               self.add_act_space,
                                               "output",
                                               is_training=is_training,
                                               initial_type=initial_type,
                                               func=tf.nn.softmax)

                    act = tf.multinomial(tf.log(probs), num_samples=1)
                    act = tf.reshape(act, shape=[-1])

            with tf.variable_scope('value_net'):
                layer_1 = layer.dense_layer(self.obs,
                                            hidden_units,
                                            "DenseLayer1",
                                            norm=norm,
                                            is_training=is_training,
                                            func=activation,
                                            initial_type=initial_type,
                                            trainable=not freeze_head)
                layer_2 = layer.dense_layer(layer_1,
                                            hidden_units,
                                            "DenseLayer2",
                                            norm=norm,
                                            is_training=is_training,
                                            func=activation,
                                            initial_type=initial_type,
                                            trainable=not freeze_head)

                v_preds = layer.dense_layer(layer_2,
                                            1,
                                            "DenseLayer4",
                                            initial_type=initial_type,
                                            is_training=is_training,
                                            func=None)

            self.scope = tf.get_variable_scope().name

        return probs, act, v_preds