Beispiel #1
0
    def setup_graph(self, ob_space, ac_space):
        num_actions = ac_space.n
        self.x = tf.placeholder(tf.float32, [None] + list(ob_space))
        dist_class, dist_dim = ModelCatalog.get_action_dist(ac_space)
        self._model = ModelCatalog.ConvolutionalNetwork(self.x, dist_dim)
        self.logits = self._model.outputs
        self.vf = tf.reshape(
            linear(self._model.last_layer, 1, "value",
                   normalized_columns_initializer(1.0)), [-1])

        self.sample = categorical_sample(self.logits, num_actions)[0, :]
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          tf.get_variable_scope().name)
        self.global_step = tf.get_variable("global_step", [],
                                           tf.int32,
                                           initializer=tf.constant_initializer(
                                               0, dtype=tf.int32),
                                           trainable=False)