def _build_vf(self, ac_space, ob_space, ob, trainable, scope, scale=0.1): with tf.variable_scope(scope): l1 = dense_layer(ob, L1NUM, tf.tanh, True, trainable, 'fc1', True, True, False) l2 = dense_layer(l1, L2NUM, tf.tanh, True, trainable, 'fc2', True, True, False) #l3 = dense_layer(l2, L3NUM, tf.tanh, True, trainable, 'fc3', True, False, False) final = dense_layer(l2, 1, None, False, trainable, 'final', True, True, False) return final
def _build_policy(self, ac_space, ob_space, observation, trainable, scope, scale=0.1): with tf.variable_scope(scope): l1 = dense_layer(observation, L1NUM, tf.tanh, True, trainable, 'fc1', (True and trainable), False, False) l2 = dense_layer(l1, L2NUM, tf.tanh, True, trainable, 'fc2', (True and trainable), False, False) dist = NormalDist(l2, ac_space, trainable) return dist
def __init__(self, x, ac_space, trainable): self.logstd = tf.get_variable(name='logstd', shape=[1] + list(ac_space.shape), initializer=tf.zeros_initializer(), trainable=trainable) self.mean = dense_layer(x, ac_space.shape[0], None, True, trainable, 'mean', (True and trainable), False, False) self.std = tf.exp(self.logstd)