예제 #1
0
	def _build_vf(self, ac_space, ob_space, ob, trainable, scope, scale=0.1):
		with tf.variable_scope(scope):
			l1 = dense_layer(ob, L1NUM, tf.tanh, True, trainable, 'fc1', True, True, False)
			l2 = dense_layer(l1, L2NUM, tf.tanh, True, trainable, 'fc2', True, True, False)
			#l3 = dense_layer(l2, L3NUM, tf.tanh, True, trainable, 'fc3', True, False, False)
			final = dense_layer(l2, 1, None, False, trainable, 'final', True, True, False)
		return final
예제 #2
0
 def _build_policy(self,
                   ac_space,
                   ob_space,
                   observation,
                   trainable,
                   scope,
                   scale=0.1):
     with tf.variable_scope(scope):
         l1 = dense_layer(observation, L1NUM, tf.tanh, True, trainable,
                          'fc1', (True and trainable), False, False)
         l2 = dense_layer(l1, L2NUM, tf.tanh, True, trainable, 'fc2',
                          (True and trainable), False, False)
         dist = NormalDist(l2, ac_space, trainable)
     return dist
예제 #3
0
 def __init__(self, x, ac_space, trainable):
     self.logstd = tf.get_variable(name='logstd',
                                   shape=[1] + list(ac_space.shape),
                                   initializer=tf.zeros_initializer(),
                                   trainable=trainable)
     self.mean = dense_layer(x, ac_space.shape[0], None, True, trainable,
                             'mean', (True and trainable), False, False)
     self.std = tf.exp(self.logstd)