Esempio n. 1
0
 def _discrete_head(self, inputs):
     return tf.layers.dense(
         inputs=inputs[0],
         units=self.pdtype.param_shape()[0],
         kernel_initializer=U.normc_initializer(0.01),
         name='pi'
     )
Esempio n. 2
0
 def _head(self, inputs):
     pre_advs, pre_vf = inputs[0:2]
     advs = tf.layers.dense(
         inputs=pre_advs,
         units=self.ac_space.n,
         kernel_initializer=U.normc_initializer(1.0),
         name='advantages'
     )
     self._advs = advs - tf.expand_dims(tf.reduce_mean(advs, axis=-1), axis=-1)
     self._vf = tf.layers.dense(
         inputs=pre_vf,
         units=1,
         kernel_initializer=U.normc_initializer(1.0),
         name='value_funtion'
     )
     return self._vf + self._advs
Esempio n. 3
0
 def _head(self, inputs):
     # Add value function function head
     return tf.layers.dense(
         inputs=inputs[0],
         units=1,
         kernel_initializer=U.normc_initializer(1.0),
         name='vf'
     )
Esempio n. 4
0
 def _head(self, inputs):
     # add qfunction head
     return tf.layers.dense(
         inputs=inputs[0],
         units=self.ac_space.n,
         kernel_initializer=U.normc_initializer(1.0),
         name='qvals'
     )
Esempio n. 5
0
 def _build(self, inputs):
     net = tf.clip_by_value(inputs[0], -5.0, 5.0)
     for i, h in enumerate(self.hiddens):
         net = tf.layers.dense(net,
                               units=h,
                               kernel_initializer=U.normc_initializer(1.0),
                               activation=self.activation_fn,
                               name='dense{}'.format(i))
     return net
Esempio n. 6
0
 def _head(self, sa):
     # add qfunction head
     return tf.layers.dense(
         inputs=sa,
         units=1,
         kernel_initializer=U.normc_initializer(1.0),
         reuse=tf.AUTO_REUSE,
         name='qvals'
     )
Esempio n. 7
0
 def _continuous_head(self, inputs):
     """
     Policy head designed for continuous distributions.
     It makes logstd params independent of the network output and
     initialize them to 0.
     """
     param_shape = self.pdtype.param_shape()[0]
     mean = tf.layers.dense(
         inputs=inputs[0],
         units=param_shape // 2,
         kernel_initializer=U.normc_initializer(0.01),
         name='pi'
     )
     logstd = tf.get_variable(name="logstd", shape=[1, param_shape//2], initializer=tf.zeros_initializer())
     logstd = tf.tile(logstd, [self.nbatch*self.nstep, 1])
     return tf.concat([mean, logstd], axis=1)