def _discrete_head(self, inputs): return tf.layers.dense( inputs=inputs[0], units=self.pdtype.param_shape()[0], kernel_initializer=U.normc_initializer(0.01), name='pi' )
def _head(self, inputs): pre_advs, pre_vf = inputs[0:2] advs = tf.layers.dense( inputs=pre_advs, units=self.ac_space.n, kernel_initializer=U.normc_initializer(1.0), name='advantages' ) self._advs = advs - tf.expand_dims(tf.reduce_mean(advs, axis=-1), axis=-1) self._vf = tf.layers.dense( inputs=pre_vf, units=1, kernel_initializer=U.normc_initializer(1.0), name='value_funtion' ) return self._vf + self._advs
def _head(self, inputs): # Add value function function head return tf.layers.dense( inputs=inputs[0], units=1, kernel_initializer=U.normc_initializer(1.0), name='vf' )
def _head(self, inputs): # add qfunction head return tf.layers.dense( inputs=inputs[0], units=self.ac_space.n, kernel_initializer=U.normc_initializer(1.0), name='qvals' )
def _build(self, inputs): net = tf.clip_by_value(inputs[0], -5.0, 5.0) for i, h in enumerate(self.hiddens): net = tf.layers.dense(net, units=h, kernel_initializer=U.normc_initializer(1.0), activation=self.activation_fn, name='dense{}'.format(i)) return net
def _head(self, sa): # add qfunction head return tf.layers.dense( inputs=sa, units=1, kernel_initializer=U.normc_initializer(1.0), reuse=tf.AUTO_REUSE, name='qvals' )
def _continuous_head(self, inputs): """ Policy head designed for continuous distributions. It makes logstd params independent of the network output and initialize them to 0. """ param_shape = self.pdtype.param_shape()[0] mean = tf.layers.dense( inputs=inputs[0], units=param_shape // 2, kernel_initializer=U.normc_initializer(0.01), name='pi' ) logstd = tf.get_variable(name="logstd", shape=[1, param_shape//2], initializer=tf.zeros_initializer()) logstd = tf.tile(logstd, [self.nbatch*self.nstep, 1]) return tf.concat([mean, logstd], axis=1)