def testTrainableVariables(self):
     bias = bias_layer.BiasLayer(bias_initializer=tf.constant_initializer(
         value=1.0))
     states = tf.zeros((2, 3))
     _ = bias(states)
     self.evaluate(tf.compat.v1.global_variables_initializer())
     variables = bias.trainable_variables
     np.testing.assert_almost_equal([[1.0] * 3], self.evaluate(variables))
    def __init__(self,
                 sample_spec,
                 activation_fn=None,
                 init_means_output_factor=0.1,
                 std_bias_initializer_value=0.0,
                 mean_transform=tanh_squash_to_spec,
                 std_transform=tf.nn.softplus,
                 state_dependent_std=False,
                 scale_distribution=False,
                 name='NormalProjectionNetwork'):
        """Creates an instance of NormalProjectionNetwork.

    Args:
      sample_spec: A `tensor_spec.BoundedTensorSpec` detailing the shape and
        dtypes of samples pulled from the output distribution.
      activation_fn: Activation function to use in dense layer.
      init_means_output_factor: Output factor for initializing action means
        weights.
      std_bias_initializer_value: Initial value for the bias of the
        stddev_projection_layer or the direct bias_layer depending on the
        state_dependent_std flag.
      mean_transform: Transform to apply to the calculated means. Uses
        `tanh_squash_to_spec` by default.
      std_transform: Transform to apply to the stddevs.
      state_dependent_std: If true, stddevs will be produced by MLP from state.
        else, stddevs will be an independent variable.
      scale_distribution: Whether or not to use a bijector chain to scale
        distributions to match the sample spec. Note the TransformedDistribution
        does not support certain operations required by some agents or policies
        such as KL divergence calculations or Mode.
      name: A string representing name of the network.
    """
        if len(tf.nest.flatten(sample_spec)) != 1:
            raise ValueError(
                'Normal Projection network only supports single spec '
                'samples.')
        self._scale_distribution = scale_distribution
        output_spec = self._output_distribution_spec(sample_spec, name)
        super(NormalProjectionNetwork, self).__init__(
            # We don't need these, but base class requires them.
            input_tensor_spec=None,
            state_spec=(),
            output_spec=output_spec,
            name=name)

        self._sample_spec = sample_spec
        self._is_multivariate = sample_spec.shape.ndims > 0
        self._mean_transform = mean_transform
        self._std_transform = std_transform
        self._state_dependent_std = state_dependent_std

        self._means_projection_layer = tf.keras.layers.Dense(
            sample_spec.shape.num_elements(),
            activation=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=init_means_output_factor),
            bias_initializer=tf.keras.initializers.Zeros(),
            name='means_projection_layer')

        self._stddev_projection_layer = None
        if self._state_dependent_std:
            self._stddev_projection_layer = tf.keras.layers.Dense(
                sample_spec.shape.num_elements(),
                activation=activation_fn,
                kernel_initializer=tf.compat.v1.keras.initializers.
                VarianceScaling(scale=init_means_output_factor),
                bias_initializer=tf.constant_initializer(
                    value=std_bias_initializer_value),
                name='stddev_projection_layer')
        else:
            self._bias = bias_layer.BiasLayer(
                bias_initializer=tf.constant_initializer(
                    value=std_bias_initializer_value))
Beispiel #3
0
 def std_layers():
   # TODO(b/179510447): align these parameters with Schulman 17.
   std_bias_initializer_value = np.log(np.exp(0.35) - 1)
   return bias_layer.BiasLayer(
       bias_initializer=tf.constant_initializer(
           value=std_bias_initializer_value))
 def testBuildScalar(self):
     bias = bias_layer.BiasLayer()
     states = tf.ones((2, ))
     out = bias(states)
     self.evaluate(tf.compat.v1.global_variables_initializer())
     np.testing.assert_almost_equal([1.0] * 2, self.evaluate(out))