Ejemplo n.º 1
0
    def testSequentialNetwork(self):
        output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1)
        network = tanh_normal_projection_network.TanhNormalProjectionNetwork(
            output_spec)

        inputs = tf.random.stateless_uniform(shape=[3, 5], seed=[0, 0])
        output, _ = network(inputs, outer_rank=1)

        # Create a squashed distribution.
        def create_dist(loc_and_scale):
            ndims = output_spec.shape.num_elements()
            loc = loc_and_scale[..., :ndims]
            scale = tf.exp(loc_and_scale[..., ndims:])

            distribution = tfp.distributions.MultivariateNormalDiag(
                loc=loc,
                scale_diag=scale,
                validate_args=True,
            )
            return distribution_utils.scale_distribution_to_spec(
                distribution, output_spec)

        # Create a sequential network.
        sequential_network = sequential.Sequential(
            [network._projection_layer] +
            [tf.keras.layers.Lambda(create_dist)])
        sequential_output, _ = sequential_network(inputs)

        # Check that mode and standard deviation are the same.
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(output.mode()),
                            self.evaluate(sequential_output.mode()))
        self.assertAllClose(self.evaluate(output.stddev()),
                            self.evaluate(output.stddev()))
Ejemplo n.º 2
0
    def testTrainableVariables(self):
        output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1)
        network = tanh_normal_projection_network.TanhNormalProjectionNetwork(
            output_spec)

        inputs = _get_inputs(batch_size=3, num_input_dims=5)

        network(inputs, outer_rank=1)
        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Dense kernel and bias.
        self.assertEqual(2, len(network.trainable_variables))
        self.assertEqual((5, 4), network.trainable_variables[0].shape)
        self.assertEqual((4, ), network.trainable_variables[1].shape)
Ejemplo n.º 3
0
    def testBuild(self):
        output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1)
        network = tanh_normal_projection_network.TanhNormalProjectionNetwork(
            output_spec)

        inputs = _get_inputs(batch_size=3, num_input_dims=5)

        distribution, _ = network(inputs, outer_rank=1)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertEqual(tfp.distributions.MultivariateNormalDiag,
                         type(distribution.input_distribution))

        means = distribution.input_distribution.loc
        stds = distribution.input_distribution.scale

        self.assertAllEqual(means.shape.as_list(),
                            [3] + output_spec.shape.as_list())
        self.assertAllEqual(stds.shape.as_list(),
                            [3] + output_spec.shape.as_list() * 2)
Ejemplo n.º 4
0
    def __init__(self,
                 input_tensor_spec,
                 output_tensor_spec,
                 image_encoder=None,
                 fc_encoder_layers=(50, ),
                 fc_layers=(1024, 1024),
                 log_std_bounds=(-10, 2),
                 name='Actor'):
        """Initializes an Actor network.

    Args:
      input_tensor_spec: Tensor spec matching the environment's
        observation_spec.
      output_tensor_spec: Tensor spec matching the environment's action_spec.
      image_encoder: Optional ImageEncoder used on the input observation. If
        None the network assumes the observation is a vector and this network
        simply applies fc layers.
      fc_encoder_layers: Dense layers followed by LayerNorm and a tanh.
      fc_layers: Iterable (list or tuple) with number of units for a dense layer
        stack.
      log_std_bounds: Bounds for scaling the log_std in the
        TanhNormalProjectionNetwork.
      name: Name for the network.
    """
        def scale_and_exp(log_std):
            scale_min, scale_max = log_std_bounds
            log_std = tf.keras.activations.tanh(log_std)
            log_std = scale_min + 0.5 * (scale_max - scale_min) * (log_std + 1)
            return tf.exp(log_std)

        distribution_projection_network = (
            tanh_normal_projection_network.TanhNormalProjectionNetwork(
                output_tensor_spec, std_transform=scale_and_exp))

        super(Actor, self).__init__(
            input_tensor_spec=input_tensor_spec,
            state_spec=(),
            output_spec=distribution_projection_network.output_spec,
            name=name)

        # Because of Keras we need to assign to self AFTER we call super.
        self._image_encoder = image_encoder

        self._fc_encoder = tf.keras.Sequential()
        for fc_layer_units in fc_encoder_layers:
            self._fc_encoder.add(
                tf.keras.layers.Dense(
                    fc_layer_units,
                    # Default gain of 1.0 matches Pytorch.
                    kernel_initializer=tf.keras.initializers.Orthogonal(
                        gain=1.0),
                ))
        # 1e-5 is default epsilon in Pytorch.
        self._fc_encoder.add(tf.keras.layers.LayerNormalization(epsilon=1e-5))
        self._fc_encoder.add(
            tf.keras.layers.Activation(tf.keras.activations.tanh))

        self._dense_layers = tf.keras.Sequential()

        for units in fc_layers:
            self._dense_layers.add(
                tf.keras.layers.Dense(
                    units,
                    activation=tf.keras.activations.relu,
                    kernel_initializer=tf.keras.initializers.Orthogonal(
                        gain=1.0)))
        self._distribution_projection_network = distribution_projection_network