Example #1
0
    def test_build(self):
        x_ = np.reshape(np.linspace(-1.0, 1.0, 8, dtype=np.float32), (-1, 4))

        num_coupling_layers = 10
        hidden_layer_sizes = (64, 64)

        flow = RealNVPFlow(
            num_coupling_layers=num_coupling_layers,
            hidden_layer_sizes=hidden_layer_sizes)

        self.assertFalse(flow._built)
        flow.forward(x_)
        self.assertTrue(flow._built)

        real_nvp_layers = [
            layer for layer in flow.flow.bijectors
            if isinstance(layer, bijectors.RealNVP)
        ]
        self.assertEqual(len(real_nvp_layers), num_coupling_layers)

        permute_layers = [
            layer for layer in flow.flow.bijectors
            if isinstance(layer, bijectors.Permute)
        ]
        self.assertEqual(len(permute_layers), num_coupling_layers-1)

        batch_normalization_layers = [
            layer for layer in flow.flow.bijectors
            if isinstance(layer, bijectors.BatchNormalization)
        ]
        self.assertEqual(len(batch_normalization_layers), 0)

        self.assertEqual(
            len(flow.flow.bijectors),
            len(real_nvp_layers) + len(permute_layers))
Example #2
0
    def __init__(self,
                 hidden_layer_sizes,
                 num_coupling_layers,
                 *args,
                 activation=tf.nn.relu,
                 use_batch_normalization=False,
                 **kwargs):
        super(RealNVPPolicy, self).__init__(*args, **kwargs)

        base_distribution = tfp.distributions.MultivariateNormalDiag(
            loc=tf.zeros(self._output_shape),
            scale_diag=tf.ones(self._output_shape))

        self.flow_model = RealNVPFlow(
            num_coupling_layers=num_coupling_layers,
            hidden_layer_sizes=hidden_layer_sizes,
            use_batch_normalization=use_batch_normalization,
            activation=activation)

        raw_action_distribution = self.flow_model(base_distribution)

        self.base_distribution = base_distribution
        self.raw_action_distribution = raw_action_distribution
        self.action_distribution = self._action_post_processor(
            raw_action_distribution)
Example #3
0
    def test_with_batch_normalization(self):
        x_ = np.reshape(np.linspace(-1.0, 1.0, 8, dtype=np.float32), (-1, 4))

        with self.assertRaises(NotImplementedError):
            flow = RealNVPFlow(
                num_coupling_layers=2,
                hidden_layer_sizes=(64,),
                use_batch_normalization=True)
Example #4
0
    def test_forward_inverse_returns_identity(self):
        x_ = np.reshape(np.linspace(-1.0, 1.0, 8, dtype=np.float32), (-1, 4))

        flow = RealNVPFlow(
            num_coupling_layers=2,
            hidden_layer_sizes=(64,))

        x = tf.constant(x_)
        forward_x = flow.forward(x)
        # Use identity to invalidate cache.
        inverse_y = flow.inverse(tf.identity(forward_x))
        forward_inverse_y = flow.forward(inverse_y)
        fldj = flow.forward_log_det_jacobian(x, event_ndims=1)
        # Use identity to invalidate cache.
        ildj = flow.inverse_log_det_jacobian(tf.identity(forward_x), event_ndims=1)

        forward_x_ = forward_x.numpy()
        inverse_y_ = inverse_y.numpy()
        forward_inverse_y_ = forward_inverse_y.numpy()
        ildj_ = ildj.numpy()
        fldj_ = fldj.numpy()

        self.assertEqual("real_nvp_flow", flow.name)
        self.assertAllClose(forward_x_, forward_inverse_y_, rtol=1e-4, atol=0.)
        self.assertAllClose(x_, inverse_y_, rtol=1e-4, atol=0.0)
        self.assertAllClose(ildj_, -fldj_, rtol=1e-6, atol=0.0)
Example #5
0
    def test_batched_flow_with_mlp_transform(self):
        x_ = np.random.normal(0., 1., (3, 8)).astype(np.float32)
        flow = RealNVPFlow(
            num_coupling_layers=2,
            hidden_layer_sizes=(64,),
            use_batch_normalization=False)
        x = tf.constant(x_)
        forward_x = flow.forward(x)
        # Use identity to invalidate cache.
        inverse_y = flow.inverse(forward_x)
        forward_inverse_y = flow.forward(inverse_y)
        fldj = flow.forward_log_det_jacobian(x, event_ndims=1)
        # Use identity to invalidate cache.
        ildj = flow.inverse_log_det_jacobian(forward_x, event_ndims=1)

        [
            forward_x_,
            inverse_y_,
            forward_inverse_y_,
            ildj_,
            fldj_,
        ] = [
            forward_x.numpy(),
            inverse_y.numpy(),
            forward_inverse_y.numpy(),
            ildj.numpy(),
            fldj.numpy(),
        ]

        self.assertEqual("real_nvp_flow", flow.name)
        self.assertAllClose(forward_x_, forward_inverse_y_, rtol=1e-4, atol=0.)
        self.assertAllClose(x_, inverse_y_, rtol=1e-4, atol=0.)
        self.assertAllClose(ildj_, -fldj_, rtol=1e-6, atol=1e-8)
Example #6
0
    def test_should_reuse_scale_and_log_scale_variables(self):
        x_ = np.reshape(np.linspace(-1.0, 1.0, 8, dtype=np.float32), (-1, 4))

        flow = RealNVPFlow(
            num_coupling_layers=2,
            hidden_layer_sizes=(64,))

        x = tf.constant(x_)

        assert not tf.compat.v1.trainable_variables()

        forward_x = flow.forward(x)

        self.assertEqual(
            len(tf.compat.v1.trainable_variables()), 4 * flow._num_coupling_layers)

        inverse_y = flow.inverse(tf.identity(forward_x))
        forward_inverse_y = flow.forward(inverse_y)
        fldj = flow.forward_log_det_jacobian(x, event_ndims=1)
        ildj = flow.inverse_log_det_jacobian(
            tf.identity(forward_x), event_ndims=1)

        self.assertEqual(
            len(tf.compat.v1.trainable_variables()), 4 * flow._num_coupling_layers)
Example #7
0
class RealNVPPolicy(LatentSpacePolicy):
    def __init__(self,
                 hidden_layer_sizes,
                 num_coupling_layers,
                 *args,
                 activation=tf.nn.relu,
                 use_batch_normalization=False,
                 **kwargs):
        super(RealNVPPolicy, self).__init__(*args, **kwargs)

        base_distribution = tfp.distributions.MultivariateNormalDiag(
            loc=tf.zeros(self._output_shape),
            scale_diag=tf.ones(self._output_shape))

        self.flow_model = RealNVPFlow(
            num_coupling_layers=num_coupling_layers,
            hidden_layer_sizes=hidden_layer_sizes,
            use_batch_normalization=use_batch_normalization,
            activation=activation)

        raw_action_distribution = self.flow_model(base_distribution)

        self.base_distribution = base_distribution
        self.raw_action_distribution = raw_action_distribution
        self.action_distribution = self._action_post_processor(
            raw_action_distribution)

    @tf.function(experimental_relax_shapes=True)
    def actions(self, observations):
        if 0 < self._smoothing_alpha:
            raise NotImplementedError(
                "TODO(hartikainen): Smoothing alpha temporarily dropped on tf2"
                " migration. Should add it back. See:"
                " https://github.com/rail-berkeley/softlearning/blob/46374df0294b9b5f6dbe65b9471ec491a82b6944/softlearning/policies/base_policy.py#L80")

        observations = self._filter_observations(observations)

        batch_shape = tf.shape(tree.flatten(observations)[0])[:-1]
        actions = self.action_distribution.sample(
            batch_shape, bijector_kwargs={
                self.flow_model.name: {'observations': observations}
            })

        return actions

    @tf.function(experimental_relax_shapes=True)
    def log_probs(self, observations, actions):
        observations = self._filter_observations(observations)
        log_probs = self.action_distribution.log_prob(
            actions,
            bijector_kwargs={
                self.flow_model.name: {'observations': observations}
            })[..., tf.newaxis]

        return log_probs

    @tf.function(experimental_relax_shapes=True)
    def probs(self, observations, actions):
        observations = self._filter_observations(observations)
        probs = self.action_distribution.prob(
            actions,
            bijector_kwargs={
                self.flow_model.name: {'observations': observations}
            })[..., tf.newaxis]

        return probs

    def get_weights(self):
        return self.flow_model.get_weights()

    def set_weights(self, *args, **kwargs):
        return self.flow_model.set_weights(*args, **kwargs)

    @property
    def trainable_weights(self):
        return self.flow_model.trainable_variables

    @property
    def non_trainable_weights(self):
        return self.flow_model.non_trainable_weights

    @tf.function(experimental_relax_shapes=True)
    def get_diagnostics(self, inputs):
        """Return diagnostic information of the policy.

        Returns the mean, min, max, and standard deviation of means and
        covariances.
        """
        actions = self.actions(inputs)
        log_pis = self.log_probs(inputs, actions)

        return OrderedDict((
            ('entropy-mean', tf.reduce_mean(-log_pis)),
            ('entropy-std', tf.math.reduce_std(-log_pis)),

            ('actions-mean', tf.reduce_mean(actions)),
            ('actions-std', tf.math.reduce_std(actions)),
            ('actions-min', tf.reduce_min(actions)),
            ('actions-max', tf.reduce_max(actions)),
        ))