Example #1
0
    def test(self):
        a = chainer.Variable(np.random.rand(1).astype(np.float32))
        b = chainer.Variable(np.random.rand(1).astype(np.float32))

        # No old-style function
        y = 2 * a + b
        old_style_funcs = trpo._find_old_style_function([y])
        self.assertEqual(old_style_funcs, [])

        # One old-style function
        y = 2 * old_style_identity(a) + b
        old_style_funcs = trpo._find_old_style_function([y])
        self.assertEqual(len(old_style_funcs), 1)
        self.assertTrue(all(isinstance(f, OldStyleIdentity)
                            for f in old_style_funcs))

        # Three old-style functions
        y = (2 * old_style_identity(old_style_identity(a))
             + old_style_identity(b))
        old_style_funcs = trpo._find_old_style_function([y])
        self.assertEqual(len(old_style_funcs), 3)
        self.assertTrue(all(isinstance(f, OldStyleIdentity)
                            for f in old_style_funcs))
Example #2
0
    def test_first_order(self):
        # First order, so its Hessian will contain None
        params, y = self._generate_params_and_first_order_output()

        old_style_funcs = trpo._find_old_style_function([y])
        if old_style_funcs:
            self.skipTest("\
Chainer v{} does not support double backprop of these functions: {}.".format(
                chainer.__version__, old_style_funcs))

        vec = np.random.rand(4).astype(np.float32)
        # Hessian-vector product computation should raise an error due to None
        with self.assertRaises(AssertionError):
            compute_hessian_vector_product(y, params, vec)
Example #3
0
    def make_model(self, env):
        n_hidden_channels = 20

        n_dim_obs = env.observation_space.low.size
        v = v_functions.FCVFunction(
            n_dim_obs,
            n_hidden_layers=1,
            n_hidden_channels=n_hidden_channels,
            nonlinearity=F.tanh,
            last_wscale=0.01,
        )

        if self.discrete:
            n_actions = env.action_space.n

            pi = policies.FCSoftmaxPolicy(
                n_dim_obs,
                n_actions,
                n_hidden_layers=1,
                n_hidden_channels=n_hidden_channels,
                nonlinearity=F.tanh,
                last_wscale=0.01,
            )
        else:
            n_dim_actions = env.action_space.low.size

            pi = policies.FCGaussianPolicyWithStateIndependentCovariance(
                n_dim_obs,
                n_dim_actions,
                n_hidden_layers=1,
                n_hidden_channels=n_hidden_channels,
                nonlinearity=F.tanh,
                mean_wscale=0.01,
                var_type='diagonal',
            )

        # Check if KL div supports double-backprop
        fake_obs = np.zeros_like(env.observation_space.low, dtype=np.float32)
        action_distrib = pi(fake_obs[None])
        kl = action_distrib.kl(action_distrib)
        old_style_funcs = trpo._find_old_style_function([kl])
        if old_style_funcs:
            self.skipTest("\
Chainer v{} does not support double backprop of these functions: {}.".format(
                chainer.__version__, old_style_funcs))

        return pi, v
Example #4
0
    def test_second_order(self):
        # Second order, so its Hessian will be non-zero
        params, y = self._generate_params_and_second_order_output()

        old_style_funcs = trpo._find_old_style_function([y])
        if old_style_funcs:
            self.skipTest("\
Chainer v{} does not support double backprop of these functions: {}.".format(
                chainer.__version__, old_style_funcs))

        def test_hessian_vector_product_nonzero(vec):
            hvp = compute_hessian_vector_product(y, params, vec)
            hessian = compute_hessian(y, params)
            self.assertGreater(np.count_nonzero(hvp), 0)
            self.assertGreater(np.count_nonzero(hessian), 0)
            np.testing.assert_allclose(hvp, hessian.dot(vec), atol=1e-3)

        # Test with two different random vectors, reusing y
        test_hessian_vector_product_nonzero(
            np.random.rand(4).astype(np.float32))
        test_hessian_vector_product_nonzero(
            np.random.rand(4).astype(np.float32))