def test_obs_not_image(self):
        env = GarageEnv(DummyDiscretePixelEnv(), is_image=False)
        with mock.patch(('garage.tf.baselines.'
                         'gaussian_cnn_baseline.'
                         'GaussianCNNRegressor'),
                        new=SimpleGaussianCNNRegressor):
            with mock.patch(
                    'garage.tf.baselines.'
                    'gaussian_cnn_baseline.'
                    'normalize_pixel_batch',
                    side_effect=normalize_pixel_batch) as npb:

                gcb = GaussianCNNBaseline(env_spec=env.spec)

                obs_dim = env.spec.observation_space.shape
                paths = [{
                    'observations': [np.full(obs_dim, 1)],
                    'returns': [1]
                }, {
                    'observations': [np.full(obs_dim, 2)],
                    'returns': [2]
                }]

                gcb.fit(paths)
                obs = {
                    'observations': [np.full(obs_dim, 1),
                                     np.full(obs_dim, 2)]
                }
                gcb.predict(obs)
                assert not npb.called
 def test_flattened_image_input(self):
     env = GymEnv(DummyDiscretePixelEnv(), is_image=True)
     gcb = GaussianCNNBaseline(env_spec=env.spec,
                               filters=((3, (3, 3)), (6, (3, 3))),
                               strides=(1, 1),
                               padding='SAME',
                               hidden_sizes=(32, ))
     env.reset()
     es = env.step(1)
     obs, rewards = es.observation, es.reward
     train_paths = [{'observations': [obs.flatten()], 'returns': [rewards]}]
     gcb.fit(train_paths)
     paths = {'observations': [obs.flatten()]}
     prediction = gcb.predict(paths)
     assert np.allclose(0., prediction)
    def test_fit(self, obs_dim):
        box_env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim))
        with mock.patch(('garage.tf.baselines.'
                         'gaussian_cnn_baseline.'
                         'GaussianCNNRegressor'),
                        new=SimpleGaussianCNNRegressor):
            gcb = GaussianCNNBaseline(env_spec=box_env.spec)
        paths = [{
            'observations': [np.full(obs_dim, 1)],
            'returns': [1]
        }, {
            'observations': [np.full(obs_dim, 2)],
            'returns': [2]
        }]
        gcb.fit(paths)

        obs = {'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)]}
        prediction = gcb.predict(obs)
        assert np.array_equal(prediction, [1, 2])
Exemple #4
0
    def test_fit_unnormalized(self):
        gcr = GaussianCNNBaseline(env_spec=test_env_spec,
                                  filters=((3, (3, 3)), (6, (3, 3))),
                                  strides=(1, 1),
                                  padding='SAME',
                                  hidden_sizes=(32, ),
                                  adaptive_std=True,
                                  normalize_inputs=False,
                                  normalize_outputs=False)

        train_data, test_data = get_train_test_data()
        train_paths, _, _ = train_data

        for _ in range(20):
            gcr.fit(train_paths)

        test_paths, expected = test_data

        prediction = gcr.predict(test_paths)
        average_error = 0.0
        for i, exp in enumerate(expected):
            average_error += np.abs(exp - prediction[i])
        average_error /= len(expected)
        assert average_error <= 0.1

        x_mean = self.sess.run(gcr._networks['default'].x_mean)
        x_mean_expected = np.zeros_like(x_mean)
        x_std = self.sess.run(gcr._networks['default'].x_std)
        x_std_expected = np.ones_like(x_std)
        assert np.array_equal(x_mean, x_mean_expected)
        assert np.array_equal(x_std, x_std_expected)

        y_mean = self.sess.run(gcr._networks['default'].y_mean)
        y_mean_expected = np.zeros_like(y_mean)
        y_std = self.sess.run(gcr._networks['default'].y_std)
        y_std_expected = np.ones_like(y_std)

        assert np.allclose(y_mean, y_mean_expected)
        assert np.allclose(y_std, y_std_expected)
Exemple #5
0
    def test_fit_without_trusted_region(self):
        gcr = GaussianCNNBaseline(env_spec=test_env_spec,
                                  filters=((3, (3, 3)), (6, (3, 3))),
                                  strides=(1, 1),
                                  padding='SAME',
                                  hidden_sizes=(32, ),
                                  adaptive_std=False,
                                  use_trust_region=False)
        train_data, test_data = get_train_test_data()
        train_paths, _, _ = train_data

        for _ in range(20):
            gcr.fit(train_paths)

        test_paths, expected = test_data

        prediction = gcr.predict(test_paths)
        average_error = 0.0
        for i, exp in enumerate(expected):
            average_error += np.abs(exp - prediction[i])
        average_error /= len(expected)
        assert average_error <= 0.1
    def test_obs_is_image(self):
        env = GarageEnv(DummyDiscretePixelEnv(), is_image=True)
        with mock.patch(('garage.tf.baselines.'
                         'gaussian_cnn_baseline.'
                         'GaussianCNNRegressor'),
                        new=SimpleGaussianCNNRegressor):
            with mock.patch(
                    'garage.tf.baselines.'
                    'gaussian_cnn_baseline.'
                    'normalize_pixel_batch',
                    side_effect=normalize_pixel_batch) as npb:

                gcb = GaussianCNNBaseline(env_spec=env.spec)

                obs_dim = env.spec.observation_space.shape
                paths = [{
                    'observations': [np.full(obs_dim, 1)],
                    'returns': [1]
                }, {
                    'observations': [np.full(obs_dim, 2)],
                    'returns': [2]
                }]

                gcb.fit(paths)
                observations = np.concatenate(
                    [p['observations'] for p in paths])
                assert npb.call_count == 1, (
                    "Expected '%s' to have been called once. Called %s times."
                    % (npb._mock_name or 'mock', npb.call_count))
                assert (npb.call_args_list[0][0][0] == observations).all()

                obs = {
                    'observations': [np.full(obs_dim, 1),
                                     np.full(obs_dim, 2)]
                }
                observations = obs['observations']
                gcb.predict(obs)
                assert npb.call_args_list[1][0][0] == observations
Exemple #7
0
    def test_fit_normalized(self):
        gcr = GaussianCNNBaseline(env_spec=test_env_spec,
                                  filters=((3, (3, 3)), (6, (3, 3))),
                                  strides=(1, 1),
                                  padding='SAME',
                                  hidden_sizes=(32, ),
                                  adaptive_std=False,
                                  use_trust_region=True)

        train_data, test_data = get_train_test_data()
        train_paths, observations, returns = train_data
        for _ in range(20):
            gcr.fit(train_paths)

        test_paths, expected = test_data
        prediction = gcr.predict(test_paths)

        average_error = 0.0
        for i, exp in enumerate(expected):
            average_error += np.abs(exp - prediction[i])
        average_error /= len(expected)
        assert average_error <= 0.1

        x_mean = self.sess.run(gcr._networks['default'].x_mean)
        x_mean_expected = np.mean(observations, axis=0, keepdims=True)
        x_std = self.sess.run(gcr._networks['default'].x_std)
        x_std_expected = np.std(observations, axis=0, keepdims=True)

        assert np.allclose(x_mean, x_mean_expected)
        assert np.allclose(x_std, x_std_expected)

        y_mean = self.sess.run(gcr._networks['default'].y_mean)
        y_mean_expected = np.mean(returns, axis=0, keepdims=True)
        y_std = self.sess.run(gcr._networks['default'].y_std)
        y_std_expected = np.std(returns, axis=0, keepdims=True)

        assert np.allclose(y_mean, y_mean_expected)
        assert np.allclose(y_std, y_std_expected)