def test_dist_info_sym(self, obs_dim, action_dim, hidden_dim):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))

        obs_ph = tf.compat.v1.placeholder(
            tf.float32, shape=(None, None, env.observation_space.flat_dim))

        with mock.patch(('metarl.tf.policies.'
                         'categorical_gru_policy.GRUModel'),
                        new=SimpleGRUModel):
            policy = CategoricalGRUPolicy(env_spec=env.spec,
                                          state_include_action=False)

        policy.reset()
        obs = env.reset()

        dist_sym = policy.dist_info_sym(obs_var=obs_ph,
                                        state_info_vars=None,
                                        name='p2_sym')
        dist = self.sess.run(
            dist_sym, feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]})

        assert np.array_equal(dist['prob'], np.full((2, 1, action_dim), 0.5))
    def test_dist_info_sym_wrong_input(self):
        env = TfEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1))

        obs_ph = tf.compat.v1.placeholder(
            tf.float32, shape=(None, None, env.observation_space.flat_dim))

        with mock.patch(('metarl.tf.policies.'
                         'categorical_gru_policy.GRUModel'),
                        new=SimpleGRUModel):
            policy = CategoricalGRUPolicy(env_spec=env.spec,
                                          state_include_action=True)

        policy.reset()
        obs = env.reset()

        policy.dist_info_sym(
            obs_var=obs_ph,
            state_info_vars={'prev_action': np.zeros((3, 1, 1))},
            name='p2_sym')
        # observation batch size = 2 but prev_action batch size = 3
        with pytest.raises(tf.errors.InvalidArgumentError):
            self.sess.run(
                policy.model.networks['p2_sym'].input,
                feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]})
    def test_get_action_state_include_action(self, mock_normal, obs_dim,
                                             action_dim, hidden_dim):
        mock_normal.return_value = 0.5
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('metarl.tf.policies.'
                         'gaussian_lstm_policy.GaussianLSTMModel'),
                        new=SimpleGaussianLSTMModel):
            policy = GaussianLSTMPolicy(env_spec=env.spec,
                                        state_include_action=True)

        policy.reset()
        obs = env.reset()
        expected_action = np.full(action_dim, 0.5 * np.exp(0.5) + 0.5)
        action, agent_info = policy.get_action(obs)
        assert env.action_space.contains(action)
        assert np.allclose(action, expected_action, atol=1e-6)
        expected_mean = np.full(action_dim, 0.5)
        assert np.array_equal(agent_info['mean'], expected_mean)
        expected_log_std = np.full(action_dim, 0.5)
        assert np.array_equal(agent_info['log_std'], expected_log_std)
        expected_prev_action = np.full(action_dim, 0)
        assert np.array_equal(agent_info['prev_action'], expected_prev_action)

        policy.reset()

        actions, agent_infos = policy.get_actions([obs])
        for action, mean, log_std, prev_action in zip(
                actions, agent_infos['mean'], agent_infos['log_std'],
                agent_infos['prev_action']):
            assert env.action_space.contains(action)
            assert np.allclose(action,
                               np.full(action_dim, expected_action),
                               atol=1e-6)
            assert np.array_equal(mean, expected_mean)
            assert np.array_equal(log_std, expected_log_std)
            assert np.array_equal(prev_action, expected_prev_action)
Esempio n. 4
0
class TestNormalizedGym:
    def setup_method(self):
        self.env = TfEnv(
            normalize(gym.make('Pendulum-v0'),
                      normalize_reward=True,
                      normalize_obs=True,
                      flatten_obs=True))

    def teardown_method(self):
        self.env.close()

    def test_does_not_modify_action(self):
        a = self.env.action_space.sample()
        a_copy = a
        self.env.reset()
        self.env.step(a)
        assert a == a_copy

    def test_flatten(self):
        for _ in range(10):
            self.env.reset()
            for _ in range(5):
                self.env.render()
                action = self.env.action_space.sample()
                next_obs, _, done, _ = self.env.step(action)
                assert next_obs.shape == self.env.observation_space.low.shape
                if done:
                    break

    def test_unflatten(self):
        for _ in range(10):
            self.env.reset()
            for _ in range(5):
                action = self.env.action_space.sample()
                next_obs, _, done, _ = self.env.step(action)
                # yapf: disable
                assert (self.env.observation_space.flatten(next_obs).shape
                        == self.env.observation_space.flat_dim)
                # yapf: enable
                if done:
                    break
class TestDiscreteCNNQFunction(TfGraphTestCase):
    def setup_method(self):
        super().setup_method()
        self.env = TfEnv(DummyDiscretePixelEnv())
        self.obs = self.env.reset()

    # yapf: disable
    @pytest.mark.parametrize('filter_dims, num_filters, strides', [
        ((3, ), (5, ), (1, )),
        ((3, ), (5, ), (2, )),
        ((3, 3), (5, 5), (1, 1)),
    ])
    # yapf: enable
    def test_get_action(self, filter_dims, num_filters, strides):
        with mock.patch(('metarl.tf.q_functions.'
                         'discrete_cnn_q_function.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('metarl.tf.q_functions.'
                             'discrete_cnn_q_function.MLPModel'),
                            new=SimpleMLPModel):
                qf = DiscreteCNNQFunction(env_spec=self.env.spec,
                                          filter_dims=filter_dims,
                                          num_filters=num_filters,
                                          strides=strides,
                                          dueling=False)

        action_dim = self.env.action_space.n
        expected_output = np.full(action_dim, 0.5)
        outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]})
        assert np.array_equal(outputs[0], expected_output)
        outputs = self.sess.run(
            qf.q_vals, feed_dict={qf.input: [self.obs, self.obs, self.obs]})
        for output in outputs:
            assert np.array_equal(output, expected_output)

    # yapf: disable
    @pytest.mark.parametrize('filter_dims, num_filters, strides', [
        ((3,), (5,), (1,)),
        ((3,), (5,), (2,)),
        ((3, 3), (5, 5), (1, 1)),
    ])
    # yapf: enable
    def test_get_action_dueling(self, filter_dims, num_filters, strides):
        with mock.patch(('metarl.tf.q_functions.'
                         'discrete_cnn_q_function.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('metarl.tf.q_functions.'
                             'discrete_cnn_q_function.MLPDuelingModel'),
                            new=SimpleMLPModel):
                qf = DiscreteCNNQFunction(env_spec=self.env.spec,
                                          filter_dims=filter_dims,
                                          num_filters=num_filters,
                                          strides=strides,
                                          dueling=True)

        action_dim = self.env.action_space.n
        expected_output = np.full(action_dim, 0.5)
        outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]})
        assert np.array_equal(outputs[0], expected_output)
        outputs = self.sess.run(
            qf.q_vals, feed_dict={qf.input: [self.obs, self.obs, self.obs]})
        for output in outputs:
            assert np.array_equal(output, expected_output)

    # yapf: disable
    @pytest.mark.parametrize('filter_dims, num_filters, strides, '
                             'pool_strides, pool_shapes', [
        ((3, ), (5, ), (1, ), (1, 1), (1, 1)),  # noqa: E122
        ((3, ), (5, ), (2, ), (2, 2), (2, 2)),
        ((3, 3), (5, 5), (1, 1), (1, 1), (1, 1)),
        ((3, 3), (5, 5), (1, 1), (2, 2), (2, 2))
    ])
    # yapf: enable
    def test_get_action_max_pooling(self, filter_dims, num_filters, strides,
                                    pool_strides, pool_shapes):
        with mock.patch(('metarl.tf.q_functions.'
                         'discrete_cnn_q_function.CNNModelWithMaxPooling'),
                        new=SimpleCNNModelWithMaxPooling):
            with mock.patch(('metarl.tf.q_functions.'
                             'discrete_cnn_q_function.MLPModel'),
                            new=SimpleMLPModel):
                qf = DiscreteCNNQFunction(env_spec=self.env.spec,
                                          filter_dims=filter_dims,
                                          num_filters=num_filters,
                                          strides=strides,
                                          max_pooling=True,
                                          pool_strides=pool_strides,
                                          pool_shapes=pool_shapes,
                                          dueling=False)

        action_dim = self.env.action_space.n
        expected_output = np.full(action_dim, 0.5)
        outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]})
        assert np.array_equal(outputs[0], expected_output)
        outputs = self.sess.run(
            qf.q_vals, feed_dict={qf.input: [self.obs, self.obs, self.obs]})
        for output in outputs:
            assert np.array_equal(output, expected_output)

    # yapf: disable
    @pytest.mark.parametrize('filter_dims, num_filters, strides', [
        ((3, ), (5, ), (1, )),
        ((3, ), (5, ), (2, )),
        ((3, 3), (5, 5), (1, 1))
    ])
    # yapf: enable
    def test_get_qval_sym(self, filter_dims, num_filters, strides):
        with mock.patch(('metarl.tf.q_functions.'
                         'discrete_cnn_q_function.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('metarl.tf.q_functions.'
                             'discrete_cnn_q_function.MLPModel'),
                            new=SimpleMLPModel):
                qf = DiscreteCNNQFunction(env_spec=self.env.spec,
                                          filter_dims=filter_dims,
                                          num_filters=num_filters,
                                          strides=strides,
                                          dueling=False)
        output1 = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]})

        obs_dim = self.env.observation_space.shape
        action_dim = self.env.action_space.n

        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, ) + obs_dim)
        q_vals = qf.get_qval_sym(input_var, 'another')
        output2 = self.sess.run(q_vals, feed_dict={input_var: [self.obs]})

        expected_output = np.full(action_dim, 0.5)

        assert np.array_equal(output1, output2)
        assert np.array_equal(output2[0], expected_output)

    # yapf: disable
    @pytest.mark.parametrize('filter_dims, num_filters, strides', [
        ((3, ), (5, ), (1, )),
        ((3, ), (5, ), (2, )),
        ((3, 3), (5, 5), (1, 1)),
    ])
    # yapf: enable
    def test_is_pickleable(self, filter_dims, num_filters, strides):
        with mock.patch(('metarl.tf.q_functions.'
                         'discrete_cnn_q_function.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('metarl.tf.q_functions.'
                             'discrete_cnn_q_function.MLPModel'),
                            new=SimpleMLPModel):
                qf = DiscreteCNNQFunction(env_spec=self.env.spec,
                                          filter_dims=filter_dims,
                                          num_filters=num_filters,
                                          strides=strides,
                                          dueling=False)
        with tf.compat.v1.variable_scope(
                'DiscreteCNNQFunction/Sequential/SimpleMLPModel', reuse=True):
            return_var = tf.compat.v1.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())

        output1 = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]})

        h_data = pickle.dumps(qf)
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            qf_pickled = pickle.loads(h_data)
            output2 = sess.run(qf_pickled.q_vals,
                               feed_dict={qf_pickled.input: [self.obs]})

        assert np.array_equal(output1, output2)

    # yapf: disable
    @pytest.mark.parametrize('filter_dims, num_filters, strides', [
        ((3, ), (5, ), (1, )),
        ((3, ), (5, ), (2, )),
        ((3, 3), (5, 5), (1, 1))
    ])
    # yapf: enable
    def test_clone(self, filter_dims, num_filters, strides):
        with mock.patch(('metarl.tf.q_functions.'
                         'discrete_cnn_q_function.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('metarl.tf.q_functions.'
                             'discrete_cnn_q_function.MLPModel'),
                            new=SimpleMLPModel):
                qf = DiscreteCNNQFunction(env_spec=self.env.spec,
                                          filter_dims=filter_dims,
                                          num_filters=num_filters,
                                          strides=strides,
                                          dueling=False)
        qf_clone = qf.clone('another_qf')
        assert qf_clone._filter_dims == qf._filter_dims
        assert qf_clone._num_filters == qf._num_filters
        assert qf_clone._strides == qf._strides
Esempio n. 6
0
 def test_normalize_pixel_patch_not_trigger(self):
     env = TfEnv(DummyBoxEnv())
     obs = env.reset()
     obs_normalized = normalize_pixel_batch(env, obs)
     assert np.array_equal(obs, obs_normalized)
Esempio n. 7
0
 def test_normalize_pixel_patch(self):
     env = TfEnv(DummyDiscretePixelEnv())
     obs = env.reset()
     obs_normalized = normalize_pixel_batch(env, obs)
     expected = [ob / 255.0 for ob in obs]
     assert np.allclose(obs_normalized, expected)