def test_one_hot_numpy():
    # test one single vector:
    for max_val in range(1, 10):
        for hot_val in range(max_val):
            encoded = to_one_hot(hot_val, max_val)
            assert isinstance(encoded, np.ndarray)
            assert encoded.shape == (max_val, )
            assert np.sum(encoded) == 1
            assert encoded[hot_val] == 1

    # test two vectors at once:
    for max_val in range(1, 10):
        for first_hot in range(max_val):
            for second_hot in range(max_val):
                encoded = to_one_hot([first_hot, second_hot], max_val)
                assert isinstance(encoded, np.ndarray)
                assert encoded.shape == (2, max_val)
                assert np.sum(encoded) == 2
                assert encoded[0, first_hot] == 1
                assert encoded[1, second_hot] == 1
def test_one_hot_torch():
    # test one single vector:
    for max_val in range(1, 10):
        for hot_val in range(max_val):
            encoded = to_one_hot(hot_val, max_val, torch.zeros)
            assert isinstance(encoded, type(torch.zeros(1)))
            assert encoded.shape == (max_val, )
            assert torch.sum(encoded) == 1
            assert encoded[hot_val] == 1

    # test two vectors at once:
    for max_val in range(1, 10):
        for first_hot in range(max_val):
            for second_hot in range(max_val):
                encoded = to_one_hot([first_hot, second_hot], max_val,
                                     torch.zeros)
                assert isinstance(encoded, type(torch.zeros(1)))
                assert encoded.shape == (2, max_val)
                assert torch.sum(encoded) == 2
                assert encoded[0, first_hot] == 1
                assert encoded[1, second_hot] == 1
def test_feature_count():
    env = feature_wrapper.make('FrozenLake-v0')
    # create dummy data:
    path = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]
    features = []
    for i in path:
        features.append(to_one_hot(i, 16))

    trajs = [{'features': features}]
    result = feature_count(env, trajs, gamma=1.0)
    desired = np.array(
        [0., 1., 2., 3., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)

    # two times the same traj should get the same feature count:
    trajs = [{'features': features}, {'features': features}]
    result = feature_count(env, trajs, gamma=1.0)
    desired = np.array(
        [0., 1., 2., 3., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)

    # repeating a traj twice should double feature count (with gamma 1)
    trajs = [{'features': features + features}]
    result = feature_count(env, trajs, gamma=1.0)
    desired = np.array(
        [0., 2., 4., 6., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)

    # test gamma 0.9:
    trajs = [{'features': features}]
    result = feature_count(env, trajs, gamma=.9)
    x = .9**6 + .9**7 + .9**8 + .9**9
    desired = np.array([
        0., 1., .9 + .81, .729 + .6561 + .59049, x, 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.
    ])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)

    # test gamma 0:
    result = feature_count(env, trajs, gamma=0)
    desired = np.array(
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    assert isinstance(result, np.ndarray)
    assert np.allclose(result, desired)
Example #4
0
    def features(self, current_state: None, action: None,
                 next_state: int) -> np.ndarray:
        """Return features to be saved in step method's info dictionary.
        One-hot encoding the next state.

        Parameters
        ----------
        current_state: None
        action: None
        next_state: int
            The next state.

        Returns
        -------
        np.ndarray
            The features in a numpy array.
        """
        assert next_state is not None
        if isinstance(next_state, (int, np.int64, np.ndarray)):
            return to_one_hot(next_state, self.env.observation_space.n)
        else:
            raise NotImplementedError()
 def features(self, current_state, action, next_state):
     '''Return one-hot encoding of next_state.'''
     return to_one_hot(next_state, self.env.observation_space.n)