def test_one_hot_numpy(): # test one single vector: for max_val in range(1, 10): for hot_val in range(max_val): encoded = to_one_hot(hot_val, max_val) assert isinstance(encoded, np.ndarray) assert encoded.shape == (max_val, ) assert np.sum(encoded) == 1 assert encoded[hot_val] == 1 # test two vectors at once: for max_val in range(1, 10): for first_hot in range(max_val): for second_hot in range(max_val): encoded = to_one_hot([first_hot, second_hot], max_val) assert isinstance(encoded, np.ndarray) assert encoded.shape == (2, max_val) assert np.sum(encoded) == 2 assert encoded[0, first_hot] == 1 assert encoded[1, second_hot] == 1
def test_one_hot_torch(): # test one single vector: for max_val in range(1, 10): for hot_val in range(max_val): encoded = to_one_hot(hot_val, max_val, torch.zeros) assert isinstance(encoded, type(torch.zeros(1))) assert encoded.shape == (max_val, ) assert torch.sum(encoded) == 1 assert encoded[hot_val] == 1 # test two vectors at once: for max_val in range(1, 10): for first_hot in range(max_val): for second_hot in range(max_val): encoded = to_one_hot([first_hot, second_hot], max_val, torch.zeros) assert isinstance(encoded, type(torch.zeros(1))) assert encoded.shape == (2, max_val) assert torch.sum(encoded) == 2 assert encoded[0, first_hot] == 1 assert encoded[1, second_hot] == 1
def test_feature_count(): env = feature_wrapper.make('FrozenLake-v0') # create dummy data: path = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4] features = [] for i in path: features.append(to_one_hot(i, 16)) trajs = [{'features': features}] result = feature_count(env, trajs, gamma=1.0) desired = np.array( [0., 1., 2., 3., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired) # two times the same traj should get the same feature count: trajs = [{'features': features}, {'features': features}] result = feature_count(env, trajs, gamma=1.0) desired = np.array( [0., 1., 2., 3., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired) # repeating a traj twice should double feature count (with gamma 1) trajs = [{'features': features + features}] result = feature_count(env, trajs, gamma=1.0) desired = np.array( [0., 2., 4., 6., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired) # test gamma 0.9: trajs = [{'features': features}] result = feature_count(env, trajs, gamma=.9) x = .9**6 + .9**7 + .9**8 + .9**9 desired = np.array([ 0., 1., .9 + .81, .729 + .6561 + .59049, x, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. ]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired) # test gamma 0: result = feature_count(env, trajs, gamma=0) desired = np.array( [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) assert isinstance(result, np.ndarray) assert np.allclose(result, desired)
def features(self, current_state: None, action: None, next_state: int) -> np.ndarray: """Return features to be saved in step method's info dictionary. One-hot encoding the next state. Parameters ---------- current_state: None action: None next_state: int The next state. Returns ------- np.ndarray The features in a numpy array. """ assert next_state is not None if isinstance(next_state, (int, np.int64, np.ndarray)): return to_one_hot(next_state, self.env.observation_space.n) else: raise NotImplementedError()
def features(self, current_state, action, next_state): '''Return one-hot encoding of next_state.''' return to_one_hot(next_state, self.env.observation_space.n)