def test_trajectory_stream_sampling_by_trajectory(self): """Test if the trajectory stream samples by trajectory.""" # Long trajectory of 0s. tr1 = rl_task.Trajectory(0) for _ in range(100): tr1.extend(0, 0, 0, False, 0) tr1.extend(0, 0, 0, True, 200) # Short trajectory of 101. tr2 = rl_task.Trajectory(101) tr2.extend(0, 0, 0, True, 200) task = rl_task.RLTask( DummyEnv(), initial_trajectories=[tr1, tr2], max_steps=9) # Stream of both. Check that we're sampling by trajectory. stream = task.trajectory_stream( max_slice_length=1, sample_trajectories_uniformly=True) slices = [] for _ in range(10): next_slice = next(stream) assert len(next_slice) == 1 slices.append(next_slice.last_observation) mean_obs = sum(slices) / float(len(slices)) # Average should be around 50, sampling from {0, 101} uniformly. # Sampling 101 < 2 times has low probability (but it possible, flaky test). self.assertGreater(mean_obs, 20) self.assertLen(slices, 10)
def test_trajectory_stream_sampling_uniform(self): """Test if the trajectory stream samples uniformly.""" # Long trajectory of 0s. tr1 = rl_task.Trajectory(0) for _ in range(100): tr1.extend( action=0, dist_inputs=0, reward=0, done=False, new_observation=0 ) tr1.extend( action=0, dist_inputs=0, reward=0, done=True, new_observation=200 ) # Short trajectory of 101. tr2 = rl_task.Trajectory(101) tr2.extend( action=0, dist_inputs=0, reward=0, done=True, new_observation=200 ) task = rl_task.RLTask( DummyEnv(), initial_trajectories=[tr1, tr2], max_steps=9) # Stream of both. Check that we're sampling by slice, not by trajectory. stream = task.trajectory_stream(max_slice_length=1) slices = [] for _ in range(10): next_slice = next(stream) assert len(next_slice) == 1 slices.append(next_slice.last_observation) mean_obs = sum(slices) / float(len(slices)) # Average should be around 1 sampling from 0x100, 101 uniformly. self.assertLess(mean_obs, 31) # Sampling 101 even 3 times is unlikely. self.assertLen(slices, 10)
def test_trajectory_suffix_len(self): """Test that a trajectory suffix has the correct length.""" tr = rl_task.Trajectory(observation=0) for _ in range(5): self._extend(tr) tr_suffix = tr.suffix(length=3) self.assertLen(tr_suffix, 3)
def test_nonempty_trajectory_last_observation(self): """Test that last_observation is the one passed in the last extend().""" tr = rl_task.Trajectory(observation=123) for _ in range(5): self._extend(tr) self._extend(tr, new_observation=321) self.assertEqual(tr.last_observation, 321)
def test_trajectory_stream_margin(self): """Test trajectory stream with an added margin.""" tr1 = rl_task.Trajectory(0) tr1.extend( action=0, dist_inputs=0, reward=0, done=False, new_observation=1 ) tr1.extend( action=1, dist_inputs=2, reward=3, done=True, new_observation=1 ) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) # Stream of slices without the final state. stream1 = task.trajectory_stream( max_slice_length=3, margin=2, include_final_state=False) got_done = False for _ in range(10): next_slice = next(stream1) self.assertLen(next_slice, 3) if next_slice.timesteps[0].done: for i in range(1, 3): self.assertTrue(next_slice.timesteps[i].done) self.assertFalse(next_slice.timesteps[i].mask) got_done = True # Assert that we got a done somewhere, otherwise the test is not triggered. # Not getting done has low probability (1/2^10) but is possible, flaky test. self.assertTrue(got_done)
def test_trajectory_slice_stream_margin(self): """Test trajectory stream with an added margin.""" tr1 = rl_task.Trajectory(0) self._extend(tr1, new_observation=1) self._extend(tr1, new_observation=1) self._extend( tr1, new_observation=1, action=1, dist_inputs=2, reward=3, done=True ) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) # Stream of slices without the final state. stream1 = task.trajectory_slice_stream(max_slice_length=4, margin=3) got_done = False for _ in range(20): next_slice = next(stream1) self.assertEqual(next_slice.observation.shape, (4,)) if next_slice.done[0]: # In the slice, first we have the last timestep in the actual # trajectory, so observation = 1. # Then comes the first timestep in the margin, which has the final # observation from the trajectory: observation = 1. # The remaining timesteps have 0 observations. np.testing.assert_array_equal(next_slice.observation, [1, 1, 0, 0]) # In the margin, done = True and mask = 0. for i in range(1, next_slice.observation.shape[0]): self.assertTrue(next_slice.done[i]) self.assertFalse(next_slice.mask[i]) got_done = True # Assert that we got a done somewhere, otherwise the test is not triggered. # Not getting done has low probability (1/2^20) but is possible, flaky test. self.assertTrue(got_done)
def test_trajectory_to_np_without_margin_cuts_last_observation(self): """Test that to_np with margin=0 cuts the last observation.""" tr = rl_task.Trajectory(observation=0) for obs in range(1, 4): self._extend(tr, new_observation=obs) tr_np = tr.to_np(margin=0) np.testing.assert_array_equal(tr_np.observation, [0, 1, 2])
def test_trajectory_to_np_observations(self): """Test that to_np returns correct observations.""" tr = rl_task.Trajectory(observation=0) for obs in range(1, 3): self._extend(tr, new_observation=obs) tr_np = tr.to_np() np.testing.assert_array_equal(tr_np.observation, [0, 1, 2])
def test_trajectory_stream_final_state(self): """Test trajectory stream with and without the final state.""" tr1 = rl_task.Trajectory(0) tr1.extend( action=0, dist_inputs=0, reward=0, done=True, new_observation=1 ) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) # Stream of slices without the final state. stream1 = task.trajectory_stream( max_slice_length=1, include_final_state=False) for _ in range(10): next_slice = next(stream1) self.assertLen(next_slice, 1) self.assertEqual(next_slice.last_observation, 0) # Stream of slices with the final state. stream2 = task.trajectory_stream( max_slice_length=1, include_final_state=True) all_sum = 0 for _ in range(100): next_slice = next(stream2) self.assertLen(next_slice, 1) all_sum += next_slice.last_observation self.assertEqual(min(all_sum, 1), 1) # We've seen the end at least once.
def test_trajectory_done_get_and_set(self): """Test that we can get and set the `done` flag of a trajectory.""" tr = rl_task.Trajectory(observation=123) self._extend(tr) self.assertFalse(tr.done) tr.done = True self.assertTrue(tr.done)
def test_trajectory_suffix_observations(self): """Test that a trajectory suffix has the correct observations.""" tr = rl_task.Trajectory(observation=0) for obs in range(1, 6): self._extend(tr, new_observation=obs) tr_suffix = tr.suffix(length=4) self.assertEqual([ts.observation for ts in tr_suffix.timesteps], [2, 3, 4]) self.assertEqual(tr_suffix.last_observation, 5)
def test_trajectory_to_np_shape(self): """Test that the shape of a to_np result matches the trajectory length.""" tr = rl_task.Trajectory(observation=np.zeros((2, 3))) for _ in range(5): self._extend(tr, new_observation=np.zeros((2, 3))) tr_np = tr.to_np() self.assertEqual(tr_np.observation.shape, (len(tr), 2, 3)) self.assertEqual(tr_np.action.shape, (len(tr),))
def test_trajectory_to_np_adds_margin(self): """Test that to_np adds a specified margin.""" tr = rl_task.Trajectory(observation=2) for _ in range(2): self._extend(tr, new_observation=2) tr_np = tr.to_np(margin=2) np.testing.assert_array_equal(tr_np.observation, [2, 2, 2, 0]) np.testing.assert_array_equal(tr_np.mask, [1, 1, 0, 0])
def test_trajectory_slice_stream_shape(self): """Test the shape yielded by trajectory stream.""" obs = np.zeros((12, 13)) tr1 = rl_task.Trajectory(obs) self._extend(tr1, new_observation=obs, done=True) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) stream = task.trajectory_slice_stream(max_slice_length=1) next_slice = next(stream) self.assertEqual(next_slice.observation.shape, (1, 12, 13))
def test_trajectory_to_np_shape_after_extend(self): """Test that the shape of a to_np result grows after calling extend().""" tr = rl_task.Trajectory(observation=0) for _ in range(5): self._extend(tr) len_before = tr.to_np().observation.shape[0] self._extend(tr) len_after = tr.to_np().observation.shape[0] self.assertEqual(len_after, len_before + 1)
def test_trajectory_stream_shape(self): """Test the shape yielded by trajectory stream.""" elem = np.zeros((12, 13)) tr1 = rl_task.Trajectory(elem) tr1.extend(0, 0, 0, True, elem) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) stream = task.trajectory_stream(max_slice_length=1) next_slice = next(stream) self.assertLen(next_slice, 1) self.assertEqual(next_slice.last_observation.shape, (12, 13))
def test_task_sampling(self): """Trains a policy on cartpole.""" tr1 = rl_task.Trajectory(0) for _ in range(100): tr1.extend(0, 0, 0, 0) tr1.extend(0, 0, 0, 200) tr2 = rl_task.Trajectory(101) tr2.extend(0, 0, 0, 200) task = rl_task.RLTask('CartPole-v0', initial_trajectories=[tr1, tr2]) stream = task.trajectory_stream(max_slice_length=1) slices = [] for _ in range(10): next_slice = next(stream) assert len(next_slice) == 1 slices.append(next_slice.last_observation) mean_obs = sum(slices) / float(len(slices)) # Average should be around 1 sampling from 0x100, 101 uniformly. assert mean_obs < 31 # Sampling 101 even 3 times is unlikely. self.assertLen(slices, 10)
def test_trajectory_stream_long_slice(self): """Test trajectory stream with slices of longer length.""" elem = np.zeros((12, 13)) tr1 = rl_task.Trajectory(elem) tr1.extend(0, 0, 0, False, elem) tr1.extend(0, 0, 0, True, elem) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) stream = task.trajectory_stream(max_slice_length=2) next_slice = next(stream) self.assertLen(next_slice, 2) self.assertEqual(next_slice.last_observation.shape, (12, 13))
def test_task_epochs_index_minusone(self): """Test that the epoch index -1 means last epoch and updates to it.""" obs = np.zeros((2,)) tr1 = rl_task.Trajectory(obs) self._extend(tr1, new_observation=obs, done=True) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) stream = task.trajectory_slice_stream(epochs=[-1], max_slice_length=1) next_slice = next(stream) np.testing.assert_equal(next_slice.observation, np.zeros((1, 2))) task.collect_trajectories(policy=(lambda _: (0, 0)), n_trajectories=1) next_slice = next(stream) np.testing.assert_equal(next_slice.observation, np.ones((1, 2)))
def test_trajectory_stream_shape(self): """Test the shape yielded by trajectory stream.""" obs = np.zeros((12, 13)) tr1 = rl_task.Trajectory(obs) tr1.extend( action=0, dist_inputs=0, reward=0, done=True, new_observation=obs ) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) stream = task.trajectory_stream(max_slice_length=1) next_slice = next(stream) self.assertLen(next_slice, 1) self.assertEqual(next_slice.last_observation.shape, (12, 13))
def test_task_epochs_index_minusone(self): """Test that the epoch index -1 means last epoch and updates to it.""" elem = np.zeros((2,)) tr1 = rl_task.Trajectory(elem) tr1.extend(0, 0, 0, True, elem) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) stream = task.trajectory_stream(epochs=[-1], max_slice_length=1) next_slice = next(stream) self.assertLen(next_slice, 1) self.assertEqual(next_slice.last_observation[0], 0) task.collect_trajectories((lambda _: (0, 0)), 1) next_slice = next(stream) self.assertLen(next_slice, 1) self.assertEqual(next_slice.last_observation[0], 1)
def test_task_epochs_index_minusone(self): """Test that the epoch index -1 means last epoch and updates to it.""" obs = np.zeros((2,)) tr1 = rl_task.Trajectory(obs) tr1.extend( action=0, dist_inputs=0, reward=0, done=True, new_observation=obs ) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) stream = task.trajectory_stream(epochs=[-1], max_slice_length=1) next_slice = next(stream) self.assertLen(next_slice, 1) self.assertEqual(next_slice.last_observation[0], 0) task.collect_trajectories(policy=(lambda _: (0, 0)), n_trajectories=1) next_slice = next(stream) self.assertLen(next_slice, 1) self.assertEqual(next_slice.last_observation[0], 1)
def test_trajectory_stream_long_slice(self): """Test trajectory stream with slices of longer length.""" obs = np.zeros((12, 13)) tr1 = rl_task.Trajectory(obs) tr1.extend(action=0, dist_inputs=0, reward=0, done=False, new_observation=obs) tr1.extend(action=0, dist_inputs=0, reward=0, done=True, new_observation=obs) task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9) stream = task.trajectory_stream(max_slice_length=2) next_slice = next(stream) self.assertEqual(next_slice.observations.shape, (2, 12, 13))
def test_trajectory_len(self): """Test that trajectory length is equal to the number of observations.""" tr = rl_task.Trajectory(observation=0) for _ in range(5): self._extend(tr) self.assertLen(tr, 6)
def test_empty_trajectory_last_observation(self): """Test that last_observation is the one passed in __init__.""" tr = rl_task.Trajectory(observation=123) self.assertEqual(tr.last_observation, 123)