Example #1
0
  def test_trajectory_stream_sampling_by_trajectory(self):
    """Test if the trajectory stream samples by trajectory."""
    # Long trajectory of 0s.
    tr1 = rl_task.Trajectory(0)
    for _ in range(100):
      tr1.extend(0, 0, 0, False, 0)
    tr1.extend(0, 0, 0, True, 200)
    # Short trajectory of 101.
    tr2 = rl_task.Trajectory(101)
    tr2.extend(0, 0, 0, True, 200)
    task = rl_task.RLTask(
        DummyEnv(), initial_trajectories=[tr1, tr2], max_steps=9)

    # Stream of both. Check that we're sampling by trajectory.
    stream = task.trajectory_stream(
        max_slice_length=1, sample_trajectories_uniformly=True)
    slices = []
    for _ in range(10):
      next_slice = next(stream)
      assert len(next_slice) == 1
      slices.append(next_slice.last_observation)
    mean_obs = sum(slices) / float(len(slices))
    # Average should be around 50, sampling from {0, 101} uniformly.
    # Sampling 101 < 2 times has low probability (but it possible, flaky test).
    self.assertGreater(mean_obs, 20)
    self.assertLen(slices, 10)
Example #2
0
  def test_trajectory_stream_sampling_uniform(self):
    """Test if the trajectory stream samples uniformly."""
    # Long trajectory of 0s.
    tr1 = rl_task.Trajectory(0)
    for _ in range(100):
      tr1.extend(
          action=0, dist_inputs=0, reward=0, done=False, new_observation=0
      )
    tr1.extend(
        action=0, dist_inputs=0, reward=0, done=True, new_observation=200
    )
    # Short trajectory of 101.
    tr2 = rl_task.Trajectory(101)
    tr2.extend(
        action=0, dist_inputs=0, reward=0, done=True, new_observation=200
    )
    task = rl_task.RLTask(
        DummyEnv(), initial_trajectories=[tr1, tr2], max_steps=9)

    # Stream of both. Check that we're sampling by slice, not by trajectory.
    stream = task.trajectory_stream(max_slice_length=1)
    slices = []
    for _ in range(10):
      next_slice = next(stream)
      assert len(next_slice) == 1
      slices.append(next_slice.last_observation)
    mean_obs = sum(slices) / float(len(slices))
    # Average should be around 1 sampling from 0x100, 101 uniformly.
    self.assertLess(mean_obs, 31)  # Sampling 101 even 3 times is unlikely.
    self.assertLen(slices, 10)
Example #3
0
 def test_trajectory_suffix_len(self):
   """Test that a trajectory suffix has the correct length."""
   tr = rl_task.Trajectory(observation=0)
   for _ in range(5):
     self._extend(tr)
   tr_suffix = tr.suffix(length=3)
   self.assertLen(tr_suffix, 3)
Example #4
0
 def test_nonempty_trajectory_last_observation(self):
   """Test that last_observation is the one passed in the last extend()."""
   tr = rl_task.Trajectory(observation=123)
   for _ in range(5):
     self._extend(tr)
   self._extend(tr, new_observation=321)
   self.assertEqual(tr.last_observation, 321)
Example #5
0
  def test_trajectory_stream_margin(self):
    """Test trajectory stream with an added margin."""
    tr1 = rl_task.Trajectory(0)
    tr1.extend(
        action=0, dist_inputs=0, reward=0, done=False, new_observation=1
    )
    tr1.extend(
        action=1, dist_inputs=2, reward=3, done=True, new_observation=1
    )
    task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)

    # Stream of slices without the final state.
    stream1 = task.trajectory_stream(
        max_slice_length=3, margin=2, include_final_state=False)
    got_done = False
    for _ in range(10):
      next_slice = next(stream1)
      self.assertLen(next_slice, 3)
      if next_slice.timesteps[0].done:
        for i in range(1, 3):
          self.assertTrue(next_slice.timesteps[i].done)
          self.assertFalse(next_slice.timesteps[i].mask)
        got_done = True
    # Assert that we got a done somewhere, otherwise the test is not triggered.
    # Not getting done has low probability (1/2^10) but is possible, flaky test.
    self.assertTrue(got_done)
Example #6
0
  def test_trajectory_slice_stream_margin(self):
    """Test trajectory stream with an added margin."""
    tr1 = rl_task.Trajectory(0)
    self._extend(tr1, new_observation=1)
    self._extend(tr1, new_observation=1)
    self._extend(
        tr1, new_observation=1, action=1, dist_inputs=2, reward=3, done=True
    )
    task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)

    # Stream of slices without the final state.
    stream1 = task.trajectory_slice_stream(max_slice_length=4, margin=3)
    got_done = False
    for _ in range(20):
      next_slice = next(stream1)
      self.assertEqual(next_slice.observation.shape, (4,))
      if next_slice.done[0]:
        # In the slice, first we have the last timestep in the actual
        # trajectory, so observation = 1.
        # Then comes the first timestep in the margin, which has the final
        # observation from the trajectory: observation = 1.
        # The remaining timesteps have 0 observations.
        np.testing.assert_array_equal(next_slice.observation, [1, 1, 0, 0])
        # In the margin, done = True and mask = 0.
        for i in range(1, next_slice.observation.shape[0]):
          self.assertTrue(next_slice.done[i])
          self.assertFalse(next_slice.mask[i])
        got_done = True
    # Assert that we got a done somewhere, otherwise the test is not triggered.
    # Not getting done has low probability (1/2^20) but is possible, flaky test.
    self.assertTrue(got_done)
Example #7
0
 def test_trajectory_to_np_without_margin_cuts_last_observation(self):
   """Test that to_np with margin=0 cuts the last observation."""
   tr = rl_task.Trajectory(observation=0)
   for obs in range(1, 4):
     self._extend(tr, new_observation=obs)
   tr_np = tr.to_np(margin=0)
   np.testing.assert_array_equal(tr_np.observation, [0, 1, 2])
Example #8
0
 def test_trajectory_to_np_observations(self):
   """Test that to_np returns correct observations."""
   tr = rl_task.Trajectory(observation=0)
   for obs in range(1, 3):
     self._extend(tr, new_observation=obs)
   tr_np = tr.to_np()
   np.testing.assert_array_equal(tr_np.observation, [0, 1, 2])
Example #9
0
  def test_trajectory_stream_final_state(self):
    """Test trajectory stream with and without the final state."""
    tr1 = rl_task.Trajectory(0)
    tr1.extend(
        action=0, dist_inputs=0, reward=0, done=True, new_observation=1
    )
    task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)

    # Stream of slices without the final state.
    stream1 = task.trajectory_stream(
        max_slice_length=1, include_final_state=False)
    for _ in range(10):
      next_slice = next(stream1)
      self.assertLen(next_slice, 1)
      self.assertEqual(next_slice.last_observation, 0)

    # Stream of slices with the final state.
    stream2 = task.trajectory_stream(
        max_slice_length=1, include_final_state=True)
    all_sum = 0
    for _ in range(100):
      next_slice = next(stream2)
      self.assertLen(next_slice, 1)
      all_sum += next_slice.last_observation
    self.assertEqual(min(all_sum, 1), 1)  # We've seen the end at least once.
Example #10
0
 def test_trajectory_done_get_and_set(self):
   """Test that we can get and set the `done` flag of a trajectory."""
   tr = rl_task.Trajectory(observation=123)
   self._extend(tr)
   self.assertFalse(tr.done)
   tr.done = True
   self.assertTrue(tr.done)
Example #11
0
 def test_trajectory_suffix_observations(self):
   """Test that a trajectory suffix has the correct observations."""
   tr = rl_task.Trajectory(observation=0)
   for obs in range(1, 6):
     self._extend(tr, new_observation=obs)
   tr_suffix = tr.suffix(length=4)
   self.assertEqual([ts.observation for ts in tr_suffix.timesteps], [2, 3, 4])
   self.assertEqual(tr_suffix.last_observation, 5)
Example #12
0
 def test_trajectory_to_np_shape(self):
   """Test that the shape of a to_np result matches the trajectory length."""
   tr = rl_task.Trajectory(observation=np.zeros((2, 3)))
   for _ in range(5):
     self._extend(tr, new_observation=np.zeros((2, 3)))
   tr_np = tr.to_np()
   self.assertEqual(tr_np.observation.shape, (len(tr), 2, 3))
   self.assertEqual(tr_np.action.shape, (len(tr),))
Example #13
0
 def test_trajectory_to_np_adds_margin(self):
   """Test that to_np adds a specified margin."""
   tr = rl_task.Trajectory(observation=2)
   for _ in range(2):
     self._extend(tr, new_observation=2)
   tr_np = tr.to_np(margin=2)
   np.testing.assert_array_equal(tr_np.observation, [2, 2, 2, 0])
   np.testing.assert_array_equal(tr_np.mask, [1, 1, 0, 0])
Example #14
0
 def test_trajectory_slice_stream_shape(self):
   """Test the shape yielded by trajectory stream."""
   obs = np.zeros((12, 13))
   tr1 = rl_task.Trajectory(obs)
   self._extend(tr1, new_observation=obs, done=True)
   task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)
   stream = task.trajectory_slice_stream(max_slice_length=1)
   next_slice = next(stream)
   self.assertEqual(next_slice.observation.shape, (1, 12, 13))
Example #15
0
 def test_trajectory_to_np_shape_after_extend(self):
   """Test that the shape of a to_np result grows after calling extend()."""
   tr = rl_task.Trajectory(observation=0)
   for _ in range(5):
     self._extend(tr)
   len_before = tr.to_np().observation.shape[0]
   self._extend(tr)
   len_after = tr.to_np().observation.shape[0]
   self.assertEqual(len_after, len_before + 1)
Example #16
0
 def test_trajectory_stream_shape(self):
   """Test the shape yielded by trajectory stream."""
   elem = np.zeros((12, 13))
   tr1 = rl_task.Trajectory(elem)
   tr1.extend(0, 0, 0, True, elem)
   task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)
   stream = task.trajectory_stream(max_slice_length=1)
   next_slice = next(stream)
   self.assertLen(next_slice, 1)
   self.assertEqual(next_slice.last_observation.shape, (12, 13))
Example #17
0
 def test_task_sampling(self):
     """Trains a policy on cartpole."""
     tr1 = rl_task.Trajectory(0)
     for _ in range(100):
         tr1.extend(0, 0, 0, 0)
     tr1.extend(0, 0, 0, 200)
     tr2 = rl_task.Trajectory(101)
     tr2.extend(0, 0, 0, 200)
     task = rl_task.RLTask('CartPole-v0', initial_trajectories=[tr1, tr2])
     stream = task.trajectory_stream(max_slice_length=1)
     slices = []
     for _ in range(10):
         next_slice = next(stream)
         assert len(next_slice) == 1
         slices.append(next_slice.last_observation)
     mean_obs = sum(slices) / float(len(slices))
     # Average should be around 1 sampling from 0x100, 101 uniformly.
     assert mean_obs < 31  # Sampling 101 even 3 times is unlikely.
     self.assertLen(slices, 10)
Example #18
0
 def test_trajectory_stream_long_slice(self):
   """Test trajectory stream with slices of longer length."""
   elem = np.zeros((12, 13))
   tr1 = rl_task.Trajectory(elem)
   tr1.extend(0, 0, 0, False, elem)
   tr1.extend(0, 0, 0, True, elem)
   task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)
   stream = task.trajectory_stream(max_slice_length=2)
   next_slice = next(stream)
   self.assertLen(next_slice, 2)
   self.assertEqual(next_slice.last_observation.shape, (12, 13))
Example #19
0
 def test_task_epochs_index_minusone(self):
   """Test that the epoch index -1 means last epoch and updates to it."""
   obs = np.zeros((2,))
   tr1 = rl_task.Trajectory(obs)
   self._extend(tr1, new_observation=obs, done=True)
   task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)
   stream = task.trajectory_slice_stream(epochs=[-1], max_slice_length=1)
   next_slice = next(stream)
   np.testing.assert_equal(next_slice.observation, np.zeros((1, 2)))
   task.collect_trajectories(policy=(lambda _: (0, 0)), n_trajectories=1)
   next_slice = next(stream)
   np.testing.assert_equal(next_slice.observation, np.ones((1, 2)))
Example #20
0
 def test_trajectory_stream_shape(self):
   """Test the shape yielded by trajectory stream."""
   obs = np.zeros((12, 13))
   tr1 = rl_task.Trajectory(obs)
   tr1.extend(
       action=0, dist_inputs=0, reward=0, done=True, new_observation=obs
   )
   task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)
   stream = task.trajectory_stream(max_slice_length=1)
   next_slice = next(stream)
   self.assertLen(next_slice, 1)
   self.assertEqual(next_slice.last_observation.shape, (12, 13))
Example #21
0
 def test_task_epochs_index_minusone(self):
   """Test that the epoch index -1 means last epoch and updates to it."""
   elem = np.zeros((2,))
   tr1 = rl_task.Trajectory(elem)
   tr1.extend(0, 0, 0, True, elem)
   task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)
   stream = task.trajectory_stream(epochs=[-1], max_slice_length=1)
   next_slice = next(stream)
   self.assertLen(next_slice, 1)
   self.assertEqual(next_slice.last_observation[0], 0)
   task.collect_trajectories((lambda _: (0, 0)), 1)
   next_slice = next(stream)
   self.assertLen(next_slice, 1)
   self.assertEqual(next_slice.last_observation[0], 1)
Example #22
0
 def test_task_epochs_index_minusone(self):
   """Test that the epoch index -1 means last epoch and updates to it."""
   obs = np.zeros((2,))
   tr1 = rl_task.Trajectory(obs)
   tr1.extend(
       action=0, dist_inputs=0, reward=0, done=True, new_observation=obs
   )
   task = rl_task.RLTask(DummyEnv(), initial_trajectories=[tr1], max_steps=9)
   stream = task.trajectory_stream(epochs=[-1], max_slice_length=1)
   next_slice = next(stream)
   self.assertLen(next_slice, 1)
   self.assertEqual(next_slice.last_observation[0], 0)
   task.collect_trajectories(policy=(lambda _: (0, 0)), n_trajectories=1)
   next_slice = next(stream)
   self.assertLen(next_slice, 1)
   self.assertEqual(next_slice.last_observation[0], 1)
Example #23
0
 def test_trajectory_stream_long_slice(self):
     """Test trajectory stream with slices of longer length."""
     obs = np.zeros((12, 13))
     tr1 = rl_task.Trajectory(obs)
     tr1.extend(action=0,
                dist_inputs=0,
                reward=0,
                done=False,
                new_observation=obs)
     tr1.extend(action=0,
                dist_inputs=0,
                reward=0,
                done=True,
                new_observation=obs)
     task = rl_task.RLTask(DummyEnv(),
                           initial_trajectories=[tr1],
                           max_steps=9)
     stream = task.trajectory_stream(max_slice_length=2)
     next_slice = next(stream)
     self.assertEqual(next_slice.observations.shape, (2, 12, 13))
Example #24
0
 def test_trajectory_len(self):
   """Test that trajectory length is equal to the number of observations."""
   tr = rl_task.Trajectory(observation=0)
   for _ in range(5):
     self._extend(tr)
   self.assertLen(tr, 6)
Example #25
0
 def test_empty_trajectory_last_observation(self):
   """Test that last_observation is the one passed in __init__."""
   tr = rl_task.Trajectory(observation=123)
   self.assertEqual(tr.last_observation, 123)