Example #1
0
    def test_eval_job(self):
        # Create test context.
        summary_dir = self.create_tempdir().full_path
        environment = test_envs.CountingEnv(steps_per_episode=4)
        action_tensor_spec = tensor_spec.from_spec(environment.action_spec())
        time_step_tensor_spec = tensor_spec.from_spec(
            environment.time_step_spec())
        policy = py_tf_eager_policy.PyTFEagerPolicy(
            random_tf_policy.RandomTFPolicy(time_step_tensor_spec,
                                            action_tensor_spec))
        mock_variable_container = mock.create_autospec(
            reverb_variable_container.ReverbVariableContainer)

        with mock.patch.object(
                tf.summary, 'scalar',
                autospec=True) as mock_scalar_summary, mock.patch.object(
                    train_utils, 'wait_for_predicate', autospec=True):
            # Run the function tested.
            eval_job.evaluate(summary_dir=summary_dir,
                              policy=policy,
                              environment_name=None,
                              suite_load_fn=lambda _: environment,
                              variable_container=mock_variable_container,
                              is_running=_NTimesReturnTrue(n=2))

            # Check if the expected calls happened.
            # As an input, an eval job is expected to fetch data from the variable
            # container.
            mock_variable_container.assert_has_calls(
                [mock.call.update(mock.ANY)])

            # As an output, an eval job is expected to write at least the average
            # return corresponding to the first step.
            mock_scalar_summary.assert_any_call(
                name='eval_actor/AverageReturn', data=mock.ANY, step=mock.ANY)
Example #2
0
  def test_observation_stacked(self):
    env = test_envs.CountingEnv()
    history_env = wrappers.HistoryWrapper(env, 3)
    time_step = history_env.reset()
    self.assertEqual([0, 0, 0], time_step.observation.tolist())

    time_step = history_env.step(0)
    self.assertEqual([0, 0, 1], time_step.observation.tolist())

    time_step = history_env.step(0)
    self.assertEqual([0, 1, 2], time_step.observation.tolist())

    time_step = history_env.step(0)
    self.assertEqual([1, 2, 3], time_step.observation.tolist())
Example #3
0
  def test_sequential(self):
    num_episodes = 3
    steps_per_episode = 4
    env = test_envs.CountingEnv(steps_per_episode)

    for episode in range(num_episodes):
      step = 0
      time_step = env.reset()
      self.assertEqual(episode * 10 + step, time_step.observation)
      while not time_step.is_last():
        time_step = env.step(0)
        step += 1
        self.assertEqual(episode * 10 + step, time_step.observation)
      self.assertEqual(episode * 10 + steps_per_episode, time_step.observation)
Example #4
0
  def test_eval_job_constant_eval(self):
    """Tests eval every step for 2 steps.

    This test's `variable_container` passes the same train step twice to test
    that `is_train_step_the_same_or_behind` is working as expected. If were not
    working, the number of train steps processed will be incorrect (2x higher).
    """
    summary_dir = self.create_tempdir().full_path
    environment = test_envs.CountingEnv(steps_per_episode=4)
    action_tensor_spec = tensor_spec.from_spec(environment.action_spec())
    time_step_tensor_spec = tensor_spec.from_spec(environment.time_step_spec())
    policy = py_tf_eager_policy.PyTFEagerPolicy(
        random_tf_policy.RandomTFPolicy(time_step_tensor_spec,
                                        action_tensor_spec))
    mock_variable_container = mock.create_autospec(
        reverb_variable_container.ReverbVariableContainer)

    class VCUpdateIncrementEveryOtherTrainStep(object):
      """Side effect that updates train_step on every other call."""

      def __init__(self):
        self.fake_train_step = -1
        self.call_count = 0

      def __call__(self, variables):
        if self.call_count % 2:
          self.fake_train_step += 1
          variables[reverb_variable_container.TRAIN_STEP_KEY].assign(
              self.fake_train_step)
        self.call_count += 1

    fake_update = VCUpdateIncrementEveryOtherTrainStep()
    mock_variable_container.update.side_effect = fake_update

    with mock.patch.object(
        tf.summary, 'scalar', autospec=True) as mock_scalar_summary:
      eval_job.evaluate(
          summary_dir=summary_dir,
          policy=policy,
          environment_name=None,
          suite_load_fn=lambda _: environment,
          variable_container=mock_variable_container,
          eval_interval=1,
          is_running=_NTimesReturnTrue(n=2))

      summary_count = self.count_summary_scalar_tags_in_call_list(
          mock_scalar_summary, 'Metrics/eval_actor/AverageReturn')
      self.assertEqual(summary_count, 2)
Example #5
0
  def test_observation_tiled(self):
    env = test_envs.CountingEnv()
    # Force observations to be non zero for the test
    env._episodes = 2
    history_env = wrappers.HistoryWrapper(env, 3, tile_first_step_obs=True)
    # Extra reset to make observations in base env not 0.
    time_step = history_env.reset()
    self.assertEqual([20, 20, 20], time_step.observation.tolist())

    time_step = history_env.step(0)
    self.assertEqual([20, 20, 21], time_step.observation.tolist())

    time_step = history_env.step(0)
    self.assertEqual([20, 21, 22], time_step.observation.tolist())

    time_step = history_env.step(0)
    self.assertEqual([21, 22, 23], time_step.observation.tolist())
Example #6
0
  def test_observation_and_action_stacked(self):
    env = test_envs.CountingEnv()
    history_env = wrappers.HistoryWrapper(env, 3, include_actions=True)
    time_step = history_env.reset()
    self.assertEqual([0, 0, 0], time_step.observation['observation'].tolist())
    self.assertEqual([0, 0, 0], time_step.observation['action'].tolist())

    time_step = history_env.step(5)
    self.assertEqual([0, 0, 1], time_step.observation['observation'].tolist())
    self.assertEqual([0, 0, 5], time_step.observation['action'].tolist())

    time_step = history_env.step(6)
    self.assertEqual([0, 1, 2], time_step.observation['observation'].tolist())
    self.assertEqual([0, 5, 6], time_step.observation['action'].tolist())

    time_step = history_env.step(7)
    self.assertEqual([1, 2, 3], time_step.observation['observation'].tolist())
    self.assertEqual([5, 6, 7], time_step.observation['action'].tolist())
Example #7
0
  def test_eval_job(self):
    """Tests the eval job doing an eval every 5 steps for 10 train steps."""
    summary_dir = self.create_tempdir().full_path
    environment = test_envs.CountingEnv(steps_per_episode=4)
    action_tensor_spec = tensor_spec.from_spec(environment.action_spec())
    time_step_tensor_spec = tensor_spec.from_spec(environment.time_step_spec())
    policy = py_tf_eager_policy.PyTFEagerPolicy(
        random_tf_policy.RandomTFPolicy(time_step_tensor_spec,
                                        action_tensor_spec))

    class VCUpdateIncrementTrainStep(object):
      """Side effect that updates train_step."""

      def __init__(self):
        self.fake_train_step = -1

      def __call__(self, variables):
        self.fake_train_step += 1
        variables[reverb_variable_container.TRAIN_STEP_KEY].assign(
            self.fake_train_step)

    mock_variable_container = mock.create_autospec(
        reverb_variable_container.ReverbVariableContainer)
    fake_update = VCUpdateIncrementTrainStep()
    mock_variable_container.update.side_effect = fake_update

    with mock.patch.object(
        tf.summary, 'scalar', autospec=True) as mock_scalar_summary:
      # Run the function tested.
      # 11 loops to do 10 steps becaue the eval occurs on the loop after the
      # train_step is found.
      eval_job.evaluate(
          summary_dir=summary_dir,
          policy=policy,
          environment_name=None,
          suite_load_fn=lambda _: environment,
          variable_container=mock_variable_container,
          eval_interval=5,
          is_running=_NTimesReturnTrue(n=11))

      summary_count = self.count_summary_scalar_tags_in_call_list(
          mock_scalar_summary, 'Metrics/eval_actor/AverageReturn')
      self.assertEqual(summary_count, 3)
Example #8
0
  def test_steps_sequencial(self):
    env = test_envs.CountingEnv(steps_per_episode=4)

    time_step = env.reset()
    self.assertTrue(time_step.is_first())
    self.assertEqual(0, time_step.observation)
    time_step = env.step(0)
    self.assertTrue(time_step.is_mid())
    self.assertEqual(1, time_step.observation)
    time_step = env.step(0)
    self.assertTrue(time_step.is_mid())
    self.assertEqual(2, time_step.observation)
    time_step = env.step(0)
    self.assertTrue(time_step.is_mid())
    self.assertEqual(3, time_step.observation)
    time_step = env.step(0)
    self.assertTrue(time_step.is_last())
    self.assertEqual(4, time_step.observation)
    time_step = env.step(0)
    self.assertTrue(time_step.is_first())
    self.assertEqual(10, time_step.observation)
Example #9
0
 def test_validate_specs(self):
     env = test_envs.CountingEnv(steps_per_episode=15)
     env_utils.validate_py_environment(env, episodes=10)