Example #1
0
    def test_eval_job(self):
        # Create test context.
        summary_dir = self.create_tempdir().full_path
        environment = test_envs.CountingEnv(steps_per_episode=4)
        action_tensor_spec = tensor_spec.from_spec(environment.action_spec())
        time_step_tensor_spec = tensor_spec.from_spec(
            environment.time_step_spec())
        policy = py_tf_eager_policy.PyTFEagerPolicy(
            random_tf_policy.RandomTFPolicy(time_step_tensor_spec,
                                            action_tensor_spec))
        mock_variable_container = mock.create_autospec(
            reverb_variable_container.ReverbVariableContainer)

        with mock.patch.object(
                tf.summary, 'scalar',
                autospec=True) as mock_scalar_summary, mock.patch.object(
                    train_utils, 'wait_for_predicate', autospec=True):
            # Run the function tested.
            eval_job.evaluate(summary_dir=summary_dir,
                              policy=policy,
                              environment_name=None,
                              suite_load_fn=lambda _: environment,
                              variable_container=mock_variable_container,
                              is_running=_NTimesReturnTrue(n=2))

            # Check if the expected calls happened.
            # As an input, an eval job is expected to fetch data from the variable
            # container.
            mock_variable_container.assert_has_calls(
                [mock.call.update(mock.ANY)])

            # As an output, an eval job is expected to write at least the average
            # return corresponding to the first step.
            mock_scalar_summary.assert_any_call(
                name='eval_actor/AverageReturn', data=mock.ANY, step=mock.ANY)
Example #2
0
  def test_eval_job_constant_eval(self):
    """Tests eval every step for 2 steps.

    This test's `variable_container` passes the same train step twice to test
    that `is_train_step_the_same_or_behind` is working as expected. If were not
    working, the number of train steps processed will be incorrect (2x higher).
    """
    summary_dir = self.create_tempdir().full_path
    environment = test_envs.CountingEnv(steps_per_episode=4)
    action_tensor_spec = tensor_spec.from_spec(environment.action_spec())
    time_step_tensor_spec = tensor_spec.from_spec(environment.time_step_spec())
    policy = py_tf_eager_policy.PyTFEagerPolicy(
        random_tf_policy.RandomTFPolicy(time_step_tensor_spec,
                                        action_tensor_spec))
    mock_variable_container = mock.create_autospec(
        reverb_variable_container.ReverbVariableContainer)

    class VCUpdateIncrementEveryOtherTrainStep(object):
      """Side effect that updates train_step on every other call."""

      def __init__(self):
        self.fake_train_step = -1
        self.call_count = 0

      def __call__(self, variables):
        if self.call_count % 2:
          self.fake_train_step += 1
          variables[reverb_variable_container.TRAIN_STEP_KEY].assign(
              self.fake_train_step)
        self.call_count += 1

    fake_update = VCUpdateIncrementEveryOtherTrainStep()
    mock_variable_container.update.side_effect = fake_update

    with mock.patch.object(
        tf.summary, 'scalar', autospec=True) as mock_scalar_summary:
      eval_job.evaluate(
          summary_dir=summary_dir,
          policy=policy,
          environment_name=None,
          suite_load_fn=lambda _: environment,
          variable_container=mock_variable_container,
          eval_interval=1,
          is_running=_NTimesReturnTrue(n=2))

      summary_count = self.count_summary_scalar_tags_in_call_list(
          mock_scalar_summary, 'Metrics/eval_actor/AverageReturn')
      self.assertEqual(summary_count, 2)
Example #3
0
  def test_eval_job(self):
    """Tests the eval job doing an eval every 5 steps for 10 train steps."""
    summary_dir = self.create_tempdir().full_path
    environment = test_envs.CountingEnv(steps_per_episode=4)
    action_tensor_spec = tensor_spec.from_spec(environment.action_spec())
    time_step_tensor_spec = tensor_spec.from_spec(environment.time_step_spec())
    policy = py_tf_eager_policy.PyTFEagerPolicy(
        random_tf_policy.RandomTFPolicy(time_step_tensor_spec,
                                        action_tensor_spec))

    class VCUpdateIncrementTrainStep(object):
      """Side effect that updates train_step."""

      def __init__(self):
        self.fake_train_step = -1

      def __call__(self, variables):
        self.fake_train_step += 1
        variables[reverb_variable_container.TRAIN_STEP_KEY].assign(
            self.fake_train_step)

    mock_variable_container = mock.create_autospec(
        reverb_variable_container.ReverbVariableContainer)
    fake_update = VCUpdateIncrementTrainStep()
    mock_variable_container.update.side_effect = fake_update

    with mock.patch.object(
        tf.summary, 'scalar', autospec=True) as mock_scalar_summary:
      # Run the function tested.
      # 11 loops to do 10 steps becaue the eval occurs on the loop after the
      # train_step is found.
      eval_job.evaluate(
          summary_dir=summary_dir,
          policy=policy,
          environment_name=None,
          suite_load_fn=lambda _: environment,
          variable_container=mock_variable_container,
          eval_interval=5,
          is_running=_NTimesReturnTrue(n=11))

      summary_count = self.count_summary_scalar_tags_in_call_list(
          mock_scalar_summary, 'Metrics/eval_actor/AverageReturn')
      self.assertEqual(summary_count, 3)