Esempio n. 1
0
  def testRunOneIteration(self):
    environment_steps = 2
    batch_size = self.batch_size
    envs = [MockEnvironment(max_steps=environment_steps)
            for _ in range(batch_size)]

    environment = BatchEnv(envs)

    training_steps = 20 * batch_size
    evaluation_steps = 10 * batch_size

    runner = dopamine_connector.BatchRunner(
        self._test_subdir, self._create_agent_fn,
        create_environment_fn=lambda: environment,
        training_steps=training_steps, evaluation_steps=evaluation_steps
    )

    dictionary = runner._run_one_iteration(1)
    train_rollouts = int(training_steps / environment_steps)
    eval_rollouts = int(evaluation_steps / environment_steps)
    expected_dictionary = {
        "train_episode_lengths": [2 for _ in range(train_rollouts)],
        "train_episode_returns": [-1 for _ in range(train_rollouts)],
        "train_average_return": [-1],
        "eval_episode_lengths": [2 for _ in range(eval_rollouts)],
        "eval_episode_returns": [-1 for _ in range(eval_rollouts)],
        "eval_average_return": [-1]
    }
    self.assertDictEqual(expected_dictionary, dictionary)
Esempio n. 2
0
  def testRunOnePhase(self):
    batch_size = self.batch_size
    environment_steps = 2
    max_steps = environment_steps * batch_size * 10

    envs = [MockEnvironment(max_steps=environment_steps)
            for _ in range(batch_size)]

    environment = BatchEnv(envs)
    runner = dopamine_connector.BatchRunner(
        self._test_subdir, self._create_agent_fn,
        create_environment_fn=lambda: environment)

    statistics = []

    step_number, sum_returns, num_episodes = runner._run_one_phase(
        max_steps, statistics, "test")
    calls_to_run_episode = int(max_steps / (environment_steps * batch_size))
    self.assertEqual(self._agent.step.call_count, calls_to_run_episode)
    self.assertEqual(self._agent.end_episode.call_count, calls_to_run_episode)
    self.assertEqual(max_steps, step_number)
    self.assertEqual(-1 * calls_to_run_episode * batch_size, sum_returns)
    self.assertEqual(calls_to_run_episode, num_episodes / batch_size)
    expected_statistics = []
    for _ in range(calls_to_run_episode * batch_size):
      expected_statistics.append({
          "test_episode_lengths": 2,
          "test_episode_returns": -1
      })
    self.assertEqual(len(expected_statistics), len(statistics))
    for expected_stats, stats in zip(expected_statistics, statistics):
      self.assertDictEqual(expected_stats, stats)
Esempio n. 3
0
  def testRunOneEpisodeWithLowMaxSteps(self):
    max_steps_per_episode = 2
    batch_size = self.batch_size
    reward_multipliers = [-1, 1] * int(batch_size / 2)
    envs = [MockEnvironment(reward_multiplier=rm) for rm in reward_multipliers]
    environment = BatchEnv(envs)
    runner = dopamine_connector.BatchRunner(
        self._test_subdir, self._create_agent_fn,
        create_environment_fn=lambda: environment,
        max_steps_per_episode=max_steps_per_episode)
    step_number, total_rewards = runner._run_one_episode()

    self.assertEqual(self._agent.step.call_count, max_steps_per_episode - 1)
    self.assertEqual(self._agent.end_episode.call_count, 1)
    self.assertEqual(max_steps_per_episode, step_number / batch_size)
    self.assertAllEqual(np.array(reward_multipliers) * -1, total_rewards)
Esempio n. 4
0
  def testRunEpisodeBatch(self):
    max_steps_per_episode = 11
    batch_size = self.batch_size
    reward_multipliers = [-1, 1] * int(batch_size / 2)
    envs = [MockEnvironment(reward_multiplier=rm) for rm in reward_multipliers]
    environment = BatchEnv(envs)
    runner = dopamine_connector.BatchRunner(
        self._test_subdir, self._create_agent_fn,
        create_environment_fn=lambda: environment,
        max_steps_per_episode=max_steps_per_episode)
    step_number, total_rewards = runner._run_one_episode()

    self.assertEqual(self._agent.step.call_count, environment.max_steps - 1)
    self.assertEqual(self._agent.end_episode.call_count, 1)
    self.assertEqual(environment.max_steps, step_number / batch_size)
    # Expected reward will be \sum_{i=0}^{9} (-1)**i * i = -5 when reward
    # multiplier=1
    self.assertAllEqual(np.array(reward_multipliers) * -5, total_rewards)
Esempio n. 5
0
 def testLogExperiment(self, mock_logger_constructor):
   # TODO(kozak): We probably do not need this test, dopamine test
   # for Runner is enough here. Remove this?
   log_every_n = 2
   logging_file_prefix = "prefix"
   statistics = "statistics"
   experiment_logger = MockLogger(test_cls=self)
   mock_logger_constructor.return_value = experiment_logger
   runner = dopamine_connector.BatchRunner(
       self._test_subdir, self._create_agent_fn,
       create_environment_fn=mock.Mock,
       logging_file_prefix=logging_file_prefix,
       log_every_n=log_every_n)
   num_iterations = 10
   for i in range(num_iterations):
     runner._log_experiment(i, statistics)
   self.assertEqual(num_iterations, experiment_logger._calls_to_set)
   self.assertEqual((num_iterations / log_every_n),
                    experiment_logger._calls_to_log)