def testRunOnePhase(self): max_steps = 10 environment_steps = 2 environment = MockEnvironment(max_steps=environment_steps) statistics = [] runner = run_experiment.Runner( self._test_subdir, self._create_agent_fn, game_name='Test', create_environment_fn=lambda x, y: environment) step_number, sum_returns, num_episodes = runner._run_one_phase( max_steps, statistics, 'test') calls_to_run_episode = int(max_steps / environment_steps) self.assertEqual(self._agent.step.call_count, calls_to_run_episode) self.assertEqual(self._agent.end_episode.call_count, calls_to_run_episode) self.assertEqual(max_steps, step_number) self.assertEqual(-1 * calls_to_run_episode, sum_returns) self.assertEqual(calls_to_run_episode, num_episodes) expected_statistics = [] for _ in range(calls_to_run_episode): expected_statistics.append({ 'test_episode_lengths': 2, 'test_episode_returns': -1 }) self.assertEqual(len(expected_statistics), len(statistics)) for i in range(len(statistics)): self.assertDictEqual(expected_statistics[i], statistics[i])
def main(): # Create the runner class with this agent. We use very small numbers of steps # to terminate quickly, as this is mostly meant for demonstrating how one can # use the framework. We also explicitly terminate after 110 iterations (instead # of the standard 200) to demonstrate the plotting of partial runs. random_dqn_runner = run_experiment.Runner(LOG_PATH, create_random_dqn_agent, game_name=GAME, num_iterations=200, training_steps=10, evaluation_steps=10, max_steps_per_episode=100) # @title Train MyRandomDQNAgent.# @titl print('Will train agent, please be patient, may be a while...') random_dqn_runner.run_experiment() print('Done training!') # @title Load the training logs.# @titl random_dqn_data = colab_utils.read_experiment(LOG_PATH, verbose=True) random_dqn_data['agent'] = 'MyRandomDQN' random_dqn_data['run_number'] = 1 print(experimental_data.__dict__) experimental_data[GAME] = experimental_data[GAME].merge(random_dqn_data, how='outer') # @title Plot training results. fig, ax = plt.subplots(figsize=(16,8)) sns.tsplot(data=experimental_data[GAME], time='iteration', unit='run_number', condition='agent', value='train_episode_returns', ax=ax) plt.title(GAME) plt.savefig('game.png')
def testCheckpointExperiment(self, mock_logger_constructor, mock_checkpointer_constructor): checkpoint_dir = os.path.join(self._test_subdir, 'checkpoints') test_dict = {'test': 1} iteration = 1729 def bundle_and_checkpoint(x, y): self.assertEqual(checkpoint_dir, x) self.assertEqual(iteration, y) return test_dict self._agent.bundle_and_checkpoint.side_effect = bundle_and_checkpoint experiment_checkpointer = mock.Mock() mock_checkpointer_constructor.return_value = experiment_checkpointer logs_data = {'one': 1, 'two': 2} mock_logger = MockLogger(run_asserts=False, data=logs_data) mock_logger_constructor.return_value = mock_logger runner = run_experiment.Runner( self._test_subdir, self._create_agent_fn, game_name='Test', create_environment_fn=lambda x, y: mock.Mock()) runner._checkpoint_experiment(iteration) self.assertEqual(1, experiment_checkpointer.save_checkpoint.call_count) mock_args, _ = experiment_checkpointer.save_checkpoint.call_args self.assertEqual(iteration, mock_args[0]) test_dict['logs'] = logs_data test_dict['current_iteration'] = iteration self.assertDictEqual(test_dict, mock_args[1])
def testInitializeCheckpointingWithNoCheckpointFile(self, mock_get_latest): mock_get_latest.return_value = -1 base_dir = '/does/not/exist' with self.assertRaisesRegexp(tf.errors.PermissionDeniedError, '.*/does.*'): run_experiment.Runner(base_dir, self._create_agent_fn, game_name='Pong')
def testRunOneEpisodeWithLowMaxSteps(self): max_steps_per_episode = 2 environment = MockEnvironment() runner = run_experiment.Runner( self._test_subdir, self._create_agent_fn, game_name='Test', create_environment_fn=lambda x, y: environment, max_steps_per_episode=max_steps_per_episode) step_number, total_reward = runner._run_one_episode() self.assertEqual(self._agent.step.call_count, max_steps_per_episode - 1) self.assertEqual(self._agent.end_episode.call_count, 1) self.assertEqual(max_steps_per_episode, step_number) self.assertEqual(-1, total_reward)
def testRunOneEpisode(self): max_steps_per_episode = 11 environment = MockEnvironment() runner = run_experiment.Runner( self._test_subdir, self._create_agent_fn, game_name='Test', create_environment_fn=lambda x, y: environment, max_steps_per_episode=max_steps_per_episode) step_number, total_reward = runner._run_one_episode() self.assertEqual(self._agent.step.call_count, environment.max_steps - 1) self.assertEqual(self._agent.end_episode.call_count, 1) self.assertEqual(environment.max_steps, step_number) # Expected reward will be \sum_{i=0}^{9} (-1)**i * i = -5 self.assertEqual(-5, total_reward)
def testDefaultGinRainbow(self): """Test RainbowAgent default configuration using default gin.""" tf.logging.info('####### Training the RAINBOW agent #####') tf.logging.info('####### RAINBOW base_dir: {}'.format(FLAGS.base_dir)) FLAGS.agent_name = 'rainbow' FLAGS.gin_files = ['dopamine/agents/rainbow/configs/rainbow.gin'] FLAGS.gin_bindings = [ 'WrappedReplayBuffer.replay_capacity = 100' # To prevent OOM. ] run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings) runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent) self.assertIsInstance(runner._agent.optimizer, tf.train.AdamOptimizer) self.assertNear(0.0000625, runner._agent.optimizer._lr, 0.0001) shutil.rmtree(FLAGS.base_dir)
def testRunExperimentWithInconsistentRange(self, mock_logger_constructor, mock_checkpointer_constructor): experiment_logger = MockLogger() mock_logger_constructor.return_value = experiment_logger experiment_checkpointer = mock.Mock() mock_checkpointer_constructor.return_value = experiment_checkpointer runner = run_experiment.Runner( self._test_subdir, self._create_agent_fn, game_name='Test', create_environment_fn=lambda x, y: mock.Mock(), num_iterations=0) runner.run_experiment() self.assertEqual(0, experiment_checkpointer.save_checkpoint.call_count) self.assertEqual(0, experiment_logger._calls_to_set) self.assertEqual(0, experiment_logger._calls_to_log)
def testDefaultGinDqn(self): """Test DQNAgent configuration using the default gin config.""" tf.logging.info('####### Training the DQN agent #####') tf.logging.info('####### DQN base_dir: {}'.format(FLAGS.base_dir)) FLAGS.agent_name = 'dqn' FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin'] FLAGS.gin_bindings = [ 'WrappedReplayBuffer.replay_capacity = 100' # To prevent OOM. ] run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings) runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent) self.assertIsInstance(runner._agent.optimizer, tf.train.RMSPropOptimizer) self.assertNear(0.00025, runner._agent.optimizer._learning_rate, 0.0001) shutil.rmtree(FLAGS.base_dir)
def testOverrideGinDqn(self): """Test DQNAgent configuration overridden with AdamOptimizer.""" tf.logging.info('####### Training the DQN agent #####') tf.logging.info('####### DQN base_dir: {}'.format(FLAGS.base_dir)) FLAGS.agent_name = 'dqn' FLAGS.gin_files = ['dopamine/agents/dqn/configs/dqn.gin'] FLAGS.gin_bindings = [ 'DQNAgent.optimizer = @tf.train.AdamOptimizer()', 'tf.train.AdamOptimizer.learning_rate = 100', 'WrappedReplayBuffer.replay_capacity = 100' # To prevent OOM. ] run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings) runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent) self.assertIsInstance(runner._agent.optimizer, tf.train.AdamOptimizer) self.assertEqual(100, runner._agent.optimizer._lr) shutil.rmtree(FLAGS.base_dir)
def testLogExperiment(self, mock_logger_constructor): log_every_n = 2 logging_file_prefix = 'prefix' statistics = 'statistics' experiment_logger = MockLogger(test_cls=self) mock_logger_constructor.return_value = experiment_logger runner = run_experiment.Runner( self._test_subdir, self._create_agent_fn, game_name='Test', create_environment_fn=lambda x, y: mock.Mock(), logging_file_prefix=logging_file_prefix, log_every_n=log_every_n) num_iterations = 10 for i in range(num_iterations): runner._log_experiment(i, statistics) self.assertEqual(num_iterations, experiment_logger._calls_to_set) self.assertEqual((num_iterations / log_every_n), experiment_logger._calls_to_log)
def testRunExperiment(self, mock_logger_constructor, mock_checkpointer_constructor, mock_get_latest): log_every_n = 1 environment = MockEnvironment() experiment_logger = MockLogger(run_asserts=False) mock_logger_constructor.return_value = experiment_logger experiment_checkpointer = mock.Mock() start_iteration = 1729 mock_get_latest.return_value = start_iteration def load_checkpoint(_): return { 'logs': 'log_data', 'current_iteration': start_iteration - 1 } experiment_checkpointer.load_checkpoint.side_effect = load_checkpoint mock_checkpointer_constructor.return_value = experiment_checkpointer def bundle_and_checkpoint(x, y): del x, y # Unused. return {'test': 1} self._agent.bundle_and_checkpoint.side_effect = bundle_and_checkpoint num_iterations = 10 self._agent.unbundle.return_value = True end_iteration = start_iteration + num_iterations runner = run_experiment.Runner( self._test_subdir, self._create_agent_fn, game_name='Test', create_environment_fn=lambda x, y: environment, log_every_n=log_every_n, num_iterations=end_iteration, training_steps=1, evaluation_steps=1) self.assertEqual(start_iteration, runner._start_iteration) runner.run_experiment() self.assertEqual(num_iterations, experiment_checkpointer.save_checkpoint.call_count) self.assertEqual(num_iterations, experiment_logger._calls_to_set) self.assertEqual(num_iterations, experiment_logger._calls_to_log) glob_string = '{}/events.out.tfevents.*'.format(self._test_subdir) self.assertGreater(len(tf.gfile.Glob(glob_string)), 0)
def testOverrideGinRainbow(self): """Test RainbowAgent configuration overridden with RMSPropOptimizer.""" tf.logging.info('####### Training the RAINBOW agent #####') tf.logging.info('####### RAINBOW base_dir: {}'.format(FLAGS.base_dir)) FLAGS.agent_name = 'rainbow' FLAGS.gin_files = [ 'dopamine/agents/rainbow/configs/rainbow.gin', ] FLAGS.gin_bindings = [ 'RainbowAgent.optimizer = @tf.train.RMSPropOptimizer()', 'tf.train.RMSPropOptimizer.learning_rate = 100', 'WrappedReplayBuffer.replay_capacity = 100' # To prevent OOM. ] run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings) runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent) self.assertIsInstance(runner._agent.optimizer, tf.train.RMSPropOptimizer) self.assertEqual(100, runner._agent.optimizer._learning_rate) shutil.rmtree(FLAGS.base_dir)
def testOverrideGinImplicitQuantile(self): """Test ImplicitQuantile configuration overriding using IQN gin.""" tf.logging.info('###### Training the Implicit Quantile agent #####') FLAGS.agent_name = 'implicit_quantile' FLAGS.base_dir = os.path.join( '/tmp/dopamine_tests', datetime.datetime.utcnow().strftime('run_%Y_%m_%d_%H_%M_%S')) tf.logging.info('###### IQN base dir: {}'.format(FLAGS.base_dir)) FLAGS.gin_files = [ 'dopamine/agents/' 'implicit_quantile/configs/implicit_quantile.gin' ] FLAGS.gin_bindings = [ 'Runner.num_iterations=0', 'WrappedPrioritizedReplayBuffer.replay_capacity = 1000', ] run_experiment.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings) runner = run_experiment.Runner(FLAGS.base_dir, train.create_agent) self.assertEqual(1000, runner._agent._replay.memory._replay_capacity) shutil.rmtree(FLAGS.base_dir)
def create_runner(self, env_fn, hparams, target_iterations, training_steps_per_iteration): # pylint: disable=unbalanced-tuple-unpacking agent_params, optimizer_params, \ runner_params, replay_buffer_params = _parse_hparams(hparams) # pylint: enable=unbalanced-tuple-unpacking optimizer = _get_optimizer(optimizer_params) agent_params["optimizer"] = optimizer agent_params.update(replay_buffer_params) create_agent_fn = get_create_agent(agent_params) runner = run_experiment.Runner( base_dir=self.agent_model_dir, create_agent_fn=create_agent_fn, create_environment_fn=get_create_env_fun( env_fn, time_limit=hparams.time_limit), evaluation_steps=0, num_iterations=target_iterations, training_steps=training_steps_per_iteration, **runner_params) return runner
def testRunOneIteration(self): environment_steps = 2 environment = MockEnvironment(max_steps=environment_steps) training_steps = 20 evaluation_steps = 10 runner = run_experiment.Runner( self._test_subdir, self._create_agent_fn, game_name='Test', create_environment_fn=lambda x, y: environment, training_steps=training_steps, evaluation_steps=evaluation_steps) dictionary = runner._run_one_iteration(1) train_calls = int(training_steps / environment_steps) eval_calls = int(evaluation_steps / environment_steps) expected_dictionary = { 'train_episode_lengths': [2 for _ in range(train_calls)], 'train_episode_returns': [-1 for _ in range(train_calls)], 'train_average_return': [-1], 'eval_episode_lengths': [2 for _ in range(eval_calls)], 'eval_episode_returns': [-1 for _ in range(eval_calls)], 'eval_average_return': [-1] } self.assertDictEqual(expected_dictionary, dictionary)
def testInitializeCheckpointingWhenCheckpointUnbundleSucceeds( self, mock_get_latest): latest_checkpoint = 7 mock_get_latest.return_value = latest_checkpoint logs_data = {'a': 1, 'b': 2} current_iteration = 1729 checkpoint_data = {'current_iteration': current_iteration, 'logs': logs_data} checkpoint_dir = os.path.join(self._test_subdir, 'checkpoints') checkpoint = checkpointer.Checkpointer(checkpoint_dir, 'ckpt') checkpoint.save_checkpoint(latest_checkpoint, checkpoint_data) mock_agent = mock.Mock() mock_agent.unbundle.return_value = True runner = run_experiment.Runner(self._test_subdir, lambda x, y, summary_writer: mock_agent, game_name='Pong') expected_iteration = current_iteration + 1 self.assertEqual(expected_iteration, runner._start_iteration) self.assertDictEqual(logs_data, runner._logger.data) mock_agent.unbundle.assert_called_once_with( checkpoint_dir, latest_checkpoint, checkpoint_data)
def create_runner(base_dir, create_agent_fn): """Creates an experiment Runner. Args: base_dir: str, base directory for hosting all subdirectories. create_agent_fn: A function that takes as args a Tensorflow session and an Atari 2600 Gym environment, and returns an agent. Returns: runner: A `run_experiment.Runner` like object. Raises: ValueError: When an unknown schedule is encountered. """ assert base_dir is not None # Continuously runs training and evaluation until max num_iterations is hit. if FLAGS.schedule == 'continuous_train_and_eval': return run_experiment.Runner(base_dir, create_agent_fn, create_pacman_environment) # Continuously runs training until max num_iterations is hit. elif FLAGS.schedule == 'continuous_train': return run_experiment.TrainRunner(base_dir, create_agent_fn, create_pacman_environment) else: raise ValueError('Unknown schedule: {}'.format(FLAGS.schedule))
def testInitializeCheckpointingWhenCheckpointUnbundleFails( self, mock_logger_constructor, mock_checkpointer_constructor, mock_get_latest): mock_checkpointer = _create_mock_checkpointer() mock_checkpointer_constructor.return_value = mock_checkpointer latest_checkpoint = 7 mock_get_latest.return_value = latest_checkpoint agent = mock.Mock() agent.unbundle.return_value = False mock_logger = mock.Mock() mock_logger_constructor.return_value = mock_logger runner = run_experiment.Runner(self._test_subdir, lambda x, y, summary_writer: agent, create_environment_fn=lambda x, y: x, game_name='Test') self.assertEqual(0, runner._start_iteration) self.assertEqual(1, mock_checkpointer.load_checkpoint.call_count) self.assertEqual(1, agent.unbundle.call_count) mock_args, _ = agent.unbundle.call_args self.assertEqual('{}/checkpoints'.format(self._test_subdir), mock_args[0]) self.assertEqual(latest_checkpoint, mock_args[1]) expected_dictionary = {'current_iteration': 1729, 'logs': 'logs'} self.assertDictEqual(expected_dictionary, mock_args[2])
def testFailsWithoutGameName(self): with self.assertRaises(AssertionError): run_experiment.Runner(self._test_subdir, self._create_agent_fn)
def step(self, reward, observation): return self._choose_action() def create_basic_agent(sess, environment, summary_writer = None): """The Runner class will expect a function of this type to create an agent.""" return BasicAgent(sess, num_actions=environment.action_space.n, switch_prob=0.2) # Create the runner class with this agent. We use very small numbers of steps # to terminate quickly, as this is mostly meant for demonstrating how one can # use the framework. We also explicitly terminate after 110 iterations (instead # of the standard 200) to demonstrate the plotting of partial runs. basic_runner = run_experiment.Runner(LOG_PATH, create_basic_agent, game_name=GAME, num_iterations=200, training_steps=10, evaluation_steps=10, max_steps_per_episode=100) # @title Train Basic Agent. print('Will train basic agent, please be patient, may be a while...') basic_runner.run_experiment() print('Done training!') # @title Load baseline data content = os.path.join(LOG_PATH, 'content') os.system('mkdir -p ' + content)
net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu6) q_values = slim.fully_connected(net, self.num_actions, activation_fn=None) return self._get_network_type()(q_values) def create_random_dqn_agent(sess, environment, summary_writer=None): return MyDQNAgent(sess, num_actions=environment.action_space.n) # Create the runner class with this agent. We use very small numbers of steps # to terminate quickly, as this is mostly meant for demonstrating how one can # use the framework. We also explicitly terminate after 110 iterations (instead # of the standard 200) to demonstrate the plotting of partial runs. print("creating runner") random_dqn_runner = run_experiment.Runner(BASE_PATH, create_random_dqn_agent, game_name=GAME ) # @title Train MyRandomDQNAgent. # print('Will train agent, please be patient, may be a while...') # random_dqn_runner.run_experiment() # print('Done training!') def display_frames_as_gif(frames): """ Displays a list of frames as a gif, with controls """ plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 72)