def test_timesteps(self): agent = mock.Mock() agent.batch_act.side_effect = [[1, 1]] * 5 def make_env(idx): env = mock.Mock() if idx == 0: # First episode: 0 -> 1 -> 2 -> 3 (reset) # Second episode: 4 -> 5 -> 6 -> 7 (done) env.reset.side_effect = [('state', 0), ('state', 4)] env.step.side_effect = [ (('state', 1), 0, False, {}), (('state', 2), 0.1, False, {}), (('state', 3), 0.2, False, {'needs_reset': True}), (('state', 5), -0.5, False, {}), (('state', 6), 0, False, {}), (('state', 7), 1, True, {}), ] else: # First episode: 0 -> 1 (reset) # Second episode: 2 -> 3 (reset) # Third episode: 4 -> 5 -> 6 -> 7 (done) env.reset.side_effect = [ ('state', 0), ('state', 2), ('state', 4)] env.step.side_effect = [ (('state', 1), 2, False, {'needs_reset': True}), (('state', 3), 3, False, {'needs_reset': True}), (('state', 5), -0.6, False, {}), (('state', 6), 0, False, {}), (('state', 7), 1, True, {}), ] return env vec_env = chainerrl.envs.SerialVectorEnv( [make_env(i) for i in range(2)]) if self.n_episodes: with self.assertRaises(AssertionError): scores = evaluator.batch_run_evaluation_episodes( vec_env, agent, n_steps=self.n_timesteps, n_episodes=self.n_episodes) else: # First Env: [1 2 (3_a) 5 6 (7_a)] # Second Env: [(1)(3_b) 5 6 (7_b)] scores = evaluator.batch_run_evaluation_episodes( vec_env, agent, n_steps=self.n_timesteps, n_episodes=self.n_episodes) if self.n_timesteps == 2: self.assertAlmostEqual(len(scores), 1) self.assertAlmostEqual(scores[0], 0.1) self.assertEqual(agent.batch_observe.call_count, 2) else: self.assertAlmostEqual(len(scores), 3) self.assertAlmostEqual(scores[0], 0.3) self.assertAlmostEqual(scores[1], 2.0) self.assertAlmostEqual(scores[2], 3.0) # batch_reset should be all True self.assertTrue(all(agent.batch_observe.call_args[0][3]))
def _test_abc_batch(self, steps=100000, require_success=True, gpu=-1, load_model=False, num_envs=4): if self.recurrent and gpu >= 0: self.skipTest( 'NStepLSTM does not support double backprop with GPU.') if self.recurrent and chainer.__version__ == '7.0.0b3': self.skipTest( 'chainer==7.0.0b3 has a bug in double backrop of LSTM.' ' See https://github.com/chainer/chainer/pull/8037') env, _ = self.make_vec_env_and_successful_return(test=False, num_envs=num_envs) test_env, successful_return = self.make_vec_env_and_successful_return( test=True, num_envs=num_envs) agent = self.make_agent(env, gpu) max_episode_len = None if self.episodic else 2 if load_model: print('Load agent from', self.agent_dirname) agent.load(self.agent_dirname) # Train train_agent_batch_with_evaluation( agent=agent, env=env, steps=steps, outdir=self.tmpdir, eval_interval=200, eval_n_steps=None, eval_n_episodes=40, successful_score=successful_return, eval_env=test_env, log_interval=100, max_episode_len=max_episode_len, ) env.close() # Test n_test_runs = 10 eval_returns = batch_run_evaluation_episodes( test_env, agent, n_steps=None, n_episodes=n_test_runs, max_episode_len=max_episode_len, ) test_env.close() if require_success: n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return) self.assertEqual(n_succeeded, n_test_runs) # Save agent.save(self.agent_dirname)
def _test_abc_batch(self, steps=100000, require_success=True, gpu=-1, load_model=False, num_envs=4): env, _ = self.make_vec_env_and_successful_return(test=False, num_envs=num_envs) test_env, successful_return = self.make_vec_env_and_successful_return( test=True, num_envs=num_envs) agent = self.make_agent(env, gpu) max_episode_len = None if self.episodic else 2 if load_model: print('Load agent from', self.agent_dirname) agent.load(self.agent_dirname) # Train train_agent_batch_with_evaluation( agent=agent, env=env, steps=steps, outdir=self.tmpdir, eval_interval=200, eval_n_steps=None, eval_n_episodes=40, successful_score=successful_return, eval_env=test_env, log_interval=100, max_episode_len=max_episode_len, ) env.close() # Test n_test_runs = 10 eval_returns = batch_run_evaluation_episodes( test_env, agent, n_steps=None, n_episodes=n_test_runs, max_episode_len=max_episode_len, ) test_env.close() if require_success: n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return) self.assertEqual(n_succeeded, n_test_runs) # Save agent.save(self.agent_dirname)
def _test_batch_training(self, gpu, steps=5000, load_model=False, require_success=True): random_seed.set_random_seed(1) logging.basicConfig(level=logging.DEBUG) env, _ = self.make_vec_env_and_successful_return(test=False) test_env, successful_return = self.make_vec_env_and_successful_return( test=True) agent = self.make_agent(env, gpu) if load_model: print('Load agent from', self.agent_dirname) agent.load(self.agent_dirname) agent.replay_buffer.load(self.rbuf_filename) # Train train_agent_batch_with_evaluation( agent=agent, env=env, steps=steps, outdir=self.tmpdir, eval_interval=200, eval_n_steps=None, eval_n_episodes=5, successful_score=1, eval_env=test_env, ) env.close() # Test n_test_runs = 5 eval_returns = batch_run_evaluation_episodes( test_env, agent, n_steps=None, n_episodes=n_test_runs, ) test_env.close() n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return) if require_success: self.assertEqual(n_succeeded, n_test_runs) # Save agent.save(self.agent_dirname) agent.replay_buffer.save(self.rbuf_filename)
def _test_abc(self, steps=1000000, require_success=True, gpu=-1, load_model=False): env, _ = self.make_env_and_successful_return(test=False, n=self.num_processes) test_env, successful_return = self.make_env_and_successful_return( test=True, n=1) agent = self.make_agent(env, gpu) if load_model: print('Load agent from', self.agent_dirname) agent.load(self.agent_dirname) # Train chainerrl.experiments.train_agent_batch_with_evaluation( agent=agent, env=env, steps=steps, outdir=self.tmpdir, log_interval=10, eval_interval=200, eval_n_steps=None, eval_n_episodes=50, successful_score=1, eval_env=test_env, ) env.close() # Test n_test_runs = 100 eval_returns = batch_run_evaluation_episodes( test_env, agent, n_steps=None, n_episodes=n_test_runs, ) test_env.close() n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return) if require_success: self.assertGreater(n_succeeded, 0.8 * n_test_runs) # Save agent.save(self.agent_dirname)
def test_needs_reset(self): agent = mock.Mock() agent.batch_act.side_effect = [[1, 1]] * 5 def make_env(idx): env = mock.Mock() if idx == 0: # First episode: 0 -> 1 -> 2 -> 3 (reset) # Second episode: 4 -> 5 -> 6 -> 7 (done) env.reset.side_effect = [('state', 0), ('state', 4)] env.step.side_effect = [ (('state', 1), 0, False, {}), (('state', 2), 0, False, {}), (('state', 3), 0, False, {'needs_reset': True}), (('state', 5), -0.5, False, {}), (('state', 6), 0, False, {}), (('state', 7), 1, True, {}), ] else: # First episode: 0 -> 1 (reset) # Second episode: 2 -> 3 (reset) # Third episode: 4 -> 5 -> 6 -> 7 (done) env.reset.side_effect = [ ('state', 0), ('state', 2), ('state', 4)] env.step.side_effect = [ (('state', 1), 2, False, {'needs_reset': True}), (('state', 3), 3, False, {'needs_reset': True}), (('state', 5), -0.6, False, {}), (('state', 6), 0, False, {}), (('state', 7), 1, True, {}), ] return env vec_env = chainerrl.envs.SerialVectorEnv( [make_env(i) for i in range(2)]) # First Env: [1 2 (3_a) 5 6 (7_a)] # Second Env: [(1) (3_b) 5 6 (7_b)] # Results: (1), (3a), (3b), (7b) scores = evaluator.batch_run_evaluation_episodes( vec_env, agent, n_steps=None, n_episodes=4) self.assertAlmostEqual(len(scores), 4) self.assertAlmostEqual(scores[0], 0) self.assertAlmostEqual(scores[1], 2) self.assertAlmostEqual(scores[2], 3) self.assertAlmostEqual(scores[3], 0.4)