def test_needs_reset(self): # MagicMock can mock eval_mode while Mock cannot agent = mock.MagicMock() agent.batch_act.side_effect = [[1, 1]] * 5 def make_env(idx): env = mock.Mock() if idx == 0: # First episode: 0 -> 1 -> 2 -> 3 (reset) # Second episode: 4 -> 5 -> 6 -> 7 (done) env.reset.side_effect = [("state", 0), ("state", 4)] env.step.side_effect = [ (("state", 1), 0, False, {}), (("state", 2), 0, False, {}), (("state", 3), 0, False, { "needs_reset": True }), (("state", 5), -0.5, False, {}), (("state", 6), 0, False, {}), (("state", 7), 1, True, {}), ] else: # First episode: 0 -> 1 (reset) # Second episode: 2 -> 3 (reset) # Third episode: 4 -> 5 -> 6 -> 7 (done) env.reset.side_effect = [("state", 0), ("state", 2), ("state", 4)] env.step.side_effect = [ (("state", 1), 2, False, { "needs_reset": True }), (("state", 3), 3, False, { "needs_reset": True }), (("state", 5), -0.6, False, {}), (("state", 6), 0, False, {}), (("state", 7), 1, True, {}), ] return env vec_env = pfrl.envs.SerialVectorEnv([make_env(i) for i in range(2)]) # First Env: [1 2 (3_a) 5 6 (7_a)] # Second Env: [(1) (3_b) 5 6 (7_b)] # Results: (1), (3a), (3b), (7b) scores = evaluator.batch_run_evaluation_episodes(vec_env, agent, n_steps=None, n_episodes=4) assert len(scores) == 4 np.testing.assert_allclose(scores[0], 0) np.testing.assert_allclose(scores[1], 2) np.testing.assert_allclose(scores[2], 3) np.testing.assert_allclose(scores[3], 0.4) # batch_reset should be all True assert all(agent.batch_observe.call_args[0][3])
def _test_abc_batch(self, steps=100000, require_success=True, gpu=-1, load_model=False, num_envs=4): env, _ = self.make_vec_env_and_successful_return(test=False, num_envs=num_envs) test_env, successful_return = self.make_vec_env_and_successful_return( test=True, num_envs=num_envs) agent = self.make_agent(env, gpu) max_episode_len = None if self.episodic else 2 if load_model: print("Load agent from", self.agent_dirname) agent.load(self.agent_dirname) # Train train_agent_batch_with_evaluation( agent=agent, env=env, steps=steps, outdir=self.tmpdir, eval_interval=200, eval_n_steps=None, eval_n_episodes=40, successful_score=successful_return, eval_env=test_env, log_interval=100, max_episode_len=max_episode_len, ) env.close() # Test n_test_runs = 10 eval_returns, _ = batch_run_evaluation_episodes( test_env, agent, n_steps=None, n_episodes=n_test_runs, max_episode_len=max_episode_len, ) test_env.close() if require_success: n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return) assert n_succeeded == n_test_runs # Save agent.save(self.agent_dirname)
def _test_batch_training(self, gpu, steps=5000, load_model=False, require_success=True): random_seed.set_random_seed(1) logging.basicConfig(level=logging.DEBUG) env, _ = self.make_vec_env_and_successful_return(test=False) test_env, successful_return = self.make_vec_env_and_successful_return( test=True) agent = self.make_agent(env, gpu) if load_model: print("Load agent from", self.agent_dirname) agent.load(self.agent_dirname) agent.replay_buffer.load(self.rbuf_filename) # Train train_agent_batch_with_evaluation( agent=agent, env=env, steps=steps, outdir=self.tmpdir, eval_interval=200, eval_n_steps=None, eval_n_episodes=5, successful_score=1, eval_env=test_env, ) env.close() # Test n_test_runs = 5 eval_returns, _ = batch_run_evaluation_episodes( test_env, agent, n_steps=None, n_episodes=n_test_runs, ) test_env.close() n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return) if require_success: assert n_succeeded == n_test_runs # Save agent.save(self.agent_dirname) agent.replay_buffer.save(self.rbuf_filename)
def _test_abc(self, steps=1000000, require_success=True, gpu=-1, load_model=False): env, _ = self.make_env_and_successful_return(test=False, n=self.num_processes) test_env, successful_return = self.make_env_and_successful_return( test=True, n=1) agent = self.make_agent(env, gpu) if load_model: print("Load agent from", self.agent_dirname) agent.load(self.agent_dirname) # Train pfrl.experiments.train_agent_batch_with_evaluation( agent=agent, env=env, steps=steps, outdir=self.tmpdir, log_interval=10, eval_interval=200, eval_n_steps=None, eval_n_episodes=50, successful_score=1, eval_env=test_env, ) env.close() # Test n_test_runs = 100 eval_returns = batch_run_evaluation_episodes( test_env, agent, n_steps=None, n_episodes=n_test_runs, ) test_env.close() n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return) if require_success: assert n_succeeded > 0.8 * n_test_runs # Save agent.save(self.agent_dirname)
def test_batch_run_evaluation_episodes_with_n_steps(n_episodes, n_steps): # MagicMock can mock eval_mode while Mock cannot agent = mock.MagicMock() agent.batch_act.side_effect = [[1, 1]] * 5 def make_env(idx): env = mock.Mock() if idx == 0: # First episode: 0 -> 1 -> 2 -> 3 (reset) # Second episode: 4 -> 5 -> 6 -> 7 (done) env.reset.side_effect = [("state", 0), ("state", 4)] env.step.side_effect = [ (("state", 1), 0, False, {}), (("state", 2), 0.1, False, {}), (("state", 3), 0.2, False, { "needs_reset": True }), (("state", 5), -0.5, False, {}), (("state", 6), 0, False, {}), (("state", 7), 1, True, {}), ] else: # First episode: 0 -> 1 (reset) # Second episode: 2 -> 3 (reset) # Third episode: 4 -> 5 -> 6 -> 7 (done) env.reset.side_effect = [("state", 0), ("state", 2), ("state", 4)] env.step.side_effect = [ (("state", 1), 2, False, { "needs_reset": True }), (("state", 3), 3, False, { "needs_reset": True }), (("state", 5), -0.6, False, {}), (("state", 6), 0, False, {}), (("state", 7), 1, True, {}), ] return env vec_env = pfrl.envs.SerialVectorEnv([make_env(i) for i in range(2)]) if n_episodes: with pytest.raises(AssertionError): scores = evaluator.batch_run_evaluation_episodes( vec_env, agent, n_steps=n_steps, n_episodes=n_episodes) else: # First Env: [1 2 (3_a) 5 6 (7_a)] # Second Env: [(1)(3_b) 5 6 (7_b)] scores = evaluator.batch_run_evaluation_episodes(vec_env, agent, n_steps=n_steps, n_episodes=n_episodes) if n_steps == 2: assert len(scores) == 1 np.testing.assert_allclose(scores[0], 0.1) assert agent.batch_observe.call_count == 2 else: assert len(scores) == 3 np.testing.assert_allclose(scores[0], 0.3) np.testing.assert_allclose(scores[1], 2.0) np.testing.assert_allclose(scores[2], 3.0) # batch_reset should be all True assert all(agent.batch_observe.call_args[0][3])