def test_cart_pole_example(self): # Arrange config = CartPoleConfig(agent_type=self._agent_type, plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=10) # Assert self.assertIsInstance(agent, self._sut)
def run_exp(n_episodes: int = 1000, max_episode_steps: int = 500): exp = AgentExperiment(agent_class=LinearQAgent, agent_config=CartPoleConfig('linear_q'), n_reps=32, n_jobs=32, training_options={"n_episodes": n_episodes, "max_episode_steps": max_episode_steps}) exp.run() exp.save(fn=f"{LinearQAgent.__name__}_experiment.pkl")
def test_double_dueling_dqn_cart_pole_example(self): # Arrange config = CartPoleConfig(agent_type='double_dueling_dqn', plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=20) # Assert self.assertTrue(agent.model_architecture.dueling) self.assertIsInstance(agent, self._sut)
def run_exp(n_episodes: int = 1000, max_episode_steps: int = 500): exp = AgentExperiment(agent_class=ReinforceAgent, agent_config=CartPoleConfig(agent_type='reinforce'), n_reps=5, n_jobs=6, training_options={"n_episodes": n_episodes, "max_episode_steps": max_episode_steps, "update_every": 1}) exp.run() exp.save(fn=f"{ReinforceAgent.__name__}_experiment.pkl")
def test_saving_and_reloading_creates_identical_object(self): # Arrange agent = self._sut(**CartPoleConfig(agent_type=self._agent_type, plot_during_training=False, folder=self._tmp_dir.name).build()) agent.train(verbose=True, render=False, n_episodes=2) # Act agent.save() agent_2 = self._sut.load(f"{agent.name}_{agent.env_spec}") agent_2.check_ready() # Assert self.assertEqual(agent, agent_2)
def run_exp(agent_type: str, n_episodes: int = 1000, max_episode_steps: int = 500): exp = AgentExperiment(name=f"{agent_type} CartPole", agent_class=ActorCriticAgent, agent_config=CartPoleConfig(agent_type=agent_type), n_reps=6, n_jobs=6, training_options={ "n_episodes": n_episodes, "max_episode_steps": max_episode_steps }) exp.run() exp.save(fn=f"{ActorCriticAgent.__name__}_{agent_type}experiment.pkl")
from rlk.agents.actor_critic.actor_critic import ActorCriticAgent from rlk.environments.cart_pole.cart_pole_config import CartPoleConfig if __name__ == "__main__": agent = ActorCriticAgent.example(config=CartPoleConfig(agent_type='actor_critic'), max_episode_steps=500, n_episodes=2000, render=False, update_every=6, checkpoint_every=0) agent.save()
from rlk.agents.q_learning.deep_q_agent import DeepQAgent from rlk.environments.cart_pole.cart_pole_config import CartPoleConfig if __name__ == "__main__": agent = DeepQAgent.example(config=CartPoleConfig(agent_type='dqn'), max_episode_steps=500, n_episodes=2000, render=False, update_every=6, checkpoint_every=0) agent.save()
class TestReinforceAgent(TestRandomAgent): _sut = ReinforceAgent _config = CartPoleConfig(agent_type='reinforce', plot_during_training=False) def _agent_specific_set_up(self): VirtualGPU(256) # For this agent, this call is made once at the end of the episode not on every step self._expected_model_update_during_training_episode: int = 0 # This is inside the play episode function, which is still 0 here as it's called in .train self._expected_model_update_after_playing_episode: int = 0 @staticmethod def _checkpoint_model(agent: ReinforceAgent) -> List[np.ndarray]: """Get coefs from each model""" return copy.deepcopy(agent._model.get_weights()) def _assert_model_unchanged(self, agent: ReinforceAgent, checkpoint: List[np.ndarray]): model_weights = agent._model.get_weights() for w in range(len(model_weights)): assert_array_almost_equal(model_weights[w], checkpoint[w]) def _assert_relevant_play_episode_change( self, agent: ReinforceAgent, checkpoint: List[np.ndarray]) -> None: self._assert_buffer_changed(agent, checkpoint) def _assert_relevant_after_play_episode_change( self, agent: ReinforceAgent, checkpoint: List[np.ndarray]) -> None: self._assert_model_changed(agent, checkpoint) def _assert_buffer_changed(self, agent: ReinforceAgent, checkpoint: List[np.ndarray]): # TODO pass def _assert_model_changed(self, agent: ReinforceAgent, checkpoint: List[np.ndarray]) -> None: model_weights = agent._model.get_weights() for w in range(len(checkpoint)): self.assertFalse( np.all(model_weights[w].round(8) == checkpoint[w].round(8))) def _assert_agent_unready(self, agent: ReinforceAgent) -> None: self.assertIsNone(agent._model) self.assertFalse(agent.ready) def _assert_agent_ready(self, agent: ReinforceAgent) -> None: self.assertIsNotNone(agent.env_builder._env) self.assertIsNotNone(agent._model) self.assertTrue(agent.ready) def test_train_calls_after_episode_updates_model_as_expected(self) -> None: # Arrange agent = self._ready_agent() checkpoint = self._checkpoint_model(agent) # Act agent.train(n_episodes=self._n_episodes, max_episode_steps=self._n_step, render=False, checkpoint_every=0) # Assert self._assert_relevant_after_play_episode_change(agent, checkpoint)
class TestDeepQAgent(TestRandomAgent): _sut = DeepQAgent _config = CartPoleConfig(agent_type='dqn', plot_during_training=False) def _agent_specific_set_up(self): VirtualGPU(256) # (in .play_episode) self._expected_model_update_after_training_episode: int = 0 self._expected_model_update_after_playing_episode: int = 0 def _ready_agent(self) -> DeepQAgent: agent = self._sut(**self._config.build()) # Sync weights so checks are simpler agent.update_target_model() return agent @staticmethod def _checkpoint_model(agent: DeepQAgent) -> List[np.ndarray]: """Get coefs from each model""" return copy.deepcopy(agent._action_model.get_weights()) def _assert_model_unchanged(self, agent: DeepQAgent, checkpoint: List[np.ndarray]) -> None: action_weights = agent._action_model.get_weights() value_weights = agent._target_model.get_weights() for w in range(len(action_weights)): assert_array_almost_equal(action_weights[w], checkpoint[w]) assert_array_almost_equal(value_weights[w], checkpoint[w]) def _assert_relevant_play_episode_change( self, agent: DeepQAgent, checkpoint: List[np.ndarray]) -> None: self._assert_buffer_changed(agent, checkpoint) def _assert_relevant_after_play_episode_change( self, agent: DeepQAgent, checkpoint: List[np.ndarray]) -> None: """TODO: self._assert_value_model_changed(agent, checkpoint) (?)""" pass def _assert_buffer_changed(self, agent: DeepQAgent, checkpoint: List[np.ndarray]) -> None: # TODO pass def _assert_action_model_changed(self, agent: DeepQAgent, checkpoint: List[np.ndarray]) -> None: action_weights = agent._action_model.get_weights() for w in range(len(checkpoint)): self.assertFalse( np.all(action_weights[w].round(6) == checkpoint[w].round(6))) def _assert_value_model_changed(self, agent: DeepQAgent, checkpoint: List[np.ndarray]) -> None: value_weights = agent._target_model.get_weights() for w in range(len(checkpoint)): self.assertFalse( np.all(value_weights[w].round(6) == checkpoint[w].round(6))) def _assert_model_changed(self, agent: DeepQAgent, checkpoint: List[np.ndarray]) -> None: self._assert_action_model_changed(agent, checkpoint) self._assert_value_model_changed(agent, checkpoint) def _assert_agent_unready(self, agent: DeepQAgent) -> None: self.assertIsNone(agent._action_model) self.assertIsNone(agent._target_model) self.assertIsNone(agent.replay_buffer) self.assertFalse(agent.ready) def _assert_agent_ready(self, agent: DeepQAgent) -> None: self.assertIsNotNone(agent._action_model) self.assertIsNotNone(agent._target_model) self.assertIsNotNone(agent.replay_buffer) self.assertTrue(agent.ready) def test_play_episode_steps_does_not_call_update_models_when_not_training( self) -> None: # Arrange agent = self._ready_agent() # Act with patch.object(agent, 'update_model') as mocked_update_model, \ patch.object(agent, "update_target_model") as mocked_update_value_model: _ = agent.play_episode(max_episode_steps=self._n_step, training=False, render=False) # Assert self.assertEqual(self._expected_model_update_during_playing_episode, mocked_update_model.call_count) self.assertEqual(self._expected_model_update_after_playing_episode, mocked_update_value_model.call_count) self.assertEqual(self._n_step, agent.env_builder._env._max_episode_steps) def test_play_episode_steps_calls_update_models_when_training( self) -> None: # Arrange agent = self._ready_agent() # Act with patch.object(agent, 'update_model') as mocked_update_model, \ patch.object(agent, "update_target_model") as mocked_update_value_model: _ = agent.play_episode(max_episode_steps=self._n_step, training=True, render=False) # Assert self.assertEqual(self._expected_model_update_during_training_episode, mocked_update_model.call_count) self.assertEqual(self._expected_model_update_after_training_episode, mocked_update_value_model.call_count) self.assertEqual(self._n_step, agent.env_builder._env._max_episode_steps)
class TestRandomAgent(unittest.TestCase): """ The random agent is basically a non-abstract version of AgentBase. It's used here to define the general test interface, as if it's a mocked AgentBase. Other agents can modify the private methods here to define how to arrange/assert for their specific cases. Acting is standard across agents, except for where path.object is required for multiple methods (see DQN as an example). """ _sut = RandomAgent _config = CartPoleConfig(agent_type='random', plot_during_training=False) def _standard_set_up(self) -> None: self._n_step = 4 self._n_episodes = 3 # These are the calls during .play_episode (not .train) self._expected_model_update_during_training_episode: int = self._n_step self._expected_model_update_during_playing_episode: int = 0 self._expected_play_episode_calls = self._n_episodes def _agent_specific_set_up(self) -> None: # This agent doesn't bother calling update_model as it doesn't have one. self._expected_model_update_during_training_episode: int = 0 self._expected_model_update_after_training_episode: int = 0 def setUp(self) -> None: self._standard_set_up() self._agent_specific_set_up() def _ready_agent(self) -> RandomAgent: return self._sut(**self._config.build()) @staticmethod def _checkpoint_model(agent: AgentBase) -> None: """No model to checkpoint.""" return None def _assert_model_unchanged(self, agent: AgentBase, checkpoint: None) -> None: """No model to compare, nothing to assert.""" pass def _assert_buffer_changed(self, agent: AgentBase, checkpoint: None) -> None: """No buffer to change in RandomAgent.""" pass def _assert_model_changed(self, agent: AgentBase, checkpoint: None) -> None: """No model to compare, nothing to assert.""" pass def _assert_relevant_play_episode_change(self, agent: AgentBase, checkpoint: None) -> None: """This can differ between MC and TD agents. In MC case model might not be updated but buffer is.""" self._assert_buffer_changed(agent, checkpoint) self._assert_model_changed(agent, checkpoint) def _assert_relevant_after_play_episode_change(self, agent: AgentBase, checkpoint: None) -> None: """This can differ between MC and TD agents. In MC case buffer might not be updated but model is.""" self._assert_buffer_changed(agent, checkpoint) self._assert_model_changed(agent, checkpoint) def _assert_agent_unready(self, agent: AgentBase) -> None: """Nothing to unready in RandomAgent.""" pass def _assert_agent_ready(self, agent: RandomAgent) -> None: self.assertIsNotNone(agent.env_builder._env) self.assertIsInstance(agent.model, RandomModel) def test_env_set_during_init(self) -> None: # Act agent = self._ready_agent() # Assert self.assertIsNotNone(agent.env_builder._env) def test_model_set_during_init(self) -> None: # Act agent = self._ready_agent() # Assert self._assert_agent_ready(agent) def test_history_set_during_init(self) -> None: # Act agent = self._ready_agent() # Assert self.assertIsNotNone(agent.training_history) def test_unready_detaches_env_and_models(self) -> None: # Arrange agent = self._ready_agent() # Act agent.unready() # Assert self._assert_agent_unready(agent) def test_ready_restores_matching_object(self) -> None: # Arrange agent = self._ready_agent() checkpoint = self._checkpoint_model(agent) agent.unready() # Act agent.check_ready() # Assert self._assert_agent_ready(agent) self._assert_model_unchanged(agent, checkpoint) def test_play_episode_steps_returns_reward_when_not_training(self) -> None: # Arrange agent = self._ready_agent() # Act reward = agent.play_episode(max_episode_steps=3, training=False, render=False) # Assert self.assertIsInstance(reward, EpisodeReport) def test_play_episode_steps_does_not_call_update_models_when_not_training( self) -> None: # Arrange agent = self._ready_agent() # Act with patch.object(agent, 'update_model') as mocked_update_model: _ = agent.play_episode(max_episode_steps=self._n_step, training=False, render=False) # Assert self.assertEqual(self._expected_model_update_during_playing_episode, mocked_update_model.call_count) self.assertEqual(self._n_step, agent.env_builder._env._max_episode_steps) def test_play_episode_steps_does_not_update_models_when_not_training( self) -> None: # Arrange agent = self._ready_agent() checkpoint = self._checkpoint_model(agent) # Act _ = agent.play_episode(max_episode_steps=self._n_step, training=False, render=False) # Assert self._assert_model_unchanged(agent, checkpoint) def test_play_episode_steps_returns_reward_and_updates_model_when_training( self) -> None: # Arrange agent = self._ready_agent() # Act reward = agent.play_episode(max_episode_steps=self._n_step, training=True, render=False) # Assert self.assertIsInstance(reward, EpisodeReport) def test_play_episode_steps_calls_update_models_as_expected_when_training( self) -> None: # Arrange agent = self._ready_agent() # Act with patch.object(agent, 'update_model') as mocked_update_model: _ = agent.play_episode(max_episode_steps=self._n_step, training=True, render=False) # Assert self.assertEqual(self._expected_model_update_during_training_episode, mocked_update_model.call_count) def test_play_episode_steps_updates_models_as_expected_when_training( self) -> None: # Arrange agent = self._ready_agent() checkpoint = self._checkpoint_model(agent) # Act _ = agent.play_episode(max_episode_steps=self._n_step, training=True, render=False) # Assert self._assert_relevant_play_episode_change(agent, checkpoint) def test_train_runs_multiple_episodes(self) -> None: # Arrange agent = self._ready_agent() # Act with patch.object(agent, 'play_episode') as mocked_play_episode, \ patch.object(agent, '_after_episode_update') as after_ep_update: agent.train(n_episodes=self._n_episodes, max_episode_steps=self._n_step, render=False, checkpoint_every=0) # Assert self.assertEqual(self._n_episodes, len(agent.training_history.history)) self.assertEqual(self._n_episodes, mocked_play_episode.call_count) self.assertEqual(self._n_episodes, after_ep_update.call_count) def test_train_calls_after_episode_updates_model_as_expected(self) -> None: # Arrange agent = self._ready_agent() checkpoint = self._checkpoint_model(agent) # Act agent.train(n_episodes=self._n_episodes, max_episode_steps=self._n_step, render=False, checkpoint_every=0) # Assert self._assert_relevant_after_play_episode_change(agent, checkpoint) def test_train_calls_after_episode_update_as_expected_with_delayed( self) -> None: # Arrange agent = self._ready_agent() # Act with patch.object(agent, 'play_episode') as mocked_play_episode, \ patch.object(agent, '_after_episode_update') as after_ep_update: agent.train(n_episodes=self._n_episodes, max_episode_steps=self._n_step, render=False, checkpoint_every=0, update_every=2) # Assert self.assertEqual(self._n_episodes, len(agent.training_history.history)) self.assertEqual(self._n_episodes, mocked_play_episode.call_count) self.assertEqual(2, after_ep_update.call_count)