Esempio n. 1
0
    def test_cart_pole_example(self):
        # Arrange
        config = CartPoleConfig(agent_type=self._agent_type, plot_during_training=False,
                                folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config, render=False, n_episodes=10)

        # Assert
        self.assertIsInstance(agent, self._sut)
def run_exp(n_episodes: int = 1000, max_episode_steps: int = 500):
    exp = AgentExperiment(agent_class=LinearQAgent,
                          agent_config=CartPoleConfig('linear_q'),
                          n_reps=32,
                          n_jobs=32,
                          training_options={"n_episodes": n_episodes,
                                            "max_episode_steps": max_episode_steps})

    exp.run()
    exp.save(fn=f"{LinearQAgent.__name__}_experiment.pkl")
    def test_double_dueling_dqn_cart_pole_example(self):
        # Arrange
        config = CartPoleConfig(agent_type='double_dueling_dqn', plot_during_training=False,
                                folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config, render=False, n_episodes=20)

        # Assert
        self.assertTrue(agent.model_architecture.dueling)
        self.assertIsInstance(agent, self._sut)
Esempio n. 4
0
def run_exp(n_episodes: int = 1000, max_episode_steps: int = 500):
    exp = AgentExperiment(agent_class=ReinforceAgent,
                          agent_config=CartPoleConfig(agent_type='reinforce'),
                          n_reps=5,
                          n_jobs=6,
                          training_options={"n_episodes": n_episodes,
                                            "max_episode_steps": max_episode_steps,
                                            "update_every": 1})

    exp.run()
    exp.save(fn=f"{ReinforceAgent.__name__}_experiment.pkl")
Esempio n. 5
0
    def test_saving_and_reloading_creates_identical_object(self):
        # Arrange
        agent = self._sut(**CartPoleConfig(agent_type=self._agent_type, plot_during_training=False,
                                           folder=self._tmp_dir.name).build())
        agent.train(verbose=True, render=False, n_episodes=2)

        # Act
        agent.save()
        agent_2 = self._sut.load(f"{agent.name}_{agent.env_spec}")
        agent_2.check_ready()

        # Assert
        self.assertEqual(agent, agent_2)
Esempio n. 6
0
def run_exp(agent_type: str,
            n_episodes: int = 1000,
            max_episode_steps: int = 500):
    exp = AgentExperiment(name=f"{agent_type} CartPole",
                          agent_class=ActorCriticAgent,
                          agent_config=CartPoleConfig(agent_type=agent_type),
                          n_reps=6,
                          n_jobs=6,
                          training_options={
                              "n_episodes": n_episodes,
                              "max_episode_steps": max_episode_steps
                          })

    exp.run()
    exp.save(fn=f"{ActorCriticAgent.__name__}_{agent_type}experiment.pkl")
from rlk.agents.actor_critic.actor_critic import ActorCriticAgent
from rlk.environments.cart_pole.cart_pole_config import CartPoleConfig

if __name__ == "__main__":
    agent = ActorCriticAgent.example(config=CartPoleConfig(agent_type='actor_critic'), max_episode_steps=500,
                                     n_episodes=2000,
                                     render=False,
                                     update_every=6, checkpoint_every=0)
    agent.save()
from rlk.agents.q_learning.deep_q_agent import DeepQAgent
from rlk.environments.cart_pole.cart_pole_config import CartPoleConfig

if __name__ == "__main__":
    agent = DeepQAgent.example(config=CartPoleConfig(agent_type='dqn'),
                               max_episode_steps=500,
                               n_episodes=2000,
                               render=False,
                               update_every=6,
                               checkpoint_every=0)
    agent.save()
Esempio n. 9
0
class TestReinforceAgent(TestRandomAgent):
    _sut = ReinforceAgent
    _config = CartPoleConfig(agent_type='reinforce',
                             plot_during_training=False)

    def _agent_specific_set_up(self):
        VirtualGPU(256)

        # For this agent, this call is made once at the end of the episode not on every step
        self._expected_model_update_during_training_episode: int = 0
        # This is inside the play episode function, which is still 0 here as it's called in .train
        self._expected_model_update_after_playing_episode: int = 0

    @staticmethod
    def _checkpoint_model(agent: ReinforceAgent) -> List[np.ndarray]:
        """Get coefs from each model"""
        return copy.deepcopy(agent._model.get_weights())

    def _assert_model_unchanged(self, agent: ReinforceAgent,
                                checkpoint: List[np.ndarray]):
        model_weights = agent._model.get_weights()
        for w in range(len(model_weights)):
            assert_array_almost_equal(model_weights[w], checkpoint[w])

    def _assert_relevant_play_episode_change(
            self, agent: ReinforceAgent, checkpoint: List[np.ndarray]) -> None:
        self._assert_buffer_changed(agent, checkpoint)

    def _assert_relevant_after_play_episode_change(
            self, agent: ReinforceAgent, checkpoint: List[np.ndarray]) -> None:
        self._assert_model_changed(agent, checkpoint)

    def _assert_buffer_changed(self, agent: ReinforceAgent,
                               checkpoint: List[np.ndarray]):
        # TODO
        pass

    def _assert_model_changed(self, agent: ReinforceAgent,
                              checkpoint: List[np.ndarray]) -> None:
        model_weights = agent._model.get_weights()
        for w in range(len(checkpoint)):
            self.assertFalse(
                np.all(model_weights[w].round(8) == checkpoint[w].round(8)))

    def _assert_agent_unready(self, agent: ReinforceAgent) -> None:
        self.assertIsNone(agent._model)
        self.assertFalse(agent.ready)

    def _assert_agent_ready(self, agent: ReinforceAgent) -> None:
        self.assertIsNotNone(agent.env_builder._env)
        self.assertIsNotNone(agent._model)
        self.assertTrue(agent.ready)

    def test_train_calls_after_episode_updates_model_as_expected(self) -> None:
        # Arrange
        agent = self._ready_agent()
        checkpoint = self._checkpoint_model(agent)

        # Act
        agent.train(n_episodes=self._n_episodes,
                    max_episode_steps=self._n_step,
                    render=False,
                    checkpoint_every=0)

        # Assert
        self._assert_relevant_after_play_episode_change(agent, checkpoint)
Esempio n. 10
0
class TestDeepQAgent(TestRandomAgent):
    _sut = DeepQAgent
    _config = CartPoleConfig(agent_type='dqn', plot_during_training=False)

    def _agent_specific_set_up(self):
        VirtualGPU(256)

        # (in .play_episode)
        self._expected_model_update_after_training_episode: int = 0
        self._expected_model_update_after_playing_episode: int = 0

    def _ready_agent(self) -> DeepQAgent:
        agent = self._sut(**self._config.build())
        # Sync weights so checks are simpler
        agent.update_target_model()

        return agent

    @staticmethod
    def _checkpoint_model(agent: DeepQAgent) -> List[np.ndarray]:
        """Get coefs from each model"""
        return copy.deepcopy(agent._action_model.get_weights())

    def _assert_model_unchanged(self, agent: DeepQAgent,
                                checkpoint: List[np.ndarray]) -> None:
        action_weights = agent._action_model.get_weights()
        value_weights = agent._target_model.get_weights()
        for w in range(len(action_weights)):
            assert_array_almost_equal(action_weights[w], checkpoint[w])
            assert_array_almost_equal(value_weights[w], checkpoint[w])

    def _assert_relevant_play_episode_change(
            self, agent: DeepQAgent, checkpoint: List[np.ndarray]) -> None:
        self._assert_buffer_changed(agent, checkpoint)

    def _assert_relevant_after_play_episode_change(
            self, agent: DeepQAgent, checkpoint: List[np.ndarray]) -> None:
        """TODO: self._assert_value_model_changed(agent, checkpoint) (?)"""
        pass

    def _assert_buffer_changed(self, agent: DeepQAgent,
                               checkpoint: List[np.ndarray]) -> None:
        # TODO
        pass

    def _assert_action_model_changed(self, agent: DeepQAgent,
                                     checkpoint: List[np.ndarray]) -> None:
        action_weights = agent._action_model.get_weights()
        for w in range(len(checkpoint)):
            self.assertFalse(
                np.all(action_weights[w].round(6) == checkpoint[w].round(6)))

    def _assert_value_model_changed(self, agent: DeepQAgent,
                                    checkpoint: List[np.ndarray]) -> None:
        value_weights = agent._target_model.get_weights()
        for w in range(len(checkpoint)):
            self.assertFalse(
                np.all(value_weights[w].round(6) == checkpoint[w].round(6)))

    def _assert_model_changed(self, agent: DeepQAgent,
                              checkpoint: List[np.ndarray]) -> None:
        self._assert_action_model_changed(agent, checkpoint)
        self._assert_value_model_changed(agent, checkpoint)

    def _assert_agent_unready(self, agent: DeepQAgent) -> None:
        self.assertIsNone(agent._action_model)
        self.assertIsNone(agent._target_model)
        self.assertIsNone(agent.replay_buffer)
        self.assertFalse(agent.ready)

    def _assert_agent_ready(self, agent: DeepQAgent) -> None:
        self.assertIsNotNone(agent._action_model)
        self.assertIsNotNone(agent._target_model)
        self.assertIsNotNone(agent.replay_buffer)
        self.assertTrue(agent.ready)

    def test_play_episode_steps_does_not_call_update_models_when_not_training(
            self) -> None:
        # Arrange
        agent = self._ready_agent()

        # Act
        with patch.object(agent, 'update_model') as mocked_update_model, \
                patch.object(agent, "update_target_model") as mocked_update_value_model:
            _ = agent.play_episode(max_episode_steps=self._n_step,
                                   training=False,
                                   render=False)

        # Assert
        self.assertEqual(self._expected_model_update_during_playing_episode,
                         mocked_update_model.call_count)
        self.assertEqual(self._expected_model_update_after_playing_episode,
                         mocked_update_value_model.call_count)
        self.assertEqual(self._n_step,
                         agent.env_builder._env._max_episode_steps)

    def test_play_episode_steps_calls_update_models_when_training(
            self) -> None:
        # Arrange
        agent = self._ready_agent()

        # Act
        with patch.object(agent, 'update_model') as mocked_update_model, \
                patch.object(agent, "update_target_model") as mocked_update_value_model:
            _ = agent.play_episode(max_episode_steps=self._n_step,
                                   training=True,
                                   render=False)

        # Assert
        self.assertEqual(self._expected_model_update_during_training_episode,
                         mocked_update_model.call_count)
        self.assertEqual(self._expected_model_update_after_training_episode,
                         mocked_update_value_model.call_count)
        self.assertEqual(self._n_step,
                         agent.env_builder._env._max_episode_steps)
class TestRandomAgent(unittest.TestCase):
    """
    The random agent is basically a non-abstract version of AgentBase. It's used here to define the general test
    interface, as if it's a mocked AgentBase.

    Other agents can modify the private methods here to define how to arrange/assert for their specific cases.
    Acting is standard across agents, except for where path.object is required for multiple methods (see DQN as an
    example).
    """
    _sut = RandomAgent
    _config = CartPoleConfig(agent_type='random', plot_during_training=False)

    def _standard_set_up(self) -> None:
        self._n_step = 4
        self._n_episodes = 3
        # These are the calls during .play_episode (not .train)
        self._expected_model_update_during_training_episode: int = self._n_step
        self._expected_model_update_during_playing_episode: int = 0
        self._expected_play_episode_calls = self._n_episodes

    def _agent_specific_set_up(self) -> None:
        # This agent doesn't bother calling update_model as it doesn't have one.
        self._expected_model_update_during_training_episode: int = 0
        self._expected_model_update_after_training_episode: int = 0

    def setUp(self) -> None:
        self._standard_set_up()
        self._agent_specific_set_up()

    def _ready_agent(self) -> RandomAgent:
        return self._sut(**self._config.build())

    @staticmethod
    def _checkpoint_model(agent: AgentBase) -> None:
        """No model to checkpoint."""
        return None

    def _assert_model_unchanged(self, agent: AgentBase,
                                checkpoint: None) -> None:
        """No model to compare, nothing to assert."""
        pass

    def _assert_buffer_changed(self, agent: AgentBase,
                               checkpoint: None) -> None:
        """No buffer to change in RandomAgent."""
        pass

    def _assert_model_changed(self, agent: AgentBase,
                              checkpoint: None) -> None:
        """No model to compare, nothing to assert."""
        pass

    def _assert_relevant_play_episode_change(self, agent: AgentBase,
                                             checkpoint: None) -> None:
        """This can differ between MC and TD agents. In MC case model might not be updated but buffer is."""
        self._assert_buffer_changed(agent, checkpoint)
        self._assert_model_changed(agent, checkpoint)

    def _assert_relevant_after_play_episode_change(self, agent: AgentBase,
                                                   checkpoint: None) -> None:
        """This can differ between MC and TD agents. In MC case buffer might not be updated but model is."""
        self._assert_buffer_changed(agent, checkpoint)
        self._assert_model_changed(agent, checkpoint)

    def _assert_agent_unready(self, agent: AgentBase) -> None:
        """Nothing to unready in RandomAgent."""
        pass

    def _assert_agent_ready(self, agent: RandomAgent) -> None:
        self.assertIsNotNone(agent.env_builder._env)
        self.assertIsInstance(agent.model, RandomModel)

    def test_env_set_during_init(self) -> None:
        # Act
        agent = self._ready_agent()

        # Assert
        self.assertIsNotNone(agent.env_builder._env)

    def test_model_set_during_init(self) -> None:
        # Act
        agent = self._ready_agent()

        # Assert
        self._assert_agent_ready(agent)

    def test_history_set_during_init(self) -> None:
        # Act
        agent = self._ready_agent()

        # Assert
        self.assertIsNotNone(agent.training_history)

    def test_unready_detaches_env_and_models(self) -> None:
        # Arrange
        agent = self._ready_agent()

        # Act
        agent.unready()

        # Assert
        self._assert_agent_unready(agent)

    def test_ready_restores_matching_object(self) -> None:
        # Arrange
        agent = self._ready_agent()
        checkpoint = self._checkpoint_model(agent)
        agent.unready()

        # Act
        agent.check_ready()

        # Assert
        self._assert_agent_ready(agent)
        self._assert_model_unchanged(agent, checkpoint)

    def test_play_episode_steps_returns_reward_when_not_training(self) -> None:
        # Arrange
        agent = self._ready_agent()

        # Act
        reward = agent.play_episode(max_episode_steps=3,
                                    training=False,
                                    render=False)

        # Assert
        self.assertIsInstance(reward, EpisodeReport)

    def test_play_episode_steps_does_not_call_update_models_when_not_training(
            self) -> None:
        # Arrange
        agent = self._ready_agent()

        # Act
        with patch.object(agent, 'update_model') as mocked_update_model:
            _ = agent.play_episode(max_episode_steps=self._n_step,
                                   training=False,
                                   render=False)

        # Assert
        self.assertEqual(self._expected_model_update_during_playing_episode,
                         mocked_update_model.call_count)
        self.assertEqual(self._n_step,
                         agent.env_builder._env._max_episode_steps)

    def test_play_episode_steps_does_not_update_models_when_not_training(
            self) -> None:
        # Arrange
        agent = self._ready_agent()
        checkpoint = self._checkpoint_model(agent)

        # Act
        _ = agent.play_episode(max_episode_steps=self._n_step,
                               training=False,
                               render=False)

        # Assert
        self._assert_model_unchanged(agent, checkpoint)

    def test_play_episode_steps_returns_reward_and_updates_model_when_training(
            self) -> None:
        # Arrange
        agent = self._ready_agent()

        # Act
        reward = agent.play_episode(max_episode_steps=self._n_step,
                                    training=True,
                                    render=False)

        # Assert
        self.assertIsInstance(reward, EpisodeReport)

    def test_play_episode_steps_calls_update_models_as_expected_when_training(
            self) -> None:
        # Arrange
        agent = self._ready_agent()

        # Act
        with patch.object(agent, 'update_model') as mocked_update_model:
            _ = agent.play_episode(max_episode_steps=self._n_step,
                                   training=True,
                                   render=False)

        # Assert
        self.assertEqual(self._expected_model_update_during_training_episode,
                         mocked_update_model.call_count)

    def test_play_episode_steps_updates_models_as_expected_when_training(
            self) -> None:
        # Arrange
        agent = self._ready_agent()
        checkpoint = self._checkpoint_model(agent)

        # Act
        _ = agent.play_episode(max_episode_steps=self._n_step,
                               training=True,
                               render=False)

        # Assert
        self._assert_relevant_play_episode_change(agent, checkpoint)

    def test_train_runs_multiple_episodes(self) -> None:
        # Arrange
        agent = self._ready_agent()

        # Act
        with patch.object(agent, 'play_episode') as mocked_play_episode, \
                patch.object(agent, '_after_episode_update') as after_ep_update:
            agent.train(n_episodes=self._n_episodes,
                        max_episode_steps=self._n_step,
                        render=False,
                        checkpoint_every=0)

        # Assert
        self.assertEqual(self._n_episodes, len(agent.training_history.history))
        self.assertEqual(self._n_episodes, mocked_play_episode.call_count)
        self.assertEqual(self._n_episodes, after_ep_update.call_count)

    def test_train_calls_after_episode_updates_model_as_expected(self) -> None:
        # Arrange
        agent = self._ready_agent()
        checkpoint = self._checkpoint_model(agent)

        # Act
        agent.train(n_episodes=self._n_episodes,
                    max_episode_steps=self._n_step,
                    render=False,
                    checkpoint_every=0)

        # Assert
        self._assert_relevant_after_play_episode_change(agent, checkpoint)

    def test_train_calls_after_episode_update_as_expected_with_delayed(
            self) -> None:
        # Arrange
        agent = self._ready_agent()

        # Act
        with patch.object(agent, 'play_episode') as mocked_play_episode, \
                patch.object(agent, '_after_episode_update') as after_ep_update:
            agent.train(n_episodes=self._n_episodes,
                        max_episode_steps=self._n_step,
                        render=False,
                        checkpoint_every=0,
                        update_every=2)

        # Assert
        self.assertEqual(self._n_episodes, len(agent.training_history.history))
        self.assertEqual(self._n_episodes, mocked_play_episode.call_count)
        self.assertEqual(2, after_ep_update.call_count)