예제 #1
0
    def _agent_specific_set_up(self):
        VirtualGPU(256)

        # For this agent, this call is made once at the end of the episode not on every step
        self._expected_model_update_during_training_episode: int = 0
        # This is inside the play episode function, which is still 0 here as it's called in .train
        self._expected_model_update_after_playing_episode: int = 0
    def set_agent(self,
                  agent_class: Callable,
                  agent_config: ConfigBase,
                  agent_kwargs: Dict[str, any],
                  gpu_memory_limit: int = 512):
        self.gpu_memory_limit = gpu_memory_limit
        VirtualGPU(gpu_device_id=self.device_id,
                   gpu_memory_limit=self.gpu_memory_limit)

        self.agent_class = agent_class
        config = agent_config.build()
        config.update(agent_kwargs)
        self.agent_config = config
        self.agent = agent_class(**self.agent_config)
def run_exp(n_episodes: int = 1000, max_episode_steps: int = 1000):
    gpu = VirtualGPU(256)

    exp = AgentExperiment(
        agent_class=ReinforceAgent,
        agent_config=MountainCarConfig(agent_type='reinforce'),
        n_reps=5,
        n_jobs=1 if gpu.on else 5,
        training_options={
            "n_episodes": n_episodes,
            "max_episode_steps": max_episode_steps,
            "update_every": 1
        })

    exp.run()
    exp.save(fn=f"{ReinforceAgent.__name__}_experiment.pkl")
예제 #4
0
    def _fit_agent(agent_class: Callable, agent_config: ConfigBase, training_options: Dict[str, Any],
                   gpu_memory_per_agent: int = 512) -> AgentBase:
        VirtualGPU(gpu_device_id=0, gpu_memory_limit=gpu_memory_per_agent)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', FutureWarning)

            config_dict = agent_config.build()
            # Give each agent a unique name for easier tracking with verbose and multiprocessing
            config_dict["name"] = f"{config_dict.get('name', 'Agent')}_{np.random.randint(0, 2 ** 16)}"

            agent: AgentBase = agent_class(**config_dict)
            agent.train(**training_options)
            # Might as well save agent. This will also unready and save buffers, models, etc.
            agent.save()
            agent.unready()

        return agent
    def _fit_agent(
        agent_class: Callable,
        agent_config: Dict[str, Any],
        training_kwargs: Dict[str, Any],
        real_device_id: int = 0,
        gpu_memory_limit: int = 512
    ) -> Tuple[np.ndarray, ContinuousBuffer, TrainingHistory]:
        VirtualGPU(gpu_device_id=real_device_id,
                   gpu_memory_limit=gpu_memory_limit)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', FutureWarning)

            agent_config[
                "name"] = f"{agent_config.get('name', 'Agent')}_{str(uuid.uuid1())}"

            agent: DeepQAgent = agent_class(**agent_config)
            agent.train(**training_kwargs)

        return agent.get_weights(), agent.replay_buffer, agent.training_history
예제 #6
0
    def example(cls,
                config: ConfigBase,
                render: bool = True,
                n_episodes: int = 500,
                max_episode_steps: int = 500,
                update_every: int = 1) -> "ReinforceAgent":
        """Create, train, and save agent for a given config."""
        VirtualGPU(config.gpu_memory)
        config_dict = config.build()

        agent = cls(**config_dict)

        agent.train(verbose=True,
                    render=render,
                    n_episodes=n_episodes,
                    max_episode_steps=max_episode_steps,
                    update_every=update_every)
        agent.save()

        return agent
class TestReinforceAgent(unittest.TestCase):
    _sut = ReinforceAgent
    _agent_type: str = 'reinforce'
    _gpu = VirtualGPU(256)

    def setUp(self) -> None:
        self._tmp_dir = tempfile.TemporaryDirectory()

    def tearDown(self):
        tf.keras.backend.clear_session()
        tf.compat.v1.reset_default_graph()
        gc.collect()
        self._tmp_dir.cleanup()

    def test_saving_and_reloading_creates_identical_object(self):
        # Arrange
        agent = self._sut(**CartPoleConfig(agent_type=self._agent_type,
                                           plot_during_training=False,
                                           folder=self._tmp_dir.name).build())
        agent.train(verbose=True, render=False, n_episodes=2)

        # Act
        agent.save()
        agent_2 = self._sut.load(f"{agent.name}_{agent.env_spec}")
        agent_2.check_ready()

        # Assert
        self.assertEqual(agent, agent_2)

    def test_cart_pole_example(self):
        # Arrange
        config = CartPoleConfig(agent_type=self._agent_type,
                                plot_during_training=False,
                                folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config, render=False, n_episodes=10)

        # Assert
        self.assertIsInstance(agent, self._sut)
예제 #8
0
    def example(cls, config: ConfigBase, render: bool = True,
                n_episodes: int = 500, max_episode_steps: int = 500,
                checkpoint_every: int = 100, **kwargs) -> "AgentBase":
        """
        Default example runner.

        kwargs should contain algo specific settings and will be passed to .train. eg, update_every for dqn.
        """
        VirtualGPU(config.gpu_memory)

        config_dict = config.build()
        if os.path.exists(config_dict['name']):
            agent = cls.load(config_dict['name'])
            warnings.warn('Loaded existing agent.')
        else:
            agent = cls(**config_dict)  # noqa - __init__ will be available in non abstract children

        agent.train(verbose=True, render=render,
                    n_episodes=n_episodes, max_episode_steps=max_episode_steps,
                    checkpoint_every=checkpoint_every, **kwargs)
        agent.save()

        return agent
예제 #9
0
class TestDeepQAgent(unittest.TestCase):
    _sut = DeepQAgent
    _fn = 'test_dqn_save.agents'
    _gpu = VirtualGPU(1024)

    @classmethod
    def setUpClass(cls) -> None:
        if GFOOTBALL_AVAILABLE:
            register_all()

    def setUp(self) -> None:
        self._tmp_dir = tempfile.TemporaryDirectory()

    def tearDown(self):
        tf.keras.backend.clear_session()
        tf.compat.v1.reset_default_graph()
        gc.collect()
        self._tmp_dir.cleanup()

    @staticmethod
    def _build_mock_config(base_config: PongConfig) -> MagicMock:
        config = base_config.build()
        config['eps'] = EpsilonGreedy(eps_initial=0.5,
                                      decay=0.0001,
                                      eps_min=0.01,
                                      decay_schedule='linear',
                                      actions_pool=list(range(3)))
        config['replay_buffer'] = ContinuousBuffer(buffer_size=10)
        config['replay_buffer_samples'] = 2
        mock_config = MagicMock()
        mock_config.gpu_memory = 2048
        mock_config.build.return_value = config

        return mock_config

    def test_saving_and_reloading_creates_identical_object(self):
        # Arrange
        agent = self._sut(**CartPoleConfig(agent_type='dqn',
                                           plot_during_training=False,
                                           folder=self._tmp_dir.name).build())
        agent.train(verbose=True, render=False, n_episodes=2)

        # Act
        agent.save()
        agent_2 = self._sut.load(f"{agent.name}_{agent.env_spec}")
        agent_2.check_ready()

        # Assert
        self.assertEqual(agent, agent_2)

    def test_dqn_cart_pole_example(self):
        # Arrange
        config = CartPoleConfig(agent_type='dqn',
                                plot_during_training=False,
                                folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config, render=False, n_episodes=10)

        # Assert
        self.assertIsInstance(agent, self._sut)

    def test_dqn_mountain_car_example(self):
        # Arrange
        config = MountainCarConfig(agent_type='dqn',
                                   plot_during_training=False,
                                   folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config,
                                  render=False,
                                  max_episode_steps=50,
                                  n_episodes=10)

        # Assert
        self.assertIsInstance(agent, self._sut)

    def test_dqn_pong_diff_example(self):
        # Arrange
        mock_config = self._build_mock_config(
            PongConfig(agent_type='dqn',
                       mode='diff',
                       plot_during_training=False,
                       folder=self._tmp_dir.name))

        # Act
        # Needs to run for long enough to fill replay buffer
        agent = self._sut.example(mock_config,
                                  render=False,
                                  max_episode_steps=20,
                                  n_episodes=3)

        # Assert
        self.assertFalse(agent.model_architecture.dueling)
        self.assertIsInstance(agent, self._sut)

    def test_dqn_pong_stack_example(self):
        # Arrange
        mock_config = self._build_mock_config(
            PongConfig(agent_type='dqn',
                       mode='stack',
                       plot_during_training=False,
                       folder=self._tmp_dir.name))

        # Act
        # Needs to run for long enough to fill replay buffer
        agent = self._sut.example(mock_config,
                                  render=False,
                                  max_episode_steps=20,
                                  n_episodes=3)

        # Assert
        self.assertFalse(agent.model_architecture.dueling)
        self.assertIsInstance(agent, self._sut)

    def test_dueling_dqn_cart_pole_example(self):
        # Arrange
        config = CartPoleConfig(agent_type='dueling_dqn',
                                plot_during_training=False,
                                folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config, render=False, n_episodes=18)

        # Assert
        self.assertTrue(agent.model_architecture.dueling)
        self.assertIsInstance(agent, self._sut)

    def test_dueling_dqn_mountain_car_example(self):
        # Arrange
        config = MountainCarConfig(agent_type='dueling_dqn',
                                   plot_during_training=False,
                                   folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config,
                                  render=False,
                                  max_episode_steps=100,
                                  n_episodes=18)

        # Assert
        self.assertIsInstance(agent, self._sut)

    def test_double_dqn_cart_pole_example(self):
        # Arrange
        config = CartPoleConfig(agent_type='double_dqn',
                                plot_during_training=False,
                                folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config, render=False, n_episodes=16)

        # Assert
        self.assertFalse(agent.model_architecture.dueling)
        self.assertIsInstance(agent, self._sut)

    def test_double_dueling_dqn_cart_pole_example(self):
        # Arrange
        config = CartPoleConfig(agent_type='double_dueling_dqn',
                                plot_during_training=False,
                                folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config, render=False, n_episodes=20)

        # Assert
        self.assertTrue(agent.model_architecture.dueling)
        self.assertIsInstance(agent, self._sut)

    @unittest.skipUnless(GFOOTBALL_AVAILABLE, GFOOTBALL_MESSAGE)
    def test_dqn_with_dense_nn_on_gfootball(self):
        # Arrange
        config = GFootballConfig(
            'dqn',
            env_spec="GFootball-11_vs_11_kaggle-simple115v2-v0",
            using_smm_obs=False,
            using_simple_obs=True,
            plot_during_training=False,
            folder=self._tmp_dir.name)
        # Act
        agent = self._sut.example(config,
                                  render=False,
                                  n_episodes=3,
                                  max_episode_steps=100)

        # Assert
        self.assertFalse(agent.model_architecture.dueling)
        self.assertIsInstance(agent, self._sut)

    @unittest.skipUnless(GFOOTBALL_AVAILABLE, GFOOTBALL_MESSAGE)
    def test_dqn_with_conv_nn_on_gfootball(self):
        # Arrange
        config = GFootballConfig('dqn',
                                 env_spec="GFootball-11_vs_11_kaggle-SMM-v0",
                                 using_smm_obs=True,
                                 using_simple_obs=False,
                                 plot_during_training=False,
                                 folder=self._tmp_dir.name)
        # Act
        agent = self._sut.example(config,
                                  render=False,
                                  n_episodes=3,
                                  max_episode_steps=100)

        # Assert
        self.assertIsInstance(agent, self._sut)

    @unittest.skipUnless(GFOOTBALL_AVAILABLE, GFOOTBALL_MESSAGE)
    def test_dqn_with_splitter_nn_on_gfootball(self):
        # Arrange
        config = GFootballConfig('dqn',
                                 env_spec="GFootball-kaggle_11_vs_11-v0",
                                 using_smm_obs=True,
                                 using_simple_obs=True,
                                 plot_during_training=False,
                                 folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config,
                                  render=False,
                                  n_episodes=3,
                                  max_episode_steps=100)

        # Assert
        self.assertFalse(agent.model_architecture.dueling)
        self.assertIsInstance(agent, self._sut)

    @unittest.skipUnless(GFOOTBALL_AVAILABLE, GFOOTBALL_MESSAGE)
    def test_dqn_with_double_dueling_splitter_nn_on_gfootball(self):
        # Arrange
        config = GFootballConfig('double_dueling_dqn',
                                 env_spec="GFootball-kaggle_11_vs_11-v0",
                                 plot_during_training=False,
                                 folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config, render=False, n_episodes=3)

        # Assert
        self.assertIsInstance(agent, self._sut)
        self.assertTrue(agent.model_architecture.dueling)
예제 #10
0
 def test_short_reinforce_agent_parallel_run_completes_with_expected_outputs(self):
     gpu = VirtualGPU(256)
     if not gpu:
         self._run_exp(ReinforceAgent, agent_type='reinforce')
예제 #11
0
 def test_short_reinforce_agent_run_completes_with_expected_outputs(self):
     VirtualGPU(256)
     self._run_exp(ReinforceAgent, agent_type='reinforce')
예제 #12
0
 def test_short_deep_q_agent_parallel_run_completes_with_expected_outputs(self):
     gpu = VirtualGPU(256)
     if not gpu:
         self._run_exp(DeepQAgent, agent_type='dqn')
예제 #13
0
 def test_short_deep_q_agent_run_completes_with_expected_outputs(self):
     VirtualGPU(256)
     self._run_exp(DeepQAgent, agent_type='dqn')
class TestActorCriticAgent(unittest.TestCase):
    _sut = ActorCriticAgent
    _fn = 'test_ac_save.agents'
    _gpu = VirtualGPU(1024)

    def setUp(self) -> None:
        self._tmp_dir = tempfile.TemporaryDirectory()

        # Ensure eager model is on, this must be done in each setup.
        # (TODO: But why? It's on by default and is only turned off in DeepQAgent and ReinforceAgent.
        #  Perhaps an import in Configs?)
        tf.compat.v1.enable_eager_execution()

    def tearDown(self):
        tf.keras.backend.clear_session()
        tf.compat.v1.reset_default_graph()
        gc.collect()
        self._tmp_dir.cleanup()

    @staticmethod
    def _build_mock_config(base_config: PongConfig) -> MagicMock:
        config = base_config.build()
        mock_config = MagicMock()
        mock_config.gpu_memory = 2048
        mock_config.build.return_value = config

        return mock_config

    def test_saving_and_reloading_creates_identical_object(self):
        # Arrange
        agent = self._sut(**CartPoleConfig(agent_type='actor_critic',
                                           plot_during_training=False,
                                           folder=self._tmp_dir.name).build())
        agent.train(verbose=True, render=False, n_episodes=2)

        # Act
        agent.save()
        agent_2 = self._sut.load(f"{agent.name}_{agent.env_spec}")
        agent_2.check_ready()

        # Assert
        self.assertEqual(agent, agent_2)

    def test_ac_cart_pole_example(self):
        # Arrange
        config = CartPoleConfig(agent_type='actor_critic',
                                plot_during_training=False,
                                folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config, render=False, n_episodes=10)

        # Assert
        self.assertIsInstance(agent, self._sut)

    def test_ac_mountain_car_example(self):
        # Arrange
        config = MountainCarConfig(agent_type='actor_critic',
                                   plot_during_training=False,
                                   folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config,
                                  render=False,
                                  max_episode_steps=50,
                                  n_episodes=10)

        # Assert
        self.assertIsInstance(agent, self._sut)

    def test_ac_pong_diff_example(self):
        # Arrange
        mock_config = self._build_mock_config(
            PongConfig(agent_type='actor_critic',
                       mode='diff',
                       plot_during_training=False,
                       folder=self._tmp_dir.name))

        # Act
        # Needs to run for long enough to fill replay buffer
        agent = self._sut.example(mock_config,
                                  render=False,
                                  max_episode_steps=20,
                                  n_episodes=3)

        # Assert
        self.assertIsInstance(agent, self._sut)

    def test_ac_pong_stack_example(self):
        # Arrange
        mock_config = self._build_mock_config(
            PongConfig(agent_type='actor_critic',
                       mode='stack',
                       plot_during_training=False,
                       folder=self._tmp_dir.name))

        # Act
        # Needs to run for long enough to fill replay buffer
        agent = self._sut.example(mock_config,
                                  render=False,
                                  max_episode_steps=20,
                                  n_episodes=3)

        # Assert
        self.assertIsInstance(agent, self._sut)
예제 #15
0
    def _agent_specific_set_up(self):
        VirtualGPU(256)

        # (in .play_episode)
        self._expected_model_update_after_training_episode: int = 0
        self._expected_model_update_after_playing_episode: int = 0
예제 #16
0
import vizdoomgym  # noqa
from tf2_vgpu import VirtualGPU

from rlk.agents.q_learning.deep_q_agent import DeepQAgent

VirtualGPU(512)

agent = DeepQAgent.load(
    "DoubleDuelingDQN_GFootball-academy_empty_goal_close-v0")

agent.train(n_episodes=10000,
            max_episode_steps=20000,
            checkpoint_every=100,
            render=False)
def train_nn_s115_raw(use_raw: bool = False, roll_steps: int = 1):
    """

    :param use_raw: Bool indicating whether or not to add additional features based on the raw observations.
    :param roll_steps: Number of steps to offset the dataset by to obtain past observations. Should match number of
                       frames the RL agent will use in its frame buffer. Currently can b 0 (use just current
                       observation) or 1 (use last and current).
    """

    VirtualGPU(1500)

    repo = HDFRepository().set_path(f"downloaded_games")

    train_episodes, test_episodes = repo.split(train_prop=0.95)
    actions, s115, _, raw = repo.load_episodes(
        keys=[repo.actions_key, repo.s115_key, repo.raw_key])

    if use_raw:
        x = np.concatenate([s115, raw], axis=1)
    else:
        x = s115

    if roll_steps > 0:
        # Check all episodes have 3000 steps
        # This relies on iding episodes with agent, episode, and score. Any clash would be overwritten in the json repo,
        # so any combo should have 3000 steps max.
        semi_unique_ids = (actions['agent_id'].astype(str) + '_' +
                           actions['episode_id'].astype(str) + '_' +
                           actions["updated_score"].astype(str))
        n_steps = np.unique(semi_unique_ids.value_counts())
        n_episodes = len(np.unique(semi_unique_ids))
        assert n_steps == 3000

        td = TimeDimension(n_episode_steps=int(n_steps),
                           n_roll_steps=1).fit(x.shape[0])
        x = td.transform_1d(x)
        actions = actions.reset_index(drop=True).drop(td.idx_to_drop_, axis=0)

        assert x.shape[0] < s115.shape[0]
        assert x.shape[1] > s115.shape[1]
        assert x.shape[0] == actions.shape[0]
        assert x.shape[0] == (s115.shape[0] - n_episodes)
        assert np.unique(
            (actions['agent_id'].astype(str) + '_' +
             actions['episode_id'].astype(str) + '_' +
             actions["updated_score"].astype(str)).value_counts()) == 2999

    train_idx = actions['episode_id'].isin(train_episodes).values
    test_idx = actions['episode_id'].isin(test_episodes).values
    x_train = x[train_idx, ...]
    x_test = x[test_idx, ...]
    y_train = actions.iloc[train_idx, :]
    y_test = actions.iloc[test_idx, :]

    print(
        f"Training with {len(train_episodes)} episodes, totalling {y_train.shape[0]}, rows"
    )
    print(
        f"Evaluating with {len(test_episodes)} episodes, totalling {y_test.shape[0]}, rows"
    )

    model_class = DenserNN
    mod_arc = model_class(observation_shape=(x_train.shape[1], ),
                          n_actions=19,
                          learning_rate=0.001,
                          output_activation='softmax',
                          dueling=False)
    mod = mod_arc.compile(loss='categorical_crossentropy',
                          metrics=['accuracy'])

    mod.fit(x_train,
            one_hot(y_train[repo.target], num_classes=19),
            callbacks=[Callbacks.es, Callbacks.tb],
            validation_split=0.2,
            epochs=1000,
            batch_size=5000)
    mod.save(f"nn_s115_pretrained_model")

    training_summary(mod, x_train, x_test, y_train[repo.target],
                     y_test[repo.target])
    various_plots(mod, x_train, x_test, y_train[repo.target],
                  y_test[repo.target])

    return mod