def _agent_specific_set_up(self): VirtualGPU(256) # For this agent, this call is made once at the end of the episode not on every step self._expected_model_update_during_training_episode: int = 0 # This is inside the play episode function, which is still 0 here as it's called in .train self._expected_model_update_after_playing_episode: int = 0
def set_agent(self, agent_class: Callable, agent_config: ConfigBase, agent_kwargs: Dict[str, any], gpu_memory_limit: int = 512): self.gpu_memory_limit = gpu_memory_limit VirtualGPU(gpu_device_id=self.device_id, gpu_memory_limit=self.gpu_memory_limit) self.agent_class = agent_class config = agent_config.build() config.update(agent_kwargs) self.agent_config = config self.agent = agent_class(**self.agent_config)
def run_exp(n_episodes: int = 1000, max_episode_steps: int = 1000): gpu = VirtualGPU(256) exp = AgentExperiment( agent_class=ReinforceAgent, agent_config=MountainCarConfig(agent_type='reinforce'), n_reps=5, n_jobs=1 if gpu.on else 5, training_options={ "n_episodes": n_episodes, "max_episode_steps": max_episode_steps, "update_every": 1 }) exp.run() exp.save(fn=f"{ReinforceAgent.__name__}_experiment.pkl")
def _fit_agent(agent_class: Callable, agent_config: ConfigBase, training_options: Dict[str, Any], gpu_memory_per_agent: int = 512) -> AgentBase: VirtualGPU(gpu_device_id=0, gpu_memory_limit=gpu_memory_per_agent) with warnings.catch_warnings(): warnings.simplefilter('ignore', FutureWarning) config_dict = agent_config.build() # Give each agent a unique name for easier tracking with verbose and multiprocessing config_dict["name"] = f"{config_dict.get('name', 'Agent')}_{np.random.randint(0, 2 ** 16)}" agent: AgentBase = agent_class(**config_dict) agent.train(**training_options) # Might as well save agent. This will also unready and save buffers, models, etc. agent.save() agent.unready() return agent
def _fit_agent( agent_class: Callable, agent_config: Dict[str, Any], training_kwargs: Dict[str, Any], real_device_id: int = 0, gpu_memory_limit: int = 512 ) -> Tuple[np.ndarray, ContinuousBuffer, TrainingHistory]: VirtualGPU(gpu_device_id=real_device_id, gpu_memory_limit=gpu_memory_limit) with warnings.catch_warnings(): warnings.simplefilter('ignore', FutureWarning) agent_config[ "name"] = f"{agent_config.get('name', 'Agent')}_{str(uuid.uuid1())}" agent: DeepQAgent = agent_class(**agent_config) agent.train(**training_kwargs) return agent.get_weights(), agent.replay_buffer, agent.training_history
def example(cls, config: ConfigBase, render: bool = True, n_episodes: int = 500, max_episode_steps: int = 500, update_every: int = 1) -> "ReinforceAgent": """Create, train, and save agent for a given config.""" VirtualGPU(config.gpu_memory) config_dict = config.build() agent = cls(**config_dict) agent.train(verbose=True, render=render, n_episodes=n_episodes, max_episode_steps=max_episode_steps, update_every=update_every) agent.save() return agent
class TestReinforceAgent(unittest.TestCase): _sut = ReinforceAgent _agent_type: str = 'reinforce' _gpu = VirtualGPU(256) def setUp(self) -> None: self._tmp_dir = tempfile.TemporaryDirectory() def tearDown(self): tf.keras.backend.clear_session() tf.compat.v1.reset_default_graph() gc.collect() self._tmp_dir.cleanup() def test_saving_and_reloading_creates_identical_object(self): # Arrange agent = self._sut(**CartPoleConfig(agent_type=self._agent_type, plot_during_training=False, folder=self._tmp_dir.name).build()) agent.train(verbose=True, render=False, n_episodes=2) # Act agent.save() agent_2 = self._sut.load(f"{agent.name}_{agent.env_spec}") agent_2.check_ready() # Assert self.assertEqual(agent, agent_2) def test_cart_pole_example(self): # Arrange config = CartPoleConfig(agent_type=self._agent_type, plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=10) # Assert self.assertIsInstance(agent, self._sut)
def example(cls, config: ConfigBase, render: bool = True, n_episodes: int = 500, max_episode_steps: int = 500, checkpoint_every: int = 100, **kwargs) -> "AgentBase": """ Default example runner. kwargs should contain algo specific settings and will be passed to .train. eg, update_every for dqn. """ VirtualGPU(config.gpu_memory) config_dict = config.build() if os.path.exists(config_dict['name']): agent = cls.load(config_dict['name']) warnings.warn('Loaded existing agent.') else: agent = cls(**config_dict) # noqa - __init__ will be available in non abstract children agent.train(verbose=True, render=render, n_episodes=n_episodes, max_episode_steps=max_episode_steps, checkpoint_every=checkpoint_every, **kwargs) agent.save() return agent
class TestDeepQAgent(unittest.TestCase): _sut = DeepQAgent _fn = 'test_dqn_save.agents' _gpu = VirtualGPU(1024) @classmethod def setUpClass(cls) -> None: if GFOOTBALL_AVAILABLE: register_all() def setUp(self) -> None: self._tmp_dir = tempfile.TemporaryDirectory() def tearDown(self): tf.keras.backend.clear_session() tf.compat.v1.reset_default_graph() gc.collect() self._tmp_dir.cleanup() @staticmethod def _build_mock_config(base_config: PongConfig) -> MagicMock: config = base_config.build() config['eps'] = EpsilonGreedy(eps_initial=0.5, decay=0.0001, eps_min=0.01, decay_schedule='linear', actions_pool=list(range(3))) config['replay_buffer'] = ContinuousBuffer(buffer_size=10) config['replay_buffer_samples'] = 2 mock_config = MagicMock() mock_config.gpu_memory = 2048 mock_config.build.return_value = config return mock_config def test_saving_and_reloading_creates_identical_object(self): # Arrange agent = self._sut(**CartPoleConfig(agent_type='dqn', plot_during_training=False, folder=self._tmp_dir.name).build()) agent.train(verbose=True, render=False, n_episodes=2) # Act agent.save() agent_2 = self._sut.load(f"{agent.name}_{agent.env_spec}") agent_2.check_ready() # Assert self.assertEqual(agent, agent_2) def test_dqn_cart_pole_example(self): # Arrange config = CartPoleConfig(agent_type='dqn', plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=10) # Assert self.assertIsInstance(agent, self._sut) def test_dqn_mountain_car_example(self): # Arrange config = MountainCarConfig(agent_type='dqn', plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, max_episode_steps=50, n_episodes=10) # Assert self.assertIsInstance(agent, self._sut) def test_dqn_pong_diff_example(self): # Arrange mock_config = self._build_mock_config( PongConfig(agent_type='dqn', mode='diff', plot_during_training=False, folder=self._tmp_dir.name)) # Act # Needs to run for long enough to fill replay buffer agent = self._sut.example(mock_config, render=False, max_episode_steps=20, n_episodes=3) # Assert self.assertFalse(agent.model_architecture.dueling) self.assertIsInstance(agent, self._sut) def test_dqn_pong_stack_example(self): # Arrange mock_config = self._build_mock_config( PongConfig(agent_type='dqn', mode='stack', plot_during_training=False, folder=self._tmp_dir.name)) # Act # Needs to run for long enough to fill replay buffer agent = self._sut.example(mock_config, render=False, max_episode_steps=20, n_episodes=3) # Assert self.assertFalse(agent.model_architecture.dueling) self.assertIsInstance(agent, self._sut) def test_dueling_dqn_cart_pole_example(self): # Arrange config = CartPoleConfig(agent_type='dueling_dqn', plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=18) # Assert self.assertTrue(agent.model_architecture.dueling) self.assertIsInstance(agent, self._sut) def test_dueling_dqn_mountain_car_example(self): # Arrange config = MountainCarConfig(agent_type='dueling_dqn', plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, max_episode_steps=100, n_episodes=18) # Assert self.assertIsInstance(agent, self._sut) def test_double_dqn_cart_pole_example(self): # Arrange config = CartPoleConfig(agent_type='double_dqn', plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=16) # Assert self.assertFalse(agent.model_architecture.dueling) self.assertIsInstance(agent, self._sut) def test_double_dueling_dqn_cart_pole_example(self): # Arrange config = CartPoleConfig(agent_type='double_dueling_dqn', plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=20) # Assert self.assertTrue(agent.model_architecture.dueling) self.assertIsInstance(agent, self._sut) @unittest.skipUnless(GFOOTBALL_AVAILABLE, GFOOTBALL_MESSAGE) def test_dqn_with_dense_nn_on_gfootball(self): # Arrange config = GFootballConfig( 'dqn', env_spec="GFootball-11_vs_11_kaggle-simple115v2-v0", using_smm_obs=False, using_simple_obs=True, plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=3, max_episode_steps=100) # Assert self.assertFalse(agent.model_architecture.dueling) self.assertIsInstance(agent, self._sut) @unittest.skipUnless(GFOOTBALL_AVAILABLE, GFOOTBALL_MESSAGE) def test_dqn_with_conv_nn_on_gfootball(self): # Arrange config = GFootballConfig('dqn', env_spec="GFootball-11_vs_11_kaggle-SMM-v0", using_smm_obs=True, using_simple_obs=False, plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=3, max_episode_steps=100) # Assert self.assertIsInstance(agent, self._sut) @unittest.skipUnless(GFOOTBALL_AVAILABLE, GFOOTBALL_MESSAGE) def test_dqn_with_splitter_nn_on_gfootball(self): # Arrange config = GFootballConfig('dqn', env_spec="GFootball-kaggle_11_vs_11-v0", using_smm_obs=True, using_simple_obs=True, plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=3, max_episode_steps=100) # Assert self.assertFalse(agent.model_architecture.dueling) self.assertIsInstance(agent, self._sut) @unittest.skipUnless(GFOOTBALL_AVAILABLE, GFOOTBALL_MESSAGE) def test_dqn_with_double_dueling_splitter_nn_on_gfootball(self): # Arrange config = GFootballConfig('double_dueling_dqn', env_spec="GFootball-kaggle_11_vs_11-v0", plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=3) # Assert self.assertIsInstance(agent, self._sut) self.assertTrue(agent.model_architecture.dueling)
def test_short_reinforce_agent_parallel_run_completes_with_expected_outputs(self): gpu = VirtualGPU(256) if not gpu: self._run_exp(ReinforceAgent, agent_type='reinforce')
def test_short_reinforce_agent_run_completes_with_expected_outputs(self): VirtualGPU(256) self._run_exp(ReinforceAgent, agent_type='reinforce')
def test_short_deep_q_agent_parallel_run_completes_with_expected_outputs(self): gpu = VirtualGPU(256) if not gpu: self._run_exp(DeepQAgent, agent_type='dqn')
def test_short_deep_q_agent_run_completes_with_expected_outputs(self): VirtualGPU(256) self._run_exp(DeepQAgent, agent_type='dqn')
class TestActorCriticAgent(unittest.TestCase): _sut = ActorCriticAgent _fn = 'test_ac_save.agents' _gpu = VirtualGPU(1024) def setUp(self) -> None: self._tmp_dir = tempfile.TemporaryDirectory() # Ensure eager model is on, this must be done in each setup. # (TODO: But why? It's on by default and is only turned off in DeepQAgent and ReinforceAgent. # Perhaps an import in Configs?) tf.compat.v1.enable_eager_execution() def tearDown(self): tf.keras.backend.clear_session() tf.compat.v1.reset_default_graph() gc.collect() self._tmp_dir.cleanup() @staticmethod def _build_mock_config(base_config: PongConfig) -> MagicMock: config = base_config.build() mock_config = MagicMock() mock_config.gpu_memory = 2048 mock_config.build.return_value = config return mock_config def test_saving_and_reloading_creates_identical_object(self): # Arrange agent = self._sut(**CartPoleConfig(agent_type='actor_critic', plot_during_training=False, folder=self._tmp_dir.name).build()) agent.train(verbose=True, render=False, n_episodes=2) # Act agent.save() agent_2 = self._sut.load(f"{agent.name}_{agent.env_spec}") agent_2.check_ready() # Assert self.assertEqual(agent, agent_2) def test_ac_cart_pole_example(self): # Arrange config = CartPoleConfig(agent_type='actor_critic', plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, n_episodes=10) # Assert self.assertIsInstance(agent, self._sut) def test_ac_mountain_car_example(self): # Arrange config = MountainCarConfig(agent_type='actor_critic', plot_during_training=False, folder=self._tmp_dir.name) # Act agent = self._sut.example(config, render=False, max_episode_steps=50, n_episodes=10) # Assert self.assertIsInstance(agent, self._sut) def test_ac_pong_diff_example(self): # Arrange mock_config = self._build_mock_config( PongConfig(agent_type='actor_critic', mode='diff', plot_during_training=False, folder=self._tmp_dir.name)) # Act # Needs to run for long enough to fill replay buffer agent = self._sut.example(mock_config, render=False, max_episode_steps=20, n_episodes=3) # Assert self.assertIsInstance(agent, self._sut) def test_ac_pong_stack_example(self): # Arrange mock_config = self._build_mock_config( PongConfig(agent_type='actor_critic', mode='stack', plot_during_training=False, folder=self._tmp_dir.name)) # Act # Needs to run for long enough to fill replay buffer agent = self._sut.example(mock_config, render=False, max_episode_steps=20, n_episodes=3) # Assert self.assertIsInstance(agent, self._sut)
def _agent_specific_set_up(self): VirtualGPU(256) # (in .play_episode) self._expected_model_update_after_training_episode: int = 0 self._expected_model_update_after_playing_episode: int = 0
import vizdoomgym # noqa from tf2_vgpu import VirtualGPU from rlk.agents.q_learning.deep_q_agent import DeepQAgent VirtualGPU(512) agent = DeepQAgent.load( "DoubleDuelingDQN_GFootball-academy_empty_goal_close-v0") agent.train(n_episodes=10000, max_episode_steps=20000, checkpoint_every=100, render=False)
def train_nn_s115_raw(use_raw: bool = False, roll_steps: int = 1): """ :param use_raw: Bool indicating whether or not to add additional features based on the raw observations. :param roll_steps: Number of steps to offset the dataset by to obtain past observations. Should match number of frames the RL agent will use in its frame buffer. Currently can b 0 (use just current observation) or 1 (use last and current). """ VirtualGPU(1500) repo = HDFRepository().set_path(f"downloaded_games") train_episodes, test_episodes = repo.split(train_prop=0.95) actions, s115, _, raw = repo.load_episodes( keys=[repo.actions_key, repo.s115_key, repo.raw_key]) if use_raw: x = np.concatenate([s115, raw], axis=1) else: x = s115 if roll_steps > 0: # Check all episodes have 3000 steps # This relies on iding episodes with agent, episode, and score. Any clash would be overwritten in the json repo, # so any combo should have 3000 steps max. semi_unique_ids = (actions['agent_id'].astype(str) + '_' + actions['episode_id'].astype(str) + '_' + actions["updated_score"].astype(str)) n_steps = np.unique(semi_unique_ids.value_counts()) n_episodes = len(np.unique(semi_unique_ids)) assert n_steps == 3000 td = TimeDimension(n_episode_steps=int(n_steps), n_roll_steps=1).fit(x.shape[0]) x = td.transform_1d(x) actions = actions.reset_index(drop=True).drop(td.idx_to_drop_, axis=0) assert x.shape[0] < s115.shape[0] assert x.shape[1] > s115.shape[1] assert x.shape[0] == actions.shape[0] assert x.shape[0] == (s115.shape[0] - n_episodes) assert np.unique( (actions['agent_id'].astype(str) + '_' + actions['episode_id'].astype(str) + '_' + actions["updated_score"].astype(str)).value_counts()) == 2999 train_idx = actions['episode_id'].isin(train_episodes).values test_idx = actions['episode_id'].isin(test_episodes).values x_train = x[train_idx, ...] x_test = x[test_idx, ...] y_train = actions.iloc[train_idx, :] y_test = actions.iloc[test_idx, :] print( f"Training with {len(train_episodes)} episodes, totalling {y_train.shape[0]}, rows" ) print( f"Evaluating with {len(test_episodes)} episodes, totalling {y_test.shape[0]}, rows" ) model_class = DenserNN mod_arc = model_class(observation_shape=(x_train.shape[1], ), n_actions=19, learning_rate=0.001, output_activation='softmax', dueling=False) mod = mod_arc.compile(loss='categorical_crossentropy', metrics=['accuracy']) mod.fit(x_train, one_hot(y_train[repo.target], num_classes=19), callbacks=[Callbacks.es, Callbacks.tb], validation_split=0.2, epochs=1000, batch_size=5000) mod.save(f"nn_s115_pretrained_model") training_summary(mod, x_train, x_test, y_train[repo.target], y_test[repo.target]) various_plots(mod, x_train, x_test, y_train[repo.target], y_test[repo.target]) return mod