class TestContinuousPresets(unittest.TestCase): def setUp(self): self.env = GymEnvironment('LunarLanderContinuous-v2') self.env.reset() def tearDown(self): if os.path.exists('test_preset.pt'): os.remove('test_preset.pt') def test_ddpg(self): self.validate(ddpg) def test_ppo(self): self.validate(ppo) def test_sac(self): self.validate(sac) def validate(self, builder): preset = builder.device('cpu').env(self.env).build() # normal agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.env.state) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state)
def test_step(self): env = GymEnvironment('CartPole-v0') env.reset() state = env.step(1) self.assertEqual(state.observation.shape, (4, )) self.assertEqual(state.reward, 1.) self.assertFalse(state.done) self.assertEqual(state.mask, 1)
class TestContinuousPresets(unittest.TestCase): def setUp(self): self.env = GymEnvironment('LunarLanderContinuous-v2') self.env.reset() self.parallel_env = DuplicateEnvironment([ GymEnvironment('LunarLanderContinuous-v2'), GymEnvironment('LunarLanderContinuous-v2'), ]) self.parallel_env.reset() def tearDown(self): if os.path.exists('test_preset.pt'): os.remove('test_preset.pt') def test_ddpg(self): self.validate(ddpg) def test_ppo(self): self.validate(ppo) def test_sac(self): self.validate(sac) def validate(self, builder): preset = builder.device('cpu').env(self.env).build() if isinstance(preset, ParallelPreset): return self.validate_parallel_preset(preset) return self.validate_standard_preset(preset) def validate_standard_preset(self, preset): # train agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.env.state) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state) def validate_parallel_preset(self, preset): # train agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.parallel_env.state_array) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # parallel test_agent parallel_test_agent = preset.test_agent() parallel_test_agent.act(self.parallel_env.state_array) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state)
def test_step_until_done(self): env = GymEnvironment('CartPole-v0') env.reset() for _ in range(100): state = env.step(1) if state.done: break self.assertEqual(state.observation.shape, (4, )) self.assertEqual(state.reward, 1.) self.assertTrue(state.done) self.assertEqual(state.mask, 0)
class TestClassicControlPresets(unittest.TestCase): def setUp(self): self.env = GymEnvironment('CartPole-v0') self.env.reset() def tearDown(self): if os.path.exists('test_preset.pt'): os.remove('test_preset.pt') def test_a2c(self): self.validate(a2c) def test_c51(self): self.validate(c51) def test_ddqn(self): self.validate(ddqn) def test_dqn(self): self.validate(dqn) def test_ppo(self): self.validate(ppo) def test_rainbow(self): self.validate(rainbow) def test_vac(self): self.validate(vac) def test_vpg(self): self.validate(vpg) def test_vsarsa(self): self.validate(vsarsa) def test_vqn(self): self.validate(vqn) def validate(self, builder): preset = builder.device('cpu').env(self.env).build() # normal agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.env.state) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state)
def test_reset_preconstructed_env(self): env = GymEnvironment(gym.make('CartPole-v0')) state = env.reset() self.assertEqual(state.observation.shape, (4, )) self.assertEqual(state.reward, 0) self.assertFalse(state.done) self.assertEqual(state.mask, 1)
class NoFramestackTest(unittest.TestCase): def setUp(self): self.agent = MockAgent() self.env = GymEnvironment('PongNoFrameskip-v4') self.body = DeepmindAtariBody(ToLegacyBody(self.agent), self.env, noop_max=0, frame_stack=1) def test_several_steps(self): self.env.reset() self.env.step(self.body.act(self.env.state, 0)) self.env.step(self.body.act(self.env.state, -5)) for _ in range(10): self.body.act(self.env.state, -5) self.assertEqual(self.agent.state.features.shape, (1, 1, 105, 80))
def evaluate_dqn_all(num_test_episodes): from all.experiments.watch import GreedyAgent from all.environments import GymEnvironment if Settings.CUDA: device = "cuda" else: device = "cpu" env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device) agent = GreedyAgent.load('models', env) num_crashed = 0 num_arrived = 0 action = None iteration = 0 rlstats = StatsAggregator() episode_reward = 0 def add_reward(state): return {"reward": episode_reward} rlstats.add_custom_stat_callback(add_reward) rewards = [] while iteration < num_test_episodes: if env.done: actualEnv = env.env stats = actualEnv.get_stats() if len(stats["position_history"]) != 0: rlstats.add_episode_stats(stats) num_crashed += stats["crashed"] num_arrived += stats["merged"] iteration += 1 print(iteration) rewards.append(episode_reward) episode_reward = 0 env.reset() else: env.step(action) action = agent.eval(env.state, env.reward) episode_reward += env.reward logging.info("Rewards: {}".format(rewards)) rlstats.print_stats()
class DeepmindAtariBodyPongTest(unittest.TestCase): def setUp(self): self.agent = MockAgent() self.env = GymEnvironment('PongNoFrameskip-v4') self.body = DeepmindAtariBody(ToLegacyBody(self.agent), self.env, noop_max=0) def test_initial_state(self): self.env.reset() action = self.body.act(self.env.state, 0) tt.assert_equal(action, torch.tensor([1])) # fire on reset 1 def test_second_state(self): self.env.reset() self.env.step(self.body.act(self.env.state, 0)) action = self.body.act(self.env.state, self.env.reward) tt.assert_equal(action, torch.tensor([2])) # fire on reset 2 def test_several_steps(self): self.env.reset() self.env.step(self.body.act(self.env.state, 0)) self.env.step(self.body.act(self.env.state, -5)) for _ in range(4): action = self.body.act(self.env.state, -5) self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80)) tt.assert_equal(action, INITIAL_ACTION) self.env.step(action) for _ in range(10): reward = -5 # should be clipped self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80)) action = self.body.act(self.env.state, reward) tt.assert_equal(action, ACT_ACTION) self.env.step(action) self.assertEqual(self.agent.reward, -4) def test_terminal_state(self): self.env.reset() self.env.step(self.body.act(self.env.state, 0)) for _ in range(11): reward = -5 # should be clipped action = self.body.act(self.env.state, reward) self.env.step(action) # pylint: disable=protected-access self.env.state._mask = torch.tensor([0]) self.body.act(self.env.state, -1) tt.assert_equal(action, ACT_ACTION) self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80)) self.assertEqual(self.agent.reward, -4)
class TestClassicControlPresets(unittest.TestCase): def setUp(self): self.env = GymEnvironment('CartPole-v0') self.env.reset() self.parallel_env = DuplicateEnvironment([GymEnvironment('CartPole-v0'), GymEnvironment('CartPole-v0')]) self.parallel_env.reset() def tearDown(self): if os.path.exists('test_preset.pt'): os.remove('test_preset.pt') def test_a2c(self): self.validate(a2c) def test_c51(self): self.validate(c51) def test_ddqn(self): self.validate(ddqn) def test_dqn(self): self.validate(dqn) def test_ppo(self): self.validate(ppo) def test_rainbow(self): self.validate(rainbow) def test_vac(self): self.validate(vac) def test_vpg(self): self.validate(vpg) def test_vsarsa(self): self.validate(vsarsa) def test_vqn(self): self.validate(vqn) def validate(self, builder): preset = builder.device('cpu').env(self.env).build() if isinstance(preset, ParallelPreset): return self.validate_parallel_preset(preset) return self.validate_standard_preset(preset) def validate_standard_preset(self, preset): # train agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.env.state) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state) def validate_parallel_preset(self, preset): # train agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.parallel_env.state_array) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # parallel test_agent parallel_test_agent = preset.test_agent() parallel_test_agent.act(self.parallel_env.state_array) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state)