class NoFramestackTest(unittest.TestCase): def setUp(self): self.agent = MockAgent() self.env = GymEnvironment('PongNoFrameskip-v4') self.body = DeepmindAtariBody(ToLegacyBody(self.agent), self.env, noop_max=0, frame_stack=1) def test_several_steps(self): self.env.reset() self.env.step(self.body.act(self.env.state, 0)) self.env.step(self.body.act(self.env.state, -5)) for _ in range(10): self.body.act(self.env.state, -5) self.assertEqual(self.agent.state.features.shape, (1, 1, 105, 80))
def test_step(self): env = GymEnvironment('CartPole-v0') env.reset() state = env.step(1) self.assertEqual(state.observation.shape, (4, )) self.assertEqual(state.reward, 1.) self.assertFalse(state.done) self.assertEqual(state.mask, 1)
def evaluate_dqn_all(num_test_episodes): from all.experiments.watch import GreedyAgent from all.environments import GymEnvironment if Settings.CUDA: device = "cuda" else: device = "cpu" env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device) agent = GreedyAgent.load('models', env) num_crashed = 0 num_arrived = 0 action = None iteration = 0 rlstats = StatsAggregator() episode_reward = 0 def add_reward(state): return {"reward": episode_reward} rlstats.add_custom_stat_callback(add_reward) rewards = [] while iteration < num_test_episodes: if env.done: actualEnv = env.env stats = actualEnv.get_stats() if len(stats["position_history"]) != 0: rlstats.add_episode_stats(stats) num_crashed += stats["crashed"] num_arrived += stats["merged"] iteration += 1 print(iteration) rewards.append(episode_reward) episode_reward = 0 env.reset() else: env.step(action) action = agent.eval(env.state, env.reward) episode_reward += env.reward logging.info("Rewards: {}".format(rewards)) rlstats.print_stats()
def test_step_until_done(self): env = GymEnvironment('CartPole-v0') env.reset() for _ in range(100): state = env.step(1) if state.done: break self.assertEqual(state.observation.shape, (4, )) self.assertEqual(state.reward, 1.) self.assertTrue(state.done) self.assertEqual(state.mask, 0)
class DeepmindAtariBodyPongTest(unittest.TestCase): def setUp(self): self.agent = MockAgent() self.env = GymEnvironment('PongNoFrameskip-v4') self.body = DeepmindAtariBody(ToLegacyBody(self.agent), self.env, noop_max=0) def test_initial_state(self): self.env.reset() action = self.body.act(self.env.state, 0) tt.assert_equal(action, torch.tensor([1])) # fire on reset 1 def test_second_state(self): self.env.reset() self.env.step(self.body.act(self.env.state, 0)) action = self.body.act(self.env.state, self.env.reward) tt.assert_equal(action, torch.tensor([2])) # fire on reset 2 def test_several_steps(self): self.env.reset() self.env.step(self.body.act(self.env.state, 0)) self.env.step(self.body.act(self.env.state, -5)) for _ in range(4): action = self.body.act(self.env.state, -5) self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80)) tt.assert_equal(action, INITIAL_ACTION) self.env.step(action) for _ in range(10): reward = -5 # should be clipped self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80)) action = self.body.act(self.env.state, reward) tt.assert_equal(action, ACT_ACTION) self.env.step(action) self.assertEqual(self.agent.reward, -4) def test_terminal_state(self): self.env.reset() self.env.step(self.body.act(self.env.state, 0)) for _ in range(11): reward = -5 # should be clipped action = self.body.act(self.env.state, reward) self.env.step(action) # pylint: disable=protected-access self.env.state._mask = torch.tensor([0]) self.body.act(self.env.state, -1) tt.assert_equal(action, ACT_ACTION) self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80)) self.assertEqual(self.agent.reward, -4)