def test_pong_num_actions(self): env = gym.make('PongDeterministic-v4') env = wrap_deepmind(env, frame_stack=True) env = wrap_pytorch(env) env.seed(42) observation = env.reset() self.assertEqual(env.action_space.n, 6)
def test_frame_shape(self): env = gym.make('BreakoutDeterministic-v4') env = wrap_deepmind(env, frame_stack=True) env = wrap_pytorch(env) env.seed(42) observation = env.reset() self.assertEqual(observation.shape, (4, 84, 84))
def test_env_input(self): env = gym.make('BreakoutDeterministic-v4') env = wrap_deepmind(env, frame_stack=True) env = wrap_pytorch(env) env.seed(42) state = env.reset() # Need to normalize inputs to range of 0-1 state = torch.tensor(state, dtype=torch.float32) / 255.0 state = state.unsqueeze(dim=0) DQNModel = AtariCNN(env.observation_space.shape, num_actions=4) out = DQNModel(state) self.assertEqual(out.size(), (1, 4))
def test_breakout_rewards(self): env = gym.make('BreakoutDeterministic-v4') env = wrap_deepmind(env, frame_stack=True) env = wrap_pytorch(env) env.seed(42) state = env.reset() done = False tot_reward = 0.0 while not done: state, reward, done, _ = env.step(env.action_space.sample()) tot_reward += reward self.assertGreaterEqual(tot_reward, 0)
def make_env(env_id): env = gym.make(env_id) env = wrap_deepmind(env, frame_stack=True) env = wrap_pytorch(env) env.seed(42) return env
def make_env(env_id): env = gym.make(env_id) env = wrap_deepmind(env, frame_stack=True) env = wrap_pytorch(env) env.seed(42) return env def make_net(inp_shape, num_actions): PolicyNet = AtariCNN(inp_shape, num_actions) TargetNet = AtariCNN(inp_shape, num_actions) return PolicyNet, TargetNet random.seed(42) env = gym.make('PongDeterministic-v4') env = wrap_deepmind(env, frame_stack=True, clip_rewards=True) env = wrap_pytorch(env) env.seed(42) # lst = [] # for i in range(1): # state = env.reset() # done = False # tot_reward = 0.0 # while not done: # state, reward, done, _ = env.step(env.action_space.sample()) # env.render() # time.sleep(0.01) # tot_reward += reward # print(i, tot_reward, state.shape) # lst.append(tot_reward)