コード例 #1
0
 def test_pong_num_actions(self):
     env = gym.make('PongDeterministic-v4')
     env = wrap_deepmind(env, frame_stack=True)
     env = wrap_pytorch(env)
     env.seed(42)
     observation = env.reset()
     self.assertEqual(env.action_space.n, 6)
コード例 #2
0
 def test_frame_shape(self):
     env = gym.make('BreakoutDeterministic-v4')
     env = wrap_deepmind(env, frame_stack=True)
     env = wrap_pytorch(env)
     env.seed(42)
     observation = env.reset()
     self.assertEqual(observation.shape, (4, 84, 84))
コード例 #3
0
 def test_env_input(self):
     env = gym.make('BreakoutDeterministic-v4')
     env = wrap_deepmind(env, frame_stack=True)
     env = wrap_pytorch(env)
     env.seed(42)
     state = env.reset()
     # Need to normalize inputs to range of 0-1
     state = torch.tensor(state, dtype=torch.float32) / 255.0 
     state = state.unsqueeze(dim=0)
     DQNModel = AtariCNN(env.observation_space.shape, num_actions=4)
     out = DQNModel(state)
     self.assertEqual(out.size(), (1, 4))
コード例 #4
0
    def test_breakout_rewards(self):
        env = gym.make('BreakoutDeterministic-v4')
        env = wrap_deepmind(env, frame_stack=True)
        env = wrap_pytorch(env)
        env.seed(42)

        state = env.reset()
        done = False
        tot_reward = 0.0
        while not done:
            state, reward, done, _ = env.step(env.action_space.sample())
            tot_reward += reward
        self.assertGreaterEqual(tot_reward, 0)
コード例 #5
0
def make_env(env_id):
    env = gym.make(env_id)
    env = wrap_deepmind(env, frame_stack=True)
    env = wrap_pytorch(env)
    env.seed(42)
    return env
コード例 #6
0
    env = gym.make(env_id)
    env = wrap_deepmind(env, frame_stack=True)
    env = wrap_pytorch(env)
    env.seed(42)
    return env

def make_net(inp_shape, num_actions):
    PolicyNet = AtariCNN(inp_shape, num_actions)
    TargetNet = AtariCNN(inp_shape, num_actions)
    return PolicyNet, TargetNet

random.seed(42)

env = gym.make('PongDeterministic-v4')
env = wrap_deepmind(env, frame_stack=True, clip_rewards=True)
env = wrap_pytorch(env)
env.seed(42)

# lst = []
# for i in range(1):
#     state = env.reset()
#     done = False
#     tot_reward = 0.0
#     while not done:
#         state, reward, done, _ = env.step(env.action_space.sample())
#         env.render()
#         time.sleep(0.01)
#         tot_reward += reward
#     print(i, tot_reward, state.shape)
#     lst.append(tot_reward)
# print(np.mean(lst))