Esempio n. 1
0
 def __init__(self):
     env = gym.envs.make("PongDeterministic-v4")
     self.Q_target = DQN.Mynet(env.observation_space, env.action_space).to(device)
     self.Q_policy = DQN.Mynet(env.observation_space, env.action_space).to(device)
     self.Q_target.load_state_dict(self.Q_policy.state_dict())
     self.Q_target.eval()
     self.env = env
     self.pool = DQN.ReplyMemory(15000)
     self.gramma = GRAMMA
     self.alpha = ALPHA
     self.epsilon = EPSILON
     self.ImageProcess = DQN.ImageProcess()
Esempio n. 2
0
    return torch.cat(obb).to(device).unsqueeze(0)


if __name__ == '__main__':
    env = gym.envs.make("PongDeterministic-v4")

    V = DQN.Mynet(env.observation_space, env.action_space)
    # V = DQN.Mynet()
    # with open('./save_model/499.pt', 'r') as f:
    V.load_state_dict(torch.load(r'C:\Users\lingse\Desktop\新建文件夹\RL_home_work-master\Q_save_model\899_pong_new.pt'))
    V.eval()
    observation = [None for i in range(5)]
    import numpy as np

    # c = V(state).max(1)[1].view(1, 1)
    I = DQN.ImageProcess()
    while True:
        state = env.reset()
        state = I.ColorMat2Binary(state)
        state_shadow = np.stack((state, state, state, state), axis=2)
        state_now = transfor_o(state_shadow)
        import time
        while True:
            env.render()
            time.sleep(0.01)
            action = V(state_now).max(1)[1].view(1, 1)
            if action[0][0] == 0:
                do_action = [[2]]
            else:
                do_action = [[5]]
            observation1, reward, done, _ = env.step(do_action)