def __init__(self): env = gym.envs.make("PongDeterministic-v4") self.Q_target = DQN.Mynet(env.observation_space, env.action_space).to(device) self.Q_policy = DQN.Mynet(env.observation_space, env.action_space).to(device) self.Q_target.load_state_dict(self.Q_policy.state_dict()) self.Q_target.eval() self.env = env self.pool = DQN.ReplyMemory(15000) self.gramma = GRAMMA self.alpha = ALPHA self.epsilon = EPSILON self.ImageProcess = DQN.ImageProcess()
import random device = 'cpu' def transfor_o(ob): obb = [] for i in ob: obb.append(torch.tensor(i.tolist(), dtype=torch.float32).to(device).unsqueeze(0)) return torch.cat(obb).to(device).unsqueeze(0) if __name__ == '__main__': env = gym.envs.make("PongDeterministic-v4") V = DQN.Mynet(env.observation_space, env.action_space) # V = DQN.Mynet() # with open('./save_model/499.pt', 'r') as f: V.load_state_dict(torch.load(r'C:\Users\lingse\Desktop\新建文件夹\RL_home_work-master\Q_save_model\899_pong_new.pt')) V.eval() observation = [None for i in range(5)] import numpy as np # c = V(state).max(1)[1].view(1, 1) I = DQN.ImageProcess() while True: state = env.reset() state = I.ColorMat2Binary(state) state_shadow = np.stack((state, state, state, state), axis=2) state_now = transfor_o(state_shadow) import time