class DQNLunar: def __init__(self): self.env = gym.make('LunarLander-v2') self.nn = DQN(8, 4) self.avgReward = np.zeros(100) def runEp(self, env, printT=False): state = env.reset() state = np.reshape(state, [1, 8]) totalreward = 0 for _ in range(500): if printT: env.render() action = self.nn.chooseAction(state) next_state, reward, done, _ = env.step(action) totalreward += reward next_state = np.reshape(next_state, [1, 8]) self.nn.storePastResults(state, action, reward, next_state, done) state = next_state if done: break self.nn.train(6) return totalreward def run(self): for i in range(20000): # Number of episodes s if i % 100 == 0: self.avgReward[i % 100] = self.runEp(self.env, True) avg = np.average(self.avgReward) print('Episode %f/10000 Reward: %f' % (i, avg)) else: self.avgReward[i % 100] = self.runEp(self.env, True) print(i, self.avgReward[i % 100])
loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1)) optimizer.zero_grad() loss.backward() for param in policy_net.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() ###################################################################### init_screen = get_screen() _, _, screen_height, screen_width = init_screen.shape # Get number of actions from gym action space n_actions = 4 policy_net = DQN(screen_height, screen_width, n_actions,layers=20).to(device) target_net = DQN(screen_height, screen_width, n_actions,layers=20).to(device) PATH = 'C:/Users/sagau/Google Drive/smaller1.pth' optimizer = optim.Adam(policy_net.parameters(),lr=1e-4) load_mode = False if load_mode: model_dict = torch.load(PATH,map_location=torch.device('cpu')) i_episode = model_dict['epoch'] optimizer.load_state_dict(model_dict['optimizer']) policy_net.load_state_dict(model_dict['state_dict']) target_net.load_state_dict(model_dict['state_dict']) episode_durations = model_dict['episode_durations'] total_reward_list = model_dict['total_reward_list'] point_list = model_dict['point_list'] plot_durations()
return torch.tensor([[random.randrange(n_actions)]], device=device, dtype=torch.long) ###################################################################### # Training loop ###################################################################### init_screen = get_screen() _, _, screen_height, screen_width = init_screen.shape # Get number of actions from gym action space n_actions = 4 #PATH = 'C:/Users/sagau/Desktop/Kaggle/TetrisRepo/models/model1_2.pth' policy_net = DQN(screen_height, screen_width, n_actions,layers=20) policy_net.eval() policy_net = DQN(screen_height, screen_width, n_actions,layers=20).to(device) PATH = 'C:/Users/sagau/Google Drive/transfersmaller.pth' model_dict = torch.load(PATH,map_location=torch.device('cpu')) policy_net.load_state_dict(model_dict['state_dict']) ###################################################################### # Play with model ! sleep_time = 0.2 game = Tetris(nb_rows=8,nb_cols=6) done = game.generate_block(choice=random.randint(0,3)) rows = 0 for t in count(): # for t in range(200):
import gym import numpy as np from dqnClass import DQN # Setup Enviornment env = gym.make('CartPole-v0') nn = DQN(4, 2) def runEp(env, printT=False): state = env.reset() state = np.reshape(state, [1, 4]) totalreward = 0 for t in range(500): if printT: env.render() action = nn.chooseAction(state) next_state, reward, done, _ = env.step(action) totalreward += reward next_state = np.reshape(next_state, [1, 4]) nn.storePastResults(state, action, reward, next_state, done) state = next_state if done: break nn.train(32) return t avgReward = np.zeros(200) for i in range(10000): # Number of episodes s score = runEp(env, True)
import gym import numpy as np from dqnClass import DQN # Setup Enviornment env = gym.make('LunarLander-v2') nn = DQN(8, 4) def runEp(env, printT=False): state = env.reset() state = np.reshape(state, [1, 8]) totalreward = 0 for _ in range(500): if printT: env.render() action = nn.chooseAction(state) next_state, reward, done, _ = env.step(action) totalreward += reward next_state = np.reshape(next_state, [1, 8]) nn.storePastResults(state, action, reward, next_state, done) state = next_state if done: break nn.train(6) return totalreward avgReward = np.zeros(100) for i in range(20000): # Number of episodes s if i % 100 == 0:
def __init__(self): self.env = gym.make('LunarLander-v2') self.nn = DQN(8, 4) self.avgReward = np.zeros(100)