#Catch KeyboardInterrupts and save model #i=-1 # try: # #Reinforcement Loop # #for i in tqdm.trange(n_episodes): # while True: # i += 1 for i in range(n_episodes): info, reward, state = env.reset( ) # reset env before starting a new episode j = 0 shortterm_memory = ReplayMemory(max_size=256) while True: j += 1 # interact with env action = agent.step(state) #observation, reward, done, info = env.step(action) done, base_reward, observation = env.step(action) #Determine real reward based on Policy reward = Policies.LiveLongAndProsper(base_reward, done) #reward = base_reward # store transaction in memory transition = [state, action, reward, observation, done] shortterm_memory.store(*transition) memory.store(*transition) # Step to next state state = observation
if torch.cuda.is_available(): torch.cuda.empty_cache() print("Running on GPU") agent.target_net.cuda() agent.policy_net.cuda() #Reinforcement Loop for i in range(n_episodes): info, reward, state = env.reset( ) # reset env before starting a new episode j = 0 while True: j += 1 # interact with env action = agent.step(state, decay_enabled=False) #observation, reward, done, info = env.step(action) done, reward, observation = env.step(action) #Determine real reward based on Policy #reward = Policies.SoreLoser(reward, done) # Step to next state state = observation #Save rewards for evaluation R[i] = reward #Reset if game lasts too long: #Protects against environment bug where agents can be trapped outside the arena