# env.render() # 4. Run agent on the state action = mario.act(state) # 5. Agent performs action next_state, reward, done, info = env.step(action) # 6. Remember mario.cache(state, next_state, action, reward, done) # 7. Learn q, loss = mario.learn() # 8. Logging logger.log_step(reward, loss, q) # 9. Update state state = next_state # 10. Check if end of game if done or info['flag_get']: break logger.log_episode() if e % print_e == 0: logger.record(episode=e, epsilon=mario.exploration_rate, step=mario.curr_step)
episode_rewards.append(reward) q, loss = doomguy.learn() logger.log_step(reward, loss, q) # If the game is finished if done: # the episode ends so no next state next_state = np.zeros((84, 84), dtype=np.int) next_state, stacked_frames = stack_frames(stacked_frames, next_state, False) # Get the total reward of the episode total_reward = np.sum(episode_rewards) break else: next_state, stacked_frames = get_next_state(False) #memory.add((state, action, reward, next_state, done)) doomguy.cache(state, next_state, action, reward, done) state = next_state if step > max_steps: total_reward = np.sum(episode_rewards) break logger.log_episode() if episode % 20 == 0: logger.record(episode=episode, epsilon=doomguy.exploration_rate, step=doomguy.curr_step)