agent = DQNAgent(config) agent.create_dirs() eps = config.eps_start for i_episode in range(1, config.n_episodes + 1): # Reset the environment and the score env_info = env.reset(train_mode=True)[brain_name] state = env_info.vector_observations[0] score = 0 while True: action = agent.act(state, eps) env_info = env.step(action)[brain_name] next_state, reward, done = env_info.vector_observations[ 0], env_info.rewards[0], env_info.local_done[0] agent.step(state, action, reward, next_state, done) state = next_state score += reward if done: break scores_window.append(score) scores.append(score) avg_scores.append(np.mean(scores_window)) eps = max(config.eps_min, config.eps_decay * eps) print( '\rEpisode {}\tEps {:.2f}\tLast Score: {:.2f}\tAverage Score: {:.2f}' .format(i_episode, eps, score, np.mean(scores_window)), end="") if i_episode % 100 == 0: test_scores.append(test(env, agent, i_episode)) test_scores_i.append(i_episode)
env_info = env.reset(train_mode=True)[brain_name] state_raw = process_state(env_info.visual_observations[0]) state_window.append(state_raw) state = np.vstack( [np.expand_dims(np.array(s), 0) for s in state_window]) score = 0 while True: action = agent.act(state, eps) env_info = env.step(action)[brain_name] next_state_raw, reward, done = process_state(env_info.visual_observations[0]), env_info.rewards[ 0], \ env_info.local_done[0] state_window.append(next_state_raw) next_state = np.vstack( [np.expand_dims(np.array(s), 0) for s in state_window]) agent.step(np.array([state]), action, reward, np.array([next_state]), done) state = next_state score += reward if done: break scores_window.append(score) scores.append(score) avg_scores.append(np.mean(scores_window)) eps = max(config.eps_min, config.eps_decay * eps) print( '\rEpisode {}\tEps {:.2f}\tLast Score: {:.2f}\tAverage Score: {:.2f}' .format(i_episode, eps, score, np.mean(scores_window)), end="") if i_episode % 100 == 0: test_scores.append(test(env, agent, i_episode, state_len=state_len))