Esempio n. 1
0
    agent = DQNAgent(config)
    agent.create_dirs()

    eps = config.eps_start

    for i_episode in range(1, config.n_episodes + 1):
        # Reset the environment and the score
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0]
        score = 0
        while True:
            action = agent.act(state, eps)
            env_info = env.step(action)[brain_name]
            next_state, reward, done = env_info.vector_observations[
                0], env_info.rewards[0], env_info.local_done[0]
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break
        scores_window.append(score)
        scores.append(score)
        avg_scores.append(np.mean(scores_window))
        eps = max(config.eps_min, config.eps_decay * eps)
        print(
            '\rEpisode {}\tEps {:.2f}\tLast Score: {:.2f}\tAverage Score: {:.2f}'
            .format(i_episode, eps, score, np.mean(scores_window)),
            end="")
        if i_episode % 100 == 0:
            test_scores.append(test(env, agent, i_episode))
            test_scores_i.append(i_episode)
Esempio n. 2
0
 env_info = env.reset(train_mode=True)[brain_name]
 state_raw = process_state(env_info.visual_observations[0])
 state_window.append(state_raw)
 state = np.vstack(
     [np.expand_dims(np.array(s), 0) for s in state_window])
 score = 0
 while True:
     action = agent.act(state, eps)
     env_info = env.step(action)[brain_name]
     next_state_raw, reward, done = process_state(env_info.visual_observations[0]), env_info.rewards[
         0], \
                                    env_info.local_done[0]
     state_window.append(next_state_raw)
     next_state = np.vstack(
         [np.expand_dims(np.array(s), 0) for s in state_window])
     agent.step(np.array([state]), action, reward,
                np.array([next_state]), done)
     state = next_state
     score += reward
     if done:
         break
 scores_window.append(score)
 scores.append(score)
 avg_scores.append(np.mean(scores_window))
 eps = max(config.eps_min, config.eps_decay * eps)
 print(
     '\rEpisode {}\tEps {:.2f}\tLast Score: {:.2f}\tAverage Score: {:.2f}'
     .format(i_episode, eps, score, np.mean(scores_window)),
     end="")
 if i_episode % 100 == 0:
     test_scores.append(test(env, agent, i_episode,
                             state_len=state_len))