Esempio n. 1
0
        env.reset()
        done = False
        curr_score = 0
        for act_i in range(max_ep_len):
            exp = agent.act(env, eps)
            curr_score = curr_score + exp.reward
            if exp.done:
                break
            if act_i % learn_every == 0:
                agent.learn(64)
        score_list.append(curr_score)
        score_window.append(curr_score)
        if episode % update_every == 0:
            agent.update_target()
        if episode % 20 == 0:
            print("episode " + str(episode) + ", mean score: " +
                  str(np.mean(score_window)))
        if episode % 100 == 0:
            mean_score_list.append(np.mean(score_window))
    print("test completed with scores: " + str(mean_score_list))

    agent.save_checkpoint(local_checkpoint="test_out_3/qnet_" + env_sel +
                          "_local_test_" + str(test_i) + ".ckp")
    pickle.dump((score_list, mean_score_list, pars),
                open(
                    "test_out_3/qnet_" + env_sel + "_scores_and_pars_test_" +
                    str(test_i) + ".p", "wb"))

    results.append(max(mean_score_list))
    result_pars.append(pars)
Esempio n. 2
0
    score_window.append(curr_score)

    #print and save
    if episode % 20 == 0:
        print("Episode " + str(episode) + ". Eps = " + str(eps) +
              ",  mean_score: " + str(np.mean(score_window)))
        ax.clear()
        ax.plot(np.arange(len(score_list)), score_list)
        plt.ylabel('Score')
        plt.xlabel('Episode #')
        plt.draw()
        plt.pause(.001)
        if np.mean(score_window) > env_pars.target_score:
            agent.save_checkpoint(target_checkpoint="qnet_" + env_sel +
                                  "_target_episode_" + str(episode) + ".ckp",
                                  local_checkpoint="qnet_" + env_sel +
                                  "_local_episode_" + str(episode) + ".ckp",
                                  delayer_checkpoint="qnet_" + env_sel +
                                  "_delayer_episode_" + str(episode) + ".ckp")
            pickle.dump(
                score_list,
                open("qnet_" + env_sel + "_scores_" + str(episode) + ".p",
                     "wb"))

#final print and save
agent.save_checkpoint(target_checkpoint="qnet_" + env_sel +
                      "_target_final.ckp")
pickle.dump(score_list, open("qnet_" + env_sel + "_scores_final.p", "wb"))
plt.plot(np.arange(len(score_list)), score_list)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.draw()