예제 #1
0
def run_sarsa(start, goal, Xrange, Vrange, plot_data_pid):
    sarsa_plot_data = list()
    sarsa_plot_data.append(plot_data_pid)

    for i in range(1, 9):
        sarsa = Sarsa(start, goal, Xrange, Vrange, n=i)
        sarsa.train(epoch=EPOCH, max_episode_length=MAX_EPISODE_LENGTH)

        sarsa_plot_data.append(sarsa.episodes)

    plot_with_n(sarsa_plot_data)
# avg = np.average(np.array(rewards), axis=0)
# std = np.std(np.array(rewards), axis=0)
# maximumEpisodes = avg.shape[0]
# plt.errorbar(np.array([i for i in range(maximumEpisodes)]), avg, std, marker='^', ecolor='g')
# plt.show()

type = "linear"
# best parameter, order 3, e 0.2, alpha 0.5
# best parameter, order 5, e 0.2, alpha 0.5
for e in [0.3]:#, 0.1, 0.01, 0.3, 0.4]:
    for order in [3]: #, 5]:
        for alpha in [0.01]:#, 0.0001, 0.0005, 0.0009, 0.001, 0.005, 0.009, 0.01, 0.05, 0.09, 0.1, 0.5, 0.9]:
            rewards = []
            print("Alpha: ", alpha)
            for t in tqdm(range(trails)):
                # print("Alpha: %s, Trail: %s" %(alpha, t))
                td = Sarsa(gamma, alpha, env, state_space, steps, e, plot=plot, order=order, discount=discount)
                td.train(episodes)
                rewards.append(td.reward)

            avg = np.average(np.array(rewards), axis=0)
            std = np.std(np.array(rewards), axis=0)
            maximumEpisodes = avg.shape[0]
            plt.errorbar(np.array([i for i in range(maximumEpisodes)]), avg, std, marker='^', ecolor='g')
            #name = "Sarsa/figures/%s/cartPole_type_%s_order%s_alpha%s_e%s.jpg" %(type, type,  order, alpha, e)
            name = "Grid_alpha%s_e%s.jpg" % (alpha, e)
            pickle.dump(avg, open(name, "wb"))
            plt.xlabel("Number of episodes")
            plt.ylabel("Total Reward")
            # plt.savefig(name)
            # plt.close()