Exemple #1
0
def test_sarsa(iterations=1000, mlambda=None, n0=100, avg_it=50):
    print "\n-------------------"
    print "TD control Sarsa"
    print "run for n. iterations: " + str(iterations)
    print "plot graph mse vs episodes for lambda equal 0 and lambda equal 1"
    print "list (standard output) win percentage for values of lambda 0, 0.1, 0.2, ..., 0.9, 1"
    monte_carlo_Q = pickle.load(
        open("Data/Qval_func_1000000_MC_control.pkl", "rb"))
    n_elements = monte_carlo_Q.shape[0] * monte_carlo_Q.shape[1] * 2
    mse = []

    if not isinstance(mlambda, list):
        # if no value is passed for lambda, default 0.5
        l = 0.5 if mlambda == None else mlambda
        # learn
        game = Environment()
        agent = Agent(game, n0)
        agent.TD_control(iterations, l, avg_it)
        # plot results
        agent.show_statevalue_function()
    else:
        # test each value of lambda
        for l in mlambda:
            game = Environment()
            agent = Agent(game, n0)
            l_mse = agent.TD_control(iterations, l, avg_it)
            mse.append(l_mse)
        plt.plot(mlambda, mse)
        plt.ylabel('mse')
        plt.show()
Exemple #2
0
def test_td_control(iter=50000):
    lambdas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    for l in lambdas:
        agent = Agent()
        agent.TD_control(iter, l)
        agent.plot_state()