Exemple #1
0
def problem6():
    """
    Repeat the previous question, but using the GA (as described earlier in 
    this homework) on the cart-pole domain. Report the same quantities and how
    the policy was parameterized. 
    """

    #TODO
    print("Problem 6")

    # Environment Params
    m = 4
    numActions = 2

    # Policy Search Params
    numTrials = 50
    numGenerations = 20
    populationSize = 20
    numEpisodes = 10

    numElite = 10
    numTruncate = 5
    alpha = 0.1
    k = 3
    policyEval = CartPoleEvaluation(k=k)
    initGA = GAInit(numActions * np.power(k + 1, m))

    # print("Trials: %d\nIterations: %d\nEpisodes: %d\nSigma: %f" % (numTrials, numIters, numEps, sigma))

    agent = GA(populationSize,
               policyEval,
               initGA,
               numElite=numElite,
               numTruncate=numTruncate,
               alpha=alpha,
               numEpisodes=numEpisodes)
    for trial in range(numTrials):
        print("Trial ", trial)
        for gen in range(numGenerations):
            print("Generation ", gen)
            agent.train()
        policyEval.endTrial()
        agent.reset()
        policyEval.plot('learningCurve_cartpole_GA_{}.png'.format(trial),
                        "Learning Curve - Cartpole with GA Agent")
Exemple #2
0
def problem3():
    """
    Repeat the previous question, but using the GA (as described earlier in 
    this assignment) on the More-Watery 687-Gridworld domain. Report the same 
    quantities.
    """

    #TODO
    print("Problem 3")

    # Environment Params
    num_states = 25
    num_actions = 4

    # Policy Search Params
    numTrials = 50
    numGenerations = 100
    populationSize = 30
    numEpisodes = 20

    numElite = 20
    numTruncate = 5
    alpha = 1.25
    policyEval = GridworldEvaluation()
    initGA = GAInit(num_states * num_actions)

    # print("Trials: %d\nIterations: %d\nEpisodes: %d\nSigma: %f" % (numTrials, numIters, numEps, sigma))

    agent = GA(populationSize,
               policyEval,
               initGA,
               numElite=numElite,
               numTruncate=numTruncate,
               alpha=alpha,
               numEpisodes=numEpisodes)
    for trial in range(numTrials):
        print("Trial ", trial)
        for gen in range(numGenerations):
            print("Generation ", gen)
            agent.train()
        policyEval.endTrial()
        agent.reset()
        policyEval.plot('learningCurve_gridworld_GA_{}.png'.format(trial),
                        "Learning Curve - Gridworld with GA Agent")
Exemple #3
0
def problem6():
    """
    Repeat the previous question, but using the GA (as described earlier in 
    this homework) on the cart-pole domain. Report the same quantities and how
    the policy was parameterized. 
    """

    print("ga-cartpole-softmax_theta_phi")

    #    fourier_param = 2
    def initPopFn(pop_size):
        theta_arr = np.zeros((pop_size, 2 * fourier_param**4))
        return theta_arr

    state = np.array([0, 0, 0, 0])

    env = Cartpole()
    env.nextState(state, 0)

    fourier_param = 4

    #    theta = np.zeros(2*fourier_param**4)
    #    sigma = 1
    popSize = 10
    numElite = 3
    numEpisodes = 5
    evaluate = EvaluateCartpole()
    #    epsilon = 0.005

    ga = GA(popSize, evaluate, initPopFn, numElite, numEpisodes)

    #    numTrials = 50
    numTrials = 10
    numIterations = 100
    #    numIterations = 250
    #    numIterations = 20
    total_episodes = numIterations * numEpisodes * popSize  # 20*50*10

    results = np.zeros((numTrials, total_episodes))

    for trial in range(numTrials):
        ga.reset()
        for i in range(numIterations):
            #DEBUG
            if (i % 5 == 0):
                print("cart ga: ", "trial: ", trial, "/", numTrials,
                      " iteration: ", i, "/", numIterations)
            ga.train()

            batch_start = (i * numEpisodes) * popSize
            batch_end = ((i + 1) * numEpisodes) * popSize

            results[trial,
                    batch_start:batch_end] = np.array(evaluate.batchReturn)

    average_results = np.average(np.array(results), axis=0)
    std_results = np.std(np.array(results), axis=0)
    maximumEpisodes = average_results.shape[0]
    max_avg = np.max(average_results)

    plt.errorbar(np.array([i for i in range(maximumEpisodes)]),
                 average_results,
                 std_results,
                 marker='.',
                 ecolor='aqua')
    plt.grid(True)
    plt.axhline(max_avg)
    plt.text(0,
             max_avg,
             "max: " + str(round(max_avg, 2)),
             fontsize=15,
             backgroundcolor='w')

    plt_name = "ga_cartpole"

    now = datetime.now()
    param_string = "_numTrials_"+str(numTrials)+"_numIter_" \
        + str(numIterations) + "_popSize_" +str(popSize)
    dt_string = now.strftime("_t_%H_%M")

    plt_name += param_string
    plt_name += dt_string
    print("plt_name=", plt_name)
    plt_path = "images/" + plt_name + ".png"

    #    plot_min = -100
    #    plot_max = 10
    #    plt.ylim(average_results.min(), average_results.max())
    #    plt.ylim(plot_min, plot_max)

    plt.savefig(plt_path, dpi=200)
    plt.show()

    np.save("data/" + "results_" + plt_name, results)
    np.save("data/" + "average_results_" + plt_name, average_results)
    np.save("data/" + "std_results_" + plt_name, std_results)

    #TODO
    pass
Exemple #4
0
def problem3():
    """
    Repeat the previous question, but using the GA (as described earlier in 
    this assignment) on the More-Watery 687-Gridworld domain. Report the same 
    quantities.
    """
    def initPopFn(pop_size):
        #        theta_arr = np.zeros((pop_size,100))
        theta_zeros = np.zeros(100)
        theta_arr = np.random.multivariate_normal(theta_zeros,
                                                  np.identity(
                                                      len(theta_zeros)),
                                                  size=pop_size)
        #        print(theta_arr.shape)
        #        return child
        return theta_arr

    print("ga-gridworld-tabular_softmax")

    popSize = 10
    evaluate = Evaluate()
    numElite = 4
    numEpisodes = 10
    #    numEpisodes = 50

    ga = GA(popSize, evaluate, initPopFn, numElite, numEpisodes)

    #    numTrials = 50
    numTrials = 20
    #    numIterations = 100
    numIterations = 25

    total_episodes = numIterations * numEpisodes * popSize  # 20*50*10

    #    total_episodes = numIterations*num_episodes # 100*50

    results = np.zeros((numTrials, total_episodes))
    #    results = []
    #    iter_results = []
    for trial in range(numTrials):
        ga.reset()
        for i in range(numIterations):
            #DEBUG
            if (i % 5 == 0):
                print("ga: ", "trial: ", trial, "/", numTrials, " iteration: ",
                      i, "/", numIterations)
            ga.train()

            batch_start = (i * numEpisodes) * popSize
            batch_end = ((i + 1) * numEpisodes) * popSize
            results[trial,batch_start:batch_end] =\
                    np.array(evaluate.batchReturn)


#            np.evaluate.batchReturn

    average_results = np.mean(np.array(results), axis=0)
    std_results = np.std(np.array(results), axis=0)
    maximumEpisodes = average_results.shape[0]
    max_avg = np.max(average_results)

    plt.errorbar(np.array(range(maximumEpisodes)),
                 average_results,
                 std_results,
                 marker='.',
                 ecolor='aqua')
    plt.grid(True)
    plt.axhline(max_avg)
    plt.text(0,
             max_avg,
             "max: " + str(round(max_avg, 2)),
             fontsize=15,
             backgroundcolor='w')

    plt_name = "ga_gridworld"

    now = datetime.now()
    param_string = "_numTrials_" + str(numTrials) + "_numIter_" + str(
        numIterations)
    dt_string = now.strftime("_%H_%M")

    plt_name += param_string
    plt_name += dt_string
    print("plt_name=", plt_name)
    plt_path = "images/" + plt_name + ".png"

    #    plot_min = -100
    #    plot_max = 10
    #    plt.ylim(average_results.min(), average_results.max())
    #    plt.ylim(plot_min, plot_max)

    plt.savefig(plt_path, dpi=200)
    plt.show()

    np.save("data/" + "results_" + plt_name, results)
    np.save("data/" + "average_results_" + plt_name, average_results)
    np.save("data/" + "std_results_" + plt_name, std_results)