예제 #1
0
def drawForLambdaZero():
    montecarlo = MonteCarlo(100)
    print('Training Monte Carlo')
    montecarlo.train(500000)
    print('Training of Monte Carlo Completed')
    lambdaValue = 0
    learningRate = []
    learningRateIndex = []
    sarsa = SARSA(100, lambdaValue)
    print('Training SARSA and plotting graph')
    for i in range(1000):
        learningRateIndex.append(i)
        sarsa.train(1)
        squareMean = np.sum(np.square(sarsa.Q - montecarlo.Q)) / float(1000)
        learningRate.append(squareMean)

    fig = plt.figure("SARSAZERO")
    surf = plt.plot(learningRateIndex, learningRate)
    fig.savefig('lambdaZero.png')
    plt.show()
예제 #2
0
def drawForAllLambdas():
    montecarlo = MonteCarlo(100)
    print('Training Monte Carlo')
    montecarlo.train(500000)
    print('Training of Monte Carlo Completed')
    lambdas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    squareMean = []
    numberElements = montecarlo.Q.shape[0] * montecarlo.Q.shape[1] * 2
    for lambdaValue in lambdas:
        sarsa = SARSA(100, lambdaValue)
        print('Training SARSA', lambdaValue)
        sarsa.train(1000)
        print('Training of SARSA Completed')
        squareMeanCalc = np.sum(
            np.square(sarsa.Q - montecarlo.Q)) / float(numberElements)
        squareMean.append(squareMeanCalc)
    fig = plt.figure("SARSA")
    surf = plt.plot(lambdas[1:10], squareMean[1:10])
    fig.savefig('lambdaALL.png')
    plt.show()
예제 #3
0
        "--- optimal policy for Q Learning agent using eligibility trace ---")
    q_agent.get_optimal_policy()
    q_agent.plot_policy()
    print(
        "--- plotting training for Q Learning agent using eligibility trace ---"
    )
    q_agent.plot(episode_steps)

    # Performing SARSA with standard environment
    sarsa_agent = SARSA(episodes=episodes,
                        lr=learning_rate,
                        discount=discount,
                        epsilon=epsilon,
                        king=False)
    print("--- starting training for SARSA agent --- ")
    episode_steps = sarsa_agent.train()
    print("--- optimal policy for SARSA agent ---")
    sarsa_agent.get_optimal_policy()
    sarsa_agent.plot_policy()
    print("--- plotting training for SARSA agent ---")
    sarsa_agent.plot(episode_steps)

    # Performing SARSA with standard environment using eligibility trace
    sarsa_agent = SARSA(episodes=episodes,
                        lr=learning_rate,
                        discount=discount,
                        epsilon=epsilon,
                        king=False,
                        _lambda=0.9)
    print("--- starting training for SARSA agent using eligibility trace --- ")
    episode_steps = sarsa_agent.train()