def drawForLambdaZero(): montecarlo = MonteCarlo(100) print('Training Monte Carlo') montecarlo.train(500000) print('Training of Monte Carlo Completed') lambdaValue = 0 learningRate = [] learningRateIndex = [] sarsa = SARSA(100, lambdaValue) print('Training SARSA and plotting graph') for i in range(1000): learningRateIndex.append(i) sarsa.train(1) squareMean = np.sum(np.square(sarsa.Q - montecarlo.Q)) / float(1000) learningRate.append(squareMean) fig = plt.figure("SARSAZERO") surf = plt.plot(learningRateIndex, learningRate) fig.savefig('lambdaZero.png') plt.show()
def drawForAllLambdas(): montecarlo = MonteCarlo(100) print('Training Monte Carlo') montecarlo.train(500000) print('Training of Monte Carlo Completed') lambdas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] squareMean = [] numberElements = montecarlo.Q.shape[0] * montecarlo.Q.shape[1] * 2 for lambdaValue in lambdas: sarsa = SARSA(100, lambdaValue) print('Training SARSA', lambdaValue) sarsa.train(1000) print('Training of SARSA Completed') squareMeanCalc = np.sum( np.square(sarsa.Q - montecarlo.Q)) / float(numberElements) squareMean.append(squareMeanCalc) fig = plt.figure("SARSA") surf = plt.plot(lambdas[1:10], squareMean[1:10]) fig.savefig('lambdaALL.png') plt.show()
"--- optimal policy for Q Learning agent using eligibility trace ---") q_agent.get_optimal_policy() q_agent.plot_policy() print( "--- plotting training for Q Learning agent using eligibility trace ---" ) q_agent.plot(episode_steps) # Performing SARSA with standard environment sarsa_agent = SARSA(episodes=episodes, lr=learning_rate, discount=discount, epsilon=epsilon, king=False) print("--- starting training for SARSA agent --- ") episode_steps = sarsa_agent.train() print("--- optimal policy for SARSA agent ---") sarsa_agent.get_optimal_policy() sarsa_agent.plot_policy() print("--- plotting training for SARSA agent ---") sarsa_agent.plot(episode_steps) # Performing SARSA with standard environment using eligibility trace sarsa_agent = SARSA(episodes=episodes, lr=learning_rate, discount=discount, epsilon=epsilon, king=False, _lambda=0.9) print("--- starting training for SARSA agent using eligibility trace --- ") episode_steps = sarsa_agent.train()