def drawMonteCarlo(): iterations = [10, 100, 1000, 10000, 100000, 500000, 1000000] for iteration in iterations: print('Creating Monte Carlo Agent...') monti = MonteCarlo(100) print('Monte Carlo created') print('Training Monte Carlo for', iteration, 'iterations.') monti.train(iteration) print('Training completed, plotting image') figure = plt.figure('Monte' + str(iteration)) b = figure.add_subplot(111, projection='3d') resultfig = plotMonte(b, monti) figure.savefig('MonteCarlo' + str(iteration) + '.png') plt.show()
def drawForLambdaZero(): montecarlo = MonteCarlo(100) print('Training Monte Carlo') montecarlo.train(500000) print('Training of Monte Carlo Completed') lambdaValue = 0 learningRate = [] learningRateIndex = [] sarsa = SARSA(100, lambdaValue) print('Training SARSA and plotting graph') for i in range(1000): learningRateIndex.append(i) sarsa.train(1) squareMean = np.sum(np.square(sarsa.Q - montecarlo.Q)) / float(1000) learningRate.append(squareMean) fig = plt.figure("SARSAZERO") surf = plt.plot(learningRateIndex, learningRate) fig.savefig('lambdaZero.png') plt.show()
def drawForAllLambdas(): montecarlo = MonteCarlo(100) print('Training Monte Carlo') montecarlo.train(500000) print('Training of Monte Carlo Completed') lambdas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] squareMean = [] numberElements = montecarlo.Q.shape[0] * montecarlo.Q.shape[1] * 2 for lambdaValue in lambdas: sarsa = SARSA(100, lambdaValue) print('Training SARSA', lambdaValue) sarsa.train(1000) print('Training of SARSA Completed') squareMeanCalc = np.sum( np.square(sarsa.Q - montecarlo.Q)) / float(numberElements) squareMean.append(squareMeanCalc) fig = plt.figure("SARSA") surf = plt.plot(lambdas[1:10], squareMean[1:10]) fig.savefig('lambdaALL.png') plt.show()