gamma = 1 #best:1 solved 460 epsilon = 1 #best:1 solved 460 mineps = 0.01 #best:0.01 solved 460 0.01 188 epsilonRate = 12 #best:40 solved 460 12 188 35 523 x = [] yscores = [] yeps = [] yalpha = [] environment = gym.make('CartPole-v0') agent = QLearnTabular.QLearnTabular(nStates, environment, alpha, gamma, epsilon) streak = 0 for i in range(maxIter): t = runEpisode.play(environment, agent, False) x.append(i + 1) yscores.append(agent.score) yalpha.append(agent.alpha) yeps.append(agent.epsilon) if i + 1 >= 100: if sum(yscores[-100:]) / 100 >= 195: print("Solved after {} episodes!".format(i + 1)) break agent.reset() #agent.alpha = max(alpha * (0.85 ** (i//alphaRate)), minAlpha) agent.epsilon = max(min(1, 1 - math.log10((i + 1) / epsilonRate)), mineps) agent.alpha = agent.epsilon #best: solved 452 if i % 20 == 0: print("Episode Number {}".format(i))
def randomPlay(environment, nIter=500): "play random to produce mean and standard deviation" agent = randomAgent(environment) for _ in range(nIter): runEpisode.play(environment, agent, False, False) return agent.calcMeanStd()