Example #1
0
gamma = 1  #best:1 solved 460
epsilon = 1  #best:1 solved 460
mineps = 0.01  #best:0.01 solved 460       0.01 188
epsilonRate = 12  #best:40 solved 460         12   188                   35     523

x = []
yscores = []
yeps = []
yalpha = []

environment = gym.make('CartPole-v0')
agent = QLearnTabular.QLearnTabular(nStates, environment, alpha, gamma,
                                    epsilon)
streak = 0
for i in range(maxIter):
    t = runEpisode.play(environment, agent, False)
    x.append(i + 1)
    yscores.append(agent.score)
    yalpha.append(agent.alpha)
    yeps.append(agent.epsilon)
    if i + 1 >= 100:
        if sum(yscores[-100:]) / 100 >= 195:
            print("Solved after {} episodes!".format(i + 1))
            break
    agent.reset()
    #agent.alpha = max(alpha * (0.85 ** (i//alphaRate)), minAlpha)
    agent.epsilon = max(min(1, 1 - math.log10((i + 1) / epsilonRate)), mineps)
    agent.alpha = agent.epsilon  #best: solved 452
    if i % 20 == 0:
        print("Episode Number {}".format(i))
Example #2
0
def randomPlay(environment, nIter=500):
    "play random to produce mean and standard deviation"
    agent = randomAgent(environment)
    for _ in range(nIter):
        runEpisode.play(environment, agent, False, False)
    return agent.calcMeanStd()