Ejemplo n.º 1
0
agent = Agent(rewards)
tenths = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
episodes = 10000
interval = 1000
top_label = (
    ',' + ','.join([str(x) for x in range(0, episodes + 1, interval)]) + '\n'
)

with open('winrate.csv', 'w') as w, open('avgsteps.csv', 'w') as a:
    print('Testing Gammas...')
    w.write('Gamma\n')
    a.write('Gamma\n')
    w.write(top_label)
    a.write(top_label)
    for gamma in tenths:
        agent.gamma = gamma
        wstr = astr = '{},0'.format(gamma)
        for i in range(episodes):
            agent.episode()
            if agent.episodes > 1 and not agent.episodes % interval:
                winrate = sum(agent.goal) / len(agent.goal)
                avgsteps = sum(agent.last100) / len(agent.last100)
                wstr += ',{}'.format(winrate)
                astr += ',{}'.format(avgsteps)
                print('Episode {}...'.format(agent.episodes))
        w.write(wstr + '\n')
        a.write(astr + '\n')
        agent.reset()
    w.write('Lambda\n')
    a.write('Lambda\n')
    w.write(top_label)