# environment.renderer = renderer
# renderer.start()

# experiment.runEpisodes(4)

# run experiment
for i in range(5000):
    experiment.runEpisodes(10)    
    agent.learn()
    agent.history.keepBest(50)
    # agent.forget()

    valdata = experiment.evaluateEpisodes(10, visualize=False)

    if renderer.isAlive():
        environment.renderer = renderer
        experiment.evaluateEpisodes(1, visualize=False)
        environment.renderer = None
    
    print i
    print "mean return", mean([sum(v.rewards) for v in valdata])
    if mean([sum(v.rewards) for v in valdata]) > 1.75*maxSteps:
        if not renderer.isAlive():
            renderer.start()
            pass
            
    print "avg. episode length", mean([len(v) for v in valdata])
    print "exploration variance", explorer.sigma
    print