Beispiel #1
0
environment.conditions['actionNum'] = 2
environment.centerCart = False
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(normalizeStates=[(-0.8, 0.8), (-12., 12.)])
experiment.addAdapter(normalizer)

if options.play:
    renderer = CartPoleRenderer()
    environment.renderer = renderer
    renderer.start()
    print "10 evaluation trials:"
    valdata = experiment.evaluateEpisodes(10, visualize=False)
    mean_return = mean([sum(v.rewards) for v in valdata])
    print "mean return", mean_return
    raise SystemExit

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000)
experiment.addAdapter(explorer)
    
if os.path.exists('cart_play.saved'):
    os.remove('cart_play.saved')
      
# run experiment
for i in range(1000):