# add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer) # # add e-greedy exploration # explorer = EpsilonGreedyExplorer(0.4, episodeCount=500) # experiment.addAdapter(explorer) experiment.runEpisodes(10) agent.forget() # explorer.decay = 0.999 # renderer = CartPoleRenderer() # environment.renderer = renderer # renderer.start() # run experiment for i in range(100): experiment.runEpisodes(5) agent.learn() # agent.forget() valdata = experiment.evaluateEpisodes(10, visualize=True) # print "exploration", explorer.epsilon print "mean return", mean([sum(v.rewards) for v in valdata]) print "num episodes", len(agent.history) # print "num total samples", agent.history.numTotalSamples()
# force setup here already to initiate pretraining experiment.setup() # environment.renderer = renderer # renderer.start() # experiment.runEpisodes(4) # run experiment for i in range(5000): experiment.runEpisodes(10) agent.learn() agent.history.keepBest(50) # agent.forget() valdata = experiment.evaluateEpisodes(10, visualize=False) if renderer.isAlive(): environment.renderer = renderer experiment.evaluateEpisodes(1, visualize=False) environment.renderer = None print i print "mean return", mean([sum(v.rewards) for v in valdata]) if mean([sum(v.rewards) for v in valdata]) > 1.75*maxSteps: if not renderer.isAlive(): renderer.start() pass print "avg. episode length", mean([len(v) for v in valdata]) print "exploration variance", explorer.sigma
import numpy as np # create agent, environment, renderer, experiment agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False) agent.gamma = 2. agent.alpha = 1.0 agent.iterations = 1 agent.presentations = 1 environment = MirrorEnvironment() experiment = Experiment(environment, agent) # add bas adapter bas = BinaryActionSearchAdapter(3., 4., 10) experiment.addAdapter(bas) # add e-greedy exploration # explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000) # experiment.addAdapter(explorer) # run experiment valdata = experiment.evaluateEpisodes(1000) print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon # print "exploration:", explorer.epsilon experiment.runEpisodes(10000) agent.learn() valdata = experiment.evaluateEpisodes(1000) print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
# cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter() experiment.addAdapter(normalizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.2, episodeCount=100) experiment.addAdapter(explorer) experiment.runEpisodes(10) agent.forget() explorer.decay = 0.999 renderer = CartPoleRenderer() environment.renderer = renderer renderer.start() # run experiment for i in range(100): experiment.runEpisodes(1) agent.learn() valdata = experiment.evaluateEpisodes(5) print "exploration", explorer.epsilon print "mean return", mean([sum(v.rewards) for v in valdata]) print "num episodes", len(agent.history)