environment = DiscreteCartPoleEnvironment(maxSteps=200) environment.conditions["actionNum"] = 2 environment.centerCart = False experiment = APIExperiment(environment, agent) # cut off last two state dimensions # indexer = IndexingAdapter([0, 1], None) # experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter() experiment.addAdapter(normalizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=500) experiment.addAdapter(explorer) experiment.setup() explorer.epsilon = 0.5 explorer.decay = 0.9999 renderer = CartPoleRenderer() for i in range(1): valdata = experiment.evaluateEpisodes(100, visualize=False) mean_return = mean([sum(v.rewards) for v in valdata]) print "mean return", mean_return print "exploration", explorer.epsilon print "mean ep. length", mean([len(e) for e in valdata]) print "num episodes", len(agent.history)
from numpy import * import time # create agent, environment, renderer, experiment agent = SARSAAgent() environment = DiscreteCartPoleEnvironment() experiment = Experiment(environment, agent) # add discretization adapter discretizer = VQStateDiscretizationAdapter(30) experiment.addAdapter(discretizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000) experiment.addAdapter(explorer) # force experiment setup now experiment.setup() for i in range(len(discretizer.stateVectors)): plt.text(discretizer.stateVectors[i,0], discretizer.stateVectors[i,1], "%i"%i, bbox=dict(facecolor='green', alpha=0.5)) plt.xlim(-2.5, 2.5) plt.ylim(-10, 10) plt.show() agent.forget() explorer.epsilon = 0.3 # renderer = CartPoleRenderer()
# create agent, environment, renderer, experiment agent = FQIAgent() environment = DiscreteCartPoleEnvironment() experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter() experiment.addAdapter(normalizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.2, episodeCount=100) experiment.addAdapter(explorer) experiment.runEpisodes(10) agent.forget() explorer.decay = 0.999 renderer = CartPoleRenderer() environment.renderer = renderer renderer.start() # run experiment for i in range(100): experiment.runEpisodes(1) agent.learn()