experiment.addAdapter(discretizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000) experiment.addAdapter(explorer) # force experiment setup now experiment.setup() for i in range(len(discretizer.stateVectors)): plt.text(discretizer.stateVectors[i,0], discretizer.stateVectors[i,1], "%i"%i, bbox=dict(facecolor='green', alpha=0.5)) plt.xlim(-2.5, 2.5) plt.ylim(-10, 10) plt.show() agent.forget() explorer.epsilon = 0.3 # renderer = CartPoleRenderer() # environment.renderer = renderer # renderer.start() # run experiment for i in range(1000): experiment.runEpisode(reset=True) discretizer.adaptClusters() agent.learn() print "sum rewards:", sum(agent.episode.rewards) print "exploration:", explorer.epsilon
from numpy import * from dopamine.adapters import VQStateDiscretizationAdapter from matplotlib import pyplot as plt plt.ion() vq = VQStateDiscretizationAdapter(50) states = random.normal([0, 3], [3, 1], (500, 2)) states = r_[states, random.normal([-1, -2], [0.5, 2], (200, 2))] plt.plot(states[:,0], states[:,1], '.') for s in states: vq.applyState(s) vq.sampleClusters() vq.adaptClusters() for i in range(len(vq.stateVectors)): plt.text(vq.stateVectors[i,0], vq.stateVectors[i,1], "%i"%i, bbox=dict(facecolor='green', alpha=0.5)) plt.ylim(-10, 10) plt.xlim(-10, 10) plt.show()