from dopamine.experiments import Experiment from dopamine.adapters import EpsilonGreedyExplorer, VQStateDiscretizationAdapter from matplotlib import pyplot as plt from numpy import * import time # create agent, environment, renderer, experiment agent = SARSAAgent() environment = DiscreteCartPoleEnvironment() experiment = Experiment(environment, agent) # add discretization adapter discretizer = VQStateDiscretizationAdapter(30) experiment.addAdapter(discretizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000) experiment.addAdapter(explorer) # force experiment setup now experiment.setup() for i in range(len(discretizer.stateVectors)): plt.text(discretizer.stateVectors[i,0], discretizer.stateVectors[i,1], "%i"%i, bbox=dict(facecolor='green', alpha=0.5)) plt.xlim(-2.5, 2.5) plt.ylim(-10, 10) plt.show()
from numpy import * from dopamine.adapters import VQStateDiscretizationAdapter from matplotlib import pyplot as plt plt.ion() vq = VQStateDiscretizationAdapter(50) states = random.normal([0, 3], [3, 1], (500, 2)) states = r_[states, random.normal([-1, -2], [0.5, 2], (200, 2))] plt.plot(states[:,0], states[:,1], '.') for s in states: vq.applyState(s) vq.sampleClusters() vq.adaptClusters() for i in range(len(vq.stateVectors)): plt.text(vq.stateVectors[i,0], vq.stateVectors[i,1], "%i"%i, bbox=dict(facecolor='green', alpha=0.5)) plt.ylim(-10, 10) plt.xlim(-10, 10) plt.show()