from dopamine.adapters import EpsilonGreedyExplorer, VQStateDiscretizationAdapter from matplotlib import pyplot as plt from numpy import * import time # create agent, environment, renderer, experiment agent = SARSAAgent() environment = DiscreteCartPoleEnvironment() experiment = Experiment(environment, agent) # add discretization adapter discretizer = VQStateDiscretizationAdapter(30) experiment.addAdapter(discretizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000) experiment.addAdapter(explorer) # force experiment setup now experiment.setup() for i in range(len(discretizer.stateVectors)): plt.text(discretizer.stateVectors[i,0], discretizer.stateVectors[i,1], "%i"%i, bbox=dict(facecolor='green', alpha=0.5)) plt.xlim(-2.5, 2.5) plt.ylim(-10, 10) plt.show()
from dopamine.agents import BASAgent, RBFEstimator, NNEstimator from dopamine.experiments import Experiment from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter from matplotlib import pyplot as plt from numpy import * # create agent, environment, renderer, experiment agent = BASAgent(estimatorClass=NNEstimator) environment = CartPoleEnvironment() experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer) # # add e-greedy exploration # explorer = EpsilonGreedyExplorer(0.4, episodeCount=500) # experiment.addAdapter(explorer) experiment.runEpisodes(10) agent.forget() # explorer.decay = 0.999 # renderer = CartPoleRenderer() # environment.renderer = renderer
from dopamine.fapprox import * import numpy as np # create agent, environment, renderer, experiment agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False) agent.gamma = 2. agent.alpha = 1.0 agent.iterations = 1 agent.presentations = 1 environment = MirrorEnvironment() experiment = Experiment(environment, agent) # add bas adapter bas = BinaryActionSearchAdapter(3., 4., 10) experiment.addAdapter(bas) # add e-greedy exploration # explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000) # experiment.addAdapter(explorer) # run experiment valdata = experiment.evaluateEpisodes(1000) print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon # print "exploration:", explorer.epsilon experiment.runEpisodes(10000) agent.learn() valdata = experiment.evaluateEpisodes(1000) print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
agent = ENACAgent(faClass=Linear) experiment = Experiment(environment, agent) # cut off last two state dimensions # indexer = IndexingAdapter([0, 1], None) # experiment.addAdapter(indexer) # enhance state # def enhancef(state): # return r_[state, state[0]**2, abs(state[2]), sin(state[1]), cos(state[1]), 1] # enhancer = StateEnhancingAdapter(enhancef) # experiment.addAdapter(enhancer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer) # add gaussian explorer explorer = LinearSDExplorer(sigma=1.) explorer.sigmaAdaptation = False experiment.addAdapter(explorer) explorer = GaussianExplorer(sigma=-1.) explorer.sigmaAdaptation = False experiment.addAdapter(explorer) # force setup here already to initiate pretraining experiment.setup() # environment.renderer = renderer # renderer.start()
from dopamine.environments import MDPMaze from dopamine.agents import QAgent, SARSAAgent, QLambdaAgent from dopamine.experiments import Experiment from dopamine.adapters import MakeEpisodicAdapter, EpsilonGreedyExplorer, BoltzmannExplorer from matplotlib import pyplot as plt from numpy import * import time agent = QLambdaAgent() agent = SARSAAgent() environment = MDPMaze() experiment = Experiment(environment, agent) experiment.addAdapter(MakeEpisodicAdapter(1000)) explorer = BoltzmannExplorer(5, episodeCount=200) experiment.addAdapter(explorer) plt.ion() for i in range(1000): # run one episode and learn experiment.runEpisode(reset=True) agent.learn() agent.forget() shape = environment.mazeTable.shape
from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter, VQActionDiscretizationAdapter from matplotlib import pyplot as plt from numpy import * # create agent, environment, renderer, experiment agent = FQIAgent(estimatorClass=RBFEstimator) agent.iterations = 1 environment = CartPoleEnvironment() # environment.conditions['actionNum'] = 2 environment.centerCart = False experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter() experiment.addAdapter(normalizer) # vector quantization for actions discretizer = VQActionDiscretizationAdapter(5) experiment.addAdapter(discretizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000) experiment.addAdapter(explorer) # renderer = CartPoleRenderer() # environment.renderer = renderer
# plt.plot(inps[blue,0].flatten(), tgts[blue], 'sb', alpha=0.5) plt.gcf().canvas.draw() # create agent, environment, renderer, experiment agent = FQIAgent() environment = TestEnvironment() experiment = Experiment(environment, agent) # add normalization adapter # normalizer = NormalizingAdapter() # experiment.addAdapter(normalizer) # add e-greedy exploration # explorer = BoltzmannExplorer(2.0, episodeCount=1000) explorer = EpsilonGreedyExplorer(0.5, episodeCount=1000) experiment.addAdapter(explorer) # run 10 episodes to initialize the normalizing adapter for i in range(10): experiment.runEpisode(reset=True) # print "normalizing:", normalizer.minStates, normalizer.maxStates agent.forget() plt.ion() # run experiment for i in range(1000): for i in range(1): experiment.runEpisode(reset=True)