from dopamine.environments import CartPoleEnvironment from dopamine.agents import ActorCriticAgent from dopamine.agents.actorcritic import FAActor, FACritic from dopamine.experiments import Experiment from dopamine.adapters import GaussianExplorer, NormalizingAdapter, IndexingAdapter from matplotlib import pyplot as plt from numpy import * # create agent, environment, renderer, experiment agent = ActorCriticAgent(actorClass=FAActor, criticClass=FACritic) environment = CartPoleEnvironment() environment.centerCart = False experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter() experiment.addAdapter(normalizer) # add e-greedy exploration explorer = GaussianExplorer() experiment.addAdapter(explorer) experiment.setup() # renderer = CartPoleRenderer() # environment.renderer = renderer
from dopamine.environments import CartPoleEnvironment, CartPoleRenderer from dopamine.agents import ReinforceAgent, ENACAgent from dopamine.adapters import IndexingAdapter, NormalizingAdapter, GaussianExplorer, LinearSDExplorer, StateEnhancingAdapter from dopamine.experiments import Experiment from dopamine.fapprox import * from numpy import * from dopamine.tools import Episode maxSteps = 400 environment = CartPoleEnvironment(maxSteps=maxSteps) environment.centerCart = True renderer = CartPoleRenderer() agent = ENACAgent(faClass=Linear) experiment = Experiment(environment, agent) # cut off last two state dimensions # indexer = IndexingAdapter([0, 1], None) # experiment.addAdapter(indexer) # enhance state # def enhancef(state): # return r_[state, state[0]**2, abs(state[2]), sin(state[1]), cos(state[1]), 1] # enhancer = StateEnhancingAdapter(enhancef) # experiment.addAdapter(enhancer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer)