from dopamine.environments import CartPoleEnvironment, CartPoleRenderer
from dopamine.agents import ReinforceAgent, ENACAgent
from dopamine.adapters import IndexingAdapter, NormalizingAdapter, GaussianExplorer, LinearSDExplorer, StateEnhancingAdapter
from dopamine.experiments import Experiment
from dopamine.fapprox import *
from numpy import *

from dopamine.tools import Episode

maxSteps = 400
environment = CartPoleEnvironment(maxSteps=maxSteps)
environment.centerCart = True

renderer = CartPoleRenderer()

agent = ENACAgent(faClass=Linear)
experiment = Experiment(environment, agent)

# cut off last two state dimensions
# indexer = IndexingAdapter([0, 1], None)
# experiment.addAdapter(indexer)

# enhance state
# def enhancef(state):
#     return r_[state, state[0]**2, abs(state[2]), sin(state[1]), cos(state[1]), 1]
# enhancer = StateEnhancingAdapter(enhancef)
# experiment.addAdapter(enhancer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)
Beispiel #2
0
agent.iterations = 1
environment = DiscreteCartPoleEnvironment()
environment.conditions['actionNum'] = 2
environment.centerCart = False
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(normalizeStates=[(-0.8, 0.8), (-12., 12.)])
experiment.addAdapter(normalizer)

if options.play:
    renderer = CartPoleRenderer()
    environment.renderer = renderer
    renderer.start()
    print "10 evaluation trials:"
    valdata = experiment.evaluateEpisodes(10, visualize=False)
    mean_return = mean([sum(v.rewards) for v in valdata])
    print "mean return", mean_return
    raise SystemExit

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000)
experiment.addAdapter(explorer)
    
if os.path.exists('cart_play.saved'):
    os.remove('cart_play.saved')
      
from dopamine.environments import CartPoleEnvironment, CartPoleRenderer
from dopamine.agents import ReinforceAgent
from dopamine.adapters import IndexingAdapter, NormalizingAdapter, GaussianExplorer, LinearSDExplorer
from dopamine.experiments import Experiment
from dopamine.fapprox import *
from numpy import *

from dopamine.tools import Episode

environment = CartPoleEnvironment(maxSteps=100)
environment.centerCart = False

renderer = CartPoleRenderer()

agent = ReinforceAgent(faClass=Linear)
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)

# add gaussian explorer
explorer = LinearSDExplorer(sigma=2.)
explorer.sigmaAdaptation = False
experiment.addAdapter(explorer)
explorer = GaussianExplorer(sigma=0.)
explorer.sigmaAdaptation = True