Beispiel #1
0
from dopamine.environments import CartPoleEnvironment
from dopamine.agents import ActorCriticAgent
from dopamine.agents.actorcritic import FAActor, FACritic
from dopamine.experiments import Experiment
from dopamine.adapters import GaussianExplorer, NormalizingAdapter, IndexingAdapter

from matplotlib import pyplot as plt
from numpy import *

# create agent, environment, renderer, experiment
agent = ActorCriticAgent(actorClass=FAActor, criticClass=FACritic)
environment = CartPoleEnvironment()
environment.centerCart = False
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter()
experiment.addAdapter(normalizer)

# add e-greedy exploration
explorer = GaussianExplorer()
experiment.addAdapter(explorer)

experiment.setup()

# renderer = CartPoleRenderer()
# environment.renderer = renderer
from dopamine.environments import CartPoleEnvironment, CartPoleRenderer
from dopamine.agents import ReinforceAgent, ENACAgent
from dopamine.adapters import IndexingAdapter, NormalizingAdapter, GaussianExplorer, LinearSDExplorer, StateEnhancingAdapter
from dopamine.experiments import Experiment
from dopamine.fapprox import *
from numpy import *

from dopamine.tools import Episode

maxSteps = 400
environment = CartPoleEnvironment(maxSteps=maxSteps)
environment.centerCart = True

renderer = CartPoleRenderer()

agent = ENACAgent(faClass=Linear)
experiment = Experiment(environment, agent)

# cut off last two state dimensions
# indexer = IndexingAdapter([0, 1], None)
# experiment.addAdapter(indexer)

# enhance state
# def enhancef(state):
#     return r_[state, state[0]**2, abs(state[2]), sin(state[1]), cos(state[1]), 1]
# enhancer = StateEnhancingAdapter(enhancef)
# experiment.addAdapter(enhancer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)