Beispiel #1
0
from dopamine.adapters import EpsilonGreedyExplorer, VQStateDiscretizationAdapter

from matplotlib import pyplot as plt
from numpy import *
import time


# create agent, environment, renderer, experiment
agent = SARSAAgent()
environment = DiscreteCartPoleEnvironment()

experiment = Experiment(environment, agent)

# add discretization adapter
discretizer = VQStateDiscretizationAdapter(30)
experiment.addAdapter(discretizer)

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000)
experiment.addAdapter(explorer)

# force experiment setup now
experiment.setup()

for i in range(len(discretizer.stateVectors)):
    plt.text(discretizer.stateVectors[i,0], discretizer.stateVectors[i,1], "%i"%i, bbox=dict(facecolor='green', alpha=0.5))

plt.xlim(-2.5, 2.5)
plt.ylim(-10, 10)
plt.show()
Beispiel #2
0
from dopamine.agents import BASAgent, RBFEstimator, NNEstimator
from dopamine.experiments import Experiment
from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter

from matplotlib import pyplot as plt
from numpy import *


# create agent, environment, renderer, experiment
agent = BASAgent(estimatorClass=NNEstimator)
environment = CartPoleEnvironment()
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)

# # add e-greedy exploration
# explorer = EpsilonGreedyExplorer(0.4, episodeCount=500)
# experiment.addAdapter(explorer)

experiment.runEpisodes(10)
agent.forget()

# explorer.decay = 0.999
# renderer = CartPoleRenderer()
# environment.renderer = renderer
Beispiel #3
0
from dopamine.fapprox import *
import numpy as np

# create agent, environment, renderer, experiment
agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False)
agent.gamma = 2.
agent.alpha = 1.0
agent.iterations = 1
agent.presentations = 1

environment = MirrorEnvironment()
experiment = Experiment(environment, agent)

# add bas adapter
bas = BinaryActionSearchAdapter(3., 4., 10)
experiment.addAdapter(bas)

# add e-greedy exploration
# explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000)
# experiment.addAdapter(explorer)

# run experiment
valdata = experiment.evaluateEpisodes(1000)
print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
# print "exploration:", explorer.epsilon

experiment.runEpisodes(10000)
agent.learn()    

valdata = experiment.evaluateEpisodes(1000)
print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
agent = ENACAgent(faClass=Linear)
experiment = Experiment(environment, agent)

# cut off last two state dimensions
# indexer = IndexingAdapter([0, 1], None)
# experiment.addAdapter(indexer)

# enhance state
# def enhancef(state):
#     return r_[state, state[0]**2, abs(state[2]), sin(state[1]), cos(state[1]), 1]
# enhancer = StateEnhancingAdapter(enhancef)
# experiment.addAdapter(enhancer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)

# add gaussian explorer
explorer = LinearSDExplorer(sigma=1.)
explorer.sigmaAdaptation = False
experiment.addAdapter(explorer)
explorer = GaussianExplorer(sigma=-1.)
explorer.sigmaAdaptation = False
experiment.addAdapter(explorer)

# force setup here already to initiate pretraining
experiment.setup()

# environment.renderer = renderer
# renderer.start()
Beispiel #5
0
from dopamine.environments import MDPMaze
from dopamine.agents import QAgent, SARSAAgent, QLambdaAgent
from dopamine.experiments import Experiment
from dopamine.adapters import MakeEpisodicAdapter, EpsilonGreedyExplorer, BoltzmannExplorer

from matplotlib import pyplot as plt
from numpy import *
import time


agent = QLambdaAgent()
agent = SARSAAgent()

environment = MDPMaze()
experiment = Experiment(environment, agent)
experiment.addAdapter(MakeEpisodicAdapter(1000))


explorer = BoltzmannExplorer(5, episodeCount=200)
experiment.addAdapter(explorer)

plt.ion()

for i in range(1000):
    # run one episode and learn
    experiment.runEpisode(reset=True)
    agent.learn()
    agent.forget()

    shape = environment.mazeTable.shape
    
Beispiel #6
0
from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter, VQActionDiscretizationAdapter

from matplotlib import pyplot as plt
from numpy import *

# create agent, environment, renderer, experiment
agent = FQIAgent(estimatorClass=RBFEstimator)
agent.iterations = 1
environment = CartPoleEnvironment()
# environment.conditions['actionNum'] = 2
environment.centerCart = False
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter()
experiment.addAdapter(normalizer)

# vector quantization for actions
discretizer = VQActionDiscretizationAdapter(5)
experiment.addAdapter(discretizer)

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000)
experiment.addAdapter(explorer)

# renderer = CartPoleRenderer()
# environment.renderer = renderer
Beispiel #7
0
    # plt.plot(inps[blue,0].flatten(), tgts[blue], 'sb', alpha=0.5)
    plt.gcf().canvas.draw()

# create agent, environment, renderer, experiment
agent = FQIAgent()
environment = TestEnvironment()
experiment = Experiment(environment, agent)

# add normalization adapter
# normalizer = NormalizingAdapter()
# experiment.addAdapter(normalizer)

# add e-greedy exploration
# explorer = BoltzmannExplorer(2.0, episodeCount=1000)
explorer = EpsilonGreedyExplorer(0.5, episodeCount=1000)
experiment.addAdapter(explorer)

# run 10 episodes to initialize the normalizing adapter
for i in range(10):
    experiment.runEpisode(reset=True)

# print "normalizing:", normalizer.minStates, normalizer.maxStates

agent.forget()

plt.ion()

# run experiment
for i in range(1000):
    for i in range(1):
        experiment.runEpisode(reset=True)