Beispiel #1
0
from dopamine.environments import MirrorEnvironment
from dopamine.agents import APIAgent, FQIAgent
from dopamine.experiments import Experiment
from dopamine.adapters import EpsilonGreedyExplorer, BinaryActionSearchAdapter
from dopamine.fapprox import *
import numpy as np

# create agent, environment, renderer, experiment
agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False)
agent.gamma = 2.
agent.alpha = 1.0
agent.iterations = 1
agent.presentations = 1

environment = MirrorEnvironment()
experiment = Experiment(environment, agent)

# add bas adapter
bas = BinaryActionSearchAdapter(3., 4., 10)
experiment.addAdapter(bas)

# add e-greedy exploration
# explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000)
# experiment.addAdapter(explorer)

# run experiment
valdata = experiment.evaluateEpisodes(1000)
print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
# print "exploration:", explorer.epsilon

experiment.runEpisodes(10000)
from dopamine.environments import DiscreteCartPoleEnvironment, CartPoleRenderer
from dopamine.agents import APIAgent
from dopamine.fapprox import *
from dopamine.experiments import APIExperiment
from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter

from matplotlib import pyplot as plt
from numpy import *

# create agent, environment, renderer, experiment
agent = APIAgent(resetFA=False)
agent.iterations = 1

environment = DiscreteCartPoleEnvironment(maxSteps=200)
environment.conditions["actionNum"] = 2
environment.centerCart = False
experiment = APIExperiment(environment, agent)

# cut off last two state dimensions
# indexer = IndexingAdapter([0, 1], None)
# experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter()
experiment.addAdapter(normalizer)

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.3, episodeCount=500)
experiment.addAdapter(explorer)

experiment.setup()