from dopamine.environments import MirrorEnvironment from dopamine.agents import APIAgent, FQIAgent from dopamine.experiments import Experiment from dopamine.adapters import EpsilonGreedyExplorer, BinaryActionSearchAdapter from dopamine.fapprox import * import numpy as np # create agent, environment, renderer, experiment agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False) agent.gamma = 2. agent.alpha = 1.0 agent.iterations = 1 agent.presentations = 1 environment = MirrorEnvironment() experiment = Experiment(environment, agent) # add bas adapter bas = BinaryActionSearchAdapter(3., 4., 10) experiment.addAdapter(bas) # add e-greedy exploration # explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000) # experiment.addAdapter(explorer) # run experiment valdata = experiment.evaluateEpisodes(1000) print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon # print "exploration:", explorer.epsilon experiment.runEpisodes(10000)
from dopamine.environments import DiscreteCartPoleEnvironment, CartPoleRenderer from dopamine.agents import APIAgent from dopamine.fapprox import * from dopamine.experiments import APIExperiment from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter from matplotlib import pyplot as plt from numpy import * # create agent, environment, renderer, experiment agent = APIAgent(resetFA=False) agent.iterations = 1 environment = DiscreteCartPoleEnvironment(maxSteps=200) environment.conditions["actionNum"] = 2 environment.centerCart = False experiment = APIExperiment(environment, agent) # cut off last two state dimensions # indexer = IndexingAdapter([0, 1], None) # experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter() experiment.addAdapter(normalizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=500) experiment.addAdapter(explorer) experiment.setup()