Beispiel #1
0
# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)

# # add e-greedy exploration
# explorer = EpsilonGreedyExplorer(0.4, episodeCount=500)
# experiment.addAdapter(explorer)

experiment.runEpisodes(10)
agent.forget()

# explorer.decay = 0.999
# renderer = CartPoleRenderer()
# environment.renderer = renderer
# renderer.start()

# run experiment
for i in range(100):
    experiment.runEpisodes(5)
    agent.learn()

    # agent.forget()
    
    valdata = experiment.evaluateEpisodes(10, visualize=True)
    # print "exploration", explorer.epsilon
    print "mean return", mean([sum(v.rewards) for v in valdata])
    print "num episodes", len(agent.history)
    # print "num total samples", agent.history.numTotalSamples()

# force setup here already to initiate pretraining
experiment.setup()

# environment.renderer = renderer
# renderer.start()

# experiment.runEpisodes(4)

# run experiment
for i in range(5000):
    experiment.runEpisodes(10)    
    agent.learn()
    agent.history.keepBest(50)
    # agent.forget()

    valdata = experiment.evaluateEpisodes(10, visualize=False)

    if renderer.isAlive():
        environment.renderer = renderer
        experiment.evaluateEpisodes(1, visualize=False)
        environment.renderer = None
    
    print i
    print "mean return", mean([sum(v.rewards) for v in valdata])
    if mean([sum(v.rewards) for v in valdata]) > 1.75*maxSteps:
        if not renderer.isAlive():
            renderer.start()
            pass
            
    print "avg. episode length", mean([len(v) for v in valdata])
    print "exploration variance", explorer.sigma
Beispiel #3
0
import numpy as np

# create agent, environment, renderer, experiment
agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False)
agent.gamma = 2.
agent.alpha = 1.0
agent.iterations = 1
agent.presentations = 1

environment = MirrorEnvironment()
experiment = Experiment(environment, agent)

# add bas adapter
bas = BinaryActionSearchAdapter(3., 4., 10)
experiment.addAdapter(bas)

# add e-greedy exploration
# explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000)
# experiment.addAdapter(explorer)

# run experiment
valdata = experiment.evaluateEpisodes(1000)
print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
# print "exploration:", explorer.epsilon

experiment.runEpisodes(10000)
agent.learn()    

valdata = experiment.evaluateEpisodes(1000)
print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
Beispiel #4
0
# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter()
experiment.addAdapter(normalizer)

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.2, episodeCount=100)
experiment.addAdapter(explorer)

experiment.runEpisodes(10)
agent.forget()

explorer.decay = 0.999
renderer = CartPoleRenderer()
environment.renderer = renderer
renderer.start()

# run experiment
for i in range(100):
    experiment.runEpisodes(1)
    agent.learn()

    valdata = experiment.evaluateEpisodes(5)
    print "exploration", explorer.epsilon
    print "mean return", mean([sum(v.rewards) for v in valdata])
    print "num episodes", len(agent.history)