Beispiel #1
0
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.environments.cartpole import CartPoleEnvironment, CartPoleRenderer, BalanceTask
from pybrain.rl.agents.learning import LearningAgent
from pybrain.rl.experiments import EpisodicExperiment
from scipy import mean
import sys

episodes = 1
epilen = 200

if len(sys.argv) < 5:
    sys.exit('please give 4 parameters. run: "python play_catpole.py <p1> <p2> <p3> <p4>"\n')

# create environment
env = CartPoleEnvironment()
env.setRenderer(CartPoleRenderer())
env.getRenderer().start()
env.delay = (episodes == 1)

# create task
task = BalanceTask(env, epilen)

# create controller network
net = buildNetwork(4, 1, bias=False)

# create agent and set parameters from command line
agent = LearningAgent(net, None)
agent.module._setParameters([float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3]), float(sys.argv[4])])

# create experiment
experiment = EpisodicExperiment(task, agent)
Beispiel #2
0
def run(arg):
    task = arg[0]
    parameters = arg[1]
    #print "run with", parameters
    
    seed = parameters["seed"]
   

    process_id = hash(multiprocessing.current_process()._identity)
    numpy.random.seed(seed + process_id)


    
    
    render = False    
    plot = False
    
    plt.ion()
    
    env = CartPoleEnvironment()
    if render:
        renderer = CartPoleRenderer()
        env.setRenderer(renderer)
        renderer.start()
    
    task_class = getattr(cp, task)
    task = task_class(env, parameters["MaxRunsPerEpisode"])
    
    testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"],desiredValue=None)

    #print "dim: ", task.indim, task.outdim

    from pybrain.tools.shortcuts import buildNetwork
    from pybrain.rl.agents import OptimizationAgent
    from pybrain.optimization import PGPE

    module = buildNetwork(task.outdim, task.indim, bias=False)
    # create agent with controller and learner (and its options)

    # % of random actions
    #learner.explorer.epsilon = parameters["ExplorerEpsilon"]
    
    
    agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=False, maxEvaluations=None,desiredEvaluation=1, verbose=False))
#
#    print agent
#    from pprint import pprint
#    pprint (vars(agent.learner))
    
    testagent = LearningAgent(module, None)
    experiment = EpisodicExperiment(task, agent)
    testexperiment = EpisodicExperiment(testtask, testagent)

    
    def plotPerformance(values, fig):
        plt.figure(fig.number)
        plt.clf()
        plt.plot(values, 'o-')
        plt.gcf().canvas.draw()
        # Without the next line, the pyplot plot won't actually show up.
        plt.pause(0.001)
    
    performance = []
    
    if plot:
        pf_fig = plt.figure()
    
    m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
    for episode in range(0,m):
    	# one learning step after one episode of world-interaction
        experiment.doEpisodes(parameters["EpisodesPerLearn"])
        #agent.learn(1)
    
        #renderer.drawPlot()
        
        # test performance (these real-world experiences are not used for training)
        if plot:
            env.delay = True
        
        if (episode) % parameters["TestAfter"] == 0:
            #print "Evaluating at episode: ", episode
            
            #experiment.agent = testagent
            #r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
            #for i in range(0,parameters["TestWith"]):
#            y = testexperiment.doEpisodes(1)
#            print (agent.learner._allEvaluated)
#                
#            
#            from pprint import pprint
#            pprint (vars(task))
                
            l = parameters["TestWith"]
            
            task.N = parameters["MaxRunsPerEpisodeTest"]
            experiment.doEpisodes(l)
            task.N = parameters["MaxRunsPerEpisode"]

            resList = (agent.learner._allEvaluations)[-l:-1]
            
#            print agent.learner._allEvaluations
            from scipy import array

            rLen = len(resList)
            avReward = array(resList).sum()/rLen
#            print avReward
#            print resList
#            exit(0)
#            print("Parameters:", agent.learner._bestFound())
#            print(
#                " Evaluation:", episode,
#                " BestReward:", agent.learner.bestEvaluation,
#                " AverageReward:", avReward)
#            if agent.learner.bestEvaluation == 0:
#                
#                print resList[-20:-1]
#                print "done"
#                break
            performance.append(avReward)
            

            env.delay = False
            testagent.reset()
            #experiment.agent = agent
        
#            performance.append(r)
            if plot:
                plotPerformance(performance, pf_fig)
        
#            print "reward avg", r
#            print "explorer epsilon", learner.explorer.epsilon
#            print "num episodes", agent.history.getNumSequences()
#            print "update step", len(performance)
            
#    print "done"
    return performance
            
        #print "network",   json.dumps(module.bn.net.E, indent=2)
            
            
#import sumatra.parameters as p
#import sys
#parameter_file = sys.argv[1]
#parameters = p.SimpleParameterSet(parameter_file)
#
#
#run(["BalanceTask",parameters])
Beispiel #3
0
from pybrain.rl.environments.cartpole import CartPoleEnvironment, DiscreteBalanceTask, CartPoleRenderer
from pybrain.rl.agents import LearningAgent
from pybrain.rl.experiments import EpisodicExperiment
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from pybrain.rl.explorers import BoltzmannExplorer
from numpy import array, arange, meshgrid, pi, zeros, mean
from matplotlib import pyplot as plt

# switch this to True if you want to see the cart balancing the pole (slower)
render = False

plt.ion()

env = CartPoleEnvironment()
if render:
    renderer = CartPoleRenderer()
    env.setRenderer(renderer)
    renderer.start()

module = ActionValueNetwork(4, 3)

task = DiscreteBalanceTask(env, 100)
learner = NFQ()
learner.explorer.epsilon = 0.4

agent = LearningAgent(module, learner)
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)

def plotPerformance(values, fig):
    plt.figure(fig.number)
Beispiel #4
0
from pybrain.rl.experiments import EpisodicExperiment
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from pybrain.rl.explorers import BoltzmannExplorer

from numpy import array, arange, meshgrid, pi, zeros, mean
from matplotlib import pyplot as plt

# switch this to True if you want to see the cart balancing the pole (slower)
render = False
#render = True

plt.ion()

env = CartPoleEnvironment()
if render:
    renderer = CartPoleRenderer()
    env.setRenderer(renderer)
    renderer.start()

module = ActionValueNetwork(4, 3)

task = DiscreteBalanceTask(env, 100)
learner = NFQ()
learner.explorer.epsilon = 0.4

agent = LearningAgent(module, learner)
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)


def plotPerformance(values, fig):
Beispiel #5
0
def run(arg):
    task = arg[0]
    parameters = arg[1]
    #print "run with", parameters
    
    seed = parameters["seed"]
   

    process_id = hash(multiprocessing.current_process()._identity)
    numpy.random.seed(seed + process_id)


    
    
    render = False    
    plot = False
    
    plt.ion()
    
    env = CartPoleEnvironment()
    if render:
        renderer = CartPoleRenderer()
        env.setRenderer(renderer)
        renderer.start()
    
    task_class = getattr(cp, task)
    task = task_class(env, parameters["MaxRunsPerEpisode"])
    testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"])

    #print "dim: ", task.indim, task.outdim
    
    # to inputs state and 4 actions
    module = ActionValueNetwork(task.outdim, task.indim)
    

    learner = NFQ()
    # % of random actions
    learner.explorer.epsilon = parameters["ExplorerEpsilon"]
    
    
    agent = LearningAgent(module, learner)
    testagent = LearningAgent(module, None)
    experiment = EpisodicExperiment(task, agent)
    testexperiment = EpisodicExperiment(testtask, testagent)

    
    def plotPerformance(values, fig):
        plt.figure(fig.number)
        plt.clf()
        plt.plot(values, 'o-')
        plt.gcf().canvas.draw()
        # Without the next line, the pyplot plot won't actually show up.
        plt.pause(0.001)
    
    performance = []
    
    if plot:
        pf_fig = plt.figure()
    
    m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
    for episode in range(0,m):
    	# one learning step after one episode of world-interaction
        experiment.doEpisodes(parameters["EpisodesPerLearn"])
        agent.learn(1)
    
        #renderer.drawPlot()
        
        # test performance (these real-world experiences are not used for training)
        if plot:
            env.delay = True
        
        if (episode) % parameters["TestAfter"] == 0:
            #print "Evaluating at episode: ", episode
            
            #experiment.agent = testagent
            r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
            
            env.delay = False
            testagent.reset()
            #experiment.agent = agent
        
            performance.append(r)
            if plot:
                plotPerformance(performance, pf_fig)
        
#            print "reward avg", r
#            print "explorer epsilon", learner.explorer.epsilon
#            print "num episodes", agent.history.getNumSequences()
#            print "update step", len(performance)
            
#    print "done"
    return performance
            
        #print "network",   json.dumps(module.bn.net.E, indent=2)
Beispiel #6
0
def run(arg):
    task = arg[0]
    parameters = arg[1]
    #print "run with", task,parameters
    
    
    seed = parameters["seed"]
   

    process_id = hash(multiprocessing.current_process()._identity)
    numpy.random.seed(seed)
    
    render = False    
    plot = False
    
    plt.ion()
    
    env = CartPoleEnvironment()
    env.randomInitialization = False
    if render:
        renderer = CartPoleRenderer()
        env.setRenderer(renderer)
        renderer.start()
    
    task_class = getattr(cp, task)
    task = task_class(env, 50)

    #print "dim: ", task.indim, task.outdim
    
    # to inputs state and 4 actions
    bmodule = ActionValueRAND(task.outdim, task.indim)
    rlearner = RAND()

    blearner = RAND()
    # % of random actions
    
    bagent = LearningAgent(bmodule, rlearner)
    
    from pybrain.tools.shortcuts import buildNetwork
    from pybrain.rl.agents import OptimizationAgent
    from pybrain.optimization import PGPE

    module = buildNetwork(task.outdim, task.indim, bias=False)
    # create agent with controller and learner (and its options)

    # % of random actions
    #learner.explorer.epsilon = parameters["ExplorerEpsilon"]
    
    
    agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=True, maxEvaluations=None, verbose=False))


    
    
    testagent = LearningAgent(module, None)
    pgpeexperiment = EpisodicExperiment(task, agent)
    randexperiment = EpisodicExperiment(task, bagent)


    def plotPerformance(values, fig):
        plt.figure(fig.number)
        plt.clf()
        plt.plot(values, 'o-')
        plt.gcf().canvas.draw()
        # Without the next line, the pyplot plot won't actually show up.
        plt.pause(0.001)
    
    performance = []
    
    if plot:
        pf_fig = plt.figure()
    
    m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
    
    ## train pgpe
    for episode in range(0,50):
    	# one learning step after one episode of world-interaction
        y =pgpeexperiment.doEpisodes(1)
        
    be, bf = agent.learner._bestFound()
    print be,bf
    
    print "generate data"
    be.numActions = 1
    gdagent = LearningAgent(be, blearner)
    experiment = EpisodicExperiment(task, gdagent)
    
    for episode in range(0,1000):
#        print episode, " of 1000"
    	# one learning step after one episode of world-interaction
        y =experiment.doEpisodes(1)
        
#        print y
        x = randexperiment.doEpisodes(1)
#        print len(y[0])
        #renderer.drawPlot()
        
        # test performance (these real-world experiences are not used for training)
        if plot:
            env.delay = True
        

        l = 5
        resList = (agent.learner._allEvaluations)[-l:-1]
        
#            print agent.learner._allEvaluations
        from scipy import array

        rLen = len(resList)
        avReward = array(resList).sum()/rLen
#            print avReward
#            print resList
#            exit(0)
#            print("Parameters:", agent.learner._bestFound())
#            print(
#                " Evaluation:", episode,
#                " BestReward:", agent.learner.bestEvaluation,
#                " AverageReward:", avReward)
#            if agent.learner.bestEvaluation == 0:
#                
#                print resList[-20:-1]
#                print "done"
#                break
        #print resList
        performance.append(avReward)
        

        env.delay = False
        testagent.reset()
        #experiment.agent = agent
    
#            performance.append(r)
        if plot:
            plotPerformance(performance, pf_fig)
            
        
#            print "reward avg", r
#            print "explorer epsilon", learner.explorer.epsilon
#            print "num episodes", agent.history.getNumSequences()
#            print "update step", len(performance)
            
    blearner.add_ds(rlearner.dataset)
    
    blearner.learn()
    #blearner.learnX(agent.learner._allEvaluated)
    print "done"
    return performance