from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.environments.cartpole import CartPoleEnvironment, CartPoleRenderer, BalanceTask from pybrain.rl.agents.learning import LearningAgent from pybrain.rl.experiments import EpisodicExperiment from scipy import mean import sys episodes = 1 epilen = 200 if len(sys.argv) < 5: sys.exit('please give 4 parameters. run: "python play_catpole.py <p1> <p2> <p3> <p4>"\n') # create environment env = CartPoleEnvironment() env.setRenderer(CartPoleRenderer()) env.getRenderer().start() env.delay = (episodes == 1) # create task task = BalanceTask(env, epilen) # create controller network net = buildNetwork(4, 1, bias=False) # create agent and set parameters from command line agent = LearningAgent(net, None) agent.module._setParameters([float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3]), float(sys.argv[4])]) # create experiment experiment = EpisodicExperiment(task, agent)
def run(arg): task = arg[0] parameters = arg[1] #print "run with", parameters seed = parameters["seed"] process_id = hash(multiprocessing.current_process()._identity) numpy.random.seed(seed + process_id) render = False plot = False plt.ion() env = CartPoleEnvironment() if render: renderer = CartPoleRenderer() env.setRenderer(renderer) renderer.start() task_class = getattr(cp, task) task = task_class(env, parameters["MaxRunsPerEpisode"]) testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"],desiredValue=None) #print "dim: ", task.indim, task.outdim from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.agents import OptimizationAgent from pybrain.optimization import PGPE module = buildNetwork(task.outdim, task.indim, bias=False) # create agent with controller and learner (and its options) # % of random actions #learner.explorer.epsilon = parameters["ExplorerEpsilon"] agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=False, maxEvaluations=None,desiredEvaluation=1, verbose=False)) # # print agent # from pprint import pprint # pprint (vars(agent.learner)) testagent = LearningAgent(module, None) experiment = EpisodicExperiment(task, agent) testexperiment = EpisodicExperiment(testtask, testagent) def plotPerformance(values, fig): plt.figure(fig.number) plt.clf() plt.plot(values, 'o-') plt.gcf().canvas.draw() # Without the next line, the pyplot plot won't actually show up. plt.pause(0.001) performance = [] if plot: pf_fig = plt.figure() m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"] for episode in range(0,m): # one learning step after one episode of world-interaction experiment.doEpisodes(parameters["EpisodesPerLearn"]) #agent.learn(1) #renderer.drawPlot() # test performance (these real-world experiences are not used for training) if plot: env.delay = True if (episode) % parameters["TestAfter"] == 0: #print "Evaluating at episode: ", episode #experiment.agent = testagent #r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])]) #for i in range(0,parameters["TestWith"]): # y = testexperiment.doEpisodes(1) # print (agent.learner._allEvaluated) # # # from pprint import pprint # pprint (vars(task)) l = parameters["TestWith"] task.N = parameters["MaxRunsPerEpisodeTest"] experiment.doEpisodes(l) task.N = parameters["MaxRunsPerEpisode"] resList = (agent.learner._allEvaluations)[-l:-1] # print agent.learner._allEvaluations from scipy import array rLen = len(resList) avReward = array(resList).sum()/rLen # print avReward # print resList # exit(0) # print("Parameters:", agent.learner._bestFound()) # print( # " Evaluation:", episode, # " BestReward:", agent.learner.bestEvaluation, # " AverageReward:", avReward) # if agent.learner.bestEvaluation == 0: # # print resList[-20:-1] # print "done" # break performance.append(avReward) env.delay = False testagent.reset() #experiment.agent = agent # performance.append(r) if plot: plotPerformance(performance, pf_fig) # print "reward avg", r # print "explorer epsilon", learner.explorer.epsilon # print "num episodes", agent.history.getNumSequences() # print "update step", len(performance) # print "done" return performance #print "network", json.dumps(module.bn.net.E, indent=2) #import sumatra.parameters as p #import sys #parameter_file = sys.argv[1] #parameters = p.SimpleParameterSet(parameter_file) # # #run(["BalanceTask",parameters])
from pybrain.rl.agents import LearningAgent from pybrain.rl.experiments import EpisodicExperiment from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork from pybrain.rl.explorers import BoltzmannExplorer from numpy import array, arange, meshgrid, pi, zeros, mean from matplotlib import pyplot as plt # switch this to True if you want to see the cart balancing the pole (slower) render = False plt.ion() env = CartPoleEnvironment() if render: renderer = CartPoleRenderer() env.setRenderer(renderer) renderer.start() module = ActionValueNetwork(4, 3) task = DiscreteBalanceTask(env, 100) learner = NFQ() learner.explorer.epsilon = 0.4 agent = LearningAgent(module, learner) testagent = LearningAgent(module, None) experiment = EpisodicExperiment(task, agent) def plotPerformance(values, fig): plt.figure(fig.number) plt.clf()
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork from pybrain.rl.explorers import BoltzmannExplorer from numpy import array, arange, meshgrid, pi, zeros, mean from matplotlib import pyplot as plt # switch this to True if you want to see the cart balancing the pole (slower) render = False #render = True plt.ion() env = CartPoleEnvironment() if render: renderer = CartPoleRenderer() env.setRenderer(renderer) renderer.start() module = ActionValueNetwork(4, 3) task = DiscreteBalanceTask(env, 100) learner = NFQ() learner.explorer.epsilon = 0.4 agent = LearningAgent(module, learner) testagent = LearningAgent(module, None) experiment = EpisodicExperiment(task, agent) def plotPerformance(values, fig): plt.figure(fig.number)
def run(arg): task = arg[0] parameters = arg[1] #print "run with", parameters seed = parameters["seed"] process_id = hash(multiprocessing.current_process()._identity) numpy.random.seed(seed + process_id) render = False plot = False plt.ion() env = CartPoleEnvironment() if render: renderer = CartPoleRenderer() env.setRenderer(renderer) renderer.start() task_class = getattr(cp, task) task = task_class(env, parameters["MaxRunsPerEpisode"]) testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"]) #print "dim: ", task.indim, task.outdim # to inputs state and 4 actions module = ActionValueNetwork(task.outdim, task.indim) learner = NFQ() # % of random actions learner.explorer.epsilon = parameters["ExplorerEpsilon"] agent = LearningAgent(module, learner) testagent = LearningAgent(module, None) experiment = EpisodicExperiment(task, agent) testexperiment = EpisodicExperiment(testtask, testagent) def plotPerformance(values, fig): plt.figure(fig.number) plt.clf() plt.plot(values, 'o-') plt.gcf().canvas.draw() # Without the next line, the pyplot plot won't actually show up. plt.pause(0.001) performance = [] if plot: pf_fig = plt.figure() m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"] for episode in range(0,m): # one learning step after one episode of world-interaction experiment.doEpisodes(parameters["EpisodesPerLearn"]) agent.learn(1) #renderer.drawPlot() # test performance (these real-world experiences are not used for training) if plot: env.delay = True if (episode) % parameters["TestAfter"] == 0: #print "Evaluating at episode: ", episode #experiment.agent = testagent r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])]) env.delay = False testagent.reset() #experiment.agent = agent performance.append(r) if plot: plotPerformance(performance, pf_fig) # print "reward avg", r # print "explorer epsilon", learner.explorer.epsilon # print "num episodes", agent.history.getNumSequences() # print "update step", len(performance) # print "done" return performance #print "network", json.dumps(module.bn.net.E, indent=2)
def run(arg): task = arg[0] parameters = arg[1] #print "run with", task,parameters seed = parameters["seed"] process_id = hash(multiprocessing.current_process()._identity) numpy.random.seed(seed) render = False plot = False plt.ion() env = CartPoleEnvironment() env.randomInitialization = False if render: renderer = CartPoleRenderer() env.setRenderer(renderer) renderer.start() task_class = getattr(cp, task) task = task_class(env, 50) #print "dim: ", task.indim, task.outdim # to inputs state and 4 actions bmodule = ActionValueRAND(task.outdim, task.indim) rlearner = RAND() blearner = RAND() # % of random actions bagent = LearningAgent(bmodule, rlearner) from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.agents import OptimizationAgent from pybrain.optimization import PGPE module = buildNetwork(task.outdim, task.indim, bias=False) # create agent with controller and learner (and its options) # % of random actions #learner.explorer.epsilon = parameters["ExplorerEpsilon"] agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=True, maxEvaluations=None, verbose=False)) testagent = LearningAgent(module, None) pgpeexperiment = EpisodicExperiment(task, agent) randexperiment = EpisodicExperiment(task, bagent) def plotPerformance(values, fig): plt.figure(fig.number) plt.clf() plt.plot(values, 'o-') plt.gcf().canvas.draw() # Without the next line, the pyplot plot won't actually show up. plt.pause(0.001) performance = [] if plot: pf_fig = plt.figure() m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"] ## train pgpe for episode in range(0,50): # one learning step after one episode of world-interaction y =pgpeexperiment.doEpisodes(1) be, bf = agent.learner._bestFound() print be,bf print "generate data" be.numActions = 1 gdagent = LearningAgent(be, blearner) experiment = EpisodicExperiment(task, gdagent) for episode in range(0,1000): # print episode, " of 1000" # one learning step after one episode of world-interaction y =experiment.doEpisodes(1) # print y x = randexperiment.doEpisodes(1) # print len(y[0]) #renderer.drawPlot() # test performance (these real-world experiences are not used for training) if plot: env.delay = True l = 5 resList = (agent.learner._allEvaluations)[-l:-1] # print agent.learner._allEvaluations from scipy import array rLen = len(resList) avReward = array(resList).sum()/rLen # print avReward # print resList # exit(0) # print("Parameters:", agent.learner._bestFound()) # print( # " Evaluation:", episode, # " BestReward:", agent.learner.bestEvaluation, # " AverageReward:", avReward) # if agent.learner.bestEvaluation == 0: # # print resList[-20:-1] # print "done" # break #print resList performance.append(avReward) env.delay = False testagent.reset() #experiment.agent = agent # performance.append(r) if plot: plotPerformance(performance, pf_fig) # print "reward avg", r # print "explorer epsilon", learner.explorer.epsilon # print "num episodes", agent.history.getNumSequences() # print "update step", len(performance) blearner.add_ds(rlearner.dataset) blearner.learn() #blearner.learnX(agent.learner._allEvaluated) print "done" return performance