figure() ion() pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1]) pl.setLineStyle(linewidth=2) numbExp=25 #number of experiments for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) env = ShipSteeringEnvironment(False) # create task task = GoNorthwardTask(env,maxsteps = 500) # create controller network net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer) # create agent with controller and learner agent = FiniteDifferenceAgent(net, SPLA()) # learning options agent.learner.gd.alpha = 0.3 #step size of \mu adaption agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption agent.learner.gd.momentum = 0.0 batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting) #create experiment experiment = EpisodicExperiment(task, agent) prnts=1 #frequency of console output epis=2000/batch/prnts #actual roll outs filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat" wf = open(filename, 'wb') for updates in range(epis): for i in range(prnts):
from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask from pybrain.rl.agents.finitedifference import FiniteDifferenceAgent from pybrain.rl.learners import SPLA from pybrain.rl.experiments import EpisodicExperiment from scipy import random numbExp = 12 for runs in range(numbExp): env = CartPoleEnvironment() # create task task = BalanceTask(env, 200) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner agent = FiniteDifferenceAgent(net, SPLA()) # learning options agent.learner.gd.alpha = 0.05 agent.learner.gdSig.alpha = 0.1 agent.learner.gd.momentum = 0.9 agent.learner.epsilon = 6.0 agent.learner.initSigmas() # agent.learner.rprop = True experiment = EpisodicExperiment(task, agent) batch = 16 prnts = 10 epis = 50000 / batch / prnts save = False rl = [] for updates in range(epis):
saveNet = False saveName = "grasp.wgt" numbExp = 1 #number of experiments for runs in range(numbExp): # create environment #Options: XML-Model, Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) env = CCRLEnvironment() # create task task = CCRLGlasVarTask(env) # create controller network net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) # create agent with controller and learner agent = FiniteDifferenceAgent(net, SPLA()) # learning options agent.learner.gd.alpha = 0.2 #step size of \mu adaption agent.learner.gdSig.alpha = 0.085 #step size of \sigma adaption agent.learner.gd.momentum = 0.0 #Loading weights if loadNet: agent.learner.original = loadWeights("grasp.wgt") agent.learner.gd.init(agent.learner.original) agent.learner.epsilon = 0.2 agent.learner.initSigmas() batch = 2 #number of samples per gradient estimate #create experiment experiment = EpisodicExperiment(task, agent)