Example #1
0
    figure()
    ion()
    pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1])
    pl.setLineStyle(linewidth=2)

numbExp=25 #number of experiments
for runs in range(numbExp):
    # create environment
    #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
    env = ShipSteeringEnvironment(False)
    # create task
    task = GoNorthwardTask(env,maxsteps = 500)
    # create controller network
    net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer)
    # create agent with controller and learner
    agent = FiniteDifferenceAgent(net, SPLA())
    # learning options
    agent.learner.gd.alpha = 0.3 #step size of \mu adaption
    agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption
    agent.learner.gd.momentum = 0.0
    batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting)
    #create experiment
    experiment = EpisodicExperiment(task, agent)
    prnts=1 #frequency of console output
    epis=2000/batch/prnts
    
    #actual roll outs
    filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat"
    wf = open(filename, 'wb')
    for updates in range(epis):
        for i in range(prnts):
Example #2
0
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask
from pybrain.rl.agents.finitedifference import FiniteDifferenceAgent
from pybrain.rl.learners import SPLA
from pybrain.rl.experiments import EpisodicExperiment
from scipy import random

numbExp = 12
for runs in range(numbExp):
    env = CartPoleEnvironment()
    # create task
    task = BalanceTask(env, 200)
    # create controller network
    net = buildNetwork(4, 1, bias=False)
    # create agent with controller and learner
    agent = FiniteDifferenceAgent(net, SPLA())
    # learning options
    agent.learner.gd.alpha = 0.05
    agent.learner.gdSig.alpha = 0.1
    agent.learner.gd.momentum = 0.9
    agent.learner.epsilon = 6.0
    agent.learner.initSigmas()
    # agent.learner.rprop = True
    experiment = EpisodicExperiment(task, agent)
    batch = 16
    prnts = 10
    epis = 50000 / batch / prnts
    save = False

    rl = []
    for updates in range(epis):
Example #3
0
saveNet = False
saveName = "grasp.wgt"
numbExp = 1  #number of experiments
for runs in range(numbExp):
    # create environment
    #Options: XML-Model, Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
    env = CCRLEnvironment()
    # create task
    task = CCRLGlasVarTask(env)
    # create controller network
    net = buildNetwork(len(task.getObservation()),
                       hiddenUnits,
                       env.actLen,
                       outclass=TanhLayer)
    # create agent with controller and learner
    agent = FiniteDifferenceAgent(net, SPLA())
    # learning options
    agent.learner.gd.alpha = 0.2  #step size of \mu adaption
    agent.learner.gdSig.alpha = 0.085  #step size of \sigma adaption
    agent.learner.gd.momentum = 0.0

    #Loading weights
    if loadNet:
        agent.learner.original = loadWeights("grasp.wgt")
        agent.learner.gd.init(agent.learner.original)
        agent.learner.epsilon = 0.2
        agent.learner.initSigmas()

    batch = 2  #number of samples per gradient estimate
    #create experiment
    experiment = EpisodicExperiment(task, agent)
Example #4
0
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask
from pybrain.rl.agents.finitedifference import FiniteDifferenceAgent
from pybrain.rl.learners import SPLA
from pybrain.rl.experiments import EpisodicExperiment
from scipy import random

numbExp = 12
for runs in range(numbExp):
    env = CartPoleEnvironment()
    # create task
    task = BalanceTask(env, 200)
    # create controller network
    net = buildNetwork(4, 1, bias=False)
    # create agent with controller and learner
    agent = FiniteDifferenceAgent(net, SPLA())
    # learning options
    agent.learner.gd.alpha = 0.05
    agent.learner.gdSig.alpha = 0.1
    agent.learner.gd.momentum = 0.9
    agent.learner.epsilon = 6.0
    agent.learner.initSigmas()
    # agent.learner.rprop = True
    experiment = EpisodicExperiment(task, agent)
    batch = 16
    prnts = 10
    epis = 50000 / batch / prnts
    save = False

    rl = []
    for updates in range(epis):