def buildExperiment(alpha, gamma, lambdaa, importance): net = nef.Network("HandWired parameters of RL node to bias") net.add_to_nengo() rl = rl_sarsa.qlambda("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000) world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000) net.add(rl) # place them into the network net.add(world) # connect them together net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # define the parameter sources (controllable from the simulation window) net.make_input("alpha", [alpha]) net.make_input("gamma", [gamma]) net.make_input("lambda", [lambdaa]) net.make_input("importance", [importance]) # connect signal sources to the RL node net.connect("alpha", rl.getTermination(QLambda.topicAlpha)) net.connect("gamma", rl.getTermination(QLambda.topicGamma)) net.connect("lambda", rl.getTermination(QLambda.topicLambda)) net.connect("importance", rl.getTermination(QLambda.topicImportance)) return net
def buildSimulation(alpha, gamma, lambdaa, importance,expName='test0'): net=nef.Network('HandWired parameters of RL node to bias') net.add_to_nengo() #rl = rl_sarsa.qlambda("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000) rl = rl_sarsa.qlambdaASM("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000) world = gridworld.benchmarkA("map_20x20","BenchmarkGridWorldNodeC",10000); net.add(rl) # place them into the network net.add(world) # connect them together net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # define the parameter sources (controllable from the simulation window) net.make_input('alpha',[alpha]) net.make_input('gamma',[gamma]) net.make_input('lambda',[lambdaa]) net.make_input('importance',[importance]) # connect signal sources to the RL node net.connect('alpha', rl.getTermination(QLambda.topicAlpha)) net.connect('gamma', rl.getTermination(QLambda.topicGamma)) net.connect('lambda', rl.getTermination(QLambda.topicLambda)) net.connect('importance', rl.getTermination(QLambda.topicImportance)) saver = net.add(ProsperitySaver('data_'+expName+'.txt')) net.connect(rl.getOrigin(QLambda.topicProsperity),saver.getTermination("data")); return net
def buildExperiment(alpha, gamma, lambdaa, importance): net = nef.Network('HandWired parameters of RL node to bias') net.add_to_nengo() rl = rl_sarsa.qlambda("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000) world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000) net.add(rl) # place them into the network net.add(world) # connect them together net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # define the parameter sources (controllable from the simulation window) net.make_input('alpha', [alpha]) net.make_input('gamma', [gamma]) net.make_input('lambda', [lambdaa]) net.make_input('importance', [importance]) # connect signal sources to the RL node net.connect('alpha', rl.getTermination(QLambda.topicAlpha)) net.connect('gamma', rl.getTermination(QLambda.topicGamma)) net.connect('lambda', rl.getTermination(QLambda.topicLambda)) net.connect('importance', rl.getTermination(QLambda.topicImportance)) return net
def addModel(net, modelName='model0'): """ Add a new independent model into the network, return array of model components. These can be then used for configuring the model. """ ############################# define components rl = rl_sarsa.qlambdaASM(modelName, noStateVars=2, noActions=4, noValues=15, logPeriod=2000, synchronous=False, classname="org.hanns.rl.discrete.ros.sarsa.config.QLambdaCoverageRewardFile") world = gridworld.benchmarkA(modelName+"_map_15x15",mapName="BenchmarkGridWorldNodeD",logPeriod=100000); source = motivation.basic(modelName, 1, Motivation.DEF_DECAY, logPeriod=10000) net.add(rl) # place them into the network net.add(world) net.add(source) ################################ connect components together # create tranform matrix which connects states dim of GridWorld (states) to the first dim of RL (dataIn=states) # note: first dim is reward tstates = [[0 for j in range(3)] for i in range(3)] tstates[1][1] = 1; tstates[2][2] = 1; # identity transform without first dimension (do not connect reward directly!) #print tstates net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn,tstates)) # world -> rl (states) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # rl -> world (actions) # connect RL to the RL provided by/(passed through) the Motivaton source treward = [[0 for j in range(1)] for i in range(3)] # yx (y is second dim) treward[0][0] = 1; net.connect(world.getOrigin(QLambda.topicDataIn), source.newTerminationFor(Motivation.topicDataIn,treward)) # world -> motivation (reward) # create tranform matrix which connects first dim of Motivation (reward) to the first dim of RL (reward) trr = [[0 for j in range(3)] for i in range(2)] # yx (y is second dim) trr[0][0] = 1; net.connect(source.getOrigin(Motivation.topicDataOut), rl.newTerminationFor(QLambda.topicDataIn,trr)) # motivation (r) -> rl (reward) # connect Importance input of the RL to the Motivation provided by the Motivaton source timportance = [[0 for j in range(1)] for i in range(2)] # yx timportance[1][0] = 1; #print timportance net.connect(source.getOrigin(Motivation.topicDataOut), rl.newTerminationFor(QLambda.topicImportance,timportance)) # motivation -> rl (importance) # connect GridWorld to the Importance input of RL module tsi = [[0 for j in range(1)] for i in range(3)] # yx #print tsi net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicImportance, tsi)) # world -> rl (importance) # connect to the decay port net.make_input(modelName+'_decay',[0.01]) #net.make_input(name+'_decay',[Motivation.DEF_DECAY]) net.connect(modelName+'_decay', source.getTermination(Motivation.topicDecay)) ########################## misc goes here net.make_input(modelName+'_importance',[0]) net.connect(modelName+'_importance', rl.getTermination(QLambda.topicImportance)) saver = net.add(ProsperitySaver(modelName+'_saver.txt')) # TODO no need for name here? net.connect(rl.getOrigin(QLambda.topicProsperity),saver.getTermination("RLprosperity")); # return all new components return [rl, world, source, saver, modelName, tstates, modelName]
def buildSimulation(alpha, gamma, lambdaa, importance, expName='test0'): net = nef.Network('HandWired parameters of RL node to bias') net.add_to_nengo() rl = rl_sarsa.qlambdaASM( "RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000, classname="org.hanns.rl.discrete.ros.sarsa.QLambda", prospLen=1) world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000) net.add(rl) # place them into the network net.add(world) # connect them together net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # define the parameter sources (controllable from the simulation window) net.make_input('alpha', [alpha]) net.make_input('gamma', [gamma]) net.make_input('lambda', [lambdaa]) net.make_input('importance', [importance]) # connect signal sources to the RL node net.connect('alpha', rl.getTermination(QLambda.topicAlpha)) net.connect('gamma', rl.getTermination(QLambda.topicGamma)) net.connect('lambda', rl.getTermination(QLambda.topicLambda)) net.connect('importance', rl.getTermination(QLambda.topicImportance)) saver = net.add(ProsperitySaver('data_' + expName + '.txt')) net.connect(rl.getOrigin(QLambda.topicProsperity), saver.getTermination("data")) return net
def buildSimulation(alpha, gamma, lambdaa, importance, expName="test0"): net = nef.Network("HandWired parameters of RL node to bias") net.add_to_nengo() rl = rl_sarsa.qlambdaASM( "RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000, classname="org.hanns.rl.discrete.ros.sarsa.QLambda", prospLen=1, ) world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000) net.add(rl) # place them into the network net.add(world) # connect them together net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # define the parameter sources (controllable from the simulation window) net.make_input("alpha", [alpha]) net.make_input("gamma", [gamma]) net.make_input("lambda", [lambdaa]) net.make_input("importance", [importance]) # connect signal sources to the RL node net.connect("alpha", rl.getTermination(QLambda.topicAlpha)) net.connect("gamma", rl.getTermination(QLambda.topicGamma)) net.connect("lambda", rl.getTermination(QLambda.topicLambda)) net.connect("importance", rl.getTermination(QLambda.topicImportance)) saver = net.add(ProsperitySaver("data_" + expName + ".txt")) net.connect(rl.getOrigin(QLambda.topicProsperity), saver.getTermination("data")) return net
# Create the NeuralModule which implements discrete RL algorithm - Q(lambda) - Q-learning with eligibility traces # # Start the benchmark B # # by Jaroslav Vitku [[email protected]] import nef from ca.nengo.math.impl import FourierFunction from ca.nengo.model.impl import FunctionInput from ca.nengo.model import Units from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup from ctu.nengoros.comm.rosutils import RosUtils as RosUtils from org.hanns.rl.discrete.ros.sarsa import QLambda import rl_sarsa import gridworld net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards') net.add_to_nengo() rl = rl_sarsa.qlambdaASMConfigured("RL",net, noStateVars=2, noActions=4, noValues=20) # 2 state variables, 4 actions, xsize=20 world = gridworld.benchmarkA("map_20x20","BenchmarkGridWorldNodeC"); net.add(world) # data net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) print 'Configuration complete.'
# # Start the benchmark B # # by Jaroslav Vitku [[email protected]] import nef from ca.nengo.math.impl import FourierFunction from ca.nengo.model.impl import FunctionInput from ca.nengo.model import Units from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup from ctu.nengoros.comm.rosutils import RosUtils as RosUtils from org.hanns.rl.discrete.ros.sarsa import QLambda import rl_sarsa import gridworld net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards') net.add_to_nengo() rl = rl_sarsa.qlambdaASMConfigured("RL",net, noStateVars=2, noActions=4, noValues=30) # 2 state variables, 4 actions, xsize=30 # TODO this seems not to work now.. world = gridworld.benchmarkA("map_30x30","BenchmarkGridWorldNodeB"); net.add(world) # data net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) print 'Configuration complete.'
import nef from ca.nengo.math.impl import FourierFunction from ca.nengo.model.impl import FunctionInput from ca.nengo.model import Units from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup from ctu.nengoros.comm.rosutils import RosUtils as RosUtils from org.hanns.rl.discrete.ros.sarsa import QLambda import rl_sarsa import gridworld net = nef.Network( 'Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards' ) net.add_to_nengo() rl = rl_sarsa.qlambdaASMConfigured( "RL", net, noStateVars=2, noActions=4, noValues=20) # 2 state variables, 4 actions, xsize=20 world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC") net.add(world) # data net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) print 'Configuration complete.'