# Create the NeuralModule which implements discrete RL algorithm - Q(lambda) - Q-learning with eligibility traces # # Start the benchmark B # # by Jaroslav Vitku [[email protected]] import nef from ca.nengo.math.impl import FourierFunction from ca.nengo.model.impl import FunctionInput from ca.nengo.model import Units from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup from ctu.nengoros.comm.rosutils import RosUtils as RosUtils from org.hanns.rl.discrete.ros.sarsa import QLambda import rl_sarsa import gridworld net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards') net.add_to_nengo() rl = rl_sarsa.qlambdaASMConfigured("RL",net, noStateVars=2, noActions=4, noValues=20) # 2 state variables, 4 actions, xsize=20 world = gridworld.benchmarkA("map_20x20","BenchmarkGridWorldNodeC"); net.add(world) # data net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) print 'Configuration complete.'
import nef from ca.nengo.math.impl import FourierFunction from ca.nengo.model.impl import FunctionInput from ca.nengo.model import Units from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup from ctu.nengoros.comm.rosutils import RosUtils as RosUtils from org.hanns.rl.discrete.ros.sarsa import QLambda import rl_sarsa import gridworld net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with one reward') net.add_to_nengo() #RosUtils.setAutorun(False) # Do we want to autorun roscore and rxgraph? (tru by default) #RosUtils.prefferJroscore(True) # preffer jroscore before the roscore? rl = rl_sarsa.qlambdaASMConfigured("RL",net, 2 ,4, 10) # 2 state variables, 4 actions, xsize=10 world = gridworld.example("map"); # 10x10 example net.add(world) # data net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) net.view() net.run(1,0.001) print 'Configuration complete.'
from ca.nengo.model import Units from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup from ctu.nengoros.comm.rosutils import RosUtils as RosUtils from org.hanns.rl.discrete.ros.sarsa import QLambda import rl_sarsa net = nef.Network( 'Demo of NeuralModule which implements discrete Q-learning with eligibility trace' ) net.add_to_nengo() #RosUtils.setAutorun(False) # Do we want to autorun roscore and rxgraph? (tru by default) #RosUtils.prefferJroscore(True) # preffer jroscore before the roscore? finderA = rl_sarsa.qlambdaASMConfigured("RL", net, 2, 4) # 2 state variables, 4 actions #Create a white noise input function with params: baseFreq, maxFreq [rad/s], RMS, seed # first dimension is reward, do not generate signal (ignored in the connection matrix) generator = FunctionInput('StateGenerator', [ FourierFunction(0, 0, 0, 12), FourierFunction(.5, 11, 1.6, 17), FourierFunction(.2, 21, 1.1, 11) ], Units.UNK) # first dimension is reward, do not generate states (these are ignored in the conneciton matrix) reward = FunctionInput('RewardGenerator', [ FourierFunction(.1, 10, 1, 12), FourierFunction(0, 0, 0, 17), FourierFunction(0, 0, 0, 17), ], Units.UNK)
# # Start the benchmark B # # by Jaroslav Vitku [[email protected]] import nef from ca.nengo.math.impl import FourierFunction from ca.nengo.model.impl import FunctionInput from ca.nengo.model import Units from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup from ctu.nengoros.comm.rosutils import RosUtils as RosUtils from org.hanns.rl.discrete.ros.sarsa import QLambda import rl_sarsa import gridworld net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards') net.add_to_nengo() rl = rl_sarsa.qlambdaASMConfigured("RL",net, noStateVars=2, noActions=4, noValues=30) # 2 state variables, 4 actions, xsize=30 # TODO this seems not to work now.. world = gridworld.benchmarkA("map_30x30","BenchmarkGridWorldNodeB"); net.add(world) # data net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) print 'Configuration complete.'
from ca.nengo.math.impl import FourierFunction from ca.nengo.model.impl import FunctionInput from ca.nengo.model import Units from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup from ctu.nengoros.comm.rosutils import RosUtils as RosUtils from org.hanns.rl.discrete.ros.sarsa import QLambda import rl_sarsa net=nef.Network('Demo of NeuralModule which implements discrete Q-learning with eligibility trace') net.add_to_nengo() #RosUtils.setAutorun(False) # Do we want to autorun roscore and rxgraph? (tru by default) #RosUtils.prefferJroscore(True) # preffer jroscore before the roscore? finderA = rl_sarsa.qlambdaASMConfigured("RL", net, 2, 4) # 2 state variables, 4 actions #Create a white noise input function with params: baseFreq, maxFreq [rad/s], RMS, seed # first dimension is reward, do not generate signal (ignored in the connection matrix) generator=FunctionInput('StateGenerator', [FourierFunction(0,0,0,12), FourierFunction(.5, 11,1.6, 17),FourierFunction(.2, 21,1.1, 11)],Units.UNK) # first dimension is reward, do not generate states (these are ignored in the conneciton matrix) reward=FunctionInput('RewardGenerator', [FourierFunction(.1, 10,1, 12), FourierFunction(0,0,0, 17),FourierFunction(0,0,0, 17),],Units.UNK) net.add(generator) net.add(reward) tx=[[0 for j in range(3)] for i in range(3)] tx[1][1] = 1;