Beispiel #1
0
# Create the NeuralModule which implements discrete RL algorithm - Q(lambda) - Q-learning with eligibility traces
#
# Start the benchmark B
#
# by Jaroslav Vitku [[email protected]]

import nef
from ca.nengo.math.impl import FourierFunction
from ca.nengo.model.impl import FunctionInput
from ca.nengo.model import Units
from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule
from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup
from ctu.nengoros.comm.rosutils import RosUtils as RosUtils
from org.hanns.rl.discrete.ros.sarsa import QLambda
import rl_sarsa
import gridworld

net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards')
net.add_to_nengo()  

rl = rl_sarsa.qlambdaASMConfigured("RL",net, noStateVars=2, noActions=4, noValues=20)   # 2 state variables, 4 actions, xsize=20

world = gridworld.benchmarkA("map_20x20","BenchmarkGridWorldNodeC");
net.add(world)

# data
net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

print 'Configuration complete.'
Beispiel #2
0
import nef
from ca.nengo.math.impl import FourierFunction
from ca.nengo.model.impl import FunctionInput
from ca.nengo.model import Units
from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule
from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup
from ctu.nengoros.comm.rosutils import RosUtils as RosUtils
from org.hanns.rl.discrete.ros.sarsa import QLambda
import rl_sarsa
import gridworld

net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with one reward')
net.add_to_nengo()  

#RosUtils.setAutorun(False)     # Do we want to autorun roscore and rxgraph? (tru by default)
#RosUtils.prefferJroscore(True)  # preffer jroscore before the roscore? 

rl = rl_sarsa.qlambdaASMConfigured("RL",net, 2 ,4, 10)   # 2 state variables, 4 actions, xsize=10

world = gridworld.example("map");	# 10x10 example
net.add(world)

# data
net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

net.view()
net.run(1,0.001)
print 'Configuration complete.'

Beispiel #3
0
from ca.nengo.model import Units
from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule
from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup
from ctu.nengoros.comm.rosutils import RosUtils as RosUtils
from org.hanns.rl.discrete.ros.sarsa import QLambda
import rl_sarsa

net = nef.Network(
    'Demo of NeuralModule which implements discrete Q-learning with eligibility trace'
)
net.add_to_nengo()

#RosUtils.setAutorun(False)     # Do we want to autorun roscore and rxgraph? (tru by default)
#RosUtils.prefferJroscore(True)  # preffer jroscore before the roscore?

finderA = rl_sarsa.qlambdaASMConfigured("RL", net, 2,
                                        4)  # 2 state variables, 4 actions

#Create a white noise input function with params: baseFreq, maxFreq [rad/s], RMS, seed
# first dimension is reward, do not generate signal (ignored in the connection matrix)
generator = FunctionInput('StateGenerator', [
    FourierFunction(0, 0, 0, 12),
    FourierFunction(.5, 11, 1.6, 17),
    FourierFunction(.2, 21, 1.1, 11)
], Units.UNK)

# first dimension is reward, do not generate states (these are ignored in the conneciton matrix)
reward = FunctionInput('RewardGenerator', [
    FourierFunction(.1, 10, 1, 12),
    FourierFunction(0, 0, 0, 17),
    FourierFunction(0, 0, 0, 17),
], Units.UNK)
Beispiel #4
0
#
# Start the benchmark B
#
# by Jaroslav Vitku [[email protected]]

import nef
from ca.nengo.math.impl import FourierFunction
from ca.nengo.model.impl import FunctionInput
from ca.nengo.model import Units
from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule
from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup
from ctu.nengoros.comm.rosutils import RosUtils as RosUtils
from org.hanns.rl.discrete.ros.sarsa import QLambda
import rl_sarsa
import gridworld

net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards')
net.add_to_nengo()  

rl = rl_sarsa.qlambdaASMConfigured("RL",net, noStateVars=2, noActions=4, noValues=30)   # 2 state variables, 4 actions, xsize=30

# TODO this seems not to work now..
world = gridworld.benchmarkA("map_30x30","BenchmarkGridWorldNodeB");
net.add(world)

# data
net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

print 'Configuration complete.'
Beispiel #5
0
from ca.nengo.math.impl import FourierFunction
from ca.nengo.model.impl import FunctionInput
from ca.nengo.model import Units
from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule
from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup
from ctu.nengoros.comm.rosutils import RosUtils as RosUtils
from org.hanns.rl.discrete.ros.sarsa import QLambda
import rl_sarsa

net=nef.Network('Demo of NeuralModule which implements discrete Q-learning with eligibility trace')
net.add_to_nengo()  

#RosUtils.setAutorun(False)     # Do we want to autorun roscore and rxgraph? (tru by default)
#RosUtils.prefferJroscore(True)  # preffer jroscore before the roscore? 

finderA = rl_sarsa.qlambdaASMConfigured("RL", net, 2, 4)   # 2 state variables, 4 actions

#Create a white noise input function with params: baseFreq, maxFreq [rad/s], RMS, seed
# first dimension is reward, do not generate signal (ignored in the connection matrix)
generator=FunctionInput('StateGenerator', [FourierFunction(0,0,0,12),
    FourierFunction(.5, 11,1.6, 17),FourierFunction(.2, 21,1.1, 11)],Units.UNK) 

# first dimension is reward, do not generate states (these are ignored in the conneciton matrix)
reward=FunctionInput('RewardGenerator', [FourierFunction(.1, 10,1, 12),
        FourierFunction(0,0,0, 17),FourierFunction(0,0,0, 17),],Units.UNK)

net.add(generator)
net.add(reward)

tx=[[0 for j in range(3)] for i in range(3)]
tx[1][1] = 1;