def qlambdaMOO(name, noStateVars=2, noActions=4, noValues=5, logPeriod=100, maxDelay=1): command = [ classMOO, '_' + QLambda.noInputsConf + ':=' + str(noStateVars), '_' + QLambda.noOutputsConf + ':=' + str(noActions), '_' + QLambda.sampleCountConf + ':=' + str(noValues), '_' + QLambda.logPeriodConf + ':=' + str(logPeriod), '_' + QLambda.filterConf + ':=' + str(maxDelay) ] g = NodeGroup("RL", True) g.addNode(command, "RL", "java") module = NeuralModule(name + '_QLambda', g, False) module.createEncoder(QLambda.topicAlpha, "float", 1) # alpha config module.createEncoder(QLambda.topicGamma, "float", 1) module.createEncoder(QLambda.topicLambda, "float", 1) module.createEncoder(QLambda.topicImportance, "float", 1) module.createDecoder(QLambda.topicProsperity, "float", 3) # float[]{prosperity, coverage, reward/step} module.createDecoder(QLambda.topicDataOut, "float", noActions) # decode actions module.createEncoder(QLambda.topicDataIn, "float", noStateVars + 1) # encode states (first is reward) return module
def fuzzyMemTriangle(name): # ____|\____ g = NodeGroup("FuzzyMemTriangle", True) g.addNode(ftriangle, "FuzzyMemTriangle", "java") module = NeuralModule(name + '_FuzzyMemTriangle', g) module.createEncoder("logic/gates/ina", "float", 1) # x module.createEncoder("logic/gates/confa", "float", 1) # alpha module.createEncoder("logic/gates/confb", "float", 1) # beta module.createEncoder("logic/gates/confc", "float", 1) # gamma module.createDecoder("logic/gates/outa", "float", 1) # y return module
def qlambdaASM(name, noStateVars=2, noActions=4, noValues=5, logPeriod=100, maxDelay=1, classname="org.hanns.rl.discrete.ros.sarsa.config.QlambdaCoverageReward", prospLen=3,synchronous=True): """Method that returns synchronous NeuralModule implementing the QLambda algorithm with the ASM inbuilt. Configuration parameters of the node can be found in the javadoc. Number of data inputs (size of the vector) to the module is determined as 1+*noStateVars*, where the first element is the reward value. The parameter *maxDelay* describes the number of time-steps in the closed-loop learning, that is: how many time steps the module should wait before change of the state to evalueate that the action had no-effect (state of the world has not changed). Note: if the configEncoders (config INs) are not connected, the default values are sent to the ROS node instead of zeros. So non-configured QLambda module will learn with use of predefined parameters. :param string mame: name of the neural module (TODO) to be created :param integer noStateVars: number of state variables to be taken into account :param integer noActions: number of actions the node can produce (encoding 1ofN is used) :param integer noValues: the number of values that is expected for each state variable (interval [0,1] is sampled) :param integer logPeriod: how often to print out the data :param integer maxDelay: max delay in the closed-loop learning :param string classname: full className of the ROS node to be launched :param integer prospLen: size of the vector expected from the nodes prosperity publisher :returns: NeuralModule that should be added into the network, the node represents the QLambda ROS node """ # this command is used to launch the ROSjava node command = [classname, '_'+QLambda.noInputsConf+ ':=' + str(noStateVars), '_'+QLambda.noOutputsConf+':='+str(noActions), '_'+QLambda.sampleCountConf+':='+str(noValues), '_'+QLambda.logPeriodConf+':='+str(logPeriod), '_'+QLambda.filterConf+':='+str(maxDelay)] # represent the ROS node by means of Neural Module g = NodeGroup("RL", True); g.addNode(command, "RL", "java"); module = NeuralModule(name+'_QLambda', g, synchronous) # create config IO module.createConfigEncoder(QLambda.topicAlpha,"float",QLambda.DEF_ALPHA); # alpha config input, def. value is DEF_ALPHA module.createConfigEncoder(QLambda.topicGamma,"float",QLambda.DEF_GAMMA); module.createConfigEncoder(QLambda.topicLambda,"float",QLambda.DEF_LAMBDA); module.createEncoder(QLambda.topicImportance,"float",1); # default value is 0 # QLambdaCoverageReward classname => float[]{prosperity, coverage, reward/step} module.createDecoder(QLambda.topicProsperity, "float", prospLen); # create data IO module.createDecoder(QLambda.topicDataOut, "float", noActions) # decode actions module.createEncoder(QLambda.topicDataIn, "float", noStateVars+1) # encode states (first is reward) return module
def make(net, name='NeuralModule which implements RL SARSA algorithm', independent=True, useQuick=True, prospLen=3, noStateVars=2, noActions=4, sampleCount=30, logPeriod=100, maxDelay=1, synchronous=True): # full name of the reosjava node to be started classname = "org.hanns.rl.discrete.ros.sarsa.config.QlambdaCoverageReward" #command to launch and configure the RL rosjava node command = [ classname, '_' + QLambda.noInputsConf + ':=' + str(noStateVars), '_' + QLambda.noOutputsConf + ':=' + str(noActions), '_' + QLambda.sampleCountConf + ':=' + str(sampleCount), '_' + QLambda.logPeriodConf + ':=' + str(logPeriod), '_' + QLambda.filterConf + ':=' + str(maxDelay) ] # create a group with a given name g = NodeGroup(name, independent) g.addNode(command, "rl_sarsa", "java") # start and configure the rosjava node module = NeuralModule(name + '_QLambda', g) # create the neural module representing the node # create config IO module.createConfigEncoder(QLambda.topicAlpha, "float", QLambda.DEF_ALPHA) # alpha config input, def. value is DEF_ALPHA module.createConfigEncoder(QLambda.topicGamma, "float", QLambda.DEF_GAMMA) module.createConfigEncoder(QLambda.topicLambda, "float", QLambda.DEF_LAMBDA) module.createEncoder(QLambda.topicImportance, "float", 1) # default value is 0 # QLambdaCoverageReward classname => float[]{prosperity, coverage, reward/step} module.createDecoder(QLambda.topicProsperity, "float", prospLen) # create data IO module.createDecoder(QLambda.topicDataOut, "float", noActions) # decode actions module.createEncoder(QLambda.topicDataIn, "float", noStateVars + 1) # encode states (first is reward) many = net.add(module) # add it into the network
def benchmarkTwoR(name, mapName="benchmark", logPeriod=200, synchronous=True): command = [bench, '_' + World.logPeriodConf + ':=' + str(logPeriod)] noActions = 4 # hardcoded noStateVars = 2 g = NodeGroup(mapName, True) g.addNode(command, name, "java") module = NeuralModule(name + '_GridWorld', g, synchronous) module.createEncoder(QLambda.topicDataOut, "float", noActions) # decode actions module.createDecoder(QLambda.topicDataIn, "float", noStateVars + 2) # encode states (first is reward) return module
def example(name, logPeriod=200): command = [nodep, '_' + World.logPeriodConf + ':=' + str(logPeriod)] noActions = 4 # hardcoded noStateVars = 2 g = NodeGroup("GridWorld", True) g.addNode(command, "GridWorld", "java") module = NeuralModule(name + '_GridWorld', g, True) module.createEncoder(QLambda.topicDataOut, "float", noActions) # decode actions module.createDecoder(QLambda.topicDataIn, "float", noStateVars + 1) # encode states (first is reward) return module
def make(net,name='NeuralModule which implements FuzzyMembership function - Triangular - projectTemplate', independent=True, useQuick=True): finder = "org.hanns.myPackage.fuzzy.membership.impl.Triangular"; # create group with a name g = NodeGroup(name, independent); g.addNode(finder, "temp_FuzzyMemTriangular", "java"); neuron = NeuralModule(name+"_temp_FuzzyMemTriangular", g) neuron.createEncoder("logic/gates/ina", "float",1) # termination = data input x neuron.createEncoder("logic/gates/confa", "float",1) # termination - config input alpha neuron.createEncoder("logic/gates/confb", "float",1) # termination - config input betaa neuron.createEncoder("logic/gates/confc", "float",1) # termination - config input gamma neuron.createDecoder("logic/gates/outa", "float",1) # origin = output of neuron = data output y many=net.add(neuron) # add it into the network