def __init__(self, name, N, d, scale=1.0, weights=None, maxinput=1.0, oneDinput=False): # scale is a scale on the output of the multiplication # output = (input1.*input2)*scale # weights are optional matrices applied to each input # output = (C1*input1 .* C2*input2)*scale # maxinput is the maximum expected value of any dimension of the # inputs. this is used to scale the inputs internally so that the # length of the vectors in the intermediate populations are not # too small (which results in a lot of noise in the calculations) # oneDinput indicates that the second input is one dimensional, and is # just a scale on the first input rather than an element-wise product self.name = name tauPSC = 0.007 # the size of the intermediate populations smallN = int(math.ceil(float(N) / d)) # the maximum value of the vectors represented by the intermediate # populations. the vector is at most [maxinput maxinput], so the length # of that is sqrt(maxinput**2 + maxinput**2) maxlength = math.sqrt(2 * maxinput ** 2) if weights is not None and len(weights) != 2: print "Warning, other than 2 matrices given to eprod" if weights is None: weights = [MU.I(d), MU.I(d)] inputd = len(weights[0][0]) ef = HRLutils.defaultEnsembleFactory() # create input populations in1 = ef.make("in1", 1, inputd) in1.addDecodedTermination("input", MU.I(inputd), 0.001, False) self.addNode(in1) in1.setMode(SimulationMode.DIRECT) # since this is just a relay in1.fixMode() in2 = ef.make("in2", 1, inputd) if not oneDinput: in2.addDecodedTermination("input", MU.I(inputd), 0.001, False) else: # if it is a 1-D input we just expand it to a full vector of that # value so that we can treat it as an element-wise product in2.addDecodedTermination("input", [[1] for i in range(inputd)], 0.001, False) self.addNode(in2) in2.setMode(SimulationMode.DIRECT) # since this is just a relay in2.fixMode() # ensemble for intermediate populations multef = NEFEnsembleFactoryImpl() multef.nodeFactory.tauRC = 0.05 multef.nodeFactory.tauRef = 0.002 multef.nodeFactory.maxRate = IndicatorPDF(200, 500) multef.nodeFactory.intercept = IndicatorPDF(-1, 1) multef.encoderFactory = vectorgenerators.MultiplicationVectorGenerator() multef.beQuiet() result = ef.make("result", 1, d) result.setMode(SimulationMode.DIRECT) # since this is just a relay result.fixMode() self.addNode(result) resultTerm = [[0] for _ in range(d)] zeros = [0 for _ in range(inputd)] for e in range(d): # create a 2D population for each input dimension which will # combine the components from one dimension of each of the input # populations mpop = multef.make("mpop_" + str(e), smallN, 2) # make two connection that will select one component from each of # the input pops # we divide by maxlength to ensure that the maximum length of the # 2D vector is 1 # remember that (for some reason) the convention in Nengo is that # the input matrices are transpose of what they would be # mathematically mpop.addDecodedTermination( "a", [[(1.0 / maxlength) * weights[0][e][i] for i in range(inputd)], zeros], tauPSC, False ) mpop.addDecodedTermination( "b", [zeros, [(1.0 / maxlength) * weights[1][e][i] for i in range(inputd)]], tauPSC, False ) # multiply the two selected components together mpop.addDecodedOrigin("output", [PostfixFunction("x0*x1", 2)], "AXON") self.addNode(mpop) self.addProjection(in1.getOrigin("X"), mpop.getTermination("a")) self.addProjection(in2.getOrigin("X"), mpop.getTermination("b")) # combine the 1D results back into one vector. # we scaled each input by 1/maxlength, then multiplied them # together for a total scale of 1/maxlength**2, so to undo we # multiply by maxlength**2 resultTerm[e] = [maxlength ** 2 * scale] result.addDecodedTermination("in_" + str(e), resultTerm, 0.001, False) resultTerm[e] = [0] self.addProjection(mpop.getOrigin("output"), result.getTermination("in_" + str(e))) self.exposeTermination(in1.getTermination("input"), "A") self.exposeTermination(in2.getTermination("input"), "B") self.exposeOrigin(result.getOrigin("X"), "X")
def __init__(self, gamma, rewardradius=1.0): """Builds the ErrorCalc network. :param gamma: discount factor :param rewardradius: expected radius of reward values """ self.name = "ErrorCalc" tauPSC = 0.007 intPSC = 0.1 N = 50 ef = HRLutils.defaultEnsembleFactory() #current Q input currQ = ef.make("currQ", 1, 1) currQ.addDecodedTermination("input", [[1]], 0.001, False) self.addNode(currQ) currQ.setMode(SimulationMode.DIRECT) currQ.fixMode() self.exposeTermination(currQ.getTermination("input"), "currQ") #input population for resetting the network resetef = HRLutils.defaultEnsembleFactory() resetef.setEncoderFactory(vectorgenerators.DirectedVectorGenerator([1])) resetef.getNodeFactory().setIntercept(IndicatorPDF(0.3, 1.0)) reset = resetef.make("reset", N, 1) reset.addDecodedTermination("input", [[1]], tauPSC, False) self.addNode(reset) reset.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) self.exposeTermination(reset.getTermination("input"), "reset") #store previous value of Q storeQ = memory.Memory("storeQ", N * 4, 1, inputscale=50) self.addNode(storeQ) self.addProjection(reset.getOrigin("X"), storeQ.getTermination("transfer")) self.addProjection(currQ.getOrigin("X"), storeQ.getTermination("target")) #calculate discount biasInput = FunctionInput("biasinput", [ConstantFunction(1, 1)], Units.UNK) self.addNode(biasInput) discount = memory.Memory("discount", N * 4, 1, inputscale=50, recurweight=gamma) self.addNode(discount) self.addProjection(biasInput.getOrigin("origin"), discount.getTermination("target")) self.addProjection(reset.getOrigin("X"), discount.getTermination("transfer")) #accumulate discounted reward #do we really need gamma to make this all work? if it proves to be a problem, could #try removing it, and just use un-discounted reward. we can just use the fact that #the reward integrator will saturate to prevent rewards from going to infinity discountreward = eprod.Eprod("discountreward", N * 4, 1, weights=[[[1.0 / rewardradius]], [[1.0]]], oneDinput=True) self.addNode(discountreward) self.exposeTermination(discountreward.getTermination("A"), "reward") self.addProjection(discount.getOrigin("X"), discountreward.getTermination("B")) reward = ef.make("reward", N * 4, 1) reward.addDecodedTermination("input", [[intPSC]], intPSC, False) reward.addDecodedTermination("feedback", [[1]], intPSC, False) reward.addTermination("gate", [[-8] for _ in range(reward.getNodeCount())], intPSC, False) self.addNode(reward) reward.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) self.addProjection(reward.getOrigin("X"), reward.getTermination("feedback")) self.addProjection(discountreward.getOrigin("X"), reward.getTermination("input")) self.addProjection(reset.getOrigin("X"), reward.getTermination("gate")) #weight currQ by discount discountcurrQ = eprod.Eprod("discountcurrQ", N * 4, 1, oneDinput=True) self.addNode(discountcurrQ) self.addProjection(currQ.getOrigin("X"), discountcurrQ.getTermination("A")) self.addProjection(discount.getOrigin("X"), discountcurrQ.getTermination("B")) #error calculation error = ef.make("error", N * 2, [2]) #radius of 2 since max error = maxQ + maxreward - 0 (unless we let Q values go negative) error.addDecodedTermination("currQ", [[1]], tauPSC, False) error.addDecodedTermination("reward", [[1]], tauPSC, False) error.addDecodedTermination("storeQ", [[-1]], tauPSC, False) self.addNode(error) self.addProjection(discountcurrQ.getOrigin("X"), error.getTermination("currQ")) self.addProjection(reward.getOrigin("X"), error.getTermination("reward")) self.addProjection(storeQ.getOrigin("X"), error.getTermination("storeQ")) self.exposeOrigin(error.getOrigin("X"), "X")
def __init__(self, gamma, rewardradius=1.0): """Builds the ErrorCalc network. :param gamma: discount factor :param rewardradius: expected radius of reward values """ self.name = "ErrorCalc" tauPSC = 0.007 intPSC = 0.1 N = 50 ef = HRLutils.defaultEnsembleFactory() # current Q input currQ = ef.make("currQ", 1, 1) currQ.addDecodedTermination("input", [[1]], 0.001, False) self.addNode(currQ) currQ.setMode(SimulationMode.DIRECT) currQ.fixMode() self.exposeTermination(currQ.getTermination("input"), "currQ") # input population for resetting the network resetef = HRLutils.defaultEnsembleFactory() resetef.setEncoderFactory(vectorgenerators.DirectedVectorGenerator([1 ])) resetef.getNodeFactory().setIntercept(IndicatorPDF(0.3, 1.0)) reset = resetef.make("reset", N, 1) reset.addDecodedTermination("input", [[1]], tauPSC, False) self.addNode(reset) reset.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) self.exposeTermination(reset.getTermination("input"), "reset") # store previous value of Q storeQ = memory.Memory("storeQ", N * 4, 1, inputscale=50) self.addNode(storeQ) self.addProjection(reset.getOrigin("X"), storeQ.getTermination("transfer")) self.addProjection(currQ.getOrigin("X"), storeQ.getTermination("target")) # calculate discount biasInput = FunctionInput("biasinput", [ConstantFunction(1, 1)], Units.UNK) self.addNode(biasInput) discount = memory.Memory("discount", N * 4, 1, inputscale=50, recurweight=gamma) self.addNode(discount) self.addProjection(biasInput.getOrigin("origin"), discount.getTermination("target")) self.addProjection(reset.getOrigin("X"), discount.getTermination("transfer")) # accumulate discounted reward # do we really need gamma to make this all work? if it proves to be a # problem, could try removing it, and just use un-discounted reward. # we can just use the fact that the reward integrator will saturate to # prevent rewards from going to infinity discountreward = eprod.Eprod("discountreward", N * 4, 1, weights=[[[1.0 / rewardradius]], [[1.0]]], oneDinput=True) self.addNode(discountreward) self.exposeTermination(discountreward.getTermination("A"), "reward") self.addProjection(discount.getOrigin("X"), discountreward.getTermination("B")) reward = ef.make("reward", N * 4, 1) reward.addDecodedTermination("input", [[intPSC]], intPSC, False) reward.addDecodedTermination("feedback", [[1]], intPSC, False) reward.addTermination("gate", [[-8] for _ in range(reward.getNodeCount())], intPSC, False) self.addNode(reward) reward.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) self.addProjection(reward.getOrigin("X"), reward.getTermination("feedback")) self.addProjection(discountreward.getOrigin("X"), reward.getTermination("input")) self.addProjection(reset.getOrigin("X"), reward.getTermination("gate")) # weight currQ by discount discountcurrQ = eprod.Eprod("discountcurrQ", N * 4, 1, oneDinput=True) self.addNode(discountcurrQ) self.addProjection(currQ.getOrigin("X"), discountcurrQ.getTermination("A")) self.addProjection(discount.getOrigin("X"), discountcurrQ.getTermination("B")) # error calculation # radius of 2 since max error = maxQ + maxreward - 0 (unless we let Q # values go negative) error = ef.make("error", N * 2, [2]) error.addDecodedTermination("currQ", [[1]], tauPSC, False) error.addDecodedTermination("reward", [[1]], tauPSC, False) error.addDecodedTermination("storeQ", [[-1]], tauPSC, False) self.addNode(error) self.addProjection(discountcurrQ.getOrigin("X"), error.getTermination("currQ")) self.addProjection(reward.getOrigin("X"), error.getTermination("reward")) self.addProjection(storeQ.getOrigin("X"), error.getTermination("storeQ")) self.exposeOrigin(error.getOrigin("X"), "X")
def __init__(self, name, N, d, scale=1.0, weights=None, maxinput=1.0, oneDinput=False): # scale is a scale on the output of the multiplication # output = (input1.*input2)*scale # weights are optional matrices applied to each input # output = (C1*input1 .* C2*input2)*scale # maxinput is the maximum expected value of any dimension of the # inputs. this is used to scale the inputs internally so that the # length of the vectors in the intermediate populations are not # too small (which results in a lot of noise in the calculations) # oneDinput indicates that the second input is one dimensional, and is # just a scale on the first input rather than an element-wise product self.name = name tauPSC = 0.007 # the size of the intermediate populations smallN = int(math.ceil(float(N) / d)) # the maximum value of the vectors represented by the intermediate # populations. the vector is at most [maxinput maxinput], so the length # of that is sqrt(maxinput**2 + maxinput**2) maxlength = math.sqrt(2 * maxinput**2) if weights is not None and len(weights) != 2: print "Warning, other than 2 matrices given to eprod" if weights is None: weights = [MU.I(d), MU.I(d)] inputd = len(weights[0][0]) ef = HRLutils.defaultEnsembleFactory() # create input populations in1 = ef.make("in1", 1, inputd) in1.addDecodedTermination("input", MU.I(inputd), 0.001, False) self.addNode(in1) in1.setMode(SimulationMode.DIRECT) # since this is just a relay in1.fixMode() in2 = ef.make("in2", 1, inputd) if not oneDinput: in2.addDecodedTermination("input", MU.I(inputd), 0.001, False) else: # if it is a 1-D input we just expand it to a full vector of that # value so that we can treat it as an element-wise product in2.addDecodedTermination("input", [[1] for i in range(inputd)], 0.001, False) self.addNode(in2) in2.setMode(SimulationMode.DIRECT) # since this is just a relay in2.fixMode() # ensemble for intermediate populations multef = NEFEnsembleFactoryImpl() multef.nodeFactory.tauRC = 0.05 multef.nodeFactory.tauRef = 0.002 multef.nodeFactory.maxRate = IndicatorPDF(200, 500) multef.nodeFactory.intercept = IndicatorPDF(-1, 1) multef.encoderFactory = vectorgenerators.MultiplicationVectorGenerator( ) multef.beQuiet() result = ef.make("result", 1, d) result.setMode(SimulationMode.DIRECT) # since this is just a relay result.fixMode() self.addNode(result) resultTerm = [[0] for _ in range(d)] zeros = [0 for _ in range(inputd)] for e in range(d): # create a 2D population for each input dimension which will # combine the components from one dimension of each of the input # populations mpop = multef.make('mpop_' + str(e), smallN, 2) # make two connection that will select one component from each of # the input pops # we divide by maxlength to ensure that the maximum length of the # 2D vector is 1 # remember that (for some reason) the convention in Nengo is that # the input matrices are transpose of what they would be # mathematically mpop.addDecodedTermination('a', [[(1.0 / maxlength) * weights[0][e][i] for i in range(inputd)], zeros], tauPSC, False) mpop.addDecodedTermination('b', [ zeros, [(1.0 / maxlength) * weights[1][e][i] for i in range(inputd)] ], tauPSC, False) # multiply the two selected components together mpop.addDecodedOrigin("output", [PostfixFunction('x0*x1', 2)], "AXON") self.addNode(mpop) self.addProjection(in1.getOrigin('X'), mpop.getTermination('a')) self.addProjection(in2.getOrigin('X'), mpop.getTermination('b')) # combine the 1D results back into one vector. # we scaled each input by 1/maxlength, then multiplied them # together for a total scale of 1/maxlength**2, so to undo we # multiply by maxlength**2 resultTerm[e] = [maxlength**2 * scale] result.addDecodedTermination('in_' + str(e), resultTerm, 0.001, False) resultTerm[e] = [0] self.addProjection(mpop.getOrigin('output'), result.getTermination('in_' + str(e))) self.exposeTermination(in1.getTermination("input"), "A") self.exposeTermination(in2.getTermination("input"), "B") self.exposeOrigin(result.getOrigin("X"), "X")