def getEncoders(self): neurons = self.nodes[0].neurons encoders = MU.zero(self.neurons, self.dimension) for i, n in enumerate(self.nodes): MU.copyInto(n.encoders, encoders, i * neurons, i * n.dimension, neurons) return encoders
def gen_encoders(self, N, contextD, context_scale): """Generate encoders for state population of learning agent. :param N: number of neurons in state population :param contextD: dimension of context vector representation :param context_scale: weight on context representation relative to state (1.0 = equal weighting) """ if contextD > 0: contexts = MU.I(contextD) else: contexts = [[]] # neurons each sensitive to different combinations of stimuli encs = (list(MU.I(self.stateD)) + [o + s + c for o in MU.I(self.num_orientations) for s in MU.I(self.num_shapes) for c in MU.I(self.num_colours)]) return [HRLutils.normalize( HRLutils.normalize(random.choice(encs)) + [x * context_scale for x in random.choice(contexts)]) for _ in range(N)]
def compute_weight_matrix(self, proj): orig=proj.origin term=proj.termination post=term.node transform=term.transform while hasattr(orig,'getWrappedOrigin'): orig=orig.getWrappedOrigin() decoder=orig.getDecoders() encoder=term.node.getEncoders() # scale by radius encoder=MU.prod(encoder,1.0/post.getRadii()[0]) encoder=MU.prod(encoder, self.weight_scale) # scale by gain for i, n in enumerate(post.nodes): for j in range(len(encoder[i])): encoder[i][j]*=n.scale #encoder=MU.prodElementwise(encoder, [n.scale for n in post.nodes]) w=MU.prod(encoder,MU.prod(transform,MU.transpose(decoder))) return w
def __init__(self, spinn, origin, termination, transform=None): scale = [nn.scale for nn in termination.node.nodes] if transform is None: transform = termination.transform if origin.node.neurons > spinn.max_fan_in: w = optsparse.compute_sparse_weights(origin, termination.node, transform, spinn.max_fan_in) else: w = MU.prod(termination.node.encoders, MU.prod(transform, MU.transpose(origin.decoders))) w = MU.prod(w, 1.0 / termination.node.radii[0]) for i in range(len(w)): for j in range(len(w[i])): w[i][j] *= scale[i] / termination.tau w = MU.transpose(w) self.weights = w self.tau = int(round(termination.tau * 1000)) if self.tau not in spinn.populations[termination.node].taus: spinn.populations[termination.node].taus.append(self.tau) self.pre = spinn.populations[origin.node].name self.post = spinn.populations[termination.node].name
def run(self, start, end): nef.SimpleNode.run(self, start, end) # Get total values from input terminations total_input = util_funcs.zeros(1,self.dimension) for term_str in self.input_terms.keys(): term_obj = self.getTermination(term_str) term_out = term_obj._filtered_values term_mat = self.input_terms[term_str] if( term_mat is None ): term_val = term_out else: term_val = MU.prod(term_mat, term_out) total_input = [total_input[n] + term_val[n] for n in range(self.dimension)] # Get total inhibitory input total_inhib = 0 for term_str in self.inhib_terms.keys(): term_obj = self.getTermination(term_str) term_out = term_obj._filtered_values term_mat = self.inhib_terms[term_str] term_val = MU.prod(term_mat, term_out) total_inhib = total_inhib + term_val # Calculate return value input_mag = util_funcs.norm(total_input) input_sign = cmp(input_mag, 0) inhibd_mag = max(abs(input_mag) + (total_inhib * self.radius), 0) * input_sign if( input_mag != 0 ): self.return_val = [total_input[n] * inhibd_mag / input_mag for n in range(self.dimension)] else: self.return_val = util_funcs.zeros(1, self.dimension) return
def getDecoders(self): neurons = self._parent.nodes[0].neurons decoders = MU.zero(neurons * len(self._origins), self._dimensions) for i, o in enumerate(self._origins): MU.copyInto(o.decoders, decoders, i * neurons, i * o.dimensions, neurons) return decoders
def addPlasticTermination(self, name, matrix, tauPSC, decoder, weight_func=None): """Create a new termination. A new termination is created on each of the ensembles, which are then grouped together. If decoders are not known at the time the termination is created, then pass in an array of zeros of the appropriate size (i.e. however many neurons will be in the population projecting to the termination, by number of dimensions).""" terminations = [] d = 0 dd = self._nodes[0].dimension for n in self._nodes: encoder = n.encoders w = MU.prod(encoder, [ MU.prod(matrix, MU.transpose(decoder))[d + i] for i in range(dd) ]) if weight_func is not None: w = weight_func(w) t = n.addPESTermination(name, w, tauPSC, False) terminations.append(t) d += dd termination = EnsembleTermination(self, name, terminations) self.exposeTermination(termination, name) return self.getTermination(name)
def compute_sparse_weights(origin, post, transform, fan_in, noise=0.1, num_samples=100): encoder = post.encoders radius = post.radii[0] if hasattr(transform, 'tolist'): transform = transform.tolist() approx = origin.node.getDecodingApproximator('AXON') # create X matrix X = approx.evalPoints X = MU.transpose([f.multiMap(X) for f in origin.functions]) # create A matrix A = approx.values S = fan_in N_A = len(A) samples = len(A[0]) N_B = len(encoder) w_sparse = np.zeros((N_B, N_A), 'f') noise_sd = MU.max(A) * noise decoder_list = [None for _ in range(num_samples)] for i in range(num_samples): indices = random.sample(range(N_A), S) activity = [A[j] for j in indices] n = [[random.gauss(0, noise_sd) for _ in range(samples)] for j in range(S)] activity = MU.sum(activity, n) activityT = MU.transpose(activity) gamma = MU.prod(activity, activityT) upsilon = MU.prod(activity, X) gamma_inv = pinv(gamma, noise_sd * noise_sd) decoder_list[i] = MU.prod([[x for x in row] for row in gamma_inv], upsilon) for i in range(N_B): ww = MU.prod(random.choice(decoder_list), MU.prod(MU.transpose(transform), encoder[i])) for j, k in enumerate(indices): w_sparse[i, k] = float(ww[j]) / radius return list(w_sparse)
def make(net, name='System', neurons=100, A=[[0]], tau_feedback=0.1): A = numeric.array(A) assert len(A.shape) == 2 assert A.shape[0] == A.shape[1] dimensions = A.shape[0] state = net.make(name, neurons, dimensions) Ap = A * tau_feedback + numeric.identity(dimensions) net.connect(state, state, transform=Ap, pstc=tau_feedback) if net.network.getMetaData("linear") == None: net.network.setMetaData("linear", HashMap()) linears = net.network.getMetaData("linear") linear = HashMap(4) linear.put("name", name) linear.put("neurons", neurons) linear.put("A", MU.clone(A)) linear.put("tau_feedback", tau_feedback) linears.put(name, linear) if net.network.getMetaData("templates") == None: net.network.setMetaData("templates", ArrayList()) templates = net.network.getMetaData("templates") templates.add(name) if net.network.getMetaData("templateProjections") == None: net.network.setMetaData("templateProjections", HashMap()) templateproj = net.network.getMetaData("templateProjections") templateproj.put(name, name)
def calc_weights(self, encoder, decoder): self.N1 = len(decoder[0]) self.D = len(decoder) self.N2 = len(encoder) self.getTermination('input').setDimensions(self.N1) self.getOrigin('output').setDimensions(self.N2) self.tables = [] self.histograms = [] for dim in range(self.D): cdfs = [] self.tables.append(make_output_table([e[dim] for e in encoder])) for i in range(self.N1): d = decoder[dim][i] / spike_strength if d < 0: decoder_sign = -1 d = -d else: decoder_sign = 1 histogram = compute_histogram(d, [e[dim] for e in encoder]) cdf = compute_cdf(histogram) cdfs.append((decoder_sign, cdf)) self.histograms.append(cdfs) return numeric.array(MU.prod(encoder, decoder))
def make(net,name='System',neurons=100,A=[[0]],tau_feedback=0.1): A=numeric.array(A) assert len(A.shape)==2 assert A.shape[0]==A.shape[1] dimensions=A.shape[0] state=net.make(name,neurons,dimensions) Ap=A*tau_feedback+numeric.identity(dimensions) net.connect(state,state,transform=Ap,pstc=tau_feedback) if net.network.getMetaData("linear") == None: net.network.setMetaData("linear", HashMap()) linears = net.network.getMetaData("linear") linear=HashMap(4) linear.put("name", name) linear.put("neurons", neurons) linear.put("A", MU.clone(A)) linear.put("tau_feedback", tau_feedback) linears.put(name, linear) if net.network.getMetaData("templates") == None: net.network.setMetaData("templates", ArrayList()) templates = net.network.getMetaData("templates") templates.add(name) if net.network.getMetaData("templateProjections") == None: net.network.setMetaData("templateProjections", HashMap()) templateproj = net.network.getMetaData("templateProjections") templateproj.put(name, name)
def calc_weights(self,encoder,decoder): self.N1=len(decoder[0]) self.D=len(decoder) self.N2=len(encoder) self.getTermination('input').setDimensions(self.N1) self.getOrigin('output').setDimensions(self.N2) self.tables=[] self.histograms=[] for dim in range(self.D): cdfs=[] self.tables.append(make_output_table([e[dim] for e in encoder])) for i in range(self.N1): d=decoder[dim][i]/spike_strength if d<0: decoder_sign=-1 d=-d else: decoder_sign=1 histogram=compute_histogram(d,[e[dim] for e in encoder]) cdf=compute_cdf(histogram) cdfs.append((decoder_sign,cdf)) self.histograms.append(cdfs) return numeric.array(MU.prod(encoder,decoder))
def __init__(self, actions, mapname, contextD, context_rewards, **kwargs): """Initialize the environment variables. :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param mapname: filename for map file :param contextD: dimension of vector representing context :param context_rewards: mapping from region labels to rewards for being in that region (each entry represents one context) :type context_rewards: dict {"regionlabel":rewardval,...} :param **kwargs: see PlaceCellEnvironment.__init__ """ PlaceCellEnvironment.__init__(self, actions, mapname, name="ContextEnvironment", **kwargs) self.rewards = context_rewards # generate vectors representing each context self.contexts = {} # mapping from region label to context vector for i, r in enumerate(self.rewards): self.contexts[r] = list(MU.I(contextD)[i]) self.context = self.contexts[random.choice(self.contexts.keys())] # randomly pick a new context every context_delay seconds self.context_delay = 60 self.context_update = self.context_delay self.create_origin("placewcontext", lambda: self.place_activations + self.context) self.create_origin("context", lambda: self.context)
def weights(self, obj, termination, include_gain=False): v = [] for n in obj.nodes: w = n.getTermination(termination).weights if include_gain: w = MU.prod(w, n.scale) v.extend(w) return v
def __init__(self, N, d, name="PositiveBias"): """Builds the PositiveBias network. :param N: base number of neurons :param d: dimension of input signal :param name: name for network """ self.name = name net = nef.Network(self, seed=HRLutils.SEED, quick=False) tauPSC = 0.007 biaslevel = 0.03 # the value to be output for negative inputs # threshold the input signal to detect positive values nfac = HRLutils.node_fac() nfac.setIntercept(IndicatorPDF(0, 0.1)) neg_thresh = net.make_array("neg_thresh", N, d, encoders=[[1]], node_factory=nfac) neg_thresh.addDecodedTermination("input", MU.I(d), tauPSC, False) # create a population that tries to output biaslevel across # all dimensions bias_input = net.make_input("bias_input", [biaslevel]) bias_pop = net.make_array( "bias_pop", N, d, node_factory=HRLutils.node_fac(), eval_points=[[x * 0.01] for x in range(0, biaslevel * 200)]) net.connect(bias_input, bias_pop, pstc=tauPSC) # the individual dimensions of bias_pop are then inhibited by the # output of neg_thresh (so any positive values don't get the bias) net.connect(neg_thresh, bias_pop, pstc=tauPSC, func=lambda x: [1.0] if x[0] > 0 else [0.0], transform=[[-10 if i == k else 0 for k in range(d)] for i in range(d) for _ in range(bias_pop.getNeurons() / d)]) # the whole population is inhibited by the learn signal, so that it # outputs 0 if the system isn't supposed to be learning bias_pop.addTermination("learn", [[-10] for _ in range(bias_pop.getNeurons())], tauPSC, False) self.exposeTermination(neg_thresh.getTermination("input"), "input") self.exposeTermination(bias_pop.getTermination("learn"), "learn") self.exposeOrigin(bias_pop.getOrigin("X"), "X")
def addDecodedOrigin(self, name, funcs, origin): net = nef.Network(self) o = self.getNode("storage").addDecodedOrigin(name, funcs, origin) #undo radius scaling funcout = net.make(name, 1, self.dimension, mode="direct") funcout.fixMode() net.connect(o, funcout, pstc=0.001, transform=MU.diag([self.radius for _ in range(self.dimension)])) self.exposeOrigin(funcout.getOrigin("X"), name) return self.getOrigin(name)
def addPlasticTermination(self,name,matrix,tauPSC,decoder,weight_func=None): """Create a new termination. A new termination is created on each of the ensembles, which are then grouped together. If decoders are not known at the time the termination is created, then pass in an array of zeros of the appropriate size (i.e. however many neurons will be in the population projecting to the termination, by number of dimensions).""" terminations = [] d = 0 dd=self._nodes[0].dimension for n in self._nodes: encoder = n.encoders w = MU.prod(encoder,[MU.prod(matrix,MU.transpose(decoder))[d+i] for i in range(dd)]) if weight_func is not None: w = weight_func(w) t = n.addPESTermination(name,w,tauPSC,False) terminations.append(t) d += dd termination = EnsembleTermination(self,name,terminations) self.exposeTermination(termination,name) return self.getTermination(name)
def test_actionvalues(): net = nef.Network("testActionValues") stateN = 200 N = 100 stateD = 2 stateradius = 1.0 statelength = math.sqrt(2 * stateradius**2) init_Qs = 0.5 learningrate = 0.0 Qradius = 1 tauPSC = 0.007 actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] # state state_pop = net.make( "state_pop", stateN, stateD, radius=statelength, node_factory=HRLutils.node_fac(), eval_points=[[x / statelength, y / statelength] for x in range(-int(stateradius), int(stateradius)) for y in range(-int(stateradius), int(stateradius))]) state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) state_pop.addDecodedTermination("state_input", MU.I(stateD), tauPSC, False) # set up action nodes decoders = state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders() actionvals = actionvalues.ActionValues("testActionValues", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders) net.add(actionvals) net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state")) # input inp = net.make_input("input", [0, 0]) net.connect(inp, state_pop.getTermination("state_input")) net.add_to_nengo() net.view()
def gen_encoders(self, N, contextD, context_scale): """Generates encoders for state population in RL agent. State aspect of encoders comes from PlaceCellEnvironment. Context component is a unit vector with contextD dimensions and length context_scale. """ s_encoders = PlaceCellEnvironment.gen_encoders(self, N) c_encoders = [random.choice(MU.I(contextD)) for _ in range(N)] c_encoders = [[x * context_scale for x in enc] for enc in c_encoders] encoders = [s + list(c) for s, c in zip(s_encoders, c_encoders)] encoders = [[x / math.sqrt(sum([y**2 for y in e])) for x in e] for e in encoders] return encoders
def __init__(self, spinn, origin, termination, transform = None): scale = [nn.scale for nn in termination.node.nodes] if transform is None: transform = termination.transform if origin.node.neurons>spinn.max_fan_in: w = optsparse.compute_sparse_weights(origin, termination.node, transform, spinn.max_fan_in) else: w = MU.prod(termination.node.encoders,MU.prod(transform,MU.transpose(origin.decoders))) w = MU.prod(w,1.0/termination.node.radii[0]) for i in range(len(w)): for j in range(len(w[i])): w[i][j] *= scale[i] / termination.tau w = MU.transpose(w) self.weights = w self.tau = int(round(termination.tau*1000)) if self.tau not in spinn.populations[termination.node].taus: spinn.populations[termination.node].taus.append(self.tau) self.pre = spinn.populations[origin.node].name self.post = spinn.populations[termination.node].name
def qnetwork(stateN, stateD, state_encoders, actions, learningrate, stateradius=1.0, Qradius=1.0, load_weights=None): net = nef.Network("QNetwork") with declarative_syntax(net): N = 50 statelength = math.sqrt(2*stateradius**2) tauPSC = 0.007 num_actions = len(actions) init_Qs = 0.0 weight_save = 600.0 #period to save weights (realtime, not simulation time) #set up relays direct_mode('state_relay', 1, dimension=stateD) add_decoded_termination('state_relay', 'input', MU.I(stateD), .001, False) #create state population ensemble('state_pop', neurons=LIF(stateN), dimensions=stateD, radius=statelength, encoders=state_encoders, ) connect('state_relay', 'state_pop', filter=tauPSC) memory('saved_state', neurons=LIF(N * 4), dimension=stateD, inputscale=50, radius=stateradius, direct_storage=True) # N.B. the "." syntax refers to an ensemble created by the `memory` macro connect('state_relay', 'saved_state.target') ensemble('old_state_pop', neurons=LIF(stateN), dimensions=stateD, radius=statelength, encoders=state_encoders) connect('saved_state', 'old_state_pop', filter=tauPSC) # mess with the intercepts ? for name in 'state_pop', 'old_state_pop': set_intercepts(name, IndicatorPDF(0, 1)) fixMode('state_relay') fixMode('state_pop', ['default', 'rate']) fixMode('old_state_pop', ['default', 'rate'])
def termination_Cycle(self, x): x = x[0] if( self.cyc_opt ): x = 1 - x if( x < 0.025 ): if( self.reset_val < 0.5 ): input_total = zeros(1, self.dimension) for term_name in self.input_terms: termination = self.getTermination(term_name) term_matrix = self.input_mats[term_name] term_output = termination.getOutput() if( isinstance(term_matrix, (int,float,long)) ): input_total = [input_total[n] + term_matrix * term_output[n] for n in range(self.dimension)] else: #term_value = numeric.dot(numeric.array(term_output, typecode='f'), self.input_mats[term_name]) term_value = MU.prod(self.input_mats[term_name], term_output) input_total = [input_total[n] + term_value[n] for n in range(self.dimension)] self.stored_val = deepcopy(input_total)
def __init__(self, name, N, d, scale=1.0, weights=None, maxinput=1.0, oneDinput=False): # scale is a scale on the output of the multiplication # output = (input1.*input2)*scale # weights are optional matrices applied to each input # output = (C1*input1 .* C2*input2)*scale # maxinput is the maximum expected value of any dimension of the # inputs. this is used to scale the inputs internally so that the # length of the vectors in the intermediate populations are not # too small (which results in a lot of noise in the calculations) # oneDinput indicates that the second input is one dimensional, and is # just a scale on the first input rather than an element-wise product self.name = name tauPSC = 0.007 # the size of the intermediate populations smallN = int(math.ceil(float(N) / d)) # the maximum value of the vectors represented by the intermediate # populations. the vector is at most [maxinput maxinput], so the length # of that is sqrt(maxinput**2 + maxinput**2) maxlength = math.sqrt(2 * maxinput**2) if weights is not None and len(weights) != 2: print "Warning, other than 2 matrices given to eprod" if weights is None: weights = [MU.I(d), MU.I(d)] inputd = len(weights[0][0]) ef = HRLutils.defaultEnsembleFactory() # create input populations in1 = ef.make("in1", 1, inputd) in1.addDecodedTermination("input", MU.I(inputd), 0.001, False) self.addNode(in1) in1.setMode(SimulationMode.DIRECT) # since this is just a relay in1.fixMode() in2 = ef.make("in2", 1, inputd) if not oneDinput: in2.addDecodedTermination("input", MU.I(inputd), 0.001, False) else: # if it is a 1-D input we just expand it to a full vector of that # value so that we can treat it as an element-wise product in2.addDecodedTermination("input", [[1] for i in range(inputd)], 0.001, False) self.addNode(in2) in2.setMode(SimulationMode.DIRECT) # since this is just a relay in2.fixMode() # ensemble for intermediate populations multef = NEFEnsembleFactoryImpl() multef.nodeFactory.tauRC = 0.05 multef.nodeFactory.tauRef = 0.002 multef.nodeFactory.maxRate = IndicatorPDF(200, 500) multef.nodeFactory.intercept = IndicatorPDF(-1, 1) multef.encoderFactory = vectorgenerators.MultiplicationVectorGenerator( ) multef.beQuiet() result = ef.make("result", 1, d) result.setMode(SimulationMode.DIRECT) # since this is just a relay result.fixMode() self.addNode(result) resultTerm = [[0] for _ in range(d)] zeros = [0 for _ in range(inputd)] for e in range(d): # create a 2D population for each input dimension which will # combine the components from one dimension of each of the input # populations mpop = multef.make('mpop_' + str(e), smallN, 2) # make two connection that will select one component from each of # the input pops # we divide by maxlength to ensure that the maximum length of the # 2D vector is 1 # remember that (for some reason) the convention in Nengo is that # the input matrices are transpose of what they would be # mathematically mpop.addDecodedTermination('a', [[(1.0 / maxlength) * weights[0][e][i] for i in range(inputd)], zeros], tauPSC, False) mpop.addDecodedTermination('b', [ zeros, [(1.0 / maxlength) * weights[1][e][i] for i in range(inputd)] ], tauPSC, False) # multiply the two selected components together mpop.addDecodedOrigin("output", [PostfixFunction('x0*x1', 2)], "AXON") self.addNode(mpop) self.addProjection(in1.getOrigin('X'), mpop.getTermination('a')) self.addProjection(in2.getOrigin('X'), mpop.getTermination('b')) # combine the 1D results back into one vector. # we scaled each input by 1/maxlength, then multiplied them # together for a total scale of 1/maxlength**2, so to undo we # multiply by maxlength**2 resultTerm[e] = [maxlength**2 * scale] result.addDecodedTermination('in_' + str(e), resultTerm, 0.001, False) resultTerm[e] = [0] self.addProjection(mpop.getOrigin('output'), result.getTermination('in_' + str(e))) self.exposeTermination(in1.getTermination("input"), "A") self.exposeTermination(in2.getTermination("input"), "B") self.exposeOrigin(result.getOrigin("X"), "X")
def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None): """Build ActionValues network. :param name: name of Network :param N: base number of neurons :param stateN: number of neurons in state population :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param learningrate: learning rate for PES rule :param Qradius: expected radius of Q values :param init_decoders: if specified, will be used to initialize the connection weights to whatever function is specified by the decoders """ self.name = name net = nef.Network(self, seed=HRLutils.SEED, quick=False) self.N = N self.learningrate = learningrate self.supervision = 1.0 # don't use the unsupervised stuff at all self.tauPSC = 0.007 modterms = [] learnterms = [] # relays output = net.make("output", 1, len(actions), mode="direct") output.fixMode() for i, action in enumerate(actions): # create one population corresponding to each action act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac()) act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) # add error termination modterm = act_pop.addDecodedTermination("error", [[0 if j != i else 1 for j in range(len(actions))]], 0.005, True) # set modulatory transform so that it selects one dimension of the error signal # create learning termination if init_decoders != None: weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders)) else: weights = [[random.uniform(-1e-3, 1e-3) for j in range(stateN)] for i in range(act_pop.getNeurons())] learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None) # initialize the learning rule net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision) # connect each action back to output relay net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))], pstc=0.001) # note, we learn all the Q values with radius 1, then just multiply by the desired Q radius here modterms += [modterm] learnterms += [learningterm] # use EnsembleTerminations to group the individual action terminations into one multi-dimensional termination self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state") self.exposeTermination(EnsembleTermination(self, "error", modterms), "error") self.exposeOrigin(output.getOrigin("X"), "X")
def make_convolution(self, name, A, B, C, N_per_D, quick=False, encoders=[[1, 1], [1, -1], [-1, 1], [-1, -1]], radius=3, pstc_out=0.01, pstc_in=0.01, pstc_gate=0.01, invert_first=False, invert_second=False, mode='default', output_scale=1): if isinstance(A, str): A = self.network.getNode(A) if isinstance(B, str): B = self.network.getNode(B) if isinstance(C, str): C = self.network.getNode(C) dimensions = C.dimension if (B is not None and B.dimension != dimensions) or (A is not None and A.dimension != dimensions): raise Exception('Dimensions not the same for convolution (%d,%d->%d)' % (A.dimension, B.dimension, C.dimension)) if mode == 'direct': D = DirectConvolution(name, dimensions, invert_first, invert_second) self.add(D) D.getTermination('A').setTau(pstc_in) D.getTermination('B').setTau(pstc_in) D.getTermination('gate').setTau(pstc_gate) if A is not None: self.connect(A, D.getTermination('A')) if B is not None: self.connect(B, D.getTermination('B')) self.connect(D.getOrigin('C'), C, pstc=pstc_out, weight=output_scale) else: D = make_array(self, name, N_per_D, dimensions, quick=quick, encoders=encoders, radius=radius) A2 = input_transform(dimensions, True, invert_first) B2 = input_transform(dimensions, False, invert_second) D.addDecodedTermination('A', A2, pstc_in, False) D.addDecodedTermination('B', B2, pstc_in, False) if A is not None: self.connect(A, D.getTermination('A')) if B is not None: self.connect(B, D.getTermination('B')) ifftm2 = output_transform(dimensions) self.connect(D, C, func=product, transform=ifftm2 * output_scale, pstc=pstc_out) if self.network.getMetaData("convolution") == None: self.network.setMetaData("convolution", HashMap()) bindings = self.network.getMetaData("convolution") binding = HashMap(15) binding.put("name", name) if A is not None: binding.put("A", A.getName()) else: binding.put("A", None) if B is not None: binding.put("B", B.getName()) else: binding.put("B", None) binding.put("C", C.getName()) binding.put("N_per_D", N_per_D) binding.put("quick", quick) binding.put("encoders", MU.clone(encoders)) binding.put("radius", radius) binding.put("pstc_out", pstc_out) binding.put("pstc_in", pstc_in) binding.put("pstc_gate", pstc_gate) binding.put("invert_first", invert_first) binding.put("invert_second", invert_second) binding.put("mode", mode) binding.put("output_scale", output_scale) bindings.put(name, binding) if self.network.getMetaData("templates") == None: self.network.setMetaData("templates", ArrayList()) templates = self.network.getMetaData("templates") templates.add(name) if self.network.getMetaData("templateProjections") == None: self.network.setMetaData("templateProjections", HashMap()) templateproj = self.network.getMetaData("templateProjections") if A is not None: templateproj.put(name, A.getName()) if B is not None: templateproj.put(name, B.getName()) templateproj.put(name, C.getName()) return D
def run_badreenvironment(nav_args, ctrl_args, bias=0.0, seed=None, flat=False, label="tmp"): """Runs the model on the Badre et al. (2010) task.""" if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_badreenvironment") env = badreenvironment.BadreEnvironment(flat=flat) net.add(env) # ##NAV AGENT stateN = 500 max_state_input = 3 enc = env.gen_encoders(stateN, 0, 0.0) # generate evaluation points orientations = MU.I(env.num_orientations) shapes = MU.I(env.num_shapes) colours = MU.I(env.num_colours) evals = ( list(MU.diag([3 for _ in range(env.stateD)])) + [o + s + c for o in orientations for s in shapes for c in colours]) # create lower level nav_agent = smdpagent.SMDPAgent(stateN, env.stateD, env.actions, name="NavAgent", stateradius=max_state_input, state_encoders=enc, state_evals=evals, discount=0.5, **nav_args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() # actions terminate on fixed schedule (aligned with environment) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.6)): None}, env, name="NavTermNode", state_delay=0.1, reset_delay=0.05, reset_interval=0.1) net.add(nav_term_node) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) # ##CTRL AGENT stateN = 500 enc = RandomHypersphereVG().genVectors(stateN, env.stateD) actions = [("shape", [0, 1]), ("orientation", [1, 0]), ("null", [0, 0])] ctrl_agent = smdpagent.SMDPAgent(stateN, env.stateD, actions, name="CtrlAgent", state_encoders=enc, stateradius=max_state_input, state_evals=evals, discount=0.4, **ctrl_args) net.add(ctrl_agent) print "agent neurons:", ctrl_agent.countNeurons() net.connect(env.getOrigin("state"), ctrl_agent.getTermination("state_input")) ctrl_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.6)): None}, env, name="CtrlTermNode", state_delay=0.1, reset_delay=0.05, reset_interval=0.1) net.add(ctrl_term_node) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset")) net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action")) # ctrl gets a slight bonus if it selects a rule (as opposed to null), to # encourage it to not just pick null all the time reward_relay = net.make("reward_relay", 1, 3, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay, transform=[[1], [0], [0]]) net.connect(ctrl_agent.getOrigin("action_output"), reward_relay, transform=[[0, 0], [1, 0], [0, 1]]) net.connect(reward_relay, ctrl_agent.getTermination("reward"), func=lambda x: ((x[0] + bias * abs(x[0])) if x[1] + x[2] > 0.5 else x[0]), origin_name="ctrl_reward") # ideal reward function (for testing) # def ctrl_reward_func(x): # if abs(x[0]) < 0.5: # return 0.0 # # if flat: # return 1.5 if x[1] + x[2] < 0.5 else -1.5 # else: # if x[1] + x[2] < 0.5: # return -1.5 # if [round(a) for a in env.state[-2:]] == [round(b) # for b in x[1:]]: # return 1.5 # else: # return -1.5 # net.connect(reward_relay, ctrl_agent.getTermination("reward"), # func=ctrl_reward_func) # nav rewarded for picking ctrl target def nav_reward_func(x): if abs(x[0]) < 0.5 or env.action is None: return 0.0 if x[1] + x[2] < 0.5: return x[0] if x[1] > x[2]: return (1.5 if env.action[1] == env.state[:env.num_orientations] else -1.5) else: return (1.5 if env.action[1] == env.state[env.num_orientations:-env.num_colours] else -1.5) net.connect(reward_relay, nav_agent.getTermination("reward"), func=nav_reward_func) # state for navagent controlled by ctrlagent ctrl_state_inhib = net.make_array("ctrl_state_inhib", 50, env.stateD, radius=2, mode=HRLutils.SIMULATION_MODE) ctrl_state_inhib.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) inhib_matrix = [[0, -5]] * 50 * env.num_orientations + \ [[-5, 0]] * 50 * env.num_shapes + \ [[-5, -5]] * 50 * env.num_colours # ctrl output inhibits all the non-selected aspects of the state net.connect(env.getOrigin("state"), ctrl_state_inhib) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_state_inhib, transform=inhib_matrix) # also give a boost to the selected aspects (so that neurons are roughly # equally activated). def boost_func(x): if x[0] > 0.5: return [3 * v for v in x[1:]] else: return x[1:] boost = net.make("boost", 1, 1 + env.stateD, mode="direct") boost.fixMode() net.connect(ctrl_state_inhib, boost, transform=([[0 for _ in range(env.stateD)]] + list(MU.I(env.stateD)))) net.connect(ctrl_agent.getOrigin("action_output"), boost, transform=[[1, 1]] + [[0, 0] for _ in range(env.stateD)]) net.connect(boost, nav_agent.getTermination("state_input"), func=boost_func) # save weights weight_save = 1.0 # period to save weights (realtime, not simulation time) threads = [ HRLutils.WeightSaveThread( nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save), HRLutils.WeightSaveThread( ctrl_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (ctrl_agent.name, seed)), weight_save) ] for t in threads: t.start() # data collection node data = datanode.DataNode( period=1, filename=HRLutils.datafile("dataoutput_%s.txt" % label), header="%s %s %s %s %s" % (nav_args, ctrl_args, bias, seed, flat)) print "saving data to", data.filename print "header", data.header net.add(data) nav_q = nav_agent.getNode("QNetwork") ctrl_q = ctrl_agent.getNode("QNetwork") ctrl_bg = ctrl_agent.getNode("BGNetwork").getNode("weight_actions") data.record_avg(env.getOrigin("reward")) data.record_avg(ctrl_q.getNode("actionvals").getOrigin("X")) data.record_sparsity(ctrl_q.getNode("state_pop").getOrigin("AXON")) data.record_sparsity(nav_q.getNode("state_pop").getOrigin("AXON")) data.record_avg(ctrl_q.getNode("valdiff").getOrigin("X")) data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error")) data.record_avg(ctrl_bg.getNode("0").getOrigin("AXON")) data.record_avg(ctrl_bg.getNode("1").getOrigin("AXON")) data.record(env.getOrigin("score")) # net.add_to_nengo() # net.network.simulator.run(0, 300, 0.001) net.view() for t in threads: t.stop()
def __init__(self, stateN, stateD, state_encoders, actions, learningrate, stateradius=1.0, Qradius=1.0, load_weights=None): NetworkImpl.__init__(self) self.name = "QNetwork" net = nef.Network(self, seed=HRLutils.SEED, quick=False) N = 50 statelength = math.sqrt(2*stateradius**2) tauPSC = 0.007 num_actions = len(actions) init_Qs = 0.0 weight_save = 600.0 #period to save weights (realtime, not simulation time) #set up relays state_relay = net.make("state_relay", 1, stateD, mode="direct") state_relay.fixMode() state_relay.addDecodedTermination("input", MU.I(stateD), 0.001, False) #create state population state_fac = HRLutils.node_fac() state_fac.setIntercept(IndicatorPDF(0,1)) state_pop = net.make("state_pop", stateN, stateD, radius=statelength, node_factory=state_fac, encoders=state_encoders) # eval_points=MU.I(stateD)) # state_pop = net.make_array("state_pop", stateN/stateD, stateD, # node_factory=state_fac) state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(state_relay, state_pop, pstc=tauPSC) #create population tied to previous state (to be used in learning) saved_state = memory.Memory("saved_state", N*4, stateD, inputscale=50, radius=stateradius, direct_storage=True) net.add(saved_state) net.connect(state_relay, saved_state.getTermination("target")) old_state_pop = net.make("old_state_pop", stateN, stateD, radius=statelength, node_factory=state_fac, encoders=state_encoders) # eval_points=MU.I(stateD)) # old_state_pop = net.make_array("old_state_pop", stateN/stateD, stateD, # node_factory=state_fac) old_state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(saved_state, old_state_pop, pstc=tauPSC) #set up action nodes decoders = state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD,init_Qs)], "AXON").getDecoders() actionvals = actionvalues.ActionValues("actionvals", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders) net.add(actionvals) decoders = old_state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD,init_Qs)], "AXON").getDecoders() old_actionvals = actionvalues.ActionValues("old_actionvals", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders) net.add(old_actionvals) net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state")) net.connect(old_state_pop.getOrigin("AXON"), old_actionvals.getTermination("state")) if load_weights != None: self.loadWeights(load_weights) #find error between old_actionvals and actionvals valdiff = net.make_array("valdiff", N, num_actions, node_factory = HRLutils.node_fac()) net.connect(old_actionvals, valdiff, transform=MU.diag([2]*num_actions), pstc=tauPSC) net.connect(actionvals, valdiff, transform=MU.diag([-2]*num_actions), pstc=tauPSC) #doubling values to get a bigger error signal #calculate diff between curr_state and saved_state and use that to gate valdiff statediff = net.make_array("statediff", N, stateD, intercept=(0.2,1)) net.connect(state_relay, statediff, pstc=tauPSC) net.connect(saved_state, statediff, transform=MU.diag([-1]*stateD), pstc=tauPSC) net.connect(statediff, valdiff, func=lambda x: [abs(v) for v in x], transform = [[-10]*stateD for _ in range(valdiff.getNeurons())], pstc=tauPSC) net.connect(valdiff, actionvals.getTermination("error")) #periodically save the weights class WeightSaveThread(threading.Thread): def __init__(self, func, prefix, period): threading.Thread.__init__(self) self.func = func self.prefix = prefix self.period = period def run(self): while True: time.sleep(self.period) self.func(self.prefix) wsn = WeightSaveThread(self.saveWeights, os.path.join("weights","tmp"), weight_save) wsn.start() self.exposeTermination(state_relay.getTermination("input"), "state") self.exposeTermination(old_actionvals.getTermination("error"), "error") self.exposeTermination(saved_state.getTermination("transfer"), "save_state") self.exposeOrigin(actionvals.getOrigin("X"), "vals") self.exposeOrigin(old_actionvals.getOrigin("X"), "old_vals")
def termination_context(self, c, pstc=0.01): self.context = max(self.contexts, key=lambda x: MU.prod(HRLutils.normalize(c), HRLutils.normalize(x[1])))
def run_deliveryenvironment(navargs, ctrlargs, tag=None, seed=None): """Runs the model on the delivery task. :param navargs: kwargs for the nav_agent (see SMDPAgent.__init__) :param ctrlargs: kwargs for the ctrl_agent (see SMDPAgent.__init__) :param tag: string appended to datafiles associated with this run :param seed: random seed used for this run """ if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED if tag is None: tag = str(seed) net = nef.Network("runDeliveryEnvironment", seed=seed) stateN = 1200 # number of neurons to use in state population contextD = 2 # dimension of context vector context_scale = 1.0 # relative scale of context vector vs state vector max_state_input = 2 # maximum length of input vector to state population # labels and vectors corresponding to basic actions available to the system actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] if "load_weights" in navargs and navargs["load_weights"] is not None: navargs["load_weights"] += "_%s" % tag if "load_weights" in ctrlargs and ctrlargs["load_weights"] is not None: ctrlargs["load_weights"] += "_%s" % tag # ##ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment( actions, HRLutils.datafile("contextmap.bmp"), colormap={ -16777216: "wall", -1: "floor", -256: "a", -2088896: "b" }, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # ##NAV AGENT # generate encoders and divide them by max_state_input (so that inputs # will be scaled down to radius 1) enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) # read in eval points from file with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % tag)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **navargs) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() # output of nav_agent is what goes to the environment net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) # termination node for nav_agent (just a timer that goes off regularly) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): None}, env, contextD=2, name="NavTermNode") net.add(nav_term_node) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) # ##CTRL AGENT # actions corresponding to "go to A" or "go to B" actions = [("a", [0, 1]), ("b", [1, 0])] ctrl_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="CtrlAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **ctrlargs) net.add(ctrl_agent) print "agent neurons:", ctrl_agent.countNeurons() # ctrl_agent gets environmental state and reward net.connect(env.getOrigin("placewcontext"), ctrl_agent.getTermination("state_input")) net.connect(env.getOrigin("reward"), ctrl_agent.getTermination("reward")) # termination node for ctrl_agent (terminates whenever the agent is in the # state targeted by the ctrl_agent) # also has a long timer so that ctrl_agent doesn't get permanently stuck # in one action ctrl_term_node = terminationnode.TerminationNode( { "a": [0, 1], "b": [1, 0], terminationnode.Timer((30, 30)): None }, env, contextD=2, name="CtrlTermNode", rewardval=1.5) net.add(ctrl_term_node) # reward for nav_agent is the pseudoreward from ctrl_agent termination net.connect(ctrl_term_node.getOrigin("pseudoreward"), nav_agent.getTermination("reward")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset")) net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action")) # connect ctrl_agent action to termination context # this is used so that ctrl_term_node knows what the current goal is (to # determine termination and pseudoreward) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_term_node.getTermination("context")) # state input for nav_agent is the environmental state + the output of # ctrl_agent ctrl_output_relay = net.make("ctrl_output_relay", 1, len(env.placecells) + contextD, mode="direct") ctrl_output_relay.fixMode() trans = (list(MU.I(len(env.placecells))) + [[0 for _ in range(len(env.placecells))] for _ in range(contextD)]) net.connect(env.getOrigin("place"), ctrl_output_relay, transform=trans) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay, transform=([[0 for _ in range(contextD)] for _ in range(len(env.placecells))] + list(MU.I(contextD)))) net.connect(ctrl_output_relay, nav_agent.getTermination("state_input")) # periodically save the weights # period to save weights (realtime, not simulation time) weight_save = 600.0 threads = [ HRLutils.WeightSaveThread( nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, tag)), weight_save), HRLutils.WeightSaveThread( ctrl_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (ctrl_agent.name, tag)), weight_save) ] for t in threads: t.start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % tag)) net.add(data) data.record(env.getOrigin("reward")) q_net = ctrl_agent.getNode("QNetwork") data.record(q_net.getNode("actionvals").getOrigin("X"), func=max) data.record(q_net.getNode("actionvals").getOrigin("X"), func=min) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error")) # net.add_to_nengo() # net.run(10000) net.view() for t in threads: t.stop()
def run_contextenvironment(args, seed=None): """Runs the model on the context task. :param args: kwargs for the agent :param seed: random seed """ if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("runContextEnvironment") if "load_weights" in args and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 # number of neurons to use in state population contextD = 2 # dimension of context vector context_scale = 1.0 # scale of context representation max_state_input = 2 # max length of input vector for state population # actions (label and vector) available to the system actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] # context labels and rewards for achieving those context goals rewards = {"a": 1.5, "b": 1.5} env = contextenvironment.ContextEnvironment( actions, HRLutils.datafile("contextmap.bmp"), contextD, rewards, colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"}, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # termination node for agent (just goes off on some regular interval) term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): 0.0}, env) net.add(term_node) # generate encoders and divide by max_state_input (so that all inputs # will end up being radius 1) enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) # load eval points from file with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: print "loading contextbmp_evalpoints_%s.txt" % seed evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(agent) print "agent neurons:", agent.countNeurons() # period to save weights (realtime, not simulation time) weight_save = 600.0 t = HRLutils.WeightSaveThread(agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (agent.name, seed)), weight_save) t.start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) q_net = agent.getNode("QNetwork") data.record(env.getOrigin("reward")) data.record(q_net.getNode("actionvals").getOrigin("X"), func=max) data.record(q_net.getNode("actionvals").getOrigin("X"), func=min) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(env.getOrigin("state")) net.connect(env.getOrigin("placewcontext"), agent.getTermination("state_input")) net.connect(env.getOrigin("reward"), agent.getTermination("reward")) net.connect(term_node.getOrigin("reset"), agent.getTermination("reset")) net.connect(term_node.getOrigin("learn"), agent.getTermination("learn")) net.connect(term_node.getOrigin("reset"), agent.getTermination("save_state")) net.connect(term_node.getOrigin("reset"), agent.getTermination("save_action")) net.connect(agent.getOrigin("action_output"), env.getTermination("action")) # net.add_to_nengo() # net.run(2000) net.view() t.stop()
def run_deliveryenvironment(navargs, ctrlargs, tag=None, seed=None): """Runs the model on the delivery task. :param navargs: kwargs for the nav_agent (see SMDPAgent.__init__) :param ctrlargs: kwargs for the ctrl_agent (see SMDPAgent.__init__) :param tag: string appended to datafiles associated with this run :param seed: random seed used for this run """ if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED if tag is None: tag = str(seed) net = nef.Network("runDeliveryEnvironment", seed=seed) stateN = 1200 # number of neurons to use in state population contextD = 2 # dimension of context vector context_scale = 1.0 # relative scale of context vector vs state vector max_state_input = 2 # maximum length of input vector to state population # labels and vectors corresponding to basic actions available to the system actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] if "load_weights" in navargs and navargs["load_weights"] is not None: navargs["load_weights"] += "_%s" % tag if "load_weights" in ctrlargs and ctrlargs["load_weights"] is not None: ctrlargs["load_weights"] += "_%s" % tag # ##ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment( actions, HRLutils.datafile("contextmap.bmp"), colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"}, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # ##NAV AGENT # generate encoders and divide them by max_state_input (so that inputs # will be scaled down to radius 1) enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) # read in eval points from file with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % tag)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **navargs) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() # output of nav_agent is what goes to the environment net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) # termination node for nav_agent (just a timer that goes off regularly) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): None}, env, contextD=2, name="NavTermNode") net.add(nav_term_node) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) # ##CTRL AGENT # actions corresponding to "go to A" or "go to B" actions = [("a", [0, 1]), ("b", [1, 0])] ctrl_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="CtrlAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **ctrlargs) net.add(ctrl_agent) print "agent neurons:", ctrl_agent.countNeurons() # ctrl_agent gets environmental state and reward net.connect(env.getOrigin("placewcontext"), ctrl_agent.getTermination("state_input")) net.connect(env.getOrigin("reward"), ctrl_agent.getTermination("reward")) # termination node for ctrl_agent (terminates whenever the agent is in the # state targeted by the ctrl_agent) # also has a long timer so that ctrl_agent doesn't get permanently stuck # in one action ctrl_term_node = terminationnode.TerminationNode( {"a": [0, 1], "b": [1, 0], terminationnode.Timer((30, 30)): None}, env, contextD=2, name="CtrlTermNode", rewardval=1.5) net.add(ctrl_term_node) # reward for nav_agent is the pseudoreward from ctrl_agent termination net.connect(ctrl_term_node.getOrigin("pseudoreward"), nav_agent.getTermination("reward")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset")) net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action")) # connect ctrl_agent action to termination context # this is used so that ctrl_term_node knows what the current goal is (to # determine termination and pseudoreward) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_term_node.getTermination("context")) # state input for nav_agent is the environmental state + the output of # ctrl_agent ctrl_output_relay = net.make("ctrl_output_relay", 1, len(env.placecells) + contextD, mode="direct") ctrl_output_relay.fixMode() trans = (list(MU.I(len(env.placecells))) + [[0 for _ in range(len(env.placecells))] for _ in range(contextD)]) net.connect(env.getOrigin("place"), ctrl_output_relay, transform=trans) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay, transform=([[0 for _ in range(contextD)] for _ in range(len(env.placecells))] + list(MU.I(contextD)))) net.connect(ctrl_output_relay, nav_agent.getTermination("state_input")) # periodically save the weights # period to save weights (realtime, not simulation time) weight_save = 600.0 threads = [ HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, tag)), weight_save), HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (ctrl_agent.name, tag)), weight_save)] for t in threads: t.start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % tag)) net.add(data) data.record(env.getOrigin("reward")) q_net = ctrl_agent.getNode("QNetwork") data.record(q_net.getNode("actionvals").getOrigin("X"), func=max) data.record(q_net.getNode("actionvals").getOrigin("X"), func=min) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error")) # net.add_to_nengo() # net.run(10000) net.view() for t in threads: t.stop()
def getDecoders(self): neurons=self._parent.nodes[0].neurons decoders=MU.zero(neurons*len(self._origins),self._dimensions) for i,o in enumerate(self._origins): MU.copyInto(o.decoders,decoders,i*neurons,i*o.dimensions,neurons) return decoders
def __init__(self, stateN, stateD, state_encoders, actions, learningrate, stateradius=1.0, Qradius=1.0, load_weights=None, state_evals=None, state_threshold=0.0): """Builds the QNetwork. :param stateN: number of neurons to use to represent state :param stateD: dimension of state vector :param state_encoders: encoders to use for neurons in state population :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param learningrate: learningrate for action value learning rule :param stateradius: expected radius of state values :param Qradius: expected radius of Q values :param load_weights: filename to load Q value weights from :param state_evals: evaluation points to use for state population. This is used when initializing the Q values (may be necessary if the input states don't tend to fall in the hypersphere). :param state_threshold: threshold of state neurons (minimum intercept) """ self.name = "QNetwork" net = nef.Network(self, seed=HRLutils.SEED, quick=False) N = 50 tauPSC = 0.007 num_actions = len(actions) init_Qs = 0.2 #initial value for all Q values self.neuron_learning = False # if True, use neuron--neuron weight learning, # otherwise, use decoder learning # set up relays state_relay = net.make("state_relay", 1, stateD, mode="direct") state_relay.fixMode() # This apparently fixes the simulator mode to the curremt mode, so I'm guessing we just don't want it over-ridden by an over-zealous config file. state_relay.addDecodedTermination("input", MU.I(stateD), 0.001, False) # create state population state_fac = HRLutils.node_fac() state_fac.setIntercept(IndicatorPDF(state_threshold, 1.0)) print("making the state_pop") state_pop = net.make("state_pop", stateN, stateD, radius=stateradius, node_factory=state_fac, encoders=state_encoders, eval_points=state_evals) state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(state_relay, state_pop, pstc=tauPSC) # store the state value (used to drive population encoding previous state) print("create the saved state memory") saved_state = memory.Memory("saved_state", N * 4, stateD, inputscale=50, radius=stateradius, direct_storage=True) net.add(saved_state) net.connect(state_relay, saved_state.getTermination("target")) # create population representing previous state old_state_pop = net.make("old_state_pop", stateN, stateD, radius=stateradius, node_factory=state_fac, encoders=state_encoders, eval_points=state_evals) old_state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(saved_state, old_state_pop, pstc=tauPSC) print("setup the action nodes") # set up action nodes if self.neuron_learning: # use ActionValues network to compute Q values # current Q values decoders = state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders() actionvals = actionvalues.ActionValues("actionvals", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders) net.add(actionvals) net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state")) # Q values of previous state decoders = old_state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders() old_actionvals = actionvalues.ActionValues("old_actionvals", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders) net.add(old_actionvals) net.connect(old_state_pop.getOrigin("AXON"), old_actionvals.getTermination("state")) else: # just use decoder on state population to compute Q values # current Q values origin = state_pop.addDecodedOrigin("vals", [ConstantFunction(num_actions, init_Qs) for _ in range(num_actions)], "AXON") state_dlnode = decoderlearningnode.DecoderLearningNode(state_pop, origin, learningrate, num_actions, name="state_learningnode") net.add(state_dlnode) # just a little relay node, so that things match up for the rest of the script # when you have the neuron -- neuron learning actionvals = net.make("actionvals", 1, num_actions, mode="direct") actionvals.fixMode() net.connect(origin, actionvals, pstc=0.001) # Q values of previous state origin = old_state_pop.addDecodedOrigin("vals", [ConstantFunction(num_actions, init_Qs) for _ in range(num_actions)], "AXON") old_state_dlnode = decoderlearningnode.DecoderLearningNode(old_state_pop, origin, learningrate, num_actions, name="old_state_learningnode") net.add(old_state_dlnode) old_actionvals = net.make("old_actionvals", 1, num_actions, mode="direct") old_actionvals.fixMode() net.connect(origin, old_actionvals, pstc=0.001) if load_weights != None: self.loadParams(load_weights) # find error between old_actionvals and actionvals (this will be used to drive learning # on the new actionvals) valdiff = net.make_array("valdiff", N, num_actions, node_factory=HRLutils.node_fac()) net.connect(old_actionvals, valdiff, transform=MU.diag([2] * num_actions), pstc=tauPSC) net.connect(actionvals, valdiff, transform=MU.diag([-2] * num_actions), pstc=tauPSC) # doubling the values to get a bigger error signal # calculate diff between curr_state and saved_state and use that to gate valdiff (we # only want to train the curr state based on previous state when the two have similar # values) # WTF does that mean and what is with these weird intercept statediff = net.make_array("statediff", N, stateD, intercept=(0.2, 1)) # note: threshold > 0 so that there is a deadzone in the middle (when the states # are similar) where there will be no output inhibition net.connect(state_relay, statediff, pstc=tauPSC) net.connect(saved_state, statediff, transform=MU.diag([-1] * stateD), pstc=tauPSC) net.connect(statediff, valdiff, func=lambda x: [abs(v) for v in x], transform=[[-10] * stateD for _ in range(valdiff.getNeurons())], pstc=tauPSC) # connect up valdiff to the error signal for current Q values, and expose # the error signal for the previous Q values to the external error if self.neuron_learning: net.connect(valdiff, actionvals.getTermination("error")) self.exposeTermination(old_actionvals.getTermination("error"), "error") else: net.connect(valdiff, state_dlnode.getTermination("error")) self.exposeTermination(old_state_dlnode.getTermination("error"), "error") self.exposeTermination(state_relay.getTermination("input"), "state") self.exposeTermination(saved_state.getTermination("transfer"), "save_state") self.exposeOrigin(actionvals.getOrigin("X"), "vals") self.exposeOrigin(old_actionvals.getOrigin("X"), "old_vals")
% Simulations to verify analytical estimates of error dependencies on network time constants from ca.nengo.math import Function from ca.nengo.math.impl import FourierFunction from ca.nengo.math.impl import IndicatorPDF from ca.nengo.math.impl import ConstantFunction from ca.nengo.model import SimulationMode from ca.nengo.plot import Plotter from ca.nengo.util import MU import math networks = [interneuron, dualTC, adapting, depressing, butterworth, interneuronFeedback] tau = [.005, .01, .05, .1, .2, .5] signalBandwidth = 15 frequencies = MU.makeVector(.1, .1, signalBandwidth) componentRMS = math.sqrt(1.0 / len(frequencies)); signal = FourierFunction(frequencies, MU.uniform(1, len(frequencies), componentRMS/.707)[0], MU.random(1, len(frequencies), IndicatorPDF(-.5, .5))[0]) noiseBandwidth = 500 for network in networks: network.setMode(SimulationMode.DIRECT); network.setStepSize(.0005); signalPower = [] noisePower = [] for t in tau: network.setTau(t) network.setInputFunction(signal);
def make_convolution( self, name, A, B, C, N_per_D, quick=False, encoders=[[1, 1], [1, -1], [-1, 1], [-1, -1]], radius=3, pstc_out=0.01, pstc_in=0.01, pstc_gate=0.01, invert_first=False, invert_second=False, mode="default", output_scale=1, ): if isinstance(A, str): A = self.network.getNode(A) if isinstance(B, str): B = self.network.getNode(B) if isinstance(C, str): C = self.network.getNode(C) dimensions = C.dimension if (B is not None and B.dimension != dimensions) or (A is not None and A.dimension != dimensions): raise Exception("Dimensions not the same for convolution (%d,%d->%d)" % (A.dimension, B.dimension, C.dimension)) if mode == "direct": D = DirectConvolution(name, dimensions, invert_first, invert_second) self.add(D) D.getTermination("A").setTau(pstc_in) D.getTermination("B").setTau(pstc_in) D.getTermination("gate").setTau(pstc_gate) if A is not None: self.connect(A, D.getTermination("A")) if B is not None: self.connect(B, D.getTermination("B")) self.connect(D.getOrigin("C"), C, pstc=pstc_out, weight=output_scale) else: D = make_array(self, name, N_per_D, dimensions, quick=quick, encoders=encoders, radius=radius) A2 = input_transform(dimensions, True, invert_first) B2 = input_transform(dimensions, False, invert_second) D.addDecodedTermination("A", A2, pstc_in, False) D.addDecodedTermination("B", B2, pstc_in, False) if A is not None: self.connect(A, D.getTermination("A")) if B is not None: self.connect(B, D.getTermination("B")) ifftm2 = output_transform(dimensions) self.connect(D, C, func=product, transform=ifftm2 * output_scale, pstc=pstc_out) if self.network.getMetaData("convolution") == None: self.network.setMetaData("convolution", HashMap()) bindings = self.network.getMetaData("convolution") binding = HashMap(15) binding.put("name", name) if A is not None: binding.put("A", A.getName()) else: binding.put("A", None) if B is not None: binding.put("B", B.getName()) else: binding.put("B", None) binding.put("C", C.getName()) binding.put("N_per_D", N_per_D) binding.put("quick", quick) binding.put("encoders", MU.clone(encoders)) binding.put("radius", radius) binding.put("pstc_out", pstc_out) binding.put("pstc_in", pstc_in) binding.put("pstc_gate", pstc_gate) binding.put("invert_first", invert_first) binding.put("invert_second", invert_second) binding.put("mode", mode) binding.put("output_scale", output_scale) bindings.put(name, binding) if self.network.getMetaData("templates") == None: self.network.setMetaData("templates", ArrayList()) templates = self.network.getMetaData("templates") templates.add(name) if self.network.getMetaData("templateProjections") == None: self.network.setMetaData("templateProjections", HashMap()) templateproj = self.network.getMetaData("templateProjections") if A is not None: templateproj.put(name, A.getName()) if B is not None: templateproj.put(name, B.getName()) templateproj.put(name, C.getName()) return D
def __init__(self, stateN, stateD, state_encoders, actions, learningrate, stateradius=1.0, Qradius=1.0, load_weights=None, state_evals=None, state_threshold=(0.0, 1.0), statediff_threshold=0.2, init_Qs=None): """Builds the QNetwork. :param stateN: number of neurons to use to represent state :param stateD: dimension of state vector :param state_encoders: encoders to use for neurons in state population :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param learningrate: learningrate for action value learning rule :param stateradius: expected radius of state values :param Qradius: expected radius of Q values :param load_weights: filename to load Q value weights from :param state_evals: evaluation points to use for state population. This is used when initializing the Q values (may be necessary if the input states don't tend to fall in the hypersphere). :param state_threshold: threshold range of state neurons :param statediff_threshold: maximum state difference for dual training :param init_Qs: initial Q values """ self.name = "QNetwork" net = nef.Network(self, seed=HRLutils.SEED, quick=False) N = 50 tauPSC = 0.007 num_actions = len(actions) init_Qs = [0.2] * num_actions if init_Qs is None else init_Qs # if True, use neuron--neuron weight learning, otherwise, use decoder # learning self.neuron_learning = False # set up relays state_relay = net.make("state_relay", 1, stateD, mode="direct") state_relay.fixMode() state_relay.addDecodedTermination("input", MU.I(stateD), 0.001, False) # create state population state_fac = HRLutils.node_fac() if isinstance(state_threshold, (float, int)): state_threshold = (state_threshold, 1.0) state_fac.setIntercept( IndicatorPDF(state_threshold[0], state_threshold[1])) state_pop = net.make("state_pop", stateN, stateD, radius=stateradius, node_factory=state_fac, encoders=state_encoders, eval_points=state_evals) state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(state_relay, state_pop, pstc=tauPSC) # store the state value (used to drive population encoding previous # state) saved_state = memory.Memory("saved_state", N * 4, stateD, inputscale=50, radius=stateradius, direct_storage=True) net.add(saved_state) net.connect(state_relay, saved_state.getTermination("target")) # create population representing previous state old_state_pop = net.make("old_state_pop", stateN, stateD, radius=stateradius, node_factory=state_fac, encoders=state_encoders, eval_points=state_evals) old_state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(saved_state, old_state_pop, pstc=tauPSC) # set up action nodes if self.neuron_learning: # use ActionValues network to compute Q values # current Q values decoders = state_pop.addDecodedOrigin( "init_decoders", [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders() actionvals = actionvalues.ActionValues("actionvals", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders) net.add(actionvals) net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state")) # Q values of previous state decoders = old_state_pop.addDecodedOrigin( "init_decoders", [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders() old_actionvals = actionvalues.ActionValues("old_actionvals", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders) net.add(old_actionvals) net.connect(old_state_pop.getOrigin("AXON"), old_actionvals.getTermination("state")) else: # just use decoder on state population to compute Q values # current Q values origin = state_pop.addDecodedOrigin("vals", [ ConstantFunction(num_actions, init_Qs[i]) for i in range(num_actions) ], "AXON") state_dlnode = decoderlearningnode.DecoderLearningNode( state_pop, origin, learningrate, num_actions, name="state_learningnode") net.add(state_dlnode) # just a little relay node, so that things match up for the rest of # the script when you have the neuron -- neuron learning actionvals = net.make("actionvals", 1, num_actions, mode="direct") actionvals.fixMode() net.connect(origin, actionvals, pstc=0.001) # Q values of previous state origin = old_state_pop.addDecodedOrigin("vals", [ ConstantFunction(num_actions, init_Qs[i]) for i in range(num_actions) ], "AXON") old_state_dlnode = decoderlearningnode.DecoderLearningNode( old_state_pop, origin, learningrate, num_actions, name="old_state_learningnode") net.add(old_state_dlnode) old_actionvals = net.make("old_actionvals", 1, num_actions, mode="direct") old_actionvals.fixMode() net.connect(origin, old_actionvals, pstc=0.001) if load_weights is not None: self.loadParams(load_weights) # find error between old_actionvals and actionvals (this will be used # to drive learning on the new actionvals) valdiff = net.make_array("valdiff", N, num_actions, node_factory=HRLutils.node_fac()) # doubling the values to get a bigger error signal net.connect(old_actionvals, valdiff, transform=MU.diag([2] * num_actions), pstc=tauPSC) net.connect(actionvals, valdiff, transform=MU.diag([-2] * num_actions), pstc=tauPSC) # calculate diff between curr_state and saved_state and use that to # gate valdiff (we only want to train the curr state based on previous # state when the two have similar values) # note: threshold > 0 so that there is a deadzone in the middle (when # the states are similar) where there will be no output inhibition statediff = net.make_array("statediff", N, stateD, intercept=(statediff_threshold, 1)) net.connect(state_relay, statediff, pstc=tauPSC) net.connect(saved_state, statediff, transform=MU.diag([-1] * stateD), pstc=tauPSC) net.connect(statediff, valdiff, func=lambda x: [abs(v) for v in x], transform=[[-10] * stateD for _ in range(valdiff.getNeurons())], pstc=tauPSC) # connect up valdiff to the error signal for current Q values, and # expose the error signal for the previous Q values to the external # error if self.neuron_learning: net.connect(valdiff, actionvals.getTermination("error")) self.exposeTermination(old_actionvals.getTermination("error"), "error") else: net.connect(valdiff, state_dlnode.getTermination("error")) self.exposeTermination(old_state_dlnode.getTermination("error"), "error") self.exposeTermination(state_relay.getTermination("input"), "state") self.exposeTermination(saved_state.getTermination("transfer"), "save_state") self.exposeOrigin(actionvals.getOrigin("X"), "vals") self.exposeOrigin(old_actionvals.getOrigin("X"), "old_vals")
def termination_action(self, a, pstc=0.01): # set the selected action to the one with highest similarity to the # available actions self.action = max(self.actions, key=lambda x: MU.prod(a, x[1]))
def run_flat_delivery(args, seed=None): """Runs the model on the delivery task with only one hierarchical level.""" if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_flat_delivery") if "load_weights" in args and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 contextD = 2 context_scale = 1.0 max_state_input = 2 actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] # ##ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment( actions, HRLutils.datafile("contextmap.bmp"), colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"}, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # ##NAV AGENT enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) net.connect(env.getOrigin("placewcontext"), nav_agent.getTermination("state_input")) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): None}, env, name="NavTermNode", contextD=2) net.add(nav_term_node) net.connect(env.getOrigin("context"), nav_term_node.getTermination("context")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) reward_relay = net.make("reward_relay", 1, 1, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay) net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay) net.connect(reward_relay, nav_agent.getTermination("reward")) # period to save weights (realtime, not simulation time) weight_save = 600.0 HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) q_net = nav_agent.getNode("QNetwork") data.record_avg(env.getOrigin("reward")) data.record_avg(q_net.getNode("actionvals").getOrigin("X")) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error")) # net.add_to_nengo() # net.run(10000) net.view()
def __init__(self, num_actions, Qradius=1.0, rewardradius=1.0, discount=0.3): """Builds the ErrorNetwork. :param num_actions: the number of actions available to the system :param Qradius: expected radius of Q values :param rewardradius: expected radius of reward signal :param discount: discount factor """ self.name = "ErrorNetwork" net = nef.Network(self, seed=HRLutils.SEED, quick=False) N = 50 tauPSC = 0.007 errorcap = 0.1 # soft cap on error magnitude (large errors seem to # cause problems with overly-generalizing the learning) # set up relays vals_relay = net.make("vals_relay", 1, num_actions, mode="direct") vals_relay.fixMode() vals_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False) old_vals_relay = net.make("old_vals_relay", 1, num_actions, mode="direct") old_vals_relay.fixMode() old_vals_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False) curr_bg_relay = net.make("curr_bg_relay", 1, num_actions, mode="direct") curr_bg_relay.fixMode() curr_bg_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False) saved_bg_relay = net.make("saved_bg_relay", 1, num_actions, mode="direct") saved_bg_relay.fixMode() saved_bg_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False) # select out only the currently chosen Q value gatedQ = net.make_array("gatedQ", N * 2, num_actions, node_factory=HRLutils.node_fac(), radius=Qradius) gatedQ.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(vals_relay, gatedQ, pstc=tauPSC) net.connect( curr_bg_relay, gatedQ, transform=[[-3 if i != k else 0 for k in range(num_actions)] for i in range(num_actions) for _ in range(gatedQ.getNeurons() / num_actions)], pstc=tauPSC) currQ = net.make("currQ", 1, 1, mode="direct") currQ.fixMode() net.connect(gatedQ, currQ, transform=[[1 for _ in range(num_actions)]], pstc=0.001) # select out only the previously chosen Q value gatedstoreQ = net.make_array("gatedstoreQ", N * 2, num_actions, node_factory=HRLutils.node_fac(), radius=Qradius) gatedstoreQ.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(old_vals_relay, gatedstoreQ, pstc=tauPSC) net.connect( saved_bg_relay, gatedstoreQ, transform=[[-3 if i != k else 0 for k in range(num_actions)] for i in range(num_actions) for _ in range(gatedstoreQ.getNeurons() / num_actions)], pstc=tauPSC) storeQ = net.make("storeQ", 1, 1, mode="direct") storeQ.fixMode() net.connect(gatedstoreQ, storeQ, transform=[[1 for _ in range(num_actions)]], pstc=0.001) # create error calculation network error = errorcalc2.ErrorCalc2(discount, rewardradius=rewardradius, Qradius=Qradius) net.add(error) net.connect(currQ, error.getTermination("currQ")) net.connect(storeQ, error.getTermination("storeQ")) # gate error by learning signal and saved BG output (we only want error # when the system is supposed to be learning, and we only want error # related to the action that was selected) gatederror = net.make_array("gatederror", N * 2, num_actions, radius=errorcap, node_factory=HRLutils.node_fac()) gatederror.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(error, gatederror, transform=[[1.0 / Qradius] for _ in range(num_actions)], pstc=tauPSC) # scale the error by Qradius, so that we don't get super huge errors # (causes problems with the gating) learninggate = net.make("learninggate", N, 1, node_factory=HRLutils.node_fac()) learninggate.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) learninggate.addTermination("gate", [[-10] for _ in range(N)], tauPSC, False) net.connect(learninggate, gatederror, func=lambda x: [1.0], transform=[[-12] for _ in range(gatederror.getNeurons())], pstc=tauPSC) net.connect( saved_bg_relay, gatederror, transform=[[-12 if i != k else 0 for k in range(num_actions)] for i in range(num_actions) for _ in range(gatederror.getNeurons() / num_actions)], pstc=tauPSC) # add a positive bias to the error anywhere the Q values are negative # (to stop Q values from getting too negative, which causes problems # with the action selection) posbias = positivebias.PositiveBias(N, num_actions) net.add(posbias) net.connect(old_vals_relay, posbias.getTermination("input")) net.connect(learninggate, posbias.getTermination("learn"), func=lambda x: [1.0]) biasederror = net.make("biasederror", 1, num_actions, mode="direct") biasederror.fixMode() net.connect(gatederror, biasederror, pstc=0.001) net.connect(posbias, biasederror, pstc=0.001) self.exposeTermination(curr_bg_relay.getTermination("input"), "curr_bg_input") self.exposeTermination(saved_bg_relay.getTermination("input"), "saved_bg_input") self.exposeTermination(vals_relay.getTermination("input"), "vals") self.exposeTermination(old_vals_relay.getTermination("input"), "old_vals") self.exposeTermination(error.getTermination("reward"), "reward") self.exposeTermination(error.getTermination("reset"), "reset") self.exposeTermination(learninggate.getTermination("gate"), "learn") self.exposeOrigin(biasederror.getOrigin("X"), "error")
def run_badreenvironment(nav_args, ctrl_args, bias=0.0, seed=None, flat=False, label="tmp"): """Runs the model on the Badre et al. (2010) task.""" if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_badreenvironment") env = badreenvironment.BadreEnvironment(flat=flat) net.add(env) # ##NAV AGENT stateN = 500 max_state_input = 3 enc = env.gen_encoders(stateN, 0, 0.0) # generate evaluation points orientations = MU.I(env.num_orientations) shapes = MU.I(env.num_shapes) colours = MU.I(env.num_colours) evals = (list(MU.diag([3 for _ in range(env.stateD)])) + [o + s + c for o in orientations for s in shapes for c in colours]) # create lower level nav_agent = smdpagent.SMDPAgent(stateN, env.stateD, env.actions, name="NavAgent", stateradius=max_state_input, state_encoders=enc, state_evals=evals, discount=0.5, **nav_args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() # actions terminate on fixed schedule (aligned with environment) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.6)): None}, env, name="NavTermNode", state_delay=0.1, reset_delay=0.05, reset_interval=0.1) net.add(nav_term_node) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) # ##CTRL AGENT stateN = 500 enc = RandomHypersphereVG().genVectors(stateN, env.stateD) actions = [("shape", [0, 1]), ("orientation", [1, 0]), ("null", [0, 0])] ctrl_agent = smdpagent.SMDPAgent(stateN, env.stateD, actions, name="CtrlAgent", state_encoders=enc, stateradius=max_state_input, state_evals=evals, discount=0.4, **ctrl_args) net.add(ctrl_agent) print "agent neurons:", ctrl_agent.countNeurons() net.connect(env.getOrigin("state"), ctrl_agent.getTermination("state_input")) ctrl_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.6)): None}, env, name="CtrlTermNode", state_delay=0.1, reset_delay=0.05, reset_interval=0.1) net.add(ctrl_term_node) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset")) net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action")) # ctrl gets a slight bonus if it selects a rule (as opposed to null), to # encourage it to not just pick null all the time reward_relay = net.make("reward_relay", 1, 3, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay, transform=[[1], [0], [0]]) net.connect(ctrl_agent.getOrigin("action_output"), reward_relay, transform=[[0, 0], [1, 0], [0, 1]]) net.connect(reward_relay, ctrl_agent.getTermination("reward"), func=lambda x: ((x[0] + bias * abs(x[0])) if x[1] + x[2] > 0.5 else x[0]), origin_name="ctrl_reward") # ideal reward function (for testing) # def ctrl_reward_func(x): # if abs(x[0]) < 0.5: # return 0.0 # # if flat: # return 1.5 if x[1] + x[2] < 0.5 else -1.5 # else: # if x[1] + x[2] < 0.5: # return -1.5 # if [round(a) for a in env.state[-2:]] == [round(b) # for b in x[1:]]: # return 1.5 # else: # return -1.5 # net.connect(reward_relay, ctrl_agent.getTermination("reward"), # func=ctrl_reward_func) # nav rewarded for picking ctrl target def nav_reward_func(x): if abs(x[0]) < 0.5 or env.action is None: return 0.0 if x[1] + x[2] < 0.5: return x[0] if x[1] > x[2]: return (1.5 if env.action[1] == env.state[:env.num_orientations] else -1.5) else: return (1.5 if env.action[1] == env.state[env.num_orientations: - env.num_colours] else -1.5) net.connect(reward_relay, nav_agent.getTermination("reward"), func=nav_reward_func) # state for navagent controlled by ctrlagent ctrl_state_inhib = net.make_array("ctrl_state_inhib", 50, env.stateD, radius=2, mode=HRLutils.SIMULATION_MODE) ctrl_state_inhib.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) inhib_matrix = [[0, -5]] * 50 * env.num_orientations + \ [[-5, 0]] * 50 * env.num_shapes + \ [[-5, -5]] * 50 * env.num_colours # ctrl output inhibits all the non-selected aspects of the state net.connect(env.getOrigin("state"), ctrl_state_inhib) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_state_inhib, transform=inhib_matrix) # also give a boost to the selected aspects (so that neurons are roughly # equally activated). def boost_func(x): if x[0] > 0.5: return [3 * v for v in x[1:]] else: return x[1:] boost = net.make("boost", 1, 1 + env.stateD, mode="direct") boost.fixMode() net.connect(ctrl_state_inhib, boost, transform=([[0 for _ in range(env.stateD)]] + list(MU.I(env.stateD)))) net.connect(ctrl_agent.getOrigin("action_output"), boost, transform=[[1, 1]] + [[0, 0] for _ in range(env.stateD)]) net.connect(boost, nav_agent.getTermination("state_input"), func=boost_func) # save weights weight_save = 1.0 # period to save weights (realtime, not simulation time) threads = [ HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save), HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (ctrl_agent.name, seed)), weight_save)] for t in threads: t.start() # data collection node data = datanode.DataNode(period=1, filename=HRLutils.datafile("dataoutput_%s.txt" % label), header="%s %s %s %s %s" % (nav_args, ctrl_args, bias, seed, flat)) print "saving data to", data.filename print "header", data.header net.add(data) nav_q = nav_agent.getNode("QNetwork") ctrl_q = ctrl_agent.getNode("QNetwork") ctrl_bg = ctrl_agent.getNode("BGNetwork").getNode("weight_actions") data.record_avg(env.getOrigin("reward")) data.record_avg(ctrl_q.getNode("actionvals").getOrigin("X")) data.record_sparsity(ctrl_q.getNode("state_pop").getOrigin("AXON")) data.record_sparsity(nav_q.getNode("state_pop").getOrigin("AXON")) data.record_avg(ctrl_q.getNode("valdiff").getOrigin("X")) data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error")) data.record_avg(ctrl_bg.getNode("0").getOrigin("AXON")) data.record_avg(ctrl_bg.getNode("1").getOrigin("AXON")) data.record(env.getOrigin("score")) # net.add_to_nengo() # net.network.simulator.run(0, 300, 0.001) net.view() for t in threads: t.stop()
def run_contextenvironment(args, seed=None): """Runs the model on the context task. :param args: kwargs for the agent :param seed: random seed """ if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("runContextEnvironment") if "load_weights" in args and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 # number of neurons to use in state population contextD = 2 # dimension of context vector context_scale = 1.0 # scale of context representation max_state_input = 2 # max length of input vector for state population # actions (label and vector) available to the system actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] # context labels and rewards for achieving those context goals rewards = {"a": 1.5, "b": 1.5} env = contextenvironment.ContextEnvironment( actions, HRLutils.datafile("contextmap.bmp"), contextD, rewards, colormap={ -16777216: "wall", -1: "floor", -256: "a", -2088896: "b" }, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # termination node for agent (just goes off on some regular interval) term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): 0.0}, env) net.add(term_node) # generate encoders and divide by max_state_input (so that all inputs # will end up being radius 1) enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) # load eval points from file with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: print "loading contextbmp_evalpoints_%s.txt" % seed evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(agent) print "agent neurons:", agent.countNeurons() # period to save weights (realtime, not simulation time) weight_save = 600.0 t = HRLutils.WeightSaveThread( agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (agent.name, seed)), weight_save) t.start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) q_net = agent.getNode("QNetwork") data.record(env.getOrigin("reward")) data.record(q_net.getNode("actionvals").getOrigin("X"), func=max) data.record(q_net.getNode("actionvals").getOrigin("X"), func=min) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(env.getOrigin("state")) net.connect(env.getOrigin("placewcontext"), agent.getTermination("state_input")) net.connect(env.getOrigin("reward"), agent.getTermination("reward")) net.connect(term_node.getOrigin("reset"), agent.getTermination("reset")) net.connect(term_node.getOrigin("learn"), agent.getTermination("learn")) net.connect(term_node.getOrigin("reset"), agent.getTermination("save_state")) net.connect(term_node.getOrigin("reset"), agent.getTermination("save_action")) net.connect(agent.getOrigin("action_output"), env.getTermination("action")) # net.add_to_nengo() # net.run(2000) net.view() t.stop()
def __init__(self, name, N, d, radius=1.0, inputscale=1.0, recurweight=1.0, direct_storage=False): """Builds the Memory network. :param name: name of network :param N: base number of neurons :param d: dimension of stored value :param radius: radius of stored value :param inputscale: controls how fast the stored value moves to the target :param recurweight: controls the preservation of the stored value :param direct_storage: if True, use directmode for the memory """ self.name = name net = nef.Network(self, seed=HRLutils.SEED, quick=False) self.dimension = d self.radius = radius tauPSC = 0.007 intPSC = 0.1 # population that will store the value if not direct_storage: storage = net.make_array("storage", N, d, node_factory=HRLutils.node_fac(), eval_points=[[x * 0.001] for x in range(-1000, 1000)]) else: storage = net.make("storage", 1, d, mode="direct") storage.fixMode() net.connect(storage, storage, transform=MU.diag([recurweight for _ in range(d)]), pstc=intPSC) # storageinput will represent (target - stored_value), which when used # as input to storage will drive the stored value to target storageinput = net.make_array("storageinput", N, d, node_factory=HRLutils.node_fac()) storageinput.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) storageinput.addDecodedTermination("target", MU.diag([1.0 / radius for _ in range(d)]), tauPSC, False) # note: store everything in -1 -- 1 range by dividing by radius # scale storageinput value by inputscale to control rate at which # it moves to the target net.connect(storageinput, storage, pstc=intPSC, transform=MU.diag([inputscale * intPSC for _ in range(d)])) # subtract currently stored value net.connect(storage, storageinput, pstc=tauPSC, transform=MU.diag([-1 for _ in range(d)])) # we want to open the input gate when the transfer signal arrives (to # transfer storageinput to storage). using a double inhibition setup # (rather than just feeding it e.g. the the inverse of the transfer # signal) so that we get a nice clean zero # this inhibits the storageinput population (to block input to the # storage) transferinhib = net.make("transferinhib", N, 1, node_factory=HRLutils.node_fac()) transferinhib.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) transferinhib.addTermination("gate", [[-10] for _ in range(transferinhib.getNeurons())], tauPSC, False) net.connect(transferinhib, storageinput, pstc=tauPSC, transform=[[-10] for _ in range(storageinput.getNeurons())]) # this drives the transferinhib population (so that by default it will # block any input). inhibiting transferinhib will thus remove the # inhibition on storageinput, and change the stored value biasinput = net.make_input("biasinput", [1]) net.connect(biasinput, transferinhib, pstc=tauPSC) # output population (to undo radius scaling) storageoutput = net.make("storageoutput", 1, d, mode="direct") storageoutput.fixMode() net.connect(storage, storageoutput, pstc=0.001, transform=MU.diag([radius for _ in range(d)])) self.exposeTermination(transferinhib.getTermination("gate"), "transfer") self.exposeTermination(storageinput.getTermination("target"), "target") self.exposeOrigin(storageoutput.getOrigin("X"), "X")
def run_flat_delivery(args, seed=None): """Runs the model on the delivery task with only one hierarchical level.""" if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_flat_delivery") if "load_weights" in args and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 contextD = 2 context_scale = 1.0 max_state_input = 2 actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] # ##ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment( actions, HRLutils.datafile("contextmap.bmp"), colormap={ -16777216: "wall", -1: "floor", -256: "a", -2088896: "b" }, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # ##NAV AGENT enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) net.connect(env.getOrigin("placewcontext"), nav_agent.getTermination("state_input")) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): None}, env, name="NavTermNode", contextD=2) net.add(nav_term_node) net.connect(env.getOrigin("context"), nav_term_node.getTermination("context")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) reward_relay = net.make("reward_relay", 1, 1, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay) net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay) net.connect(reward_relay, nav_agent.getTermination("reward")) # period to save weights (realtime, not simulation time) weight_save = 600.0 HRLutils.WeightSaveThread( nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) q_net = nav_agent.getNode("QNetwork") data.record_avg(env.getOrigin("reward")) data.record_avg(q_net.getNode("actionvals").getOrigin("X")) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error")) # net.add_to_nengo() # net.run(10000) net.view()
def make(net, preName='pre', postName='post', rate=5e-4): # get pre and post ensembles from their names pre = net.network.getNode(preName) post = net.network.getNode(postName) dim_pre = pre.getDimension() dim_post = post.getDimension() t = [[0] * dim_pre for i in range(dim_post)] index_pre = range(dim_pre) index_post = range(dim_post) for i in range(max(len(index_pre),len(index_post))): ipre = index_pre[i % len(index_pre)] ipost = index_post[i % len(index_post)] t[ipost][ipre] = 1 decoder = pre.getOrigin('X').getDecoders() encoder = post.getEncoders() encoder = MU.prod(encoder, 1.0 / post.getRadii()[0]) weight = MU.prod(encoder, MU.prod(t, MU.transpose(decoder))) # random weight matrix to initialize projection from pre to post # def rand_weights(w): # for i in range(len(w)): # for j in range(len(w[0])): # w[i][j] = random.uniform(-1e-3,1e-3) # return w # weight = rand_weights(numeric.zeros((post.neurons, pre.neurons)).tolist()) # non-decoded termination (to learn transformation) count = 0 prename = pre.getName() while '%s_%02d' % (prename, count) in [t.name for t in post.terminations]: count = count + 1 prename = '%s_%02d' % (prename, count) post.addBCMTermination(prename, weight, 0.005, False, None) # Add projections net.connect(pre.getOrigin('AXON'),post.getTermination(prename)) # Set learning rule on the non-decoded termination net.learn(post,prename,None,rate=rate) if net.network.getMetaData("bcmterm") == None: net.network.setMetaData("bcmterm", HashMap()) bcmterms = net.network.getMetaData("bcmterm") bcmterm = HashMap(4) bcmterm.put("preName", preName) bcmterm.put("postName", postName) bcmterm.put("rate", rate) bcmterms.put(prename, bcmterm) if net.network.getMetaData("templates") == None: net.network.setMetaData("templates", ArrayList()) templates = net.network.getMetaData("templates") templates.add(prename) if net.network.getMetaData("templateProjections") == None: net.network.setMetaData("templateProjections", HashMap()) templateproj = net.network.getMetaData("templateProjections") templateproj.put(preName, postName)
def termination_action(self, a, pstc=0.01): # set the selected action to the one with highest similarity to the # current action input self.action = max(self.actions, key=lambda x: MU.prod(a, x[1]))
from ca.nengo.util import MU from java.io import File import math nInput = range(200, 2001, 400) nDiff = 1000; networks = [interneuron, dualTC, adapting, depressing, butterworth, interneuronFeedback] exporter = MatlabExporter() for network in networks: network.setInputFunction(ConstantFunction(1, 0)); network.setStepSize(.0001) network.setMode(SimulationMode.DIRECT); inputVariance = []; outputVariance = []; for n in nInput: network.setNoise(n, nDiff); #network.setDistortion(n, nDiff); network.reset(0) network.run(0, 10); inputVariance.append(MU.variance(MU.prod(network.getInputEnsembleData().getValues(), [1]), 0)) outputVariance.append(MU.variance(MU.prod(network.getOutputData().getValues(), [1]), 0)) network.clearErrors(); Plotter.plot(nInput, outputVariance, "output") exporter.write(File("noise.mat"));
def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None): """Build ActionValues network. :param name: name of Network :param N: base number of neurons :param stateN: number of neurons in state population :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param learningrate: learning rate for PES rule :param Qradius: expected radius of Q values :param init_decoders: if specified, will be used to initialize the connection weights to whatever function is specified by decoders """ self.name = name net = nef.Network(self, seed=HRLutils.SEED, quick=False) self.N = N self.learningrate = learningrate self.supervision = 1.0 # don't use the unsupervised stuff at all self.tauPSC = 0.007 modterms = [] learnterms = [] # relays output = net.make("output", 1, len(actions), mode="direct") output.fixMode() for i, action in enumerate(actions): # create one population corresponding to each action act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac()) act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) # add error termination modterm = act_pop.addDecodedTermination( "error", [[0 if j != i else 1 for j in range(len(actions))]], 0.005, True) # set modulatory transform so that it selects one dimension of # the error signal # create learning termination if init_decoders is not None: weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders)) else: weights = [[ random.uniform(-1e-3, 1e-3) for j in range(stateN) ] for i in range(act_pop.getNeurons())] learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None) # initialize the learning rule net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision) # connect each action back to output relay net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))], pstc=0.001) # note, we learn all the Q values with radius 1, then just # multiply by the desired Q radius here modterms += [modterm] learnterms += [learningterm] # use EnsembleTerminations to group the individual action terminations # into one multi-dimensional termination self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state") self.exposeTermination(EnsembleTermination(self, "error", modterms), "error") self.exposeOrigin(output.getOrigin("X"), "X")
def run_flat_delivery(args, seed=None): """Runs the model on the delivery task with only one hierarchical level.""" if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_flat_delivery") if args.has_key("load_weights") and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 contextD = 2 context_scale = 1.0 max_state_input = 2 actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] ###ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment(actions, HRLutils.datafile("contextmap.bmp"), colormap={-16777216:"wall", - 1:"floor", - 256:"a", - 2088896:"b"}, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" ###NAV AGENT enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() # Connect the agents actions to the environment so the agent can act upon the environment net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) # Connect the environment state to the agent, so the agent knows the effect of it's action net.connect(env.getOrigin("placewcontext"), nav_agent.getTermination("state_input")) # net.connect(env.getOrigin("reward"), nav_agent.getTermination("reward")) # net.connect(env.getOrigin("optimal_move"), nav_agent.getTermination("bg_input")) # termination node for nav_agent (just a timer that goes off regularly) nav_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.9)):None}, env, name="NavTermNode", contextD=2) net.add(nav_term_node) net.connect(env.getOrigin("context"), nav_term_node.getTermination("context")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) # WTF why not connect directly? # Maybe this is the only way to make a direct connection between outputs in this version of Nengo? reward_relay = net.make("reward_relay", 1, 1, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay) net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay) net.connect(reward_relay, nav_agent.getTermination("reward")) #save weights weight_save = 600.0 #period to save weights (realtime, not simulation time) HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start() #data collection node data = datanode.DataNode(period=5, show_plots=None, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) #data.record_avg(env.getOrigin("reward"), filter=1e-5) #data.record_avg(nav_agent.getNode("QNetwork").getNode("actionvals").getOrigin("X"), filter=1e-5) #data.record_sparsity(nav_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=1e-5) #data.record_avg(nav_agent.getNode("QNetwork").getNode("valdiff").getOrigin("X"), filter=1e-5) # ErrorNetwork is apparently not the correct name and hell if I know what the correct one is #data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error"), filter=1e-5) # Try recording everything net.add_to_nengo() net.view()
def run_badreenvironment(nav_args, ctrl_args, seed=None, flat=False): if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_badreenvironment") env = badreenvironment.BadreEnvironment(flat=flat) net.add(env) ###NAV AGENT stateN = 500 max_state_input = 2 enc = env.gen_encoders(stateN, 0, 1.0) enc = MU.prod(enc, 1.0 / max_state_input) # with open(HRLutils.datafile("badre_evalpoints.txt")) as f: # evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] orientations = MU.I(env.num_orientations) shapes = MU.I(env.num_shapes) colours = MU.I(env.num_colours) evals = list(MU.I(env.stateD)) + \ [o+s+c for o in orientations for s in shapes for c in colours] nav_agent = smdpagent.SMDPAgent(stateN, env.stateD, env.actions, name="NavAgent", load_weights=None, state_encoders=enc, state_evals=evals, discount=0.4, **nav_args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() nav_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.6)):None}, env, name="NavTermNode", state_delay=0.1) net.add(nav_term_node) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) ###CTRL AGENT enc = env.gen_encoders(stateN, 0, 0) enc = MU.prod(enc, 1.0 / max_state_input) actions = [("shape", [0, 1]), ("orientation", [1, 0]), ("null", [0, 0])] ctrl_agent = smdpagent.SMDPAgent(stateN, env.stateD, actions, name="CtrlAgent", load_weights=None, state_encoders=enc, state_evals=evals, discount=0.4, **ctrl_args) net.add(ctrl_agent) print "agent neurons:", ctrl_agent.countNeurons() net.connect(env.getOrigin("state"), ctrl_agent.getTermination("state_input")) ctrl_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.6)):None}, env, name="CtrlTermNode", state_delay=0.1) net.add(ctrl_term_node) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset")) net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action")) ## reward for nav/ctrl reward_relay = net.make("reward_relay", 1, 2, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay, transform=[[1], [0]]) net.connect(ctrl_agent.getOrigin("action_output"), reward_relay, transform=[[0, 0], [1, 1]]) # nav reward is just environment net.connect(reward_relay, nav_agent.getTermination("reward"), func=lambda x: x[0], origin_name="nav_reward") # ctrl gets a slight bonus if it selects a rule (as opposed to null), to encourage it not # to just pick null all the time net.connect(reward_relay, ctrl_agent.getTermination("reward"), func=lambda x: x[0]+0.25*abs(x[0]) if x[1] > 0.5 else x[0], origin_name="ctrl_reward") ## state for navagent controlled by ctrlagent # ctrl_output_relay = net.make("ctrl_output_relay", 1, env.stateD+2, mode="direct") # ctrl_output_relay.fixMode() ctrl_output_relay = net.make_array("ctrl_output_relay", 50, env.stateD, radius=2, mode=HRLutils.SIMULATION_MODE) ctrl_output_relay.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) inhib_matrix = [[0,-5]]*50*env.num_orientations + \ [[-5,0]]*50*env.num_shapes + \ [[-5,-5]]*50*env.num_colours # ctrl output inhibits all the non-selected aspects of the state net.connect(env.getOrigin("state"), ctrl_output_relay) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay, # transform=zip([0]*env.num_orientations + [-1]*(env.num_shapes+env.num_colours), # [-1]*env.num_orientations + [0]*env.num_shapes + [-1]*env.num_colours)) transform=inhib_matrix) # also give a boost to the selected aspects (so that neurons are roughly equally activated). # adding 2/3 to each element (base vector has length 3, inhibited vector has length 1, so add 2/3*3 --> 3) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay, transform=zip([0.66]*env.num_orientations + [0]*(env.num_shapes+env.num_colours), [0]*env.num_orientations + [0.66]*env.num_shapes + [2]*env.num_colours)) net.connect(ctrl_output_relay, nav_agent.getTermination("state_input")) # save weights weight_save = 600.0 # period to save weights (realtime, not simulation time) HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start() HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (ctrl_agent.name, seed)), weight_save).start() # data collection node data = datanode.DataNode(period=5, show_plots=None, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) filter = 1e-5 net.add(data) data.record_avg(env.getOrigin("reward"), filter=filter) data.record_avg(ctrl_agent.getNode("QNetwork").getNode("actionvals").getOrigin("X"), filter=filter) data.record_sparsity(ctrl_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=filter) data.record_sparsity(nav_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=filter) data.record_avg(ctrl_agent.getNode("QNetwork").getNode("valdiff").getOrigin("X"), filter=filter) data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"), filter=filter) data.record_avg(ctrl_agent.getNode("BGNetwork").getNode("weight_actions").getNode("0").getOrigin("AXON"), filter=filter) data.record_avg(ctrl_agent.getNode("BGNetwork").getNode("weight_actions").getNode("1").getOrigin("AXON"), filter=filter) net.add_to_nengo() # net.view() net.run(2000)
def __init__(self, actions, Qradius=1, noiselevel=0.03): """Builds the BGNetwork. :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param Qradius: expected radius of Q values :param noiselevel: standard deviation of noise added to Q values for exploration """ self.name = "BGNetwork" net = nef.Network(self, seed=HRLutils.SEED, quick=False) self.N = 50 self.d = len(actions) self.mut_inhib = 1.0 # mutual inhibition between actions self.tauPSC = 0.007 # make basal ganglia netbg = nef.Network("bg") bginput = netbg.make("bginput", 1, self.d, mode="direct") bginput.fixMode() bginput.addDecodedTermination("input", MU.diag([1.0 / Qradius for _ in range(self.d)]), 0.001, False) # divide by Q radius to get values back into 0 -- 1 range bgoutput = netbg.make("bgoutput", 1, self.d, mode="direct") bgoutput.fixMode() basalganglia.make_basal_ganglia(netbg, bginput, bgoutput, dimensions=self.d, neurons=200) bg = netbg.network net.add(bg) bg.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) bg.exposeTermination(bginput.getTermination("input"), "input") bg.exposeOrigin(bgoutput.getOrigin("X"), "X") # insert noise (used to give some randomness to drive exploration) noiselevel = net.make_input("noiselevel", [noiselevel]) noise = noisenode.NoiseNode(1, dimension=len(actions)) net.add(noise) net.connect(noiselevel, noise.getTermination("scale")) net.connect(noise.getOrigin("noise"), "bg.bginput", pstc=0.001) # add bias to shift everything up to 0.5--1.5 biasinput = net.make_input("biasinput", [0.5]) net.connect(biasinput, "bg.bginput", transform=[[1] for _ in range(self.d)], pstc=0.001) # invert BG output (so the "selected" action will have a positive value # and the rest zero) invert = thalamus.make(net, name="invert", neurons=self.N, dimensions=self.d, useQuick=False) invert.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) net.connect(bg, invert.getTermination("bg_input")) # add mutual inhibition net.connect(invert.getOrigin("xBiased"), invert, pstc=self.tauPSC, transform=[[0 if i == j else -self.mut_inhib for j in range(self.d)] for i in range(self.d)]) # threshold output values so that you get a nice clean 0 for # non-selected and 1 for selected threshf = HRLutils.node_fac() threshold = 0.1 threshf.setIntercept(IndicatorPDF(threshold, 1.0)) val_threshold = net.make_array("val_threshold", self.N * 2, self.d, node_factory=threshf, encoders=[[1]]) val_threshold.addDecodedOrigin( "output", [PiecewiseConstantFunction([threshold], [0, 1]) for _ in range(self.d)], "AXON", True) net.connect(invert.getOrigin("xBiased"), val_threshold, pstc=self.tauPSC) # output action (action vectors weighted by BG output) weight_actions = net.make_array("weight_actions", 50, len(actions[0][1]), intercept=(0, 1)) net.connect(val_threshold.getOrigin("output"), weight_actions, transform=MU.transpose([actions[i][1] for i in range(self.d)]), pstc=0.007) # save the BG output (selected action and selected action value) save_relay = net.make("save_relay", 1, 1, mode="direct") save_relay.fixMode() save_relay.addDecodedTermination("input", [[1]], 0.001, False) saved_action = memory.Memory("saved_action", self.N * 2, len(actions[0][1]), inputscale=75) net.add(saved_action) net.connect(weight_actions, saved_action.getTermination("target")) net.connect(save_relay, saved_action.getTermination("transfer")) saved_vals = memory.Memory("saved_values", self.N * 2, self.d, inputscale=75) net.add(saved_vals) net.connect(val_threshold.getOrigin("output"), saved_vals.getTermination("target")) net.connect(save_relay, saved_vals.getTermination("transfer")) # put the saved values through a threshold (we want a nice clean # zero for non-selected values) nfac = HRLutils.node_fac() nfac.setIntercept(IndicatorPDF(0.2, 1)) saved_vals_threshold = net.make_array("saved_vals_threshold", self.N, self.d, node_factory=nfac, encoders=[[1]]) saved_vals_threshold.addDecodedOrigin( "output", [PiecewiseConstantFunction([0.3], [0, 1]) for _ in range(self.d)], "AXON", True) net.connect(saved_vals, saved_vals_threshold, pstc=self.tauPSC) self.exposeTermination(bg.getTermination("input"), "input") self.exposeTermination(save_relay.getTermination("input"), "save_output") self.exposeOrigin(val_threshold.getOrigin("output"), "curr_vals") self.exposeOrigin(weight_actions.getOrigin("X"), "curr_action") self.exposeOrigin(saved_vals_threshold.getOrigin("output"), "saved_vals") self.exposeOrigin(saved_action.getOrigin("X"), "saved_action")
def getValue(self): return MU.clone(self.matrix)
def make(net, preName='pre', postName='post', rate=5e-4): # get pre and post ensembles from their names pre = net.network.getNode(preName) post = net.network.getNode(postName) dim_pre = pre.getDimension() dim_post = post.getDimension() t = [[0] * dim_pre for i in range(dim_post)] index_pre = range(dim_pre) index_post = range(dim_post) for i in range(max(len(index_pre), len(index_post))): ipre = index_pre[i % len(index_pre)] ipost = index_post[i % len(index_post)] t[ipost][ipre] = 1 decoder = pre.getOrigin('X').getDecoders() encoder = post.getEncoders() encoder = MU.prod(encoder, 1.0 / post.getRadii()[0]) weight = MU.prod(encoder, MU.prod(t, MU.transpose(decoder))) # random weight matrix to initialize projection from pre to post # def rand_weights(w): # for i in range(len(w)): # for j in range(len(w[0])): # w[i][j] = random.uniform(-1e-3,1e-3) # return w # weight = rand_weights(numeric.zeros((post.neurons, pre.neurons)).tolist()) # non-decoded termination (to learn transformation) count = 0 prename = pre.getName() while '%s_%02d' % (prename, count) in [t.name for t in post.terminations]: count = count + 1 prename = '%s_%02d' % (prename, count) post.addBCMTermination(prename, weight, 0.005, False, None) # Add projections net.connect(pre.getOrigin('AXON'), post.getTermination(prename)) # Set learning rule on the non-decoded termination net.learn(post, prename, None, rate=rate) if net.network.getMetaData("bcmterm") == None: net.network.setMetaData("bcmterm", HashMap()) bcmterms = net.network.getMetaData("bcmterm") bcmterm = HashMap(4) bcmterm.put("preName", preName) bcmterm.put("postName", postName) bcmterm.put("rate", rate) bcmterms.put(prename, bcmterm) if net.network.getMetaData("templates") == None: net.network.setMetaData("templates", ArrayList()) templates = net.network.getMetaData("templates") templates.add(prename) if net.network.getMetaData("templateProjections") == None: net.network.setMetaData("templateProjections", HashMap()) templateproj = net.network.getMetaData("templateProjections") templateproj.put(preName, postName)
def getEncoders(self): neurons=self.nodes[0].neurons encoders=MU.zero(self.neurons,self.dimension) for i,n in enumerate(self.nodes): MU.copyInto(n.encoders,encoders,i*neurons,i*n.dimension,neurons) return encoders