Example #1
0
 def getEncoders(self):
     neurons = self.nodes[0].neurons
     encoders = MU.zero(self.neurons, self.dimension)
     for i, n in enumerate(self.nodes):
         MU.copyInto(n.encoders, encoders, i * neurons, i * n.dimension,
                     neurons)
     return encoders
Example #2
0
    def gen_encoders(self, N, contextD, context_scale):
        """Generate encoders for state population of learning agent.

        :param N: number of neurons in state population
        :param contextD: dimension of context vector representation
        :param context_scale: weight on context representation relative to
            state (1.0 = equal weighting)
        """

        if contextD > 0:
            contexts = MU.I(contextD)
        else:
            contexts = [[]]

        # neurons each sensitive to different combinations of stimuli
        encs = (list(MU.I(self.stateD)) +
                [o + s + c
                 for o in MU.I(self.num_orientations)
                 for s in MU.I(self.num_shapes)
                 for c in MU.I(self.num_colours)])

        return [HRLutils.normalize(
            HRLutils.normalize(random.choice(encs)) +
            [x * context_scale for x in random.choice(contexts)])
            for _ in range(N)]
Example #3
0
    def compute_weight_matrix(self, proj):
        orig=proj.origin
        term=proj.termination
        post=term.node
        transform=term.transform

        while hasattr(orig,'getWrappedOrigin'): orig=orig.getWrappedOrigin()
        
        decoder=orig.getDecoders()
        encoder=term.node.getEncoders()
            
        # scale by radius
        encoder=MU.prod(encoder,1.0/post.getRadii()[0])
        
        encoder=MU.prod(encoder, self.weight_scale)
        
        # scale by gain
        
        for i, n in enumerate(post.nodes):
            for j in range(len(encoder[i])):
                encoder[i][j]*=n.scale
        
        #encoder=MU.prodElementwise(encoder, [n.scale for n in post.nodes])

        w=MU.prod(encoder,MU.prod(transform,MU.transpose(decoder)))
        
        return w
Example #4
0
    def __init__(self, spinn, origin, termination, transform=None):
        scale = [nn.scale for nn in termination.node.nodes]

        if transform is None:
            transform = termination.transform

        if origin.node.neurons > spinn.max_fan_in:
            w = optsparse.compute_sparse_weights(origin, termination.node,
                                                 transform, spinn.max_fan_in)
        else:
            w = MU.prod(termination.node.encoders,
                        MU.prod(transform, MU.transpose(origin.decoders)))
            w = MU.prod(w, 1.0 / termination.node.radii[0])

        for i in range(len(w)):
            for j in range(len(w[i])):
                w[i][j] *= scale[i] / termination.tau

        w = MU.transpose(w)

        self.weights = w
        self.tau = int(round(termination.tau * 1000))
        if self.tau not in spinn.populations[termination.node].taus:
            spinn.populations[termination.node].taus.append(self.tau)
        self.pre = spinn.populations[origin.node].name
        self.post = spinn.populations[termination.node].name
Example #5
0
    def run(self, start, end):
        nef.SimpleNode.run(self, start, end)
        # Get total values from input terminations
        total_input = util_funcs.zeros(1,self.dimension)
        for term_str in self.input_terms.keys():
            term_obj = self.getTermination(term_str)
            term_out = term_obj._filtered_values
            term_mat = self.input_terms[term_str]
            if( term_mat is None ):
                term_val = term_out
            else:
                term_val = MU.prod(term_mat, term_out)
            total_input = [total_input[n] + term_val[n] for n in range(self.dimension)]

        # Get total inhibitory input
        total_inhib = 0
        for term_str in self.inhib_terms.keys():
            term_obj = self.getTermination(term_str)
            term_out = term_obj._filtered_values
            term_mat = self.inhib_terms[term_str]
            term_val = MU.prod(term_mat, term_out)
            total_inhib = total_inhib + term_val
        
        # Calculate return value
        input_mag  = util_funcs.norm(total_input)
        input_sign = cmp(input_mag, 0)
        inhibd_mag = max(abs(input_mag) + (total_inhib * self.radius), 0) * input_sign
        if( input_mag != 0 ):
            self.return_val = [total_input[n] * inhibd_mag / input_mag for n in range(self.dimension)]
        else:
            self.return_val = util_funcs.zeros(1, self.dimension)
        
        return
Example #6
0
 def getDecoders(self):
     neurons = self._parent.nodes[0].neurons
     decoders = MU.zero(neurons * len(self._origins), self._dimensions)
     for i, o in enumerate(self._origins):
         MU.copyInto(o.decoders, decoders, i * neurons, i * o.dimensions,
                     neurons)
     return decoders
Example #7
0
 def addPlasticTermination(self,
                           name,
                           matrix,
                           tauPSC,
                           decoder,
                           weight_func=None):
     """Create a new termination.  A new termination is created on each
     of the ensembles, which are then grouped together.
     
     If decoders are not known at the time the termination is created,
     then pass in an array of zeros of the appropriate size (i.e. however
     many neurons will be in the population projecting to the termination,
     by number of dimensions)."""
     terminations = []
     d = 0
     dd = self._nodes[0].dimension
     for n in self._nodes:
         encoder = n.encoders
         w = MU.prod(encoder, [
             MU.prod(matrix, MU.transpose(decoder))[d + i]
             for i in range(dd)
         ])
         if weight_func is not None:
             w = weight_func(w)
         t = n.addPESTermination(name, w, tauPSC, False)
         terminations.append(t)
         d += dd
     termination = EnsembleTermination(self, name, terminations)
     self.exposeTermination(termination, name)
     return self.getTermination(name)
Example #8
0
def compute_sparse_weights(origin,
                           post,
                           transform,
                           fan_in,
                           noise=0.1,
                           num_samples=100):
    encoder = post.encoders
    radius = post.radii[0]

    if hasattr(transform, 'tolist'): transform = transform.tolist()

    approx = origin.node.getDecodingApproximator('AXON')

    # create X matrix
    X = approx.evalPoints
    X = MU.transpose([f.multiMap(X) for f in origin.functions])

    # create A matrix
    A = approx.values

    S = fan_in
    N_A = len(A)
    samples = len(A[0])
    N_B = len(encoder)
    w_sparse = np.zeros((N_B, N_A), 'f')
    noise_sd = MU.max(A) * noise
    decoder_list = [None for _ in range(num_samples)]
    for i in range(num_samples):
        indices = random.sample(range(N_A), S)
        activity = [A[j] for j in indices]
        n = [[random.gauss(0, noise_sd) for _ in range(samples)]
             for j in range(S)]
        activity = MU.sum(activity, n)
        activityT = MU.transpose(activity)
        gamma = MU.prod(activity, activityT)

        upsilon = MU.prod(activity, X)

        gamma_inv = pinv(gamma, noise_sd * noise_sd)

        decoder_list[i] = MU.prod([[x for x in row] for row in gamma_inv],
                                  upsilon)

    for i in range(N_B):
        ww = MU.prod(random.choice(decoder_list),
                     MU.prod(MU.transpose(transform), encoder[i]))

        for j, k in enumerate(indices):
            w_sparse[i, k] = float(ww[j]) / radius

    return list(w_sparse)
Example #9
0
def make(net, name='System', neurons=100, A=[[0]], tau_feedback=0.1):
    A = numeric.array(A)
    assert len(A.shape) == 2
    assert A.shape[0] == A.shape[1]

    dimensions = A.shape[0]
    state = net.make(name, neurons, dimensions)
    Ap = A * tau_feedback + numeric.identity(dimensions)

    net.connect(state, state, transform=Ap, pstc=tau_feedback)
    if net.network.getMetaData("linear") == None:
        net.network.setMetaData("linear", HashMap())
    linears = net.network.getMetaData("linear")

    linear = HashMap(4)
    linear.put("name", name)
    linear.put("neurons", neurons)
    linear.put("A", MU.clone(A))
    linear.put("tau_feedback", tau_feedback)

    linears.put(name, linear)

    if net.network.getMetaData("templates") == None:
        net.network.setMetaData("templates", ArrayList())
    templates = net.network.getMetaData("templates")
    templates.add(name)

    if net.network.getMetaData("templateProjections") == None:
        net.network.setMetaData("templateProjections", HashMap())
    templateproj = net.network.getMetaData("templateProjections")
    templateproj.put(name, name)
Example #10
0
    def calc_weights(self, encoder, decoder):
        self.N1 = len(decoder[0])
        self.D = len(decoder)
        self.N2 = len(encoder)
        self.getTermination('input').setDimensions(self.N1)
        self.getOrigin('output').setDimensions(self.N2)

        self.tables = []
        self.histograms = []
        for dim in range(self.D):
            cdfs = []
            self.tables.append(make_output_table([e[dim] for e in encoder]))
            for i in range(self.N1):
                d = decoder[dim][i] / spike_strength
                if d < 0:
                    decoder_sign = -1
                    d = -d
                else:
                    decoder_sign = 1
                histogram = compute_histogram(d, [e[dim] for e in encoder])
                cdf = compute_cdf(histogram)
                cdfs.append((decoder_sign, cdf))
            self.histograms.append(cdfs)

        return numeric.array(MU.prod(encoder, decoder))
Example #11
0
def make(net,name='System',neurons=100,A=[[0]],tau_feedback=0.1):
    A=numeric.array(A)
    assert len(A.shape)==2
    assert A.shape[0]==A.shape[1]
    
    dimensions=A.shape[0]
    state=net.make(name,neurons,dimensions)
    Ap=A*tau_feedback+numeric.identity(dimensions)

    net.connect(state,state,transform=Ap,pstc=tau_feedback)
    if net.network.getMetaData("linear") == None:
        net.network.setMetaData("linear", HashMap())
    linears = net.network.getMetaData("linear")

    linear=HashMap(4)
    linear.put("name", name)
    linear.put("neurons", neurons)
    linear.put("A", MU.clone(A))
    linear.put("tau_feedback", tau_feedback)

    linears.put(name, linear)

    if net.network.getMetaData("templates") == None:
        net.network.setMetaData("templates", ArrayList())
    templates = net.network.getMetaData("templates")
    templates.add(name)

    if net.network.getMetaData("templateProjections") == None:
        net.network.setMetaData("templateProjections", HashMap())
    templateproj = net.network.getMetaData("templateProjections")
    templateproj.put(name, name)
Example #12
0
 def calc_weights(self,encoder,decoder):
     self.N1=len(decoder[0])
     self.D=len(decoder)
     self.N2=len(encoder)
     self.getTermination('input').setDimensions(self.N1)
     self.getOrigin('output').setDimensions(self.N2)
     
     self.tables=[]
     self.histograms=[]
     for dim in range(self.D):
         cdfs=[]
         self.tables.append(make_output_table([e[dim] for e in encoder]))
         for i in range(self.N1):
             d=decoder[dim][i]/spike_strength
             if d<0:
                 decoder_sign=-1
                 d=-d
             else:
                 decoder_sign=1
             histogram=compute_histogram(d,[e[dim] for e in encoder])
             cdf=compute_cdf(histogram)
             cdfs.append((decoder_sign,cdf))
         self.histograms.append(cdfs)
     
     return numeric.array(MU.prod(encoder,decoder))
Example #13
0
    def __init__(self, actions, mapname, contextD, context_rewards, **kwargs):
        """Initialize the environment variables.

        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param mapname: filename for map file
        :param contextD: dimension of vector representing context
        :param context_rewards: mapping from region labels to rewards for being
            in that region (each entry represents one context)
            :type context_rewards: dict {"regionlabel":rewardval,...}
        :param **kwargs: see PlaceCellEnvironment.__init__
        """

        PlaceCellEnvironment.__init__(self,
                                      actions,
                                      mapname,
                                      name="ContextEnvironment",
                                      **kwargs)

        self.rewards = context_rewards

        # generate vectors representing each context
        self.contexts = {}  # mapping from region label to context vector
        for i, r in enumerate(self.rewards):
            self.contexts[r] = list(MU.I(contextD)[i])

        self.context = self.contexts[random.choice(self.contexts.keys())]

        # randomly pick a new context every context_delay seconds
        self.context_delay = 60
        self.context_update = self.context_delay

        self.create_origin("placewcontext",
                           lambda: self.place_activations + self.context)
        self.create_origin("context", lambda: self.context)
Example #14
0
 def weights(self, obj, termination, include_gain=False):
     v = []
     for n in obj.nodes:
         w = n.getTermination(termination).weights
         if include_gain:
             w = MU.prod(w, n.scale)
         v.extend(w)
     return v
Example #15
0
 def weights(self, obj, termination, include_gain=False):
     v = []
     for n in obj.nodes:
         w = n.getTermination(termination).weights
         if include_gain:
             w = MU.prod(w, n.scale)
         v.extend(w)
     return v
Example #16
0
    def __init__(self, N, d, name="PositiveBias"):
        """Builds the PositiveBias network.

        :param N: base number of neurons
        :param d: dimension of input signal
        :param name: name for network
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        tauPSC = 0.007
        biaslevel = 0.03  # the value to be output for negative inputs

        # threshold the input signal to detect positive values
        nfac = HRLutils.node_fac()
        nfac.setIntercept(IndicatorPDF(0, 0.1))
        neg_thresh = net.make_array("neg_thresh",
                                    N,
                                    d,
                                    encoders=[[1]],
                                    node_factory=nfac)
        neg_thresh.addDecodedTermination("input", MU.I(d), tauPSC, False)

        # create a population that tries to output biaslevel across
        # all dimensions
        bias_input = net.make_input("bias_input", [biaslevel])
        bias_pop = net.make_array(
            "bias_pop",
            N,
            d,
            node_factory=HRLutils.node_fac(),
            eval_points=[[x * 0.01] for x in range(0, biaslevel * 200)])

        net.connect(bias_input, bias_pop, pstc=tauPSC)

        # the individual dimensions of bias_pop are then inhibited by the
        # output of neg_thresh (so any positive values don't get the bias)
        net.connect(neg_thresh,
                    bias_pop,
                    pstc=tauPSC,
                    func=lambda x: [1.0] if x[0] > 0 else [0.0],
                    transform=[[-10 if i == k else 0 for k in range(d)]
                               for i in range(d)
                               for _ in range(bias_pop.getNeurons() / d)])

        # the whole population is inhibited by the learn signal, so that it
        # outputs 0 if the system isn't supposed to be learning
        bias_pop.addTermination("learn",
                                [[-10] for _ in range(bias_pop.getNeurons())],
                                tauPSC, False)

        self.exposeTermination(neg_thresh.getTermination("input"), "input")
        self.exposeTermination(bias_pop.getTermination("learn"), "learn")
        self.exposeOrigin(bias_pop.getOrigin("X"), "X")
Example #17
0
    def addDecodedOrigin(self, name, funcs, origin):
        net = nef.Network(self)

        o = self.getNode("storage").addDecodedOrigin(name, funcs, origin)

        #undo radius scaling
        funcout = net.make(name, 1, self.dimension, mode="direct")
        funcout.fixMode()
        net.connect(o, funcout, pstc=0.001, transform=MU.diag([self.radius for _ in range(self.dimension)]))

        self.exposeOrigin(funcout.getOrigin("X"), name)
        return self.getOrigin(name)
Example #18
0
 def addPlasticTermination(self,name,matrix,tauPSC,decoder,weight_func=None):
     """Create a new termination.  A new termination is created on each
     of the ensembles, which are then grouped together.
     
     If decoders are not known at the time the termination is created,
     then pass in an array of zeros of the appropriate size (i.e. however
     many neurons will be in the population projecting to the termination,
     by number of dimensions)."""
     terminations = []
     d = 0
     dd=self._nodes[0].dimension
     for n in self._nodes:
         encoder = n.encoders
         w = MU.prod(encoder,[MU.prod(matrix,MU.transpose(decoder))[d+i] for i in range(dd)])
         if weight_func is not None:
             w = weight_func(w)
         t = n.addPESTermination(name,w,tauPSC,False)
         terminations.append(t)
         d += dd
     termination = EnsembleTermination(self,name,terminations)
     self.exposeTermination(termination,name)
     return self.getTermination(name)
Example #19
0
def test_actionvalues():
    net = nef.Network("testActionValues")

    stateN = 200
    N = 100
    stateD = 2
    stateradius = 1.0
    statelength = math.sqrt(2 * stateradius**2)
    init_Qs = 0.5
    learningrate = 0.0
    Qradius = 1
    tauPSC = 0.007
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]),
               ("left", [-1, 0])]

    # state
    state_pop = net.make(
        "state_pop",
        stateN,
        stateD,
        radius=statelength,
        node_factory=HRLutils.node_fac(),
        eval_points=[[x / statelength, y / statelength]
                     for x in range(-int(stateradius), int(stateradius))
                     for y in range(-int(stateradius), int(stateradius))])
    state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
    state_pop.addDecodedTermination("state_input", MU.I(stateD), tauPSC, False)

    # set up action nodes
    decoders = state_pop.addDecodedOrigin("init_decoders",
                                          [ConstantFunction(stateD, init_Qs)],
                                          "AXON").getDecoders()

    actionvals = actionvalues.ActionValues("testActionValues",
                                           N,
                                           stateN,
                                           actions,
                                           learningrate,
                                           Qradius=Qradius,
                                           init_decoders=decoders)
    net.add(actionvals)

    net.connect(state_pop.getOrigin("AXON"),
                actionvals.getTermination("state"))

    # input
    inp = net.make_input("input", [0, 0])
    net.connect(inp, state_pop.getTermination("state_input"))

    net.add_to_nengo()
    net.view()
Example #20
0
    def gen_encoders(self, N, contextD, context_scale):
        """Generates encoders for state population in RL agent.

        State aspect of encoders comes from PlaceCellEnvironment. Context
        component is a unit vector with contextD dimensions and length
        context_scale.
        """

        s_encoders = PlaceCellEnvironment.gen_encoders(self, N)
        c_encoders = [random.choice(MU.I(contextD)) for _ in range(N)]
        c_encoders = [[x * context_scale for x in enc] for enc in c_encoders]
        encoders = [s + list(c) for s, c in zip(s_encoders, c_encoders)]
        encoders = [[x / math.sqrt(sum([y**2 for y in e])) for x in e]
                    for e in encoders]
        return encoders
Example #21
0
 def __init__(self, spinn, origin, termination, transform = None):
     scale = [nn.scale for nn in termination.node.nodes]
 
     if transform is None: 
         transform = termination.transform
     
     if origin.node.neurons>spinn.max_fan_in:
         w = optsparse.compute_sparse_weights(origin, termination.node, transform, spinn.max_fan_in)
     else:    
         w = MU.prod(termination.node.encoders,MU.prod(transform,MU.transpose(origin.decoders)))
         w = MU.prod(w,1.0/termination.node.radii[0])
     
     for i in range(len(w)):
         for j in range(len(w[i])):
             w[i][j] *= scale[i] / termination.tau
 
     w = MU.transpose(w)
     
     self.weights = w
     self.tau = int(round(termination.tau*1000))
     if self.tau not in spinn.populations[termination.node].taus:
         spinn.populations[termination.node].taus.append(self.tau)
     self.pre = spinn.populations[origin.node].name
     self.post = spinn.populations[termination.node].name
Example #22
0
def qnetwork(stateN, stateD, state_encoders, actions, learningrate,
                 stateradius=1.0, Qradius=1.0, load_weights=None):
    net = nef.Network("QNetwork")
    with declarative_syntax(net):
        N = 50
        statelength = math.sqrt(2*stateradius**2)
        tauPSC = 0.007
        num_actions = len(actions)
        init_Qs = 0.0
        weight_save = 600.0 #period to save weights (realtime, not simulation time)

        #set up relays
        direct_mode('state_relay', 1, dimension=stateD)
        add_decoded_termination('state_relay', 'input', MU.I(stateD), .001, False)

        #create state population
        ensemble('state_pop', neurons=LIF(stateN),
                 dimensions=stateD,
                 radius=statelength,
                 encoders=state_encoders,
                )
        connect('state_relay', 'state_pop', filter=tauPSC)

        memory('saved_state', neurons=LIF(N * 4), dimension=stateD,
               inputscale=50,
               radius=stateradius,
               direct_storage=True)

        # N.B. the "." syntax refers to an ensemble created by the `memory` macro
        connect('state_relay', 'saved_state.target')

        ensemble('old_state_pop', neurons=LIF(stateN),
                 dimensions=stateD,
                 radius=statelength,
                 encoders=state_encoders)

        connect('saved_state', 'old_state_pop', filter=tauPSC)

        # mess with the intercepts ?
        for name in 'state_pop', 'old_state_pop':
            set_intercepts(name, IndicatorPDF(0, 1))

        fixMode('state_relay')
        fixMode('state_pop', ['default', 'rate'])
        fixMode('old_state_pop', ['default', 'rate'])
Example #23
0
 def termination_Cycle(self, x):
     x = x[0]
     if( self.cyc_opt ):
         x = 1 - x
     if( x < 0.025 ):
         if( self.reset_val < 0.5 ):
             input_total = zeros(1, self.dimension)
             for term_name in self.input_terms:
                 termination = self.getTermination(term_name)
                 term_matrix = self.input_mats[term_name]
                 term_output = termination.getOutput()
                 if( isinstance(term_matrix, (int,float,long)) ):
                     input_total = [input_total[n] + term_matrix * term_output[n] for n in range(self.dimension)]
                 else:
                     #term_value = numeric.dot(numeric.array(term_output, typecode='f'), self.input_mats[term_name])
                     term_value  = MU.prod(self.input_mats[term_name], term_output)
                     input_total = [input_total[n] + term_value[n] for n in range(self.dimension)]
             self.stored_val = deepcopy(input_total)
Example #24
0
    def __init__(self,
                 name,
                 N,
                 d,
                 scale=1.0,
                 weights=None,
                 maxinput=1.0,
                 oneDinput=False):
        # scale is a scale on the output of the multiplication
        # output = (input1.*input2)*scale

        # weights are optional matrices applied to each input
        # output = (C1*input1 .* C2*input2)*scale

        # maxinput is the maximum expected value of any dimension of the
        # inputs. this is used to scale the inputs internally so that the
        # length of the vectors in the intermediate populations are not
        # too small (which results in a lot of noise in the calculations)

        # oneDinput indicates that the second input is one dimensional, and is
        # just a scale on the first input rather than an element-wise product

        self.name = name
        tauPSC = 0.007

        # the size of the intermediate populations
        smallN = int(math.ceil(float(N) / d))

        # the maximum value of the vectors represented by the intermediate
        # populations. the vector is at most [maxinput maxinput], so the length
        # of that is sqrt(maxinput**2 + maxinput**2)
        maxlength = math.sqrt(2 * maxinput**2)

        if weights is not None and len(weights) != 2:
            print "Warning, other than 2 matrices given to eprod"

        if weights is None:
            weights = [MU.I(d), MU.I(d)]

        inputd = len(weights[0][0])

        ef = HRLutils.defaultEnsembleFactory()

        # create input populations
        in1 = ef.make("in1", 1, inputd)
        in1.addDecodedTermination("input", MU.I(inputd), 0.001, False)
        self.addNode(in1)
        in1.setMode(SimulationMode.DIRECT)  # since this is just a relay
        in1.fixMode()

        in2 = ef.make("in2", 1, inputd)
        if not oneDinput:
            in2.addDecodedTermination("input", MU.I(inputd), 0.001, False)
        else:
            # if it is a 1-D input we just expand it to a full vector of that
            # value so that we can treat it as an element-wise product
            in2.addDecodedTermination("input", [[1] for i in range(inputd)],
                                      0.001, False)
        self.addNode(in2)
        in2.setMode(SimulationMode.DIRECT)  # since this is just a relay
        in2.fixMode()

        # ensemble for intermediate populations
        multef = NEFEnsembleFactoryImpl()
        multef.nodeFactory.tauRC = 0.05
        multef.nodeFactory.tauRef = 0.002
        multef.nodeFactory.maxRate = IndicatorPDF(200, 500)
        multef.nodeFactory.intercept = IndicatorPDF(-1, 1)
        multef.encoderFactory = vectorgenerators.MultiplicationVectorGenerator(
        )
        multef.beQuiet()

        result = ef.make("result", 1, d)
        result.setMode(SimulationMode.DIRECT)  # since this is just a relay
        result.fixMode()
        self.addNode(result)

        resultTerm = [[0] for _ in range(d)]
        zeros = [0 for _ in range(inputd)]

        for e in range(d):
            # create a 2D population for each input dimension which will
            # combine the components from one dimension of each of the input
            # populations
            mpop = multef.make('mpop_' + str(e), smallN, 2)

            # make two connection that will select one component from each of
            # the input pops
            # we divide by maxlength to ensure that the maximum length of the
            # 2D vector is 1
            # remember that (for some reason) the convention in Nengo is that
            # the input matrices are transpose of what they would be
            # mathematically
            mpop.addDecodedTermination('a',
                                       [[(1.0 / maxlength) * weights[0][e][i]
                                         for i in range(inputd)], zeros],
                                       tauPSC, False)
            mpop.addDecodedTermination('b', [
                zeros,
                [(1.0 / maxlength) * weights[1][e][i] for i in range(inputd)]
            ], tauPSC, False)

            # multiply the two selected components together
            mpop.addDecodedOrigin("output", [PostfixFunction('x0*x1', 2)],
                                  "AXON")

            self.addNode(mpop)
            self.addProjection(in1.getOrigin('X'), mpop.getTermination('a'))
            self.addProjection(in2.getOrigin('X'), mpop.getTermination('b'))

            # combine the 1D results back into one vector.
            # we scaled each input by 1/maxlength, then multiplied them
            # together for a total scale of 1/maxlength**2, so to undo we
            # multiply by maxlength**2
            resultTerm[e] = [maxlength**2 * scale]
            result.addDecodedTermination('in_' + str(e), resultTerm, 0.001,
                                         False)
            resultTerm[e] = [0]

            self.addProjection(mpop.getOrigin('output'),
                               result.getTermination('in_' + str(e)))

        self.exposeTermination(in1.getTermination("input"), "A")
        self.exposeTermination(in2.getTermination("input"), "B")
        self.exposeOrigin(result.getOrigin("X"), "X")
Example #25
0
    def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None):
        """Build ActionValues network.

        :param name: name of Network
        :param N: base number of neurons
        :param stateN: number of neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learning rate for PES rule
        :param Qradius: expected radius of Q values
        :param init_decoders: if specified, will be used to initialize the connection
            weights to whatever function is specified by the decoders
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        self.N = N
        self.learningrate = learningrate
        self.supervision = 1.0 # don't use the unsupervised stuff at all

        self.tauPSC = 0.007

        modterms = []
        learnterms = []

        # relays  
        output = net.make("output", 1, len(actions), mode="direct")
        output.fixMode()

        for i, action in enumerate(actions):
            # create one population corresponding to each action
            act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac())
            act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

            # add error termination
            modterm = act_pop.addDecodedTermination("error", [[0 if j != i else 1 for j in range(len(actions))]],
                                                    0.005, True)
                # set modulatory transform so that it selects one dimension of the error signal

            # create learning termination
            if init_decoders != None:
                weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders))
            else:
                weights = [[random.uniform(-1e-3, 1e-3) for j in range(stateN)] for i in range(act_pop.getNeurons())]
            learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None)

            # initialize the learning rule
            net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision)

            # connect each action back to output relay  
            net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))],
                        pstc=0.001)
                # note, we learn all the Q values with radius 1, then just multiply by the desired Q radius here

            modterms += [modterm]
            learnterms += [learningterm]

        # use EnsembleTerminations to group the individual action terminations into one multi-dimensional termination
        self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state")
        self.exposeTermination(EnsembleTermination(self, "error", modterms), "error")

        self.exposeOrigin(output.getOrigin("X"), "X")
Example #26
0
def make_convolution(self,
                     name,
                     A,
                     B,
                     C,
                     N_per_D,
                     quick=False,
                     encoders=[[1, 1], [1, -1], [-1, 1], [-1, -1]],
                     radius=3,
                     pstc_out=0.01,
                     pstc_in=0.01,
                     pstc_gate=0.01,
                     invert_first=False,
                     invert_second=False,
                     mode='default',
                     output_scale=1):
    if isinstance(A, str):
        A = self.network.getNode(A)
    if isinstance(B, str):
        B = self.network.getNode(B)
    if isinstance(C, str):
        C = self.network.getNode(C)

    dimensions = C.dimension
    if (B is not None
            and B.dimension != dimensions) or (A is not None
                                               and A.dimension != dimensions):
        raise Exception('Dimensions not the same for convolution (%d,%d->%d)' %
                        (A.dimension, B.dimension, C.dimension))

    if mode == 'direct':
        D = DirectConvolution(name, dimensions, invert_first, invert_second)
        self.add(D)
        D.getTermination('A').setTau(pstc_in)
        D.getTermination('B').setTau(pstc_in)
        D.getTermination('gate').setTau(pstc_gate)
        if A is not None:
            self.connect(A, D.getTermination('A'))
        if B is not None:
            self.connect(B, D.getTermination('B'))
        self.connect(D.getOrigin('C'), C, pstc=pstc_out, weight=output_scale)
    else:
        D = make_array(self,
                       name,
                       N_per_D,
                       dimensions,
                       quick=quick,
                       encoders=encoders,
                       radius=radius)

        A2 = input_transform(dimensions, True, invert_first)
        B2 = input_transform(dimensions, False, invert_second)

        D.addDecodedTermination('A', A2, pstc_in, False)
        D.addDecodedTermination('B', B2, pstc_in, False)

        if A is not None:
            self.connect(A, D.getTermination('A'))
        if B is not None:
            self.connect(B, D.getTermination('B'))

        ifftm2 = output_transform(dimensions)

        self.connect(D,
                     C,
                     func=product,
                     transform=ifftm2 * output_scale,
                     pstc=pstc_out)

    if self.network.getMetaData("convolution") == None:
        self.network.setMetaData("convolution", HashMap())
    bindings = self.network.getMetaData("convolution")

    binding = HashMap(15)
    binding.put("name", name)
    if A is not None:
        binding.put("A", A.getName())
    else:
        binding.put("A", None)
    if B is not None:
        binding.put("B", B.getName())
    else:
        binding.put("B", None)
    binding.put("C", C.getName())
    binding.put("N_per_D", N_per_D)
    binding.put("quick", quick)
    binding.put("encoders", MU.clone(encoders))
    binding.put("radius", radius)
    binding.put("pstc_out", pstc_out)
    binding.put("pstc_in", pstc_in)
    binding.put("pstc_gate", pstc_gate)
    binding.put("invert_first", invert_first)
    binding.put("invert_second", invert_second)
    binding.put("mode", mode)
    binding.put("output_scale", output_scale)

    bindings.put(name, binding)

    if self.network.getMetaData("templates") == None:
        self.network.setMetaData("templates", ArrayList())
    templates = self.network.getMetaData("templates")
    templates.add(name)

    if self.network.getMetaData("templateProjections") == None:
        self.network.setMetaData("templateProjections", HashMap())
    templateproj = self.network.getMetaData("templateProjections")
    if A is not None:
        templateproj.put(name, A.getName())
    if B is not None:
        templateproj.put(name, B.getName())
    templateproj.put(name, C.getName())

    return D
Example #27
0
def run_badreenvironment(nav_args,
                         ctrl_args,
                         bias=0.0,
                         seed=None,
                         flat=False,
                         label="tmp"):
    """Runs the model on the Badre et al. (2010) task."""

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("run_badreenvironment")

    env = badreenvironment.BadreEnvironment(flat=flat)
    net.add(env)

    # ##NAV AGENT
    stateN = 500
    max_state_input = 3
    enc = env.gen_encoders(stateN, 0, 0.0)

    # generate evaluation points
    orientations = MU.I(env.num_orientations)
    shapes = MU.I(env.num_shapes)
    colours = MU.I(env.num_colours)
    evals = (
        list(MU.diag([3 for _ in range(env.stateD)])) +
        [o + s + c for o in orientations for s in shapes for c in colours])

    # create lower level
    nav_agent = smdpagent.SMDPAgent(stateN,
                                    env.stateD,
                                    env.actions,
                                    name="NavAgent",
                                    stateradius=max_state_input,
                                    state_encoders=enc,
                                    state_evals=evals,
                                    discount=0.5,
                                    **nav_args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    # actions terminate on fixed schedule (aligned with environment)
    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.6)): None},
        env,
        name="NavTermNode",
        state_delay=0.1,
        reset_delay=0.05,
        reset_interval=0.1)
    net.add(nav_term_node)

    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))

    # ##CTRL AGENT
    stateN = 500
    enc = RandomHypersphereVG().genVectors(stateN, env.stateD)
    actions = [("shape", [0, 1]), ("orientation", [1, 0]), ("null", [0, 0])]
    ctrl_agent = smdpagent.SMDPAgent(stateN,
                                     env.stateD,
                                     actions,
                                     name="CtrlAgent",
                                     state_encoders=enc,
                                     stateradius=max_state_input,
                                     state_evals=evals,
                                     discount=0.4,
                                     **ctrl_args)
    net.add(ctrl_agent)

    print "agent neurons:", ctrl_agent.countNeurons()

    net.connect(env.getOrigin("state"),
                ctrl_agent.getTermination("state_input"))

    ctrl_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.6)): None},
        env,
        name="CtrlTermNode",
        state_delay=0.1,
        reset_delay=0.05,
        reset_interval=0.1)
    net.add(ctrl_term_node)

    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("reset"))
    net.connect(ctrl_term_node.getOrigin("learn"),
                ctrl_agent.getTermination("learn"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_state"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_action"))

    # ctrl gets a slight bonus if it selects a rule (as opposed to null), to
    # encourage it to not just pick null all the time
    reward_relay = net.make("reward_relay", 1, 3, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"),
                reward_relay,
                transform=[[1], [0], [0]])
    net.connect(ctrl_agent.getOrigin("action_output"),
                reward_relay,
                transform=[[0, 0], [1, 0], [0, 1]])

    net.connect(reward_relay,
                ctrl_agent.getTermination("reward"),
                func=lambda x: ((x[0] + bias * abs(x[0]))
                                if x[1] + x[2] > 0.5 else x[0]),
                origin_name="ctrl_reward")

    # ideal reward function (for testing)
    #     def ctrl_reward_func(x):
    #         if abs(x[0]) < 0.5:
    #             return 0.0
    #
    #         if flat:
    #             return 1.5 if x[1] + x[2] < 0.5 else -1.5
    #         else:
    #             if x[1] + x[2] < 0.5:
    #                 return -1.5
    #             if [round(a) for a in env.state[-2:]] == [round(b)
    #                                                       for b in x[1:]]:
    #                 return 1.5
    #             else:
    #                 return -1.5
    #     net.connect(reward_relay, ctrl_agent.getTermination("reward"),
    #                 func=ctrl_reward_func)

    # nav rewarded for picking ctrl target
    def nav_reward_func(x):
        if abs(x[0]) < 0.5 or env.action is None:
            return 0.0

        if x[1] + x[2] < 0.5:
            return x[0]

        if x[1] > x[2]:
            return (1.5 if env.action[1] == env.state[:env.num_orientations]
                    else -1.5)
        else:
            return (1.5 if env.action[1]
                    == env.state[env.num_orientations:-env.num_colours] else
                    -1.5)

    net.connect(reward_relay,
                nav_agent.getTermination("reward"),
                func=nav_reward_func)

    # state for navagent controlled by ctrlagent
    ctrl_state_inhib = net.make_array("ctrl_state_inhib",
                                      50,
                                      env.stateD,
                                      radius=2,
                                      mode=HRLutils.SIMULATION_MODE)
    ctrl_state_inhib.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

    inhib_matrix = [[0, -5]] * 50 * env.num_orientations + \
                   [[-5, 0]] * 50 * env.num_shapes + \
                   [[-5, -5]] * 50 * env.num_colours

    # ctrl output inhibits all the non-selected aspects of the state
    net.connect(env.getOrigin("state"), ctrl_state_inhib)
    net.connect(ctrl_agent.getOrigin("action_output"),
                ctrl_state_inhib,
                transform=inhib_matrix)

    # also give a boost to the selected aspects (so that neurons are roughly
    # equally activated).
    def boost_func(x):
        if x[0] > 0.5:
            return [3 * v for v in x[1:]]
        else:
            return x[1:]

    boost = net.make("boost", 1, 1 + env.stateD, mode="direct")
    boost.fixMode()
    net.connect(ctrl_state_inhib,
                boost,
                transform=([[0 for _ in range(env.stateD)]] +
                           list(MU.I(env.stateD))))
    net.connect(ctrl_agent.getOrigin("action_output"),
                boost,
                transform=[[1, 1]] + [[0, 0] for _ in range(env.stateD)])

    net.connect(boost,
                nav_agent.getTermination("state_input"),
                func=boost_func)

    # save weights
    weight_save = 1.0  # period to save weights (realtime, not simulation time)
    threads = [
        HRLutils.WeightSaveThread(
            nav_agent.getNode("QNetwork").saveParams,
            os.path.join("weights", "%s_%s" % (nav_agent.name, seed)),
            weight_save),
        HRLutils.WeightSaveThread(
            ctrl_agent.getNode("QNetwork").saveParams,
            os.path.join("weights", "%s_%s" % (ctrl_agent.name, seed)),
            weight_save)
    ]
    for t in threads:
        t.start()

    # data collection node
    data = datanode.DataNode(
        period=1,
        filename=HRLutils.datafile("dataoutput_%s.txt" % label),
        header="%s %s %s %s %s" % (nav_args, ctrl_args, bias, seed, flat))
    print "saving data to", data.filename
    print "header", data.header
    net.add(data)
    nav_q = nav_agent.getNode("QNetwork")
    ctrl_q = ctrl_agent.getNode("QNetwork")
    ctrl_bg = ctrl_agent.getNode("BGNetwork").getNode("weight_actions")
    data.record_avg(env.getOrigin("reward"))
    data.record_avg(ctrl_q.getNode("actionvals").getOrigin("X"))
    data.record_sparsity(ctrl_q.getNode("state_pop").getOrigin("AXON"))
    data.record_sparsity(nav_q.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(ctrl_q.getNode("valdiff").getOrigin("X"))
    data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"))
    data.record_avg(ctrl_bg.getNode("0").getOrigin("AXON"))
    data.record_avg(ctrl_bg.getNode("1").getOrigin("AXON"))
    data.record(env.getOrigin("score"))

    #     net.add_to_nengo()
    #     net.network.simulator.run(0, 300, 0.001)
    net.view()

    for t in threads:
        t.stop()
Example #28
0
    def __init__(self, stateN, stateD, state_encoders, actions, learningrate,
                 stateradius=1.0, Qradius=1.0, load_weights=None):
        NetworkImpl.__init__(self)
        self.name = "QNetwork"
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)
        
        N = 50
        statelength = math.sqrt(2*stateradius**2)
        tauPSC = 0.007
        num_actions = len(actions)
        init_Qs = 0.0
        weight_save = 600.0 #period to save weights (realtime, not simulation time)
        
        #set up relays
        state_relay = net.make("state_relay", 1, stateD, mode="direct")
        state_relay.fixMode()
        state_relay.addDecodedTermination("input", MU.I(stateD), 0.001, False)
        
        #create state population
        state_fac = HRLutils.node_fac()
        state_fac.setIntercept(IndicatorPDF(0,1))
            
        state_pop = net.make("state_pop", stateN, stateD, 
                              radius=statelength,
                              node_factory=state_fac,
                              encoders=state_encoders)
#                              eval_points=MU.I(stateD))
#        state_pop = net.make_array("state_pop", stateN/stateD, stateD,
#                                   node_factory=state_fac)
        state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
        
        net.connect(state_relay, state_pop, pstc=tauPSC)
        
        #create population tied to previous state (to be used in learning)
        saved_state = memory.Memory("saved_state", N*4, stateD, inputscale=50, radius=stateradius,
                                    direct_storage=True)
        net.add(saved_state)
        
        net.connect(state_relay, saved_state.getTermination("target"))
        
        old_state_pop = net.make("old_state_pop", stateN, stateD, 
                              radius=statelength,
                              node_factory=state_fac,
                              encoders=state_encoders)
#                              eval_points=MU.I(stateD))
#        old_state_pop = net.make_array("old_state_pop", stateN/stateD, stateD,
#                                   node_factory=state_fac)
        old_state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
        
        net.connect(saved_state, old_state_pop, pstc=tauPSC)
        
        #set up action nodes
        decoders = state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD,init_Qs)], "AXON").getDecoders()
        actionvals = actionvalues.ActionValues("actionvals", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders)
        net.add(actionvals)
        
        decoders = old_state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD,init_Qs)], "AXON").getDecoders()
        old_actionvals = actionvalues.ActionValues("old_actionvals", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders)
        net.add(old_actionvals)
        
        net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state"))
        net.connect(old_state_pop.getOrigin("AXON"), old_actionvals.getTermination("state"))
        
        if load_weights != None:
            self.loadWeights(load_weights)
        
            #find error between old_actionvals and actionvals
        valdiff = net.make_array("valdiff", N, num_actions, node_factory = HRLutils.node_fac())
        net.connect(old_actionvals, valdiff, transform=MU.diag([2]*num_actions), pstc=tauPSC)
        net.connect(actionvals, valdiff, transform=MU.diag([-2]*num_actions), pstc=tauPSC)
            #doubling values to get a bigger error signal
        
            #calculate diff between curr_state and saved_state and use that to gate valdiff
        statediff = net.make_array("statediff", N, stateD, intercept=(0.2,1))
        net.connect(state_relay, statediff, pstc=tauPSC)
        net.connect(saved_state, statediff, transform=MU.diag([-1]*stateD), pstc=tauPSC)
        
        net.connect(statediff, valdiff, func=lambda x: [abs(v) for v in x], 
                    transform = [[-10]*stateD for _ in range(valdiff.getNeurons())], pstc=tauPSC)
        
        net.connect(valdiff, actionvals.getTermination("error"))
        
        #periodically save the weights
        class WeightSaveThread(threading.Thread):
            def __init__(self, func, prefix, period):
                threading.Thread.__init__(self)
                self.func = func
                self.prefix = prefix
                self.period = period
                
            def run(self):
                while True:
                    time.sleep(self.period)
                    self.func(self.prefix)
        wsn = WeightSaveThread(self.saveWeights, os.path.join("weights","tmp"), weight_save)
        wsn.start()
        
        self.exposeTermination(state_relay.getTermination("input"), "state")
        self.exposeTermination(old_actionvals.getTermination("error"), "error")
        self.exposeTermination(saved_state.getTermination("transfer"), "save_state")
        self.exposeOrigin(actionvals.getOrigin("X"), "vals")
        self.exposeOrigin(old_actionvals.getOrigin("X"), "old_vals")
Example #29
0
 def termination_context(self, c, pstc=0.01):
     self.context = max(self.contexts, key=lambda x: MU.prod(HRLutils.normalize(c), HRLutils.normalize(x[1])))
Example #30
0
def run_deliveryenvironment(navargs, ctrlargs, tag=None, seed=None):
    """Runs the model on the delivery task.

    :param navargs: kwargs for the nav_agent (see SMDPAgent.__init__)
    :param ctrlargs: kwargs for the ctrl_agent (see SMDPAgent.__init__)
    :param tag: string appended to datafiles associated with this run
    :param seed: random seed used for this run
    """

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    if tag is None:
        tag = str(seed)

    net = nef.Network("runDeliveryEnvironment", seed=seed)

    stateN = 1200  # number of neurons to use in state population
    contextD = 2  # dimension of context vector
    context_scale = 1.0  # relative scale of context vector vs state vector
    max_state_input = 2  # maximum length of input vector to state population

    # labels and vectors corresponding to basic actions available to the system
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]),
               ("left", [-1, 0])]

    if "load_weights" in navargs and navargs["load_weights"] is not None:
        navargs["load_weights"] += "_%s" % tag
    if "load_weights" in ctrlargs and ctrlargs["load_weights"] is not None:
        ctrlargs["load_weights"] += "_%s" % tag

    # ##ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(
        actions,
        HRLutils.datafile("contextmap.bmp"),
        colormap={
            -16777216: "wall",
            -1: "floor",
            -256: "a",
            -2088896: "b"
        },
        imgsize=(5, 5),
        dx=0.001,
        placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # ##NAV AGENT

    # generate encoders and divide them by max_state_input (so that inputs
    # will be scaled down to radius 1)
    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    # read in eval points from file
    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % tag)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN,
                                    len(env.placecells) + contextD,
                                    actions,
                                    name="NavAgent",
                                    state_encoders=enc,
                                    state_evals=evals,
                                    state_threshold=0.8,
                                    **navargs)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    # output of nav_agent is what goes to the environment
    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))

    # termination node for nav_agent (just a timer that goes off regularly)
    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): None},
        env,
        contextD=2,
        name="NavTermNode")
    net.add(nav_term_node)

    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    # ##CTRL AGENT

    # actions corresponding to "go to A" or "go to B"
    actions = [("a", [0, 1]), ("b", [1, 0])]
    ctrl_agent = smdpagent.SMDPAgent(stateN,
                                     len(env.placecells) + contextD,
                                     actions,
                                     name="CtrlAgent",
                                     state_encoders=enc,
                                     state_evals=evals,
                                     state_threshold=0.8,
                                     **ctrlargs)
    net.add(ctrl_agent)

    print "agent neurons:", ctrl_agent.countNeurons()

    # ctrl_agent gets environmental state and reward
    net.connect(env.getOrigin("placewcontext"),
                ctrl_agent.getTermination("state_input"))
    net.connect(env.getOrigin("reward"), ctrl_agent.getTermination("reward"))

    # termination node for ctrl_agent (terminates whenever the agent is in the
    # state targeted by the ctrl_agent)
    # also has a long timer so that ctrl_agent doesn't get permanently stuck
    # in one action
    ctrl_term_node = terminationnode.TerminationNode(
        {
            "a": [0, 1],
            "b": [1, 0],
            terminationnode.Timer((30, 30)): None
        },
        env,
        contextD=2,
        name="CtrlTermNode",
        rewardval=1.5)
    net.add(ctrl_term_node)

    # reward for nav_agent is the pseudoreward from ctrl_agent termination
    net.connect(ctrl_term_node.getOrigin("pseudoreward"),
                nav_agent.getTermination("reward"))

    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("reset"))
    net.connect(ctrl_term_node.getOrigin("learn"),
                ctrl_agent.getTermination("learn"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_state"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_action"))

    # connect ctrl_agent action to termination context
    # this is used so that ctrl_term_node knows what the current goal is (to
    # determine termination and pseudoreward)
    net.connect(ctrl_agent.getOrigin("action_output"),
                ctrl_term_node.getTermination("context"))

    # state input for nav_agent is the environmental state + the output of
    # ctrl_agent
    ctrl_output_relay = net.make("ctrl_output_relay",
                                 1,
                                 len(env.placecells) + contextD,
                                 mode="direct")
    ctrl_output_relay.fixMode()
    trans = (list(MU.I(len(env.placecells))) +
             [[0 for _ in range(len(env.placecells))]
              for _ in range(contextD)])
    net.connect(env.getOrigin("place"), ctrl_output_relay, transform=trans)
    net.connect(ctrl_agent.getOrigin("action_output"),
                ctrl_output_relay,
                transform=([[0 for _ in range(contextD)]
                            for _ in range(len(env.placecells))] +
                           list(MU.I(contextD))))

    net.connect(ctrl_output_relay, nav_agent.getTermination("state_input"))

    # periodically save the weights

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0

    threads = [
        HRLutils.WeightSaveThread(
            nav_agent.getNode("QNetwork").saveParams,
            os.path.join("weights", "%s_%s" % (nav_agent.name, tag)),
            weight_save),
        HRLutils.WeightSaveThread(
            ctrl_agent.getNode("QNetwork").saveParams,
            os.path.join("weights", "%s_%s" % (ctrl_agent.name, tag)),
            weight_save)
    ]

    for t in threads:
        t.start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        tag))
    net.add(data)
    data.record(env.getOrigin("reward"))
    q_net = ctrl_agent.getNode("QNetwork")
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=max)
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=min)
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"))

    #     net.add_to_nengo()
    #     net.run(10000)
    net.view()

    for t in threads:
        t.stop()
Example #31
0
def run_contextenvironment(args, seed=None):
    """Runs the model on the context task.

    :param args: kwargs for the agent
    :param seed: random seed
    """

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("runContextEnvironment")

    if "load_weights" in args and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200  # number of neurons to use in state population
    contextD = 2  # dimension of context vector
    context_scale = 1.0  # scale of context representation
    max_state_input = 2  # max length of input vector for state population
    # actions (label and vector) available to the system
    actions = [("up", [0, 1]), ("right", [1, 0]),
               ("down", [0, -1]), ("left", [-1, 0])]

    # context labels and rewards for achieving those context goals
    rewards = {"a": 1.5, "b": 1.5}

    env = contextenvironment.ContextEnvironment(
        actions, HRLutils.datafile("contextmap.bmp"), contextD, rewards,
        colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"},
        imgsize=(5, 5), dx=0.001, placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # termination node for agent (just goes off on some regular interval)
    term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): 0.0}, env)
    net.add(term_node)

    # generate encoders and divide by max_state_input (so that all inputs
    # will end up being radius 1)
    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    # load eval points from file
    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        print "loading contextbmp_evalpoints_%s.txt" % seed
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD,
                                actions, state_encoders=enc, state_evals=evals,
                                state_threshold=0.8, **args)
    net.add(agent)

    print "agent neurons:", agent.countNeurons()

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0
    t = HRLutils.WeightSaveThread(agent.getNode("QNetwork").saveParams,
                                  os.path.join("weights", "%s_%s" %
                                               (agent.name, seed)),
                                  weight_save)
    t.start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        seed))
    net.add(data)
    q_net = agent.getNode("QNetwork")
    data.record(env.getOrigin("reward"))
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=max)
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=min)
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(env.getOrigin("state"))

    net.connect(env.getOrigin("placewcontext"),
                agent.getTermination("state_input"))
    net.connect(env.getOrigin("reward"), agent.getTermination("reward"))
    net.connect(term_node.getOrigin("reset"), agent.getTermination("reset"))
    net.connect(term_node.getOrigin("learn"), agent.getTermination("learn"))
    net.connect(term_node.getOrigin("reset"),
                agent.getTermination("save_state"))
    net.connect(term_node.getOrigin("reset"),
                agent.getTermination("save_action"))

    net.connect(agent.getOrigin("action_output"), env.getTermination("action"))

#    net.add_to_nengo()
#    net.run(2000)
    net.view()

    t.stop()
Example #32
0
def run_deliveryenvironment(navargs, ctrlargs, tag=None, seed=None):
    """Runs the model on the delivery task.

    :param navargs: kwargs for the nav_agent (see SMDPAgent.__init__)
    :param ctrlargs: kwargs for the ctrl_agent (see SMDPAgent.__init__)
    :param tag: string appended to datafiles associated with this run
    :param seed: random seed used for this run
    """

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    if tag is None:
        tag = str(seed)

    net = nef.Network("runDeliveryEnvironment", seed=seed)

    stateN = 1200  # number of neurons to use in state population
    contextD = 2  # dimension of context vector
    context_scale = 1.0  # relative scale of context vector vs state vector
    max_state_input = 2  # maximum length of input vector to state population

    # labels and vectors corresponding to basic actions available to the system
    actions = [("up", [0, 1]), ("right", [1, 0]),
               ("down", [0, -1]), ("left", [-1, 0])]

    if "load_weights" in navargs and navargs["load_weights"] is not None:
        navargs["load_weights"] += "_%s" % tag
    if "load_weights" in ctrlargs and ctrlargs["load_weights"] is not None:
        ctrlargs["load_weights"] += "_%s" % tag

    # ##ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(
        actions, HRLutils.datafile("contextmap.bmp"),
        colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"},
        imgsize=(5, 5), dx=0.001, placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # ##NAV AGENT

    # generate encoders and divide them by max_state_input (so that inputs
    # will be scaled down to radius 1)
    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    # read in eval points from file
    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % tag)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD,
                                    actions, name="NavAgent",
                                    state_encoders=enc, state_evals=evals,
                                    state_threshold=0.8,
                                    **navargs)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    # output of nav_agent is what goes to the environment
    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))

    # termination node for nav_agent (just a timer that goes off regularly)
    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): None}, env, contextD=2,
        name="NavTermNode")
    net.add(nav_term_node)

    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    # ##CTRL AGENT

    # actions corresponding to "go to A" or "go to B"
    actions = [("a", [0, 1]), ("b", [1, 0])]
    ctrl_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD,
                                     actions, name="CtrlAgent",
                                     state_encoders=enc, state_evals=evals,
                                     state_threshold=0.8, **ctrlargs)
    net.add(ctrl_agent)

    print "agent neurons:", ctrl_agent.countNeurons()

    # ctrl_agent gets environmental state and reward
    net.connect(env.getOrigin("placewcontext"),
                ctrl_agent.getTermination("state_input"))
    net.connect(env.getOrigin("reward"),
                ctrl_agent.getTermination("reward"))

    # termination node for ctrl_agent (terminates whenever the agent is in the
    # state targeted by the ctrl_agent)
    # also has a long timer so that ctrl_agent doesn't get permanently stuck
    # in one action
    ctrl_term_node = terminationnode.TerminationNode(
        {"a": [0, 1], "b": [1, 0], terminationnode.Timer((30, 30)): None},
        env, contextD=2, name="CtrlTermNode", rewardval=1.5)
    net.add(ctrl_term_node)

    # reward for nav_agent is the pseudoreward from ctrl_agent termination
    net.connect(ctrl_term_node.getOrigin("pseudoreward"),
                nav_agent.getTermination("reward"))

    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("reset"))
    net.connect(ctrl_term_node.getOrigin("learn"),
                ctrl_agent.getTermination("learn"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_state"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_action"))

    # connect ctrl_agent action to termination context
    # this is used so that ctrl_term_node knows what the current goal is (to
    # determine termination and pseudoreward)
    net.connect(ctrl_agent.getOrigin("action_output"),
                ctrl_term_node.getTermination("context"))

    # state input for nav_agent is the environmental state + the output of
    # ctrl_agent
    ctrl_output_relay = net.make("ctrl_output_relay", 1,
                                 len(env.placecells) + contextD, mode="direct")
    ctrl_output_relay.fixMode()
    trans = (list(MU.I(len(env.placecells))) +
             [[0 for _ in range(len(env.placecells))]
              for _ in range(contextD)])
    net.connect(env.getOrigin("place"), ctrl_output_relay, transform=trans)
    net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay,
                transform=([[0 for _ in range(contextD)]
                            for _ in range(len(env.placecells))] +
                           list(MU.I(contextD))))

    net.connect(ctrl_output_relay, nav_agent.getTermination("state_input"))

    # periodically save the weights

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0

    threads = [
        HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams,
                                  os.path.join("weights", "%s_%s" %
                                               (nav_agent.name, tag)),
                                  weight_save),
        HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams,
                                  os.path.join("weights", "%s_%s" %
                                               (ctrl_agent.name, tag)),
                                  weight_save)]

    for t in threads:
        t.start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        tag))
    net.add(data)
    data.record(env.getOrigin("reward"))
    q_net = ctrl_agent.getNode("QNetwork")
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=max)
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=min)
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"))

#     net.add_to_nengo()
#     net.run(10000)
    net.view()

    for t in threads:
        t.stop()
Example #33
0
 def getDecoders(self):
     neurons=self._parent.nodes[0].neurons
     decoders=MU.zero(neurons*len(self._origins),self._dimensions)
     for i,o in enumerate(self._origins):
         MU.copyInto(o.decoders,decoders,i*neurons,i*o.dimensions,neurons)       
     return decoders            
Example #34
0
    def __init__(self, stateN, stateD, state_encoders, actions, learningrate,
                stateradius=1.0, Qradius=1.0,
                load_weights=None, state_evals=None, state_threshold=0.0):
        """Builds the QNetwork.

        :param stateN: number of neurons to use to represent state
        :param stateD: dimension of state vector
        :param state_encoders: encoders to use for neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learningrate for action value learning rule
        :param stateradius: expected radius of state values
        :param Qradius: expected radius of Q values
        :param load_weights: filename to load Q value weights from
        :param state_evals: evaluation points to use for state population.
            This is used when initializing the Q values (may be necessary if the
            input states don't tend to fall in the hypersphere).
        :param state_threshold: threshold of state neurons (minimum intercept)
        """

        self.name = "QNetwork"
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        N = 50
        tauPSC = 0.007
        num_actions = len(actions)
        init_Qs = 0.2 #initial value for all Q values
        self.neuron_learning = False
        # if True, use neuron--neuron weight learning,
        # otherwise, use decoder learning

        # set up relays
        state_relay = net.make("state_relay", 1, stateD, mode="direct")
        state_relay.fixMode() # This apparently fixes the simulator mode to the curremt mode, so I'm guessing we just don't want it over-ridden by an over-zealous config file.
        state_relay.addDecodedTermination("input", MU.I(stateD), 0.001, False)

        # create state population
        state_fac = HRLutils.node_fac()
        state_fac.setIntercept(IndicatorPDF(state_threshold, 1.0))

        print("making the state_pop")
        state_pop = net.make("state_pop", stateN, stateD,
                              radius=stateradius,
                              node_factory=state_fac,
                              encoders=state_encoders,
                              eval_points=state_evals)
        state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(state_relay, state_pop, pstc=tauPSC)

        # store the state value (used to drive population encoding previous state)
        print("create the saved state memory")
        saved_state = memory.Memory("saved_state", N * 4, stateD, inputscale=50, radius=stateradius,
                                    direct_storage=True)
        net.add(saved_state)

        net.connect(state_relay, saved_state.getTermination("target"))

        # create population representing previous state
        old_state_pop = net.make("old_state_pop", stateN, stateD,
                              radius=stateradius,
                              node_factory=state_fac,
                              encoders=state_encoders,
                              eval_points=state_evals)
        old_state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(saved_state, old_state_pop, pstc=tauPSC)

        print("setup the action nodes")
        # set up action nodes
        if self.neuron_learning:
            # use ActionValues network to compute Q values

            # current Q values
            decoders = state_pop.addDecodedOrigin("init_decoders",
                                                  [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()
            actionvals = actionvalues.ActionValues("actionvals", N, stateN, actions, learningrate,
                                                   Qradius=Qradius, init_decoders=decoders)
            net.add(actionvals)

            net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state"))

            # Q values of previous state
            decoders = old_state_pop.addDecodedOrigin("init_decoders",
                                                      [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()
            old_actionvals = actionvalues.ActionValues("old_actionvals", N, stateN, actions, learningrate,
                                                       Qradius=Qradius, init_decoders=decoders)
            net.add(old_actionvals)

            net.connect(old_state_pop.getOrigin("AXON"), old_actionvals.getTermination("state"))
        else:
            # just use decoder on state population to compute Q values

            # current Q values
            origin = state_pop.addDecodedOrigin("vals",
                                        [ConstantFunction(num_actions, init_Qs) for _ in range(num_actions)],
                                        "AXON")
            state_dlnode = decoderlearningnode.DecoderLearningNode(state_pop, origin, learningrate,
                                                                   num_actions, name="state_learningnode")
            net.add(state_dlnode)

            # just a little relay node, so that things match up for the rest of the script 
            # when you have the neuron -- neuron learning
            actionvals = net.make("actionvals", 1, num_actions, mode="direct")
            actionvals.fixMode()
            net.connect(origin, actionvals, pstc=0.001)

            # Q values of previous state
            origin = old_state_pop.addDecodedOrigin("vals",
                                        [ConstantFunction(num_actions, init_Qs) for _ in range(num_actions)],
                                        "AXON")
            old_state_dlnode = decoderlearningnode.DecoderLearningNode(old_state_pop, origin, learningrate, num_actions, name="old_state_learningnode")
            net.add(old_state_dlnode)

            old_actionvals = net.make("old_actionvals", 1, num_actions, mode="direct")
            old_actionvals.fixMode()
            net.connect(origin, old_actionvals, pstc=0.001)

        if load_weights != None:
            self.loadParams(load_weights)

        # find error between old_actionvals and actionvals (this will be used to drive learning
        # on the new actionvals)
        valdiff = net.make_array("valdiff", N, num_actions, node_factory=HRLutils.node_fac())
        net.connect(old_actionvals, valdiff, transform=MU.diag([2] * num_actions), pstc=tauPSC)
        net.connect(actionvals, valdiff, transform=MU.diag([-2] * num_actions), pstc=tauPSC)
            # doubling the values to get a bigger error signal

        # calculate diff between curr_state and saved_state and use that to gate valdiff (we
        # only want to train the curr state based on previous state when the two have similar
        # values) # WTF does that mean and what is with these weird intercept
        statediff = net.make_array("statediff", N, stateD, intercept=(0.2, 1))
            # note: threshold > 0 so that there is a deadzone in the middle (when the states
            # are similar) where there will be no output inhibition
        net.connect(state_relay, statediff, pstc=tauPSC)
        net.connect(saved_state, statediff, transform=MU.diag([-1] * stateD), pstc=tauPSC)

        net.connect(statediff, valdiff, func=lambda x: [abs(v) for v in x],
                    transform=[[-10] * stateD for _ in range(valdiff.getNeurons())], pstc=tauPSC)

        # connect up valdiff to the error signal for current Q values, and expose
        # the error signal for the previous Q values to the external error
        if self.neuron_learning:
            net.connect(valdiff, actionvals.getTermination("error"))
            self.exposeTermination(old_actionvals.getTermination("error"), "error")
        else:
            net.connect(valdiff, state_dlnode.getTermination("error"))
            self.exposeTermination(old_state_dlnode.getTermination("error"), "error")

        self.exposeTermination(state_relay.getTermination("input"), "state")
        self.exposeTermination(saved_state.getTermination("transfer"), "save_state")
        self.exposeOrigin(actionvals.getOrigin("X"), "vals")
        self.exposeOrigin(old_actionvals.getOrigin("X"), "old_vals")
% Simulations to verify analytical estimates of error dependencies on network time constants 

from ca.nengo.math import Function
from ca.nengo.math.impl import FourierFunction
from ca.nengo.math.impl import IndicatorPDF
from ca.nengo.math.impl import ConstantFunction
from ca.nengo.model import SimulationMode
from ca.nengo.plot import Plotter
from ca.nengo.util import MU
import math

networks = [interneuron, dualTC, adapting, depressing, butterworth, interneuronFeedback]

tau = [.005, .01, .05, .1, .2, .5]
signalBandwidth = 15
frequencies = MU.makeVector(.1, .1, signalBandwidth)
componentRMS = math.sqrt(1.0 / len(frequencies)); 
signal = FourierFunction(frequencies, MU.uniform(1, len(frequencies), componentRMS/.707)[0], MU.random(1, len(frequencies), IndicatorPDF(-.5, .5))[0])

noiseBandwidth = 500

for network in networks:
	network.setMode(SimulationMode.DIRECT);
	network.setStepSize(.0005);	
	signalPower = []
	noisePower = []
	
	for t in tau:
		network.setTau(t)
		
		network.setInputFunction(signal);
Example #36
0
def make_convolution(
    self,
    name,
    A,
    B,
    C,
    N_per_D,
    quick=False,
    encoders=[[1, 1], [1, -1], [-1, 1], [-1, -1]],
    radius=3,
    pstc_out=0.01,
    pstc_in=0.01,
    pstc_gate=0.01,
    invert_first=False,
    invert_second=False,
    mode="default",
    output_scale=1,
):
    if isinstance(A, str):
        A = self.network.getNode(A)
    if isinstance(B, str):
        B = self.network.getNode(B)
    if isinstance(C, str):
        C = self.network.getNode(C)

    dimensions = C.dimension
    if (B is not None and B.dimension != dimensions) or (A is not None and A.dimension != dimensions):
        raise Exception("Dimensions not the same for convolution (%d,%d->%d)" % (A.dimension, B.dimension, C.dimension))

    if mode == "direct":
        D = DirectConvolution(name, dimensions, invert_first, invert_second)
        self.add(D)
        D.getTermination("A").setTau(pstc_in)
        D.getTermination("B").setTau(pstc_in)
        D.getTermination("gate").setTau(pstc_gate)
        if A is not None:
            self.connect(A, D.getTermination("A"))
        if B is not None:
            self.connect(B, D.getTermination("B"))
        self.connect(D.getOrigin("C"), C, pstc=pstc_out, weight=output_scale)
    else:
        D = make_array(self, name, N_per_D, dimensions, quick=quick, encoders=encoders, radius=radius)

        A2 = input_transform(dimensions, True, invert_first)
        B2 = input_transform(dimensions, False, invert_second)

        D.addDecodedTermination("A", A2, pstc_in, False)
        D.addDecodedTermination("B", B2, pstc_in, False)

        if A is not None:
            self.connect(A, D.getTermination("A"))
        if B is not None:
            self.connect(B, D.getTermination("B"))

        ifftm2 = output_transform(dimensions)

        self.connect(D, C, func=product, transform=ifftm2 * output_scale, pstc=pstc_out)

    if self.network.getMetaData("convolution") == None:
        self.network.setMetaData("convolution", HashMap())
    bindings = self.network.getMetaData("convolution")

    binding = HashMap(15)
    binding.put("name", name)
    if A is not None:
        binding.put("A", A.getName())
    else:
        binding.put("A", None)
    if B is not None:
        binding.put("B", B.getName())
    else:
        binding.put("B", None)
    binding.put("C", C.getName())
    binding.put("N_per_D", N_per_D)
    binding.put("quick", quick)
    binding.put("encoders", MU.clone(encoders))
    binding.put("radius", radius)
    binding.put("pstc_out", pstc_out)
    binding.put("pstc_in", pstc_in)
    binding.put("pstc_gate", pstc_gate)
    binding.put("invert_first", invert_first)
    binding.put("invert_second", invert_second)
    binding.put("mode", mode)
    binding.put("output_scale", output_scale)

    bindings.put(name, binding)

    if self.network.getMetaData("templates") == None:
        self.network.setMetaData("templates", ArrayList())
    templates = self.network.getMetaData("templates")
    templates.add(name)

    if self.network.getMetaData("templateProjections") == None:
        self.network.setMetaData("templateProjections", HashMap())
    templateproj = self.network.getMetaData("templateProjections")
    if A is not None:
        templateproj.put(name, A.getName())
    if B is not None:
        templateproj.put(name, B.getName())
    templateproj.put(name, C.getName())

    return D
Example #37
0
    def __init__(self,
                 stateN,
                 stateD,
                 state_encoders,
                 actions,
                 learningrate,
                 stateradius=1.0,
                 Qradius=1.0,
                 load_weights=None,
                 state_evals=None,
                 state_threshold=(0.0, 1.0),
                 statediff_threshold=0.2,
                 init_Qs=None):
        """Builds the QNetwork.

        :param stateN: number of neurons to use to represent state
        :param stateD: dimension of state vector
        :param state_encoders: encoders to use for neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learningrate for action value learning rule
        :param stateradius: expected radius of state values
        :param Qradius: expected radius of Q values
        :param load_weights: filename to load Q value weights from
        :param state_evals: evaluation points to use for state population.
            This is used when initializing the Q values (may be necessary if
            the input states don't tend to fall in the hypersphere).
        :param state_threshold: threshold range of state neurons
        :param statediff_threshold: maximum state difference for dual training
        :param init_Qs: initial Q values
        """

        self.name = "QNetwork"
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        N = 50
        tauPSC = 0.007
        num_actions = len(actions)
        init_Qs = [0.2] * num_actions if init_Qs is None else init_Qs

        # if True, use neuron--neuron weight learning, otherwise, use decoder
        # learning
        self.neuron_learning = False

        # set up relays
        state_relay = net.make("state_relay", 1, stateD, mode="direct")
        state_relay.fixMode()
        state_relay.addDecodedTermination("input", MU.I(stateD), 0.001, False)

        # create state population
        state_fac = HRLutils.node_fac()
        if isinstance(state_threshold, (float, int)):
            state_threshold = (state_threshold, 1.0)
        state_fac.setIntercept(
            IndicatorPDF(state_threshold[0], state_threshold[1]))

        state_pop = net.make("state_pop",
                             stateN,
                             stateD,
                             radius=stateradius,
                             node_factory=state_fac,
                             encoders=state_encoders,
                             eval_points=state_evals)
        state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(state_relay, state_pop, pstc=tauPSC)

        # store the state value (used to drive population encoding previous
        # state)
        saved_state = memory.Memory("saved_state",
                                    N * 4,
                                    stateD,
                                    inputscale=50,
                                    radius=stateradius,
                                    direct_storage=True)
        net.add(saved_state)

        net.connect(state_relay, saved_state.getTermination("target"))

        # create population representing previous state
        old_state_pop = net.make("old_state_pop",
                                 stateN,
                                 stateD,
                                 radius=stateradius,
                                 node_factory=state_fac,
                                 encoders=state_encoders,
                                 eval_points=state_evals)
        old_state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(saved_state, old_state_pop, pstc=tauPSC)

        # set up action nodes
        if self.neuron_learning:
            # use ActionValues network to compute Q values

            # current Q values
            decoders = state_pop.addDecodedOrigin(
                "init_decoders", [ConstantFunction(stateD, init_Qs)],
                "AXON").getDecoders()
            actionvals = actionvalues.ActionValues("actionvals",
                                                   N,
                                                   stateN,
                                                   actions,
                                                   learningrate,
                                                   Qradius=Qradius,
                                                   init_decoders=decoders)
            net.add(actionvals)

            net.connect(state_pop.getOrigin("AXON"),
                        actionvals.getTermination("state"))

            # Q values of previous state
            decoders = old_state_pop.addDecodedOrigin(
                "init_decoders", [ConstantFunction(stateD, init_Qs)],
                "AXON").getDecoders()
            old_actionvals = actionvalues.ActionValues("old_actionvals",
                                                       N,
                                                       stateN,
                                                       actions,
                                                       learningrate,
                                                       Qradius=Qradius,
                                                       init_decoders=decoders)
            net.add(old_actionvals)

            net.connect(old_state_pop.getOrigin("AXON"),
                        old_actionvals.getTermination("state"))
        else:
            # just use decoder on state population to compute Q values

            # current Q values
            origin = state_pop.addDecodedOrigin("vals", [
                ConstantFunction(num_actions, init_Qs[i])
                for i in range(num_actions)
            ], "AXON")
            state_dlnode = decoderlearningnode.DecoderLearningNode(
                state_pop,
                origin,
                learningrate,
                num_actions,
                name="state_learningnode")
            net.add(state_dlnode)

            # just a little relay node, so that things match up for the rest of
            # the script when you have the neuron -- neuron learning
            actionvals = net.make("actionvals", 1, num_actions, mode="direct")
            actionvals.fixMode()
            net.connect(origin, actionvals, pstc=0.001)

            # Q values of previous state
            origin = old_state_pop.addDecodedOrigin("vals", [
                ConstantFunction(num_actions, init_Qs[i])
                for i in range(num_actions)
            ], "AXON")
            old_state_dlnode = decoderlearningnode.DecoderLearningNode(
                old_state_pop,
                origin,
                learningrate,
                num_actions,
                name="old_state_learningnode")
            net.add(old_state_dlnode)

            old_actionvals = net.make("old_actionvals",
                                      1,
                                      num_actions,
                                      mode="direct")
            old_actionvals.fixMode()
            net.connect(origin, old_actionvals, pstc=0.001)

        if load_weights is not None:
            self.loadParams(load_weights)

        # find error between old_actionvals and actionvals (this will be used
        # to drive learning on the new actionvals)
        valdiff = net.make_array("valdiff",
                                 N,
                                 num_actions,
                                 node_factory=HRLutils.node_fac())
        # doubling the values to get a bigger error signal
        net.connect(old_actionvals,
                    valdiff,
                    transform=MU.diag([2] * num_actions),
                    pstc=tauPSC)
        net.connect(actionvals,
                    valdiff,
                    transform=MU.diag([-2] * num_actions),
                    pstc=tauPSC)

        # calculate diff between curr_state and saved_state and use that to
        # gate valdiff (we only want to train the curr state based on previous
        # state when the two have similar values)
        # note: threshold > 0 so that there is a deadzone in the middle (when
        # the states are similar) where there will be no output inhibition
        statediff = net.make_array("statediff",
                                   N,
                                   stateD,
                                   intercept=(statediff_threshold, 1))

        net.connect(state_relay, statediff, pstc=tauPSC)
        net.connect(saved_state,
                    statediff,
                    transform=MU.diag([-1] * stateD),
                    pstc=tauPSC)

        net.connect(statediff,
                    valdiff,
                    func=lambda x: [abs(v) for v in x],
                    transform=[[-10] * stateD
                               for _ in range(valdiff.getNeurons())],
                    pstc=tauPSC)

        # connect up valdiff to the error signal for current Q values, and
        # expose the error signal for the previous Q values to the external
        # error
        if self.neuron_learning:
            net.connect(valdiff, actionvals.getTermination("error"))
            self.exposeTermination(old_actionvals.getTermination("error"),
                                   "error")
        else:
            net.connect(valdiff, state_dlnode.getTermination("error"))
            self.exposeTermination(old_state_dlnode.getTermination("error"),
                                   "error")

        self.exposeTermination(state_relay.getTermination("input"), "state")
        self.exposeTermination(saved_state.getTermination("transfer"),
                               "save_state")
        self.exposeOrigin(actionvals.getOrigin("X"), "vals")
        self.exposeOrigin(old_actionvals.getOrigin("X"), "old_vals")
Example #38
0
 def termination_action(self, a, pstc=0.01):
     # set the selected action to the one with highest similarity to the
     # available actions
     self.action = max(self.actions, key=lambda x: MU.prod(a, x[1]))
Example #39
0
def run_flat_delivery(args, seed=None):
    """Runs the model on the delivery task with only one hierarchical level."""

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("run_flat_delivery")

    if "load_weights" in args and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200
    contextD = 2
    context_scale = 1.0
    max_state_input = 2
    actions = [("up", [0, 1]), ("right", [1, 0]),
               ("down", [0, -1]), ("left", [-1, 0])]

    # ##ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(
        actions, HRLutils.datafile("contextmap.bmp"),
        colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"},
        imgsize=(5, 5), dx=0.001, placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # ##NAV AGENT

    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD,
                                    actions, name="NavAgent",
                                    state_encoders=enc, state_evals=evals,
                                    state_threshold=0.8, **args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))
    net.connect(env.getOrigin("placewcontext"),
                nav_agent.getTermination("state_input"))

    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): None}, env, name="NavTermNode",
        contextD=2)
    net.add(nav_term_node)
    net.connect(env.getOrigin("context"),
                nav_term_node.getTermination("context"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    reward_relay = net.make("reward_relay", 1, 1, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"), reward_relay)
    net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay)
    net.connect(reward_relay, nav_agent.getTermination("reward"))

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0
    HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams,
                              os.path.join("weights", "%s_%s" %
                                           (nav_agent.name, seed)),
                              weight_save).start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        seed))
    net.add(data)
    q_net = nav_agent.getNode("QNetwork")
    data.record_avg(env.getOrigin("reward"))
    data.record_avg(q_net.getNode("actionvals").getOrigin("X"))
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error"))

#    net.add_to_nengo()
#    net.run(10000)
    net.view()
Example #40
0
    def __init__(self,
                 num_actions,
                 Qradius=1.0,
                 rewardradius=1.0,
                 discount=0.3):
        """Builds the ErrorNetwork.

        :param num_actions: the number of actions available to the system
        :param Qradius: expected radius of Q values
        :param rewardradius: expected radius of reward signal
        :param discount: discount factor
        """

        self.name = "ErrorNetwork"
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        N = 50
        tauPSC = 0.007
        errorcap = 0.1  # soft cap on error magnitude (large errors seem to
        # cause problems with overly-generalizing the learning)

        # set up relays
        vals_relay = net.make("vals_relay", 1, num_actions, mode="direct")
        vals_relay.fixMode()
        vals_relay.addDecodedTermination("input", MU.I(num_actions), 0.001,
                                         False)

        old_vals_relay = net.make("old_vals_relay",
                                  1,
                                  num_actions,
                                  mode="direct")
        old_vals_relay.fixMode()
        old_vals_relay.addDecodedTermination("input", MU.I(num_actions), 0.001,
                                             False)

        curr_bg_relay = net.make("curr_bg_relay",
                                 1,
                                 num_actions,
                                 mode="direct")
        curr_bg_relay.fixMode()
        curr_bg_relay.addDecodedTermination("input", MU.I(num_actions), 0.001,
                                            False)

        saved_bg_relay = net.make("saved_bg_relay",
                                  1,
                                  num_actions,
                                  mode="direct")
        saved_bg_relay.fixMode()
        saved_bg_relay.addDecodedTermination("input", MU.I(num_actions), 0.001,
                                             False)

        # select out only the currently chosen Q value
        gatedQ = net.make_array("gatedQ",
                                N * 2,
                                num_actions,
                                node_factory=HRLutils.node_fac(),
                                radius=Qradius)
        gatedQ.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(vals_relay, gatedQ, pstc=tauPSC)

        net.connect(
            curr_bg_relay,
            gatedQ,
            transform=[[-3 if i != k else 0 for k in range(num_actions)]
                       for i in range(num_actions)
                       for _ in range(gatedQ.getNeurons() / num_actions)],
            pstc=tauPSC)

        currQ = net.make("currQ", 1, 1, mode="direct")
        currQ.fixMode()
        net.connect(gatedQ,
                    currQ,
                    transform=[[1 for _ in range(num_actions)]],
                    pstc=0.001)

        # select out only the previously chosen Q value
        gatedstoreQ = net.make_array("gatedstoreQ",
                                     N * 2,
                                     num_actions,
                                     node_factory=HRLutils.node_fac(),
                                     radius=Qradius)
        gatedstoreQ.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(old_vals_relay, gatedstoreQ, pstc=tauPSC)

        net.connect(
            saved_bg_relay,
            gatedstoreQ,
            transform=[[-3 if i != k else 0 for k in range(num_actions)]
                       for i in range(num_actions)
                       for _ in range(gatedstoreQ.getNeurons() / num_actions)],
            pstc=tauPSC)

        storeQ = net.make("storeQ", 1, 1, mode="direct")
        storeQ.fixMode()
        net.connect(gatedstoreQ,
                    storeQ,
                    transform=[[1 for _ in range(num_actions)]],
                    pstc=0.001)

        # create error calculation network
        error = errorcalc2.ErrorCalc2(discount,
                                      rewardradius=rewardradius,
                                      Qradius=Qradius)
        net.add(error)

        net.connect(currQ, error.getTermination("currQ"))
        net.connect(storeQ, error.getTermination("storeQ"))

        # gate error by learning signal and saved BG output (we only want error
        # when the system is supposed to be learning, and we only want error
        # related to the action that was selected)
        gatederror = net.make_array("gatederror",
                                    N * 2,
                                    num_actions,
                                    radius=errorcap,
                                    node_factory=HRLutils.node_fac())
        gatederror.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(error,
                    gatederror,
                    transform=[[1.0 / Qradius] for _ in range(num_actions)],
                    pstc=tauPSC)
        # scale the error by Qradius, so that we don't get super huge errors
        # (causes problems with the gating)

        learninggate = net.make("learninggate",
                                N,
                                1,
                                node_factory=HRLutils.node_fac())
        learninggate.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
        learninggate.addTermination("gate", [[-10] for _ in range(N)], tauPSC,
                                    False)

        net.connect(learninggate,
                    gatederror,
                    func=lambda x: [1.0],
                    transform=[[-12] for _ in range(gatederror.getNeurons())],
                    pstc=tauPSC)

        net.connect(
            saved_bg_relay,
            gatederror,
            transform=[[-12 if i != k else 0 for k in range(num_actions)]
                       for i in range(num_actions)
                       for _ in range(gatederror.getNeurons() / num_actions)],
            pstc=tauPSC)

        # add a positive bias to the error anywhere the Q values are negative
        # (to stop Q values from getting too negative, which causes problems
        # with the action selection)
        posbias = positivebias.PositiveBias(N, num_actions)
        net.add(posbias)
        net.connect(old_vals_relay, posbias.getTermination("input"))
        net.connect(learninggate,
                    posbias.getTermination("learn"),
                    func=lambda x: [1.0])

        biasederror = net.make("biasederror", 1, num_actions, mode="direct")
        biasederror.fixMode()
        net.connect(gatederror, biasederror, pstc=0.001)
        net.connect(posbias, biasederror, pstc=0.001)

        self.exposeTermination(curr_bg_relay.getTermination("input"),
                               "curr_bg_input")
        self.exposeTermination(saved_bg_relay.getTermination("input"),
                               "saved_bg_input")
        self.exposeTermination(vals_relay.getTermination("input"), "vals")
        self.exposeTermination(old_vals_relay.getTermination("input"),
                               "old_vals")
        self.exposeTermination(error.getTermination("reward"), "reward")
        self.exposeTermination(error.getTermination("reset"), "reset")
        self.exposeTermination(learninggate.getTermination("gate"), "learn")
        self.exposeOrigin(biasederror.getOrigin("X"), "error")
Example #41
0
def run_badreenvironment(nav_args, ctrl_args, bias=0.0, seed=None, flat=False,
                         label="tmp"):
    """Runs the model on the Badre et al. (2010) task."""

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("run_badreenvironment")

    env = badreenvironment.BadreEnvironment(flat=flat)
    net.add(env)

    # ##NAV AGENT
    stateN = 500
    max_state_input = 3
    enc = env.gen_encoders(stateN, 0, 0.0)

    # generate evaluation points
    orientations = MU.I(env.num_orientations)
    shapes = MU.I(env.num_shapes)
    colours = MU.I(env.num_colours)
    evals = (list(MU.diag([3 for _ in range(env.stateD)])) +
             [o + s + c
              for o in orientations for s in shapes for c in colours])

    # create lower level
    nav_agent = smdpagent.SMDPAgent(stateN, env.stateD, env.actions,
                                    name="NavAgent",
                                    stateradius=max_state_input,
                                    state_encoders=enc, state_evals=evals,
                                    discount=0.5, **nav_args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    # actions terminate on fixed schedule (aligned with environment)
    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.6)): None}, env, name="NavTermNode",
        state_delay=0.1, reset_delay=0.05, reset_interval=0.1)
    net.add(nav_term_node)

    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))

    # ##CTRL AGENT
    stateN = 500
    enc = RandomHypersphereVG().genVectors(stateN, env.stateD)
    actions = [("shape", [0, 1]), ("orientation", [1, 0]), ("null", [0, 0])]
    ctrl_agent = smdpagent.SMDPAgent(stateN, env.stateD, actions,
                                     name="CtrlAgent", state_encoders=enc,
                                     stateradius=max_state_input,
                                     state_evals=evals, discount=0.4,
                                     **ctrl_args)
    net.add(ctrl_agent)

    print "agent neurons:", ctrl_agent.countNeurons()

    net.connect(env.getOrigin("state"),
                ctrl_agent.getTermination("state_input"))

    ctrl_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.6)): None}, env, name="CtrlTermNode",
        state_delay=0.1, reset_delay=0.05, reset_interval=0.1)
    net.add(ctrl_term_node)

    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("reset"))
    net.connect(ctrl_term_node.getOrigin("learn"),
                ctrl_agent.getTermination("learn"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_state"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_action"))

    # ctrl gets a slight bonus if it selects a rule (as opposed to null), to
    # encourage it to not just pick null all the time
    reward_relay = net.make("reward_relay", 1, 3, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"), reward_relay,
                transform=[[1], [0], [0]])
    net.connect(ctrl_agent.getOrigin("action_output"), reward_relay,
                transform=[[0, 0], [1, 0], [0, 1]])

    net.connect(reward_relay, ctrl_agent.getTermination("reward"),
                func=lambda x: ((x[0] + bias * abs(x[0]))
                                if x[1] + x[2] > 0.5 else x[0]),
                origin_name="ctrl_reward")

    # ideal reward function (for testing)
#     def ctrl_reward_func(x):
#         if abs(x[0]) < 0.5:
#             return 0.0
#
#         if flat:
#             return 1.5 if x[1] + x[2] < 0.5 else -1.5
#         else:
#             if x[1] + x[2] < 0.5:
#                 return -1.5
#             if [round(a) for a in env.state[-2:]] == [round(b)
#                                                       for b in x[1:]]:
#                 return 1.5
#             else:
#                 return -1.5
#     net.connect(reward_relay, ctrl_agent.getTermination("reward"),
#                 func=ctrl_reward_func)

    # nav rewarded for picking ctrl target
    def nav_reward_func(x):
        if abs(x[0]) < 0.5 or env.action is None:
            return 0.0

        if x[1] + x[2] < 0.5:
            return x[0]

        if x[1] > x[2]:
            return (1.5 if env.action[1] == env.state[:env.num_orientations]
                    else -1.5)
        else:
            return (1.5 if env.action[1] == env.state[env.num_orientations:
                                                      - env.num_colours]
                    else -1.5)
    net.connect(reward_relay, nav_agent.getTermination("reward"),
                func=nav_reward_func)

    # state for navagent controlled by ctrlagent
    ctrl_state_inhib = net.make_array("ctrl_state_inhib", 50, env.stateD,
                                      radius=2, mode=HRLutils.SIMULATION_MODE)
    ctrl_state_inhib.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

    inhib_matrix = [[0, -5]] * 50 * env.num_orientations + \
                   [[-5, 0]] * 50 * env.num_shapes + \
                   [[-5, -5]] * 50 * env.num_colours

    # ctrl output inhibits all the non-selected aspects of the state
    net.connect(env.getOrigin("state"), ctrl_state_inhib)
    net.connect(ctrl_agent.getOrigin("action_output"), ctrl_state_inhib,
                transform=inhib_matrix)

    # also give a boost to the selected aspects (so that neurons are roughly
    # equally activated).
    def boost_func(x):
        if x[0] > 0.5:
            return [3 * v for v in x[1:]]
        else:
            return x[1:]
    boost = net.make("boost", 1, 1 + env.stateD, mode="direct")
    boost.fixMode()
    net.connect(ctrl_state_inhib, boost,
                transform=([[0 for _ in range(env.stateD)]] +
                           list(MU.I(env.stateD))))
    net.connect(ctrl_agent.getOrigin("action_output"), boost,
                transform=[[1, 1]] + [[0, 0] for _ in range(env.stateD)])

    net.connect(boost, nav_agent.getTermination("state_input"),
                func=boost_func)

    # save weights
    weight_save = 1.0  # period to save weights (realtime, not simulation time)
    threads = [
        HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams,
                                  os.path.join("weights", "%s_%s" %
                                               (nav_agent.name, seed)),
                                  weight_save),
        HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams,
                                  os.path.join("weights", "%s_%s" %
                                               (ctrl_agent.name, seed)),
                                  weight_save)]
    for t in threads:
        t.start()

    # data collection node
    data = datanode.DataNode(period=1,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        label),
                             header="%s %s %s %s %s" % (nav_args, ctrl_args,
                                                        bias, seed, flat))
    print "saving data to", data.filename
    print "header", data.header
    net.add(data)
    nav_q = nav_agent.getNode("QNetwork")
    ctrl_q = ctrl_agent.getNode("QNetwork")
    ctrl_bg = ctrl_agent.getNode("BGNetwork").getNode("weight_actions")
    data.record_avg(env.getOrigin("reward"))
    data.record_avg(ctrl_q.getNode("actionvals").getOrigin("X"))
    data.record_sparsity(ctrl_q.getNode("state_pop").getOrigin("AXON"))
    data.record_sparsity(nav_q.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(ctrl_q.getNode("valdiff").getOrigin("X"))
    data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"))
    data.record_avg(ctrl_bg.getNode("0").getOrigin("AXON"))
    data.record_avg(ctrl_bg.getNode("1").getOrigin("AXON"))
    data.record(env.getOrigin("score"))

#     net.add_to_nengo()
#     net.network.simulator.run(0, 300, 0.001)
    net.view()

    for t in threads:
        t.stop()
Example #42
0
def run_contextenvironment(args, seed=None):
    """Runs the model on the context task.

    :param args: kwargs for the agent
    :param seed: random seed
    """

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("runContextEnvironment")

    if "load_weights" in args and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200  # number of neurons to use in state population
    contextD = 2  # dimension of context vector
    context_scale = 1.0  # scale of context representation
    max_state_input = 2  # max length of input vector for state population
    # actions (label and vector) available to the system
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]),
               ("left", [-1, 0])]

    # context labels and rewards for achieving those context goals
    rewards = {"a": 1.5, "b": 1.5}

    env = contextenvironment.ContextEnvironment(
        actions,
        HRLutils.datafile("contextmap.bmp"),
        contextD,
        rewards,
        colormap={
            -16777216: "wall",
            -1: "floor",
            -256: "a",
            -2088896: "b"
        },
        imgsize=(5, 5),
        dx=0.001,
        placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # termination node for agent (just goes off on some regular interval)
    term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): 0.0}, env)
    net.add(term_node)

    # generate encoders and divide by max_state_input (so that all inputs
    # will end up being radius 1)
    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    # load eval points from file
    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        print "loading contextbmp_evalpoints_%s.txt" % seed
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    agent = smdpagent.SMDPAgent(stateN,
                                len(env.placecells) + contextD,
                                actions,
                                state_encoders=enc,
                                state_evals=evals,
                                state_threshold=0.8,
                                **args)
    net.add(agent)

    print "agent neurons:", agent.countNeurons()

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0
    t = HRLutils.WeightSaveThread(
        agent.getNode("QNetwork").saveParams,
        os.path.join("weights", "%s_%s" % (agent.name, seed)), weight_save)
    t.start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        seed))
    net.add(data)
    q_net = agent.getNode("QNetwork")
    data.record(env.getOrigin("reward"))
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=max)
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=min)
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(env.getOrigin("state"))

    net.connect(env.getOrigin("placewcontext"),
                agent.getTermination("state_input"))
    net.connect(env.getOrigin("reward"), agent.getTermination("reward"))
    net.connect(term_node.getOrigin("reset"), agent.getTermination("reset"))
    net.connect(term_node.getOrigin("learn"), agent.getTermination("learn"))
    net.connect(term_node.getOrigin("reset"),
                agent.getTermination("save_state"))
    net.connect(term_node.getOrigin("reset"),
                agent.getTermination("save_action"))

    net.connect(agent.getOrigin("action_output"), env.getTermination("action"))

    #    net.add_to_nengo()
    #    net.run(2000)
    net.view()

    t.stop()
Example #43
0
    def __init__(self, name, N, d, radius=1.0, inputscale=1.0, recurweight=1.0,
                 direct_storage=False):
        """Builds the Memory network.

        :param name: name of network
        :param N: base number of neurons
        :param d: dimension of stored value
        :param radius: radius of stored value
        :param inputscale: controls how fast the stored value moves to the
            target
        :param recurweight: controls the preservation of the stored value
        :param direct_storage: if True, use directmode for the memory
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)
        self.dimension = d
        self.radius = radius

        tauPSC = 0.007
        intPSC = 0.1

        # population that will store the value
        if not direct_storage:
            storage = net.make_array("storage", N, d,
                                     node_factory=HRLutils.node_fac(),
                                     eval_points=[[x * 0.001]
                                                  for x in range(-1000, 1000)])
        else:
            storage = net.make("storage", 1, d, mode="direct")
            storage.fixMode()

        net.connect(storage, storage, transform=MU.diag([recurweight
                                                         for _ in range(d)]),
                    pstc=intPSC)

        # storageinput will represent (target - stored_value), which when used
        # as input to storage will drive the stored value to target
        storageinput = net.make_array("storageinput", N, d,
                                      node_factory=HRLutils.node_fac())
        storageinput.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        storageinput.addDecodedTermination("target",
                                           MU.diag([1.0 / radius
                                                    for _ in range(d)]),
                                           tauPSC, False)
        # note: store everything in -1 -- 1 range by dividing by radius

        # scale storageinput value by inputscale to control rate at which
        # it moves to the target
        net.connect(storageinput, storage, pstc=intPSC,
                    transform=MU.diag([inputscale * intPSC for _ in range(d)]))

        # subtract currently stored value
        net.connect(storage, storageinput, pstc=tauPSC,
                    transform=MU.diag([-1 for _ in range(d)]))

        # we want to open the input gate when the transfer signal arrives (to
        # transfer storageinput to storage). using a double inhibition setup
        # (rather than just feeding it e.g. the the inverse of the transfer
        # signal) so that we get a nice clean zero

        # this inhibits the storageinput population (to block input to the
        # storage)
        transferinhib = net.make("transferinhib", N, 1,
                                 node_factory=HRLutils.node_fac())
        transferinhib.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        transferinhib.addTermination("gate",
                                     [[-10] for _ in
                                      range(transferinhib.getNeurons())],
                                     tauPSC, False)

        net.connect(transferinhib, storageinput, pstc=tauPSC,
                    transform=[[-10] for _ in
                               range(storageinput.getNeurons())])

        # this drives the transferinhib population (so that by default it will
        # block any input). inhibiting transferinhib will thus remove the
        # inhibition on storageinput, and change the stored value
        biasinput = net.make_input("biasinput", [1])

        net.connect(biasinput, transferinhib, pstc=tauPSC)

        # output population (to undo radius scaling)
        storageoutput = net.make("storageoutput", 1, d, mode="direct")
        storageoutput.fixMode()
        net.connect(storage, storageoutput, pstc=0.001,
                    transform=MU.diag([radius for _ in range(d)]))

        self.exposeTermination(transferinhib.getTermination("gate"),
                               "transfer")
        self.exposeTermination(storageinput.getTermination("target"), "target")
        self.exposeOrigin(storageoutput.getOrigin("X"), "X")
Example #44
0
def run_flat_delivery(args, seed=None):
    """Runs the model on the delivery task with only one hierarchical level."""

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("run_flat_delivery")

    if "load_weights" in args and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200
    contextD = 2
    context_scale = 1.0
    max_state_input = 2
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]),
               ("left", [-1, 0])]

    # ##ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(
        actions,
        HRLutils.datafile("contextmap.bmp"),
        colormap={
            -16777216: "wall",
            -1: "floor",
            -256: "a",
            -2088896: "b"
        },
        imgsize=(5, 5),
        dx=0.001,
        placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # ##NAV AGENT

    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN,
                                    len(env.placecells) + contextD,
                                    actions,
                                    name="NavAgent",
                                    state_encoders=enc,
                                    state_evals=evals,
                                    state_threshold=0.8,
                                    **args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))
    net.connect(env.getOrigin("placewcontext"),
                nav_agent.getTermination("state_input"))

    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): None},
        env,
        name="NavTermNode",
        contextD=2)
    net.add(nav_term_node)
    net.connect(env.getOrigin("context"),
                nav_term_node.getTermination("context"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    reward_relay = net.make("reward_relay", 1, 1, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"), reward_relay)
    net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay)
    net.connect(reward_relay, nav_agent.getTermination("reward"))

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0
    HRLutils.WeightSaveThread(
        nav_agent.getNode("QNetwork").saveParams,
        os.path.join("weights", "%s_%s" % (nav_agent.name, seed)),
        weight_save).start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        seed))
    net.add(data)
    q_net = nav_agent.getNode("QNetwork")
    data.record_avg(env.getOrigin("reward"))
    data.record_avg(q_net.getNode("actionvals").getOrigin("X"))
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error"))

    #    net.add_to_nengo()
    #    net.run(10000)
    net.view()
Example #45
0
def make(net, preName='pre', postName='post', rate=5e-4):

    # get pre and post ensembles from their names
    pre = net.network.getNode(preName)
    post = net.network.getNode(postName)

    dim_pre = pre.getDimension()
    dim_post = post.getDimension()

    t = [[0] * dim_pre for i in range(dim_post)]
    index_pre = range(dim_pre)
    index_post = range(dim_post)
    for i in range(max(len(index_pre),len(index_post))):
        ipre = index_pre[i % len(index_pre)]
        ipost = index_post[i % len(index_post)]
        t[ipost][ipre] = 1

    decoder = pre.getOrigin('X').getDecoders()
    encoder = post.getEncoders()
    encoder = MU.prod(encoder, 1.0 / post.getRadii()[0])
    weight = MU.prod(encoder, MU.prod(t, MU.transpose(decoder)))

    # random weight matrix to initialize projection from pre to post
    # def rand_weights(w):
    #     for i in range(len(w)):
    #         for j in range(len(w[0])):
    #             w[i][j] = random.uniform(-1e-3,1e-3)
    #     return w
    # weight = rand_weights(numeric.zeros((post.neurons, pre.neurons)).tolist())

    # non-decoded termination (to learn transformation)
    count = 0
    prename = pre.getName()
    while '%s_%02d' % (prename, count) in [t.name for t in post.terminations]:
        count = count + 1
    prename = '%s_%02d' % (prename, count)

    post.addBCMTermination(prename, weight, 0.005, False, None)
    
    # Add projections
    net.connect(pre.getOrigin('AXON'),post.getTermination(prename))
    
    # Set learning rule on the non-decoded termination
    net.learn(post,prename,None,rate=rate)

    if net.network.getMetaData("bcmterm") == None:
        net.network.setMetaData("bcmterm", HashMap())
    bcmterms = net.network.getMetaData("bcmterm")

    bcmterm = HashMap(4)
    bcmterm.put("preName", preName)
    bcmterm.put("postName", postName)
    bcmterm.put("rate", rate)

    bcmterms.put(prename, bcmterm)

    if net.network.getMetaData("templates") == None:
        net.network.setMetaData("templates", ArrayList())
    templates = net.network.getMetaData("templates")
    templates.add(prename)

    if net.network.getMetaData("templateProjections") == None:
        net.network.setMetaData("templateProjections", HashMap())
    templateproj = net.network.getMetaData("templateProjections")
    templateproj.put(preName, postName)
 def termination_action(self, a, pstc=0.01):
     # set the selected action to the one with highest similarity to the
     # current action input
     self.action = max(self.actions, key=lambda x: MU.prod(a, x[1]))
Example #47
0
from ca.nengo.util import MU
from java.io import File
import math

nInput = range(200, 2001, 400)
nDiff = 1000;

networks = [interneuron, dualTC, adapting, depressing, butterworth, interneuronFeedback]

exporter = MatlabExporter()
for network in networks:
	network.setInputFunction(ConstantFunction(1, 0));
	network.setStepSize(.0001)
	network.setMode(SimulationMode.DIRECT);

	inputVariance = [];
	outputVariance = [];
	
	for n in nInput:
		network.setNoise(n, nDiff);
		#network.setDistortion(n, nDiff);
		network.reset(0)
		network.run(0, 10);
		inputVariance.append(MU.variance(MU.prod(network.getInputEnsembleData().getValues(), [1]), 0))
		outputVariance.append(MU.variance(MU.prod(network.getOutputData().getValues(), [1]), 0))
		
	network.clearErrors();
	Plotter.plot(nInput, outputVariance, "output")
	
exporter.write(File("noise.mat"));		
	
Example #48
0
    def __init__(self,
                 name,
                 N,
                 stateN,
                 actions,
                 learningrate,
                 Qradius=1.0,
                 init_decoders=None):
        """Build ActionValues network.

        :param name: name of Network
        :param N: base number of neurons
        :param stateN: number of neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learning rate for PES rule
        :param Qradius: expected radius of Q values
        :param init_decoders: if specified, will be used to initialize the
            connection weights to whatever function is specified by decoders
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        self.N = N
        self.learningrate = learningrate
        self.supervision = 1.0  # don't use the unsupervised stuff at all

        self.tauPSC = 0.007

        modterms = []
        learnterms = []

        # relays
        output = net.make("output", 1, len(actions), mode="direct")
        output.fixMode()

        for i, action in enumerate(actions):
            # create one population corresponding to each action
            act_pop = net.make("action_" + action[0],
                               self.N * 4,
                               1,
                               node_factory=HRLutils.node_fac())
            act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

            # add error termination
            modterm = act_pop.addDecodedTermination(
                "error", [[0 if j != i else 1 for j in range(len(actions))]],
                0.005, True)
            # set modulatory transform so that it selects one dimension of
            # the error signal

            # create learning termination
            if init_decoders is not None:
                weights = MU.prod(act_pop.getEncoders(),
                                  MU.transpose(init_decoders))
            else:
                weights = [[
                    random.uniform(-1e-3, 1e-3) for j in range(stateN)
                ] for i in range(act_pop.getNeurons())]
            learningterm = act_pop.addHPESTermination("learning", weights,
                                                      0.005, False, None)

            # initialize the learning rule
            net.learn(act_pop,
                      learningterm,
                      modterm,
                      rate=self.learningrate,
                      supervisionRatio=self.supervision)

            # connect each action back to output relay
            net.connect(act_pop.getOrigin("X"),
                        output,
                        transform=[[0] if j != i else [Qradius]
                                   for j in range(len(actions))],
                        pstc=0.001)
            # note, we learn all the Q values with radius 1, then just
            # multiply by the desired Q radius here

            modterms += [modterm]
            learnterms += [learningterm]

        # use EnsembleTerminations to group the individual action terminations
        # into one multi-dimensional termination
        self.exposeTermination(EnsembleTermination(self, "state", learnterms),
                               "state")
        self.exposeTermination(EnsembleTermination(self, "error", modterms),
                               "error")

        self.exposeOrigin(output.getOrigin("X"), "X")
Example #49
0
def run_flat_delivery(args, seed=None):
    """Runs the model on the delivery task with only one hierarchical level."""

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("run_flat_delivery")

    if args.has_key("load_weights") and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200
    contextD = 2
    context_scale = 1.0
    max_state_input = 2
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])]

    ###ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(actions, HRLutils.datafile("contextmap.bmp"),
                                                  colormap={-16777216:"wall",
                                                           - 1:"floor",
                                                           - 256:"a",
                                                           - 2088896:"b"},
                                                  imgsize=(5, 5), dx=0.001, placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    ###NAV AGENT

    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent",
                                    state_encoders=enc, state_evals=evals, state_threshold=0.8,
                                    **args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    # Connect the agents actions to the environment so the agent can act upon the environment
    net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action"))
    # Connect the environment state to the agent, so the agent knows the effect of it's action
    net.connect(env.getOrigin("placewcontext"), nav_agent.getTermination("state_input"))
#    net.connect(env.getOrigin("reward"), nav_agent.getTermination("reward"))
#    net.connect(env.getOrigin("optimal_move"), nav_agent.getTermination("bg_input"))

    # termination node for nav_agent (just a timer that goes off regularly)
    nav_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.9)):None}, env,
                                                    name="NavTermNode", contextD=2)
    net.add(nav_term_node)
    net.connect(env.getOrigin("context"), nav_term_node.getTermination("context"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action"))

    # WTF why not connect directly? # Maybe this is the only way to make a direct connection between outputs in this version of Nengo?
    reward_relay = net.make("reward_relay", 1, 1, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"), reward_relay)
    net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay)
    net.connect(reward_relay, nav_agent.getTermination("reward"))

    #save weights
    weight_save = 600.0 #period to save weights (realtime, not simulation time)
    HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams,
                     os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start()

    #data collection node
    data = datanode.DataNode(period=5, show_plots=None, filename=HRLutils.datafile("dataoutput_%s.txt" % seed))
    net.add(data)
    #data.record_avg(env.getOrigin("reward"), filter=1e-5)
    #data.record_avg(nav_agent.getNode("QNetwork").getNode("actionvals").getOrigin("X"), filter=1e-5)
    #data.record_sparsity(nav_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=1e-5)
    #data.record_avg(nav_agent.getNode("QNetwork").getNode("valdiff").getOrigin("X"), filter=1e-5)
    # ErrorNetwork is apparently not the correct name and hell if I know what the correct one is
    #data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error"), filter=1e-5)

    # Try recording everything

    net.add_to_nengo()
    net.view()
Example #50
0
def run_badreenvironment(nav_args, ctrl_args, seed=None, flat=False):
    
    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED
    
    net = nef.Network("run_badreenvironment")

    env = badreenvironment.BadreEnvironment(flat=flat)
    net.add(env)

    ###NAV AGENT
    stateN = 500
    max_state_input = 2
    enc = env.gen_encoders(stateN, 0, 1.0)
    enc = MU.prod(enc, 1.0 / max_state_input)

#    with open(HRLutils.datafile("badre_evalpoints.txt")) as f:
#        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]
    orientations = MU.I(env.num_orientations)
    shapes = MU.I(env.num_shapes)
    colours = MU.I(env.num_colours)
    evals = list(MU.I(env.stateD)) + \
            [o+s+c for o in orientations for s in shapes for c in colours]

    nav_agent = smdpagent.SMDPAgent(stateN, env.stateD,
                                    env.actions, name="NavAgent",
                                    load_weights=None,
                                    state_encoders=enc, state_evals=evals,
                                    discount=0.4, **nav_args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    nav_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.6)):None}, env,
                                                    name="NavTermNode", state_delay=0.1)
    net.add(nav_term_node)

    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action"))

    net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action"))

    ###CTRL AGENT
    enc = env.gen_encoders(stateN, 0, 0)
    enc = MU.prod(enc, 1.0 / max_state_input)
    actions = [("shape", [0, 1]), ("orientation", [1, 0]), ("null", [0, 0])]
    ctrl_agent = smdpagent.SMDPAgent(stateN, env.stateD, actions, name="CtrlAgent",
                                     load_weights=None, state_encoders=enc,
                                     state_evals=evals, discount=0.4, **ctrl_args)
    net.add(ctrl_agent)

    print "agent neurons:", ctrl_agent.countNeurons()

    net.connect(env.getOrigin("state"), ctrl_agent.getTermination("state_input"))

    ctrl_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.6)):None},
                                                     env, name="CtrlTermNode",
                                                     state_delay=0.1)
    net.add(ctrl_term_node)

    net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset"))
    net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn"))
    net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state"))
    net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action"))
    
    
    ## reward for nav/ctrl
    reward_relay = net.make("reward_relay", 1, 2, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"), reward_relay, transform=[[1], [0]])
    net.connect(ctrl_agent.getOrigin("action_output"), reward_relay, transform=[[0, 0], [1, 1]])
    
    # nav reward is just environment
    net.connect(reward_relay, nav_agent.getTermination("reward"), 
                func=lambda x: x[0], origin_name="nav_reward")
    
    # ctrl gets a slight bonus if it selects a rule (as opposed to null), to encourage it not
    # to just pick null all the time
    net.connect(reward_relay, ctrl_agent.getTermination("reward"), 
                func=lambda x: x[0]+0.25*abs(x[0]) if x[1] > 0.5 else x[0], origin_name="ctrl_reward")

    ## state for navagent controlled by ctrlagent
#    ctrl_output_relay = net.make("ctrl_output_relay", 1, env.stateD+2, mode="direct")
#    ctrl_output_relay.fixMode()
    ctrl_output_relay = net.make_array("ctrl_output_relay", 50, env.stateD,
                                       radius=2, mode=HRLutils.SIMULATION_MODE)
    ctrl_output_relay.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
    
    inhib_matrix = [[0,-5]]*50*env.num_orientations + \
                   [[-5,0]]*50*env.num_shapes + \
                   [[-5,-5]]*50*env.num_colours

    # ctrl output inhibits all the non-selected aspects of the state
    net.connect(env.getOrigin("state"), ctrl_output_relay)
    net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay,
#                transform=zip([0]*env.num_orientations + [-1]*(env.num_shapes+env.num_colours),
#                              [-1]*env.num_orientations + [0]*env.num_shapes + [-1]*env.num_colours))
                transform=inhib_matrix)
    
    # also give a boost to the selected aspects (so that neurons are roughly equally activated).
    # adding 2/3 to each element (base vector has length 3, inhibited vector has length 1, so add 2/3*3 --> 3)
    net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay,
                transform=zip([0.66]*env.num_orientations + [0]*(env.num_shapes+env.num_colours),
                              [0]*env.num_orientations + [0.66]*env.num_shapes + [2]*env.num_colours))

    net.connect(ctrl_output_relay, nav_agent.getTermination("state_input"))

    # save weights
    weight_save = 600.0 # period to save weights (realtime, not simulation time)
    HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams,
                     os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start()
    HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams,
                     os.path.join("weights", "%s_%s" % (ctrl_agent.name, seed)), weight_save).start()

    # data collection node
    data = datanode.DataNode(period=5, show_plots=None, filename=HRLutils.datafile("dataoutput_%s.txt" % seed))
    filter = 1e-5
    net.add(data)
    data.record_avg(env.getOrigin("reward"), filter=filter)
    data.record_avg(ctrl_agent.getNode("QNetwork").getNode("actionvals").getOrigin("X"), filter=filter)
    data.record_sparsity(ctrl_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=filter)
    data.record_sparsity(nav_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=filter)
    data.record_avg(ctrl_agent.getNode("QNetwork").getNode("valdiff").getOrigin("X"), filter=filter)
    data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"), filter=filter)
    data.record_avg(ctrl_agent.getNode("BGNetwork").getNode("weight_actions").getNode("0").getOrigin("AXON"), filter=filter)
    data.record_avg(ctrl_agent.getNode("BGNetwork").getNode("weight_actions").getNode("1").getOrigin("AXON"), filter=filter)

    net.add_to_nengo()
#    net.view()
    net.run(2000)
Example #51
0
    def __init__(self, actions, Qradius=1, noiselevel=0.03):
        """Builds the BGNetwork.

        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param Qradius: expected radius of Q values
        :param noiselevel: standard deviation of noise added to Q values for
            exploration
        """

        self.name = "BGNetwork"
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        self.N = 50
        self.d = len(actions)
        self.mut_inhib = 1.0  # mutual inhibition between actions
        self.tauPSC = 0.007

        # make basal ganglia
        netbg = nef.Network("bg")

        bginput = netbg.make("bginput", 1, self.d, mode="direct")
        bginput.fixMode()
        bginput.addDecodedTermination("input",
                                      MU.diag([1.0 / Qradius for _ in
                                               range(self.d)]), 0.001, False)
        # divide by Q radius to get values back into 0 -- 1 range

        bgoutput = netbg.make("bgoutput", 1, self.d, mode="direct")
        bgoutput.fixMode()

        basalganglia.make_basal_ganglia(netbg, bginput, bgoutput,
                                        dimensions=self.d, neurons=200)
        bg = netbg.network
        net.add(bg)
        bg.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        bg.exposeTermination(bginput.getTermination("input"), "input")
        bg.exposeOrigin(bgoutput.getOrigin("X"), "X")

        # insert noise (used to give some randomness to drive exploration)
        noiselevel = net.make_input("noiselevel", [noiselevel])

        noise = noisenode.NoiseNode(1, dimension=len(actions))
        net.add(noise)

        net.connect(noiselevel, noise.getTermination("scale"))
        net.connect(noise.getOrigin("noise"), "bg.bginput", pstc=0.001)

        # add bias to shift everything up to 0.5--1.5
        biasinput = net.make_input("biasinput", [0.5])
        net.connect(biasinput, "bg.bginput",
                    transform=[[1] for _ in range(self.d)], pstc=0.001)

        # invert BG output (so the "selected" action will have a positive value
        # and the rest zero)
        invert = thalamus.make(net, name="invert", neurons=self.N,
                               dimensions=self.d, useQuick=False)
        invert.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
        net.connect(bg, invert.getTermination("bg_input"))

        # add mutual inhibition
        net.connect(invert.getOrigin("xBiased"), invert, pstc=self.tauPSC,
                    transform=[[0 if i == j else -self.mut_inhib
                                for j in range(self.d)]
                               for i in range(self.d)])

        # threshold output values so that you get a nice clean 0 for
        # non-selected and 1 for selected
        threshf = HRLutils.node_fac()
        threshold = 0.1
        threshf.setIntercept(IndicatorPDF(threshold, 1.0))
        val_threshold = net.make_array("val_threshold", self.N * 2, self.d,
                                       node_factory=threshf, encoders=[[1]])
        val_threshold.addDecodedOrigin(
            "output",
            [PiecewiseConstantFunction([threshold], [0, 1])
             for _ in range(self.d)], "AXON", True)

        net.connect(invert.getOrigin("xBiased"), val_threshold,
                    pstc=self.tauPSC)

        # output action (action vectors weighted by BG output)
        weight_actions = net.make_array("weight_actions", 50,
                                        len(actions[0][1]), intercept=(0, 1))
        net.connect(val_threshold.getOrigin("output"), weight_actions,
                    transform=MU.transpose([actions[i][1]
                                            for i in range(self.d)]),
                    pstc=0.007)

        # save the BG output (selected action and selected action value)
        save_relay = net.make("save_relay", 1, 1, mode="direct")
        save_relay.fixMode()
        save_relay.addDecodedTermination("input", [[1]], 0.001, False)

        saved_action = memory.Memory("saved_action", self.N * 2,
                                     len(actions[0][1]), inputscale=75)
        net.add(saved_action)
        net.connect(weight_actions, saved_action.getTermination("target"))
        net.connect(save_relay, saved_action.getTermination("transfer"))

        saved_vals = memory.Memory("saved_values", self.N * 2, self.d,
                                   inputscale=75)
        net.add(saved_vals)
        net.connect(val_threshold.getOrigin("output"),
                    saved_vals.getTermination("target"))
        net.connect(save_relay, saved_vals.getTermination("transfer"))

        # put the saved values through a threshold (we want a nice clean
        # zero for non-selected values)
        nfac = HRLutils.node_fac()
        nfac.setIntercept(IndicatorPDF(0.2, 1))
        saved_vals_threshold = net.make_array("saved_vals_threshold", self.N,
                                              self.d, node_factory=nfac,
                                              encoders=[[1]])
        saved_vals_threshold.addDecodedOrigin(
            "output", [PiecewiseConstantFunction([0.3], [0, 1])
                       for _ in range(self.d)], "AXON", True)

        net.connect(saved_vals, saved_vals_threshold, pstc=self.tauPSC)

        self.exposeTermination(bg.getTermination("input"), "input")
        self.exposeTermination(save_relay.getTermination("input"),
                               "save_output")
        self.exposeOrigin(val_threshold.getOrigin("output"), "curr_vals")
        self.exposeOrigin(weight_actions.getOrigin("X"), "curr_action")
        self.exposeOrigin(saved_vals_threshold.getOrigin("output"),
                          "saved_vals")
        self.exposeOrigin(saved_action.getOrigin("X"), "saved_action")
Example #52
0
 def getValue(self):
     return MU.clone(self.matrix)
Example #53
0
def make(net, preName='pre', postName='post', rate=5e-4):

    # get pre and post ensembles from their names
    pre = net.network.getNode(preName)
    post = net.network.getNode(postName)

    dim_pre = pre.getDimension()
    dim_post = post.getDimension()

    t = [[0] * dim_pre for i in range(dim_post)]
    index_pre = range(dim_pre)
    index_post = range(dim_post)
    for i in range(max(len(index_pre), len(index_post))):
        ipre = index_pre[i % len(index_pre)]
        ipost = index_post[i % len(index_post)]
        t[ipost][ipre] = 1

    decoder = pre.getOrigin('X').getDecoders()
    encoder = post.getEncoders()
    encoder = MU.prod(encoder, 1.0 / post.getRadii()[0])
    weight = MU.prod(encoder, MU.prod(t, MU.transpose(decoder)))

    # random weight matrix to initialize projection from pre to post
    # def rand_weights(w):
    #     for i in range(len(w)):
    #         for j in range(len(w[0])):
    #             w[i][j] = random.uniform(-1e-3,1e-3)
    #     return w
    # weight = rand_weights(numeric.zeros((post.neurons, pre.neurons)).tolist())

    # non-decoded termination (to learn transformation)
    count = 0
    prename = pre.getName()
    while '%s_%02d' % (prename, count) in [t.name for t in post.terminations]:
        count = count + 1
    prename = '%s_%02d' % (prename, count)

    post.addBCMTermination(prename, weight, 0.005, False, None)

    # Add projections
    net.connect(pre.getOrigin('AXON'), post.getTermination(prename))

    # Set learning rule on the non-decoded termination
    net.learn(post, prename, None, rate=rate)

    if net.network.getMetaData("bcmterm") == None:
        net.network.setMetaData("bcmterm", HashMap())
    bcmterms = net.network.getMetaData("bcmterm")

    bcmterm = HashMap(4)
    bcmterm.put("preName", preName)
    bcmterm.put("postName", postName)
    bcmterm.put("rate", rate)

    bcmterms.put(prename, bcmterm)

    if net.network.getMetaData("templates") == None:
        net.network.setMetaData("templates", ArrayList())
    templates = net.network.getMetaData("templates")
    templates.add(prename)

    if net.network.getMetaData("templateProjections") == None:
        net.network.setMetaData("templateProjections", HashMap())
    templateproj = net.network.getMetaData("templateProjections")
    templateproj.put(preName, postName)
Example #54
0
 def getValue(self):
     return MU.clone(self.matrix)
Example #55
0
 def getEncoders(self):  
     neurons=self.nodes[0].neurons
     encoders=MU.zero(self.neurons,self.dimension)
     for i,n in enumerate(self.nodes):
         MU.copyInto(n.encoders,encoders,i*neurons,i*n.dimension,neurons)       
     return encoders