Example #1
0
    def run(self, start, end):
        nef.SimpleNode.run(self, start, end)
        # Get total values from input terminations
        total_input = util_funcs.zeros(1,self.dimension)
        for term_str in self.input_terms.keys():
            term_obj = self.getTermination(term_str)
            term_out = term_obj._filtered_values
            term_mat = self.input_terms[term_str]
            if( term_mat is None ):
                term_val = term_out
            else:
                term_val = MU.prod(term_mat, term_out)
            total_input = [total_input[n] + term_val[n] for n in range(self.dimension)]

        # Get total inhibitory input
        total_inhib = 0
        for term_str in self.inhib_terms.keys():
            term_obj = self.getTermination(term_str)
            term_out = term_obj._filtered_values
            term_mat = self.inhib_terms[term_str]
            term_val = MU.prod(term_mat, term_out)
            total_inhib = total_inhib + term_val
        
        # Calculate return value
        input_mag  = util_funcs.norm(total_input)
        input_sign = cmp(input_mag, 0)
        inhibd_mag = max(abs(input_mag) + (total_inhib * self.radius), 0) * input_sign
        if( input_mag != 0 ):
            self.return_val = [total_input[n] * inhibd_mag / input_mag for n in range(self.dimension)]
        else:
            self.return_val = util_funcs.zeros(1, self.dimension)
        
        return
Example #2
0
    def __init__(self, spinn, origin, termination, transform=None):
        scale = [nn.scale for nn in termination.node.nodes]

        if transform is None:
            transform = termination.transform

        if origin.node.neurons > spinn.max_fan_in:
            w = optsparse.compute_sparse_weights(origin, termination.node,
                                                 transform, spinn.max_fan_in)
        else:
            w = MU.prod(termination.node.encoders,
                        MU.prod(transform, MU.transpose(origin.decoders)))
            w = MU.prod(w, 1.0 / termination.node.radii[0])

        for i in range(len(w)):
            for j in range(len(w[i])):
                w[i][j] *= scale[i] / termination.tau

        w = MU.transpose(w)

        self.weights = w
        self.tau = int(round(termination.tau * 1000))
        if self.tau not in spinn.populations[termination.node].taus:
            spinn.populations[termination.node].taus.append(self.tau)
        self.pre = spinn.populations[origin.node].name
        self.post = spinn.populations[termination.node].name
Example #3
0
 def addPlasticTermination(self,
                           name,
                           matrix,
                           tauPSC,
                           decoder,
                           weight_func=None):
     """Create a new termination.  A new termination is created on each
     of the ensembles, which are then grouped together.
     
     If decoders are not known at the time the termination is created,
     then pass in an array of zeros of the appropriate size (i.e. however
     many neurons will be in the population projecting to the termination,
     by number of dimensions)."""
     terminations = []
     d = 0
     dd = self._nodes[0].dimension
     for n in self._nodes:
         encoder = n.encoders
         w = MU.prod(encoder, [
             MU.prod(matrix, MU.transpose(decoder))[d + i]
             for i in range(dd)
         ])
         if weight_func is not None:
             w = weight_func(w)
         t = n.addPESTermination(name, w, tauPSC, False)
         terminations.append(t)
         d += dd
     termination = EnsembleTermination(self, name, terminations)
     self.exposeTermination(termination, name)
     return self.getTermination(name)
Example #4
0
    def compute_weight_matrix(self, proj):
        orig=proj.origin
        term=proj.termination
        post=term.node
        transform=term.transform

        while hasattr(orig,'getWrappedOrigin'): orig=orig.getWrappedOrigin()
        
        decoder=orig.getDecoders()
        encoder=term.node.getEncoders()
            
        # scale by radius
        encoder=MU.prod(encoder,1.0/post.getRadii()[0])
        
        encoder=MU.prod(encoder, self.weight_scale)
        
        # scale by gain
        
        for i, n in enumerate(post.nodes):
            for j in range(len(encoder[i])):
                encoder[i][j]*=n.scale
        
        #encoder=MU.prodElementwise(encoder, [n.scale for n in post.nodes])

        w=MU.prod(encoder,MU.prod(transform,MU.transpose(decoder)))
        
        return w
Example #5
0
def compute_sparse_weights(origin,
                           post,
                           transform,
                           fan_in,
                           noise=0.1,
                           num_samples=100):
    encoder = post.encoders
    radius = post.radii[0]

    if hasattr(transform, 'tolist'): transform = transform.tolist()

    approx = origin.node.getDecodingApproximator('AXON')

    # create X matrix
    X = approx.evalPoints
    X = MU.transpose([f.multiMap(X) for f in origin.functions])

    # create A matrix
    A = approx.values

    S = fan_in
    N_A = len(A)
    samples = len(A[0])
    N_B = len(encoder)
    w_sparse = np.zeros((N_B, N_A), 'f')
    noise_sd = MU.max(A) * noise
    decoder_list = [None for _ in range(num_samples)]
    for i in range(num_samples):
        indices = random.sample(range(N_A), S)
        activity = [A[j] for j in indices]
        n = [[random.gauss(0, noise_sd) for _ in range(samples)]
             for j in range(S)]
        activity = MU.sum(activity, n)
        activityT = MU.transpose(activity)
        gamma = MU.prod(activity, activityT)

        upsilon = MU.prod(activity, X)

        gamma_inv = pinv(gamma, noise_sd * noise_sd)

        decoder_list[i] = MU.prod([[x for x in row] for row in gamma_inv],
                                  upsilon)

    for i in range(N_B):
        ww = MU.prod(random.choice(decoder_list),
                     MU.prod(MU.transpose(transform), encoder[i]))

        for j, k in enumerate(indices):
            w_sparse[i, k] = float(ww[j]) / radius

    return list(w_sparse)
Example #6
0
    def calc_weights(self, encoder, decoder):
        self.N1 = len(decoder[0])
        self.D = len(decoder)
        self.N2 = len(encoder)
        self.getTermination('input').setDimensions(self.N1)
        self.getOrigin('output').setDimensions(self.N2)

        self.tables = []
        self.histograms = []
        for dim in range(self.D):
            cdfs = []
            self.tables.append(make_output_table([e[dim] for e in encoder]))
            for i in range(self.N1):
                d = decoder[dim][i] / spike_strength
                if d < 0:
                    decoder_sign = -1
                    d = -d
                else:
                    decoder_sign = 1
                histogram = compute_histogram(d, [e[dim] for e in encoder])
                cdf = compute_cdf(histogram)
                cdfs.append((decoder_sign, cdf))
            self.histograms.append(cdfs)

        return numeric.array(MU.prod(encoder, decoder))
Example #7
0
 def calc_weights(self,encoder,decoder):
     self.N1=len(decoder[0])
     self.D=len(decoder)
     self.N2=len(encoder)
     self.getTermination('input').setDimensions(self.N1)
     self.getOrigin('output').setDimensions(self.N2)
     
     self.tables=[]
     self.histograms=[]
     for dim in range(self.D):
         cdfs=[]
         self.tables.append(make_output_table([e[dim] for e in encoder]))
         for i in range(self.N1):
             d=decoder[dim][i]/spike_strength
             if d<0:
                 decoder_sign=-1
                 d=-d
             else:
                 decoder_sign=1
             histogram=compute_histogram(d,[e[dim] for e in encoder])
             cdf=compute_cdf(histogram)
             cdfs.append((decoder_sign,cdf))
         self.histograms.append(cdfs)
     
     return numeric.array(MU.prod(encoder,decoder))
Example #8
0
 def weights(self, obj, termination, include_gain=False):
     v = []
     for n in obj.nodes:
         w = n.getTermination(termination).weights
         if include_gain:
             w = MU.prod(w, n.scale)
         v.extend(w)
     return v
Example #9
0
 def weights(self, obj, termination, include_gain=False):
     v = []
     for n in obj.nodes:
         w = n.getTermination(termination).weights
         if include_gain:
             w = MU.prod(w, n.scale)
         v.extend(w)
     return v
Example #10
0
 def addPlasticTermination(self,name,matrix,tauPSC,decoder,weight_func=None):
     """Create a new termination.  A new termination is created on each
     of the ensembles, which are then grouped together.
     
     If decoders are not known at the time the termination is created,
     then pass in an array of zeros of the appropriate size (i.e. however
     many neurons will be in the population projecting to the termination,
     by number of dimensions)."""
     terminations = []
     d = 0
     dd=self._nodes[0].dimension
     for n in self._nodes:
         encoder = n.encoders
         w = MU.prod(encoder,[MU.prod(matrix,MU.transpose(decoder))[d+i] for i in range(dd)])
         if weight_func is not None:
             w = weight_func(w)
         t = n.addPESTermination(name,w,tauPSC,False)
         terminations.append(t)
         d += dd
     termination = EnsembleTermination(self,name,terminations)
     self.exposeTermination(termination,name)
     return self.getTermination(name)
Example #11
0
 def __init__(self, spinn, origin, termination, transform = None):
     scale = [nn.scale for nn in termination.node.nodes]
 
     if transform is None: 
         transform = termination.transform
     
     if origin.node.neurons>spinn.max_fan_in:
         w = optsparse.compute_sparse_weights(origin, termination.node, transform, spinn.max_fan_in)
     else:    
         w = MU.prod(termination.node.encoders,MU.prod(transform,MU.transpose(origin.decoders)))
         w = MU.prod(w,1.0/termination.node.radii[0])
     
     for i in range(len(w)):
         for j in range(len(w[i])):
             w[i][j] *= scale[i] / termination.tau
 
     w = MU.transpose(w)
     
     self.weights = w
     self.tau = int(round(termination.tau*1000))
     if self.tau not in spinn.populations[termination.node].taus:
         spinn.populations[termination.node].taus.append(self.tau)
     self.pre = spinn.populations[origin.node].name
     self.post = spinn.populations[termination.node].name
Example #12
0
 def termination_Cycle(self, x):
     x = x[0]
     if( self.cyc_opt ):
         x = 1 - x
     if( x < 0.025 ):
         if( self.reset_val < 0.5 ):
             input_total = zeros(1, self.dimension)
             for term_name in self.input_terms:
                 termination = self.getTermination(term_name)
                 term_matrix = self.input_mats[term_name]
                 term_output = termination.getOutput()
                 if( isinstance(term_matrix, (int,float,long)) ):
                     input_total = [input_total[n] + term_matrix * term_output[n] for n in range(self.dimension)]
                 else:
                     #term_value = numeric.dot(numeric.array(term_output, typecode='f'), self.input_mats[term_name])
                     term_value  = MU.prod(self.input_mats[term_name], term_output)
                     input_total = [input_total[n] + term_value[n] for n in range(self.dimension)]
             self.stored_val = deepcopy(input_total)
Example #13
0
def run_flat_delivery(args, seed=None):
    """Runs the model on the delivery task with only one hierarchical level."""

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("run_flat_delivery")

    if "load_weights" in args and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200
    contextD = 2
    context_scale = 1.0
    max_state_input = 2
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]),
               ("left", [-1, 0])]

    # ##ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(
        actions,
        HRLutils.datafile("contextmap.bmp"),
        colormap={
            -16777216: "wall",
            -1: "floor",
            -256: "a",
            -2088896: "b"
        },
        imgsize=(5, 5),
        dx=0.001,
        placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # ##NAV AGENT

    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN,
                                    len(env.placecells) + contextD,
                                    actions,
                                    name="NavAgent",
                                    state_encoders=enc,
                                    state_evals=evals,
                                    state_threshold=0.8,
                                    **args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))
    net.connect(env.getOrigin("placewcontext"),
                nav_agent.getTermination("state_input"))

    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): None},
        env,
        name="NavTermNode",
        contextD=2)
    net.add(nav_term_node)
    net.connect(env.getOrigin("context"),
                nav_term_node.getTermination("context"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    reward_relay = net.make("reward_relay", 1, 1, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"), reward_relay)
    net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay)
    net.connect(reward_relay, nav_agent.getTermination("reward"))

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0
    HRLutils.WeightSaveThread(
        nav_agent.getNode("QNetwork").saveParams,
        os.path.join("weights", "%s_%s" % (nav_agent.name, seed)),
        weight_save).start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        seed))
    net.add(data)
    q_net = nav_agent.getNode("QNetwork")
    data.record_avg(env.getOrigin("reward"))
    data.record_avg(q_net.getNode("actionvals").getOrigin("X"))
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error"))

    #    net.add_to_nengo()
    #    net.run(10000)
    net.view()
Example #14
0
def run_flat_delivery(args, seed=None):
    """Runs the model on the delivery task with only one hierarchical level."""

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("run_flat_delivery")

    if args.has_key("load_weights") and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200
    contextD = 2
    context_scale = 1.0
    max_state_input = 2
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])]

    ###ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(actions, HRLutils.datafile("contextmap.bmp"),
                                                  colormap={-16777216:"wall",
                                                           - 1:"floor",
                                                           - 256:"a",
                                                           - 2088896:"b"},
                                                  imgsize=(5, 5), dx=0.001, placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    ###NAV AGENT

    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent",
                                    state_encoders=enc, state_evals=evals, state_threshold=0.8,
                                    **args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    # Connect the agents actions to the environment so the agent can act upon the environment
    net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action"))
    # Connect the environment state to the agent, so the agent knows the effect of it's action
    net.connect(env.getOrigin("placewcontext"), nav_agent.getTermination("state_input"))
#    net.connect(env.getOrigin("reward"), nav_agent.getTermination("reward"))
#    net.connect(env.getOrigin("optimal_move"), nav_agent.getTermination("bg_input"))

    # termination node for nav_agent (just a timer that goes off regularly)
    nav_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.9)):None}, env,
                                                    name="NavTermNode", contextD=2)
    net.add(nav_term_node)
    net.connect(env.getOrigin("context"), nav_term_node.getTermination("context"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action"))

    # WTF why not connect directly? # Maybe this is the only way to make a direct connection between outputs in this version of Nengo?
    reward_relay = net.make("reward_relay", 1, 1, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"), reward_relay)
    net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay)
    net.connect(reward_relay, nav_agent.getTermination("reward"))

    #save weights
    weight_save = 600.0 #period to save weights (realtime, not simulation time)
    HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams,
                     os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start()

    #data collection node
    data = datanode.DataNode(period=5, show_plots=None, filename=HRLutils.datafile("dataoutput_%s.txt" % seed))
    net.add(data)
    #data.record_avg(env.getOrigin("reward"), filter=1e-5)
    #data.record_avg(nav_agent.getNode("QNetwork").getNode("actionvals").getOrigin("X"), filter=1e-5)
    #data.record_sparsity(nav_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=1e-5)
    #data.record_avg(nav_agent.getNode("QNetwork").getNode("valdiff").getOrigin("X"), filter=1e-5)
    # ErrorNetwork is apparently not the correct name and hell if I know what the correct one is
    #data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error"), filter=1e-5)

    # Try recording everything

    net.add_to_nengo()
    net.view()
Example #15
0
def run_contextenvironment(args, seed=None):
    """Runs the model on the context task.

    :param args: kwargs for the agent
    :param seed: random seed
    """

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("runContextEnvironment")

    if "load_weights" in args and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200  # number of neurons to use in state population
    contextD = 2  # dimension of context vector
    context_scale = 1.0  # scale of context representation
    max_state_input = 2  # max length of input vector for state population
    # actions (label and vector) available to the system
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]),
               ("left", [-1, 0])]

    # context labels and rewards for achieving those context goals
    rewards = {"a": 1.5, "b": 1.5}

    env = contextenvironment.ContextEnvironment(
        actions,
        HRLutils.datafile("contextmap.bmp"),
        contextD,
        rewards,
        colormap={
            -16777216: "wall",
            -1: "floor",
            -256: "a",
            -2088896: "b"
        },
        imgsize=(5, 5),
        dx=0.001,
        placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # termination node for agent (just goes off on some regular interval)
    term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): 0.0}, env)
    net.add(term_node)

    # generate encoders and divide by max_state_input (so that all inputs
    # will end up being radius 1)
    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    # load eval points from file
    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        print "loading contextbmp_evalpoints_%s.txt" % seed
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    agent = smdpagent.SMDPAgent(stateN,
                                len(env.placecells) + contextD,
                                actions,
                                state_encoders=enc,
                                state_evals=evals,
                                state_threshold=0.8,
                                **args)
    net.add(agent)

    print "agent neurons:", agent.countNeurons()

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0
    t = HRLutils.WeightSaveThread(
        agent.getNode("QNetwork").saveParams,
        os.path.join("weights", "%s_%s" % (agent.name, seed)), weight_save)
    t.start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        seed))
    net.add(data)
    q_net = agent.getNode("QNetwork")
    data.record(env.getOrigin("reward"))
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=max)
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=min)
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(env.getOrigin("state"))

    net.connect(env.getOrigin("placewcontext"),
                agent.getTermination("state_input"))
    net.connect(env.getOrigin("reward"), agent.getTermination("reward"))
    net.connect(term_node.getOrigin("reset"), agent.getTermination("reset"))
    net.connect(term_node.getOrigin("learn"), agent.getTermination("learn"))
    net.connect(term_node.getOrigin("reset"),
                agent.getTermination("save_state"))
    net.connect(term_node.getOrigin("reset"),
                agent.getTermination("save_action"))

    net.connect(agent.getOrigin("action_output"), env.getTermination("action"))

    #    net.add_to_nengo()
    #    net.run(2000)
    net.view()

    t.stop()
Example #16
0
def run_deliveryenvironment(navargs, ctrlargs, tag=None, seed=None):
    """Runs the model on the delivery task.

    :param navargs: kwargs for the nav_agent (see SMDPAgent.__init__)
    :param ctrlargs: kwargs for the ctrl_agent (see SMDPAgent.__init__)
    :param tag: string appended to datafiles associated with this run
    :param seed: random seed used for this run
    """

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    if tag is None:
        tag = str(seed)

    net = nef.Network("runDeliveryEnvironment", seed=seed)

    stateN = 1200  # number of neurons to use in state population
    contextD = 2  # dimension of context vector
    context_scale = 1.0  # relative scale of context vector vs state vector
    max_state_input = 2  # maximum length of input vector to state population

    # labels and vectors corresponding to basic actions available to the system
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]),
               ("left", [-1, 0])]

    if "load_weights" in navargs and navargs["load_weights"] is not None:
        navargs["load_weights"] += "_%s" % tag
    if "load_weights" in ctrlargs and ctrlargs["load_weights"] is not None:
        ctrlargs["load_weights"] += "_%s" % tag

    # ##ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(
        actions,
        HRLutils.datafile("contextmap.bmp"),
        colormap={
            -16777216: "wall",
            -1: "floor",
            -256: "a",
            -2088896: "b"
        },
        imgsize=(5, 5),
        dx=0.001,
        placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # ##NAV AGENT

    # generate encoders and divide them by max_state_input (so that inputs
    # will be scaled down to radius 1)
    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    # read in eval points from file
    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % tag)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN,
                                    len(env.placecells) + contextD,
                                    actions,
                                    name="NavAgent",
                                    state_encoders=enc,
                                    state_evals=evals,
                                    state_threshold=0.8,
                                    **navargs)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    # output of nav_agent is what goes to the environment
    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))

    # termination node for nav_agent (just a timer that goes off regularly)
    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): None},
        env,
        contextD=2,
        name="NavTermNode")
    net.add(nav_term_node)

    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    # ##CTRL AGENT

    # actions corresponding to "go to A" or "go to B"
    actions = [("a", [0, 1]), ("b", [1, 0])]
    ctrl_agent = smdpagent.SMDPAgent(stateN,
                                     len(env.placecells) + contextD,
                                     actions,
                                     name="CtrlAgent",
                                     state_encoders=enc,
                                     state_evals=evals,
                                     state_threshold=0.8,
                                     **ctrlargs)
    net.add(ctrl_agent)

    print "agent neurons:", ctrl_agent.countNeurons()

    # ctrl_agent gets environmental state and reward
    net.connect(env.getOrigin("placewcontext"),
                ctrl_agent.getTermination("state_input"))
    net.connect(env.getOrigin("reward"), ctrl_agent.getTermination("reward"))

    # termination node for ctrl_agent (terminates whenever the agent is in the
    # state targeted by the ctrl_agent)
    # also has a long timer so that ctrl_agent doesn't get permanently stuck
    # in one action
    ctrl_term_node = terminationnode.TerminationNode(
        {
            "a": [0, 1],
            "b": [1, 0],
            terminationnode.Timer((30, 30)): None
        },
        env,
        contextD=2,
        name="CtrlTermNode",
        rewardval=1.5)
    net.add(ctrl_term_node)

    # reward for nav_agent is the pseudoreward from ctrl_agent termination
    net.connect(ctrl_term_node.getOrigin("pseudoreward"),
                nav_agent.getTermination("reward"))

    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("reset"))
    net.connect(ctrl_term_node.getOrigin("learn"),
                ctrl_agent.getTermination("learn"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_state"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_action"))

    # connect ctrl_agent action to termination context
    # this is used so that ctrl_term_node knows what the current goal is (to
    # determine termination and pseudoreward)
    net.connect(ctrl_agent.getOrigin("action_output"),
                ctrl_term_node.getTermination("context"))

    # state input for nav_agent is the environmental state + the output of
    # ctrl_agent
    ctrl_output_relay = net.make("ctrl_output_relay",
                                 1,
                                 len(env.placecells) + contextD,
                                 mode="direct")
    ctrl_output_relay.fixMode()
    trans = (list(MU.I(len(env.placecells))) +
             [[0 for _ in range(len(env.placecells))]
              for _ in range(contextD)])
    net.connect(env.getOrigin("place"), ctrl_output_relay, transform=trans)
    net.connect(ctrl_agent.getOrigin("action_output"),
                ctrl_output_relay,
                transform=([[0 for _ in range(contextD)]
                            for _ in range(len(env.placecells))] +
                           list(MU.I(contextD))))

    net.connect(ctrl_output_relay, nav_agent.getTermination("state_input"))

    # periodically save the weights

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0

    threads = [
        HRLutils.WeightSaveThread(
            nav_agent.getNode("QNetwork").saveParams,
            os.path.join("weights", "%s_%s" % (nav_agent.name, tag)),
            weight_save),
        HRLutils.WeightSaveThread(
            ctrl_agent.getNode("QNetwork").saveParams,
            os.path.join("weights", "%s_%s" % (ctrl_agent.name, tag)),
            weight_save)
    ]

    for t in threads:
        t.start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        tag))
    net.add(data)
    data.record(env.getOrigin("reward"))
    q_net = ctrl_agent.getNode("QNetwork")
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=max)
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=min)
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"))

    #     net.add_to_nengo()
    #     net.run(10000)
    net.view()

    for t in threads:
        t.stop()
Example #17
0
def run_badreenvironment(nav_args, ctrl_args, seed=None, flat=False):
    
    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED
    
    net = nef.Network("run_badreenvironment")

    env = badreenvironment.BadreEnvironment(flat=flat)
    net.add(env)

    ###NAV AGENT
    stateN = 500
    max_state_input = 2
    enc = env.gen_encoders(stateN, 0, 1.0)
    enc = MU.prod(enc, 1.0 / max_state_input)

#    with open(HRLutils.datafile("badre_evalpoints.txt")) as f:
#        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]
    orientations = MU.I(env.num_orientations)
    shapes = MU.I(env.num_shapes)
    colours = MU.I(env.num_colours)
    evals = list(MU.I(env.stateD)) + \
            [o+s+c for o in orientations for s in shapes for c in colours]

    nav_agent = smdpagent.SMDPAgent(stateN, env.stateD,
                                    env.actions, name="NavAgent",
                                    load_weights=None,
                                    state_encoders=enc, state_evals=evals,
                                    discount=0.4, **nav_args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    nav_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.6)):None}, env,
                                                    name="NavTermNode", state_delay=0.1)
    net.add(nav_term_node)

    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action"))

    net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action"))

    ###CTRL AGENT
    enc = env.gen_encoders(stateN, 0, 0)
    enc = MU.prod(enc, 1.0 / max_state_input)
    actions = [("shape", [0, 1]), ("orientation", [1, 0]), ("null", [0, 0])]
    ctrl_agent = smdpagent.SMDPAgent(stateN, env.stateD, actions, name="CtrlAgent",
                                     load_weights=None, state_encoders=enc,
                                     state_evals=evals, discount=0.4, **ctrl_args)
    net.add(ctrl_agent)

    print "agent neurons:", ctrl_agent.countNeurons()

    net.connect(env.getOrigin("state"), ctrl_agent.getTermination("state_input"))

    ctrl_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.6)):None},
                                                     env, name="CtrlTermNode",
                                                     state_delay=0.1)
    net.add(ctrl_term_node)

    net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset"))
    net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn"))
    net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state"))
    net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action"))
    
    
    ## reward for nav/ctrl
    reward_relay = net.make("reward_relay", 1, 2, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"), reward_relay, transform=[[1], [0]])
    net.connect(ctrl_agent.getOrigin("action_output"), reward_relay, transform=[[0, 0], [1, 1]])
    
    # nav reward is just environment
    net.connect(reward_relay, nav_agent.getTermination("reward"), 
                func=lambda x: x[0], origin_name="nav_reward")
    
    # ctrl gets a slight bonus if it selects a rule (as opposed to null), to encourage it not
    # to just pick null all the time
    net.connect(reward_relay, ctrl_agent.getTermination("reward"), 
                func=lambda x: x[0]+0.25*abs(x[0]) if x[1] > 0.5 else x[0], origin_name="ctrl_reward")

    ## state for navagent controlled by ctrlagent
#    ctrl_output_relay = net.make("ctrl_output_relay", 1, env.stateD+2, mode="direct")
#    ctrl_output_relay.fixMode()
    ctrl_output_relay = net.make_array("ctrl_output_relay", 50, env.stateD,
                                       radius=2, mode=HRLutils.SIMULATION_MODE)
    ctrl_output_relay.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
    
    inhib_matrix = [[0,-5]]*50*env.num_orientations + \
                   [[-5,0]]*50*env.num_shapes + \
                   [[-5,-5]]*50*env.num_colours

    # ctrl output inhibits all the non-selected aspects of the state
    net.connect(env.getOrigin("state"), ctrl_output_relay)
    net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay,
#                transform=zip([0]*env.num_orientations + [-1]*(env.num_shapes+env.num_colours),
#                              [-1]*env.num_orientations + [0]*env.num_shapes + [-1]*env.num_colours))
                transform=inhib_matrix)
    
    # also give a boost to the selected aspects (so that neurons are roughly equally activated).
    # adding 2/3 to each element (base vector has length 3, inhibited vector has length 1, so add 2/3*3 --> 3)
    net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay,
                transform=zip([0.66]*env.num_orientations + [0]*(env.num_shapes+env.num_colours),
                              [0]*env.num_orientations + [0.66]*env.num_shapes + [2]*env.num_colours))

    net.connect(ctrl_output_relay, nav_agent.getTermination("state_input"))

    # save weights
    weight_save = 600.0 # period to save weights (realtime, not simulation time)
    HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams,
                     os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start()
    HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams,
                     os.path.join("weights", "%s_%s" % (ctrl_agent.name, seed)), weight_save).start()

    # data collection node
    data = datanode.DataNode(period=5, show_plots=None, filename=HRLutils.datafile("dataoutput_%s.txt" % seed))
    filter = 1e-5
    net.add(data)
    data.record_avg(env.getOrigin("reward"), filter=filter)
    data.record_avg(ctrl_agent.getNode("QNetwork").getNode("actionvals").getOrigin("X"), filter=filter)
    data.record_sparsity(ctrl_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=filter)
    data.record_sparsity(nav_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=filter)
    data.record_avg(ctrl_agent.getNode("QNetwork").getNode("valdiff").getOrigin("X"), filter=filter)
    data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"), filter=filter)
    data.record_avg(ctrl_agent.getNode("BGNetwork").getNode("weight_actions").getNode("0").getOrigin("AXON"), filter=filter)
    data.record_avg(ctrl_agent.getNode("BGNetwork").getNode("weight_actions").getNode("1").getOrigin("AXON"), filter=filter)

    net.add_to_nengo()
#    net.view()
    net.run(2000)
Example #18
0
 def termination_action(self, a, pstc=0.01):
     # set the selected action to the one with highest similarity to the
     # available actions
     self.action = max(self.actions, key=lambda x: MU.prod(a, x[1]))
Example #19
0
def make(net, preName='pre', postName='post', rate=5e-4):

    # get pre and post ensembles from their names
    pre = net.network.getNode(preName)
    post = net.network.getNode(postName)

    dim_pre = pre.getDimension()
    dim_post = post.getDimension()

    t = [[0] * dim_pre for i in range(dim_post)]
    index_pre = range(dim_pre)
    index_post = range(dim_post)
    for i in range(max(len(index_pre),len(index_post))):
        ipre = index_pre[i % len(index_pre)]
        ipost = index_post[i % len(index_post)]
        t[ipost][ipre] = 1

    decoder = pre.getOrigin('X').getDecoders()
    encoder = post.getEncoders()
    encoder = MU.prod(encoder, 1.0 / post.getRadii()[0])
    weight = MU.prod(encoder, MU.prod(t, MU.transpose(decoder)))

    # random weight matrix to initialize projection from pre to post
    # def rand_weights(w):
    #     for i in range(len(w)):
    #         for j in range(len(w[0])):
    #             w[i][j] = random.uniform(-1e-3,1e-3)
    #     return w
    # weight = rand_weights(numeric.zeros((post.neurons, pre.neurons)).tolist())

    # non-decoded termination (to learn transformation)
    count = 0
    prename = pre.getName()
    while '%s_%02d' % (prename, count) in [t.name for t in post.terminations]:
        count = count + 1
    prename = '%s_%02d' % (prename, count)

    post.addBCMTermination(prename, weight, 0.005, False, None)
    
    # Add projections
    net.connect(pre.getOrigin('AXON'),post.getTermination(prename))
    
    # Set learning rule on the non-decoded termination
    net.learn(post,prename,None,rate=rate)

    if net.network.getMetaData("bcmterm") == None:
        net.network.setMetaData("bcmterm", HashMap())
    bcmterms = net.network.getMetaData("bcmterm")

    bcmterm = HashMap(4)
    bcmterm.put("preName", preName)
    bcmterm.put("postName", postName)
    bcmterm.put("rate", rate)

    bcmterms.put(prename, bcmterm)

    if net.network.getMetaData("templates") == None:
        net.network.setMetaData("templates", ArrayList())
    templates = net.network.getMetaData("templates")
    templates.add(prename)

    if net.network.getMetaData("templateProjections") == None:
        net.network.setMetaData("templateProjections", HashMap())
    templateproj = net.network.getMetaData("templateProjections")
    templateproj.put(preName, postName)
componentRMS = math.sqrt(1.0 / len(frequencies)); 
signal = FourierFunction(frequencies, MU.uniform(1, len(frequencies), componentRMS/.707)[0], MU.random(1, len(frequencies), IndicatorPDF(-.5, .5))[0])

noiseBandwidth = 500

for network in networks:
	network.setMode(SimulationMode.DIRECT);
	network.setStepSize(.0005);	
	signalPower = []
	noisePower = []
	
	for t in tau:
		network.setTau(t)
		
		network.setInputFunction(signal);
		network.clearErrors();
		network.reset(0)
		network.run(0, 10)
		signalPower.append(MU.variance(MU.prod(network.getOutputData().getValues(), [1]), 0))
		
		network.setInputFunction(ConstantFunction(1, 0));
		network.setNoise(1000, 1000);
		network.reset(0)
		network.run(0, 10);
		network.clearErrors();
		noisePower.append(MU.variance(MU.prod(network.getOutputData().getValues(), [1]), 0))

	Plotter.plot(tau, signalPower, "%s signal power" %network.getName());
	Plotter.plot(tau, noisePower, "%s noise power" %network.getName());
	network.setStepSize(.001);
	
Example #21
0
def run_flat_delivery(args, seed=None):
    """Runs the model on the delivery task with only one hierarchical level."""

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("run_flat_delivery")

    if "load_weights" in args and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200
    contextD = 2
    context_scale = 1.0
    max_state_input = 2
    actions = [("up", [0, 1]), ("right", [1, 0]),
               ("down", [0, -1]), ("left", [-1, 0])]

    # ##ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(
        actions, HRLutils.datafile("contextmap.bmp"),
        colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"},
        imgsize=(5, 5), dx=0.001, placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # ##NAV AGENT

    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD,
                                    actions, name="NavAgent",
                                    state_encoders=enc, state_evals=evals,
                                    state_threshold=0.8, **args)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))
    net.connect(env.getOrigin("placewcontext"),
                nav_agent.getTermination("state_input"))

    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): None}, env, name="NavTermNode",
        contextD=2)
    net.add(nav_term_node)
    net.connect(env.getOrigin("context"),
                nav_term_node.getTermination("context"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    reward_relay = net.make("reward_relay", 1, 1, mode="direct")
    reward_relay.fixMode()
    net.connect(env.getOrigin("reward"), reward_relay)
    net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay)
    net.connect(reward_relay, nav_agent.getTermination("reward"))

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0
    HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams,
                              os.path.join("weights", "%s_%s" %
                                           (nav_agent.name, seed)),
                              weight_save).start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        seed))
    net.add(data)
    q_net = nav_agent.getNode("QNetwork")
    data.record_avg(env.getOrigin("reward"))
    data.record_avg(q_net.getNode("actionvals").getOrigin("X"))
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error"))

#    net.add_to_nengo()
#    net.run(10000)
    net.view()
Example #22
0
 def termination_context(self, c, pstc=0.01):
     self.context = max(self.contexts, key=lambda x: MU.prod(HRLutils.normalize(c), HRLutils.normalize(x[1])))
Example #23
0
def run_deliveryenvironment(navargs, ctrlargs, tag=None, seed=None):
    """Runs the model on the delivery task.

    :param navargs: kwargs for the nav_agent (see SMDPAgent.__init__)
    :param ctrlargs: kwargs for the ctrl_agent (see SMDPAgent.__init__)
    :param tag: string appended to datafiles associated with this run
    :param seed: random seed used for this run
    """

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    if tag is None:
        tag = str(seed)

    net = nef.Network("runDeliveryEnvironment", seed=seed)

    stateN = 1200  # number of neurons to use in state population
    contextD = 2  # dimension of context vector
    context_scale = 1.0  # relative scale of context vector vs state vector
    max_state_input = 2  # maximum length of input vector to state population

    # labels and vectors corresponding to basic actions available to the system
    actions = [("up", [0, 1]), ("right", [1, 0]),
               ("down", [0, -1]), ("left", [-1, 0])]

    if "load_weights" in navargs and navargs["load_weights"] is not None:
        navargs["load_weights"] += "_%s" % tag
    if "load_weights" in ctrlargs and ctrlargs["load_weights"] is not None:
        ctrlargs["load_weights"] += "_%s" % tag

    # ##ENVIRONMENT

    env = deliveryenvironment.DeliveryEnvironment(
        actions, HRLutils.datafile("contextmap.bmp"),
        colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"},
        imgsize=(5, 5), dx=0.001, placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # ##NAV AGENT

    # generate encoders and divide them by max_state_input (so that inputs
    # will be scaled down to radius 1)
    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    # read in eval points from file
    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % tag)) as f:
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD,
                                    actions, name="NavAgent",
                                    state_encoders=enc, state_evals=evals,
                                    state_threshold=0.8,
                                    **navargs)
    net.add(nav_agent)

    print "agent neurons:", nav_agent.countNeurons()

    # output of nav_agent is what goes to the environment
    net.connect(nav_agent.getOrigin("action_output"),
                env.getTermination("action"))

    # termination node for nav_agent (just a timer that goes off regularly)
    nav_term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): None}, env, contextD=2,
        name="NavTermNode")
    net.add(nav_term_node)

    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("reset"))
    net.connect(nav_term_node.getOrigin("learn"),
                nav_agent.getTermination("learn"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_state"))
    net.connect(nav_term_node.getOrigin("reset"),
                nav_agent.getTermination("save_action"))

    # ##CTRL AGENT

    # actions corresponding to "go to A" or "go to B"
    actions = [("a", [0, 1]), ("b", [1, 0])]
    ctrl_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD,
                                     actions, name="CtrlAgent",
                                     state_encoders=enc, state_evals=evals,
                                     state_threshold=0.8, **ctrlargs)
    net.add(ctrl_agent)

    print "agent neurons:", ctrl_agent.countNeurons()

    # ctrl_agent gets environmental state and reward
    net.connect(env.getOrigin("placewcontext"),
                ctrl_agent.getTermination("state_input"))
    net.connect(env.getOrigin("reward"),
                ctrl_agent.getTermination("reward"))

    # termination node for ctrl_agent (terminates whenever the agent is in the
    # state targeted by the ctrl_agent)
    # also has a long timer so that ctrl_agent doesn't get permanently stuck
    # in one action
    ctrl_term_node = terminationnode.TerminationNode(
        {"a": [0, 1], "b": [1, 0], terminationnode.Timer((30, 30)): None},
        env, contextD=2, name="CtrlTermNode", rewardval=1.5)
    net.add(ctrl_term_node)

    # reward for nav_agent is the pseudoreward from ctrl_agent termination
    net.connect(ctrl_term_node.getOrigin("pseudoreward"),
                nav_agent.getTermination("reward"))

    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("reset"))
    net.connect(ctrl_term_node.getOrigin("learn"),
                ctrl_agent.getTermination("learn"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_state"))
    net.connect(ctrl_term_node.getOrigin("reset"),
                ctrl_agent.getTermination("save_action"))

    # connect ctrl_agent action to termination context
    # this is used so that ctrl_term_node knows what the current goal is (to
    # determine termination and pseudoreward)
    net.connect(ctrl_agent.getOrigin("action_output"),
                ctrl_term_node.getTermination("context"))

    # state input for nav_agent is the environmental state + the output of
    # ctrl_agent
    ctrl_output_relay = net.make("ctrl_output_relay", 1,
                                 len(env.placecells) + contextD, mode="direct")
    ctrl_output_relay.fixMode()
    trans = (list(MU.I(len(env.placecells))) +
             [[0 for _ in range(len(env.placecells))]
              for _ in range(contextD)])
    net.connect(env.getOrigin("place"), ctrl_output_relay, transform=trans)
    net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay,
                transform=([[0 for _ in range(contextD)]
                            for _ in range(len(env.placecells))] +
                           list(MU.I(contextD))))

    net.connect(ctrl_output_relay, nav_agent.getTermination("state_input"))

    # periodically save the weights

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0

    threads = [
        HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams,
                                  os.path.join("weights", "%s_%s" %
                                               (nav_agent.name, tag)),
                                  weight_save),
        HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams,
                                  os.path.join("weights", "%s_%s" %
                                               (ctrl_agent.name, tag)),
                                  weight_save)]

    for t in threads:
        t.start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        tag))
    net.add(data)
    data.record(env.getOrigin("reward"))
    q_net = ctrl_agent.getNode("QNetwork")
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=max)
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=min)
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"))

#     net.add_to_nengo()
#     net.run(10000)
    net.view()

    for t in threads:
        t.stop()
Example #24
0
def run_contextenvironment(args, seed=None):
    """Runs the model on the context task.

    :param args: kwargs for the agent
    :param seed: random seed
    """

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("runContextEnvironment")

    if "load_weights" in args and args["load_weights"] is not None:
        args["load_weights"] += "_%s" % seed

    stateN = 1200  # number of neurons to use in state population
    contextD = 2  # dimension of context vector
    context_scale = 1.0  # scale of context representation
    max_state_input = 2  # max length of input vector for state population
    # actions (label and vector) available to the system
    actions = [("up", [0, 1]), ("right", [1, 0]),
               ("down", [0, -1]), ("left", [-1, 0])]

    # context labels and rewards for achieving those context goals
    rewards = {"a": 1.5, "b": 1.5}

    env = contextenvironment.ContextEnvironment(
        actions, HRLutils.datafile("contextmap.bmp"), contextD, rewards,
        colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"},
        imgsize=(5, 5), dx=0.001, placedev=0.5)
    net.add(env)

    print "generated", len(env.placecells), "placecells"

    # termination node for agent (just goes off on some regular interval)
    term_node = terminationnode.TerminationNode(
        {terminationnode.Timer((0.6, 0.9)): 0.0}, env)
    net.add(term_node)

    # generate encoders and divide by max_state_input (so that all inputs
    # will end up being radius 1)
    enc = env.gen_encoders(stateN, contextD, context_scale)
    enc = MU.prod(enc, 1.0 / max_state_input)

    # load eval points from file
    with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f:
        print "loading contextbmp_evalpoints_%s.txt" % seed
        evals = [[float(x) for x in l.split(" ")] for l in f.readlines()]

    agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD,
                                actions, state_encoders=enc, state_evals=evals,
                                state_threshold=0.8, **args)
    net.add(agent)

    print "agent neurons:", agent.countNeurons()

    # period to save weights (realtime, not simulation time)
    weight_save = 600.0
    t = HRLutils.WeightSaveThread(agent.getNode("QNetwork").saveParams,
                                  os.path.join("weights", "%s_%s" %
                                               (agent.name, seed)),
                                  weight_save)
    t.start()

    # data collection node
    data = datanode.DataNode(period=5,
                             filename=HRLutils.datafile("dataoutput_%s.txt" %
                                                        seed))
    net.add(data)
    q_net = agent.getNode("QNetwork")
    data.record(env.getOrigin("reward"))
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=max)
    data.record(q_net.getNode("actionvals").getOrigin("X"), func=min)
    data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON"))
    data.record_avg(q_net.getNode("valdiff").getOrigin("X"))
    data.record_avg(env.getOrigin("state"))

    net.connect(env.getOrigin("placewcontext"),
                agent.getTermination("state_input"))
    net.connect(env.getOrigin("reward"), agent.getTermination("reward"))
    net.connect(term_node.getOrigin("reset"), agent.getTermination("reset"))
    net.connect(term_node.getOrigin("learn"), agent.getTermination("learn"))
    net.connect(term_node.getOrigin("reset"),
                agent.getTermination("save_state"))
    net.connect(term_node.getOrigin("reset"),
                agent.getTermination("save_action"))

    net.connect(agent.getOrigin("action_output"), env.getTermination("action"))

#    net.add_to_nengo()
#    net.run(2000)
    net.view()

    t.stop()
 def termination_action(self, a, pstc=0.01):
     # set the selected action to the one with highest similarity to the
     # current action input
     self.action = max(self.actions, key=lambda x: MU.prod(a, x[1]))
Example #26
0
    def __init__(self,
                 name,
                 N,
                 stateN,
                 actions,
                 learningrate,
                 Qradius=1.0,
                 init_decoders=None):
        """Build ActionValues network.

        :param name: name of Network
        :param N: base number of neurons
        :param stateN: number of neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learning rate for PES rule
        :param Qradius: expected radius of Q values
        :param init_decoders: if specified, will be used to initialize the
            connection weights to whatever function is specified by decoders
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        self.N = N
        self.learningrate = learningrate
        self.supervision = 1.0  # don't use the unsupervised stuff at all

        self.tauPSC = 0.007

        modterms = []
        learnterms = []

        # relays
        output = net.make("output", 1, len(actions), mode="direct")
        output.fixMode()

        for i, action in enumerate(actions):
            # create one population corresponding to each action
            act_pop = net.make("action_" + action[0],
                               self.N * 4,
                               1,
                               node_factory=HRLutils.node_fac())
            act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

            # add error termination
            modterm = act_pop.addDecodedTermination(
                "error", [[0 if j != i else 1 for j in range(len(actions))]],
                0.005, True)
            # set modulatory transform so that it selects one dimension of
            # the error signal

            # create learning termination
            if init_decoders is not None:
                weights = MU.prod(act_pop.getEncoders(),
                                  MU.transpose(init_decoders))
            else:
                weights = [[
                    random.uniform(-1e-3, 1e-3) for j in range(stateN)
                ] for i in range(act_pop.getNeurons())]
            learningterm = act_pop.addHPESTermination("learning", weights,
                                                      0.005, False, None)

            # initialize the learning rule
            net.learn(act_pop,
                      learningterm,
                      modterm,
                      rate=self.learningrate,
                      supervisionRatio=self.supervision)

            # connect each action back to output relay
            net.connect(act_pop.getOrigin("X"),
                        output,
                        transform=[[0] if j != i else [Qradius]
                                   for j in range(len(actions))],
                        pstc=0.001)
            # note, we learn all the Q values with radius 1, then just
            # multiply by the desired Q radius here

            modterms += [modterm]
            learnterms += [learningterm]

        # use EnsembleTerminations to group the individual action terminations
        # into one multi-dimensional termination
        self.exposeTermination(EnsembleTermination(self, "state", learnterms),
                               "state")
        self.exposeTermination(EnsembleTermination(self, "error", modterms),
                               "error")

        self.exposeOrigin(output.getOrigin("X"), "X")
Example #27
0
    def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None):
        """Build ActionValues network.

        :param name: name of Network
        :param N: base number of neurons
        :param stateN: number of neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learning rate for PES rule
        :param Qradius: expected radius of Q values
        :param init_decoders: if specified, will be used to initialize the connection
            weights to whatever function is specified by the decoders
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        self.N = N
        self.learningrate = learningrate
        self.supervision = 1.0 # don't use the unsupervised stuff at all

        self.tauPSC = 0.007

        modterms = []
        learnterms = []

        # relays  
        output = net.make("output", 1, len(actions), mode="direct")
        output.fixMode()

        for i, action in enumerate(actions):
            # create one population corresponding to each action
            act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac())
            act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

            # add error termination
            modterm = act_pop.addDecodedTermination("error", [[0 if j != i else 1 for j in range(len(actions))]],
                                                    0.005, True)
                # set modulatory transform so that it selects one dimension of the error signal

            # create learning termination
            if init_decoders != None:
                weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders))
            else:
                weights = [[random.uniform(-1e-3, 1e-3) for j in range(stateN)] for i in range(act_pop.getNeurons())]
            learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None)

            # initialize the learning rule
            net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision)

            # connect each action back to output relay  
            net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))],
                        pstc=0.001)
                # note, we learn all the Q values with radius 1, then just multiply by the desired Q radius here

            modterms += [modterm]
            learnterms += [learningterm]

        # use EnsembleTerminations to group the individual action terminations into one multi-dimensional termination
        self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state")
        self.exposeTermination(EnsembleTermination(self, "error", modterms), "error")

        self.exposeOrigin(output.getOrigin("X"), "X")
Example #28
0
from ca.nengo.util import MU
from java.io import File
import math

nInput = range(200, 2001, 400)
nDiff = 1000;

networks = [interneuron, dualTC, adapting, depressing, butterworth, interneuronFeedback]

exporter = MatlabExporter()
for network in networks:
	network.setInputFunction(ConstantFunction(1, 0));
	network.setStepSize(.0001)
	network.setMode(SimulationMode.DIRECT);

	inputVariance = [];
	outputVariance = [];
	
	for n in nInput:
		network.setNoise(n, nDiff);
		#network.setDistortion(n, nDiff);
		network.reset(0)
		network.run(0, 10);
		inputVariance.append(MU.variance(MU.prod(network.getInputEnsembleData().getValues(), [1]), 0))
		outputVariance.append(MU.variance(MU.prod(network.getOutputData().getValues(), [1]), 0))
		
	network.clearErrors();
	Plotter.plot(nInput, outputVariance, "output")
	
exporter.write(File("noise.mat"));		
	
Example #29
0
def make(net, preName='pre', postName='post', rate=5e-4):

    # get pre and post ensembles from their names
    pre = net.network.getNode(preName)
    post = net.network.getNode(postName)

    dim_pre = pre.getDimension()
    dim_post = post.getDimension()

    t = [[0] * dim_pre for i in range(dim_post)]
    index_pre = range(dim_pre)
    index_post = range(dim_post)
    for i in range(max(len(index_pre), len(index_post))):
        ipre = index_pre[i % len(index_pre)]
        ipost = index_post[i % len(index_post)]
        t[ipost][ipre] = 1

    decoder = pre.getOrigin('X').getDecoders()
    encoder = post.getEncoders()
    encoder = MU.prod(encoder, 1.0 / post.getRadii()[0])
    weight = MU.prod(encoder, MU.prod(t, MU.transpose(decoder)))

    # random weight matrix to initialize projection from pre to post
    # def rand_weights(w):
    #     for i in range(len(w)):
    #         for j in range(len(w[0])):
    #             w[i][j] = random.uniform(-1e-3,1e-3)
    #     return w
    # weight = rand_weights(numeric.zeros((post.neurons, pre.neurons)).tolist())

    # non-decoded termination (to learn transformation)
    count = 0
    prename = pre.getName()
    while '%s_%02d' % (prename, count) in [t.name for t in post.terminations]:
        count = count + 1
    prename = '%s_%02d' % (prename, count)

    post.addBCMTermination(prename, weight, 0.005, False, None)

    # Add projections
    net.connect(pre.getOrigin('AXON'), post.getTermination(prename))

    # Set learning rule on the non-decoded termination
    net.learn(post, prename, None, rate=rate)

    if net.network.getMetaData("bcmterm") == None:
        net.network.setMetaData("bcmterm", HashMap())
    bcmterms = net.network.getMetaData("bcmterm")

    bcmterm = HashMap(4)
    bcmterm.put("preName", preName)
    bcmterm.put("postName", postName)
    bcmterm.put("rate", rate)

    bcmterms.put(prename, bcmterm)

    if net.network.getMetaData("templates") == None:
        net.network.setMetaData("templates", ArrayList())
    templates = net.network.getMetaData("templates")
    templates.add(prename)

    if net.network.getMetaData("templateProjections") == None:
        net.network.setMetaData("templateProjections", HashMap())
    templateproj = net.network.getMetaData("templateProjections")
    templateproj.put(preName, postName)