Beispiel #1
0
 def step_down(self, chain_goals):
     """ Propogate goals down through the transition model """
     # Reshape chain_goals back into a square array 
     chain_goals = np.reshape(chain_goals, (self.post.size, -1))
     # Weight chain goals by the current cable activities   
     upstream_goals = tools.bounded_sum(self.post * chain_goals.T)
     cable_goals = tools.bounded_sum([upstream_goals, self.reaction])
     return cable_goals[:self.num_cables]
Beispiel #2
0
 def step_down(self, chain_goals):
     """ Propogate goals down through the transition model """
     # Reshape chain_goals back into a square array
     chain_goals = np.reshape(chain_goals, (self.post.size, -1))
     # Weight chain goals by the current cable activities
     upstream_goals = tools.bounded_sum(self.post * chain_goals.T)
     cable_goals = tools.bounded_sum([upstream_goals, self.reaction])
     return cable_goals[:self.num_cables]
Beispiel #3
0
    def deliberate(self, goal_value_by_chain):
        """ Choose goals deliberatively, based on deliberation_vote i
        and reward value """
        # Maintain the internal deliberation_vote set
        deliberation_vote_fulfillment = 1 - self.post
        deliberation_vote_decay = 1 - self.VOTE_DECAY_RATE
        self.deliberation_vote *= (deliberation_vote_fulfillment * 
                                   deliberation_vote_decay)

        similarity = np.tile(self.post, (1,self.post.size))
        reward_noise = (np.random.random_sample(
                self.reward_uncertainty.shape)* 2 - 1)
        estimated_reward_value = (self.reward_value - self.current_reward + 
                                  self.reward_uncertainty * reward_noise)
        estimated_reward_value = np.maximum(estimated_reward_value, 0)
        estimated_reward_value = np.minimum(estimated_reward_value, 1)
        reward_value_by_cable = tools.weighted_average(
                estimated_reward_value, 
                similarity / (self.reward_uncertainty + tools.EPSILON))
        reward_value_by_cable[self.num_cables:] = 0. 
        # Reshape goal_value_by_chain back into a square array 
        goal_value_by_chain = np.reshape(goal_value_by_chain, 
                                         (self.deliberation_vote.size, -1))
        # Bounded sum of the deliberation_vote values from above over all chains 
        goal_value_by_cable = tools.bounded_sum(goal_value_by_chain.T * 
                                             similarity)
        count_by_cable = tools.weighted_average(self.count, similarity)
        exploration_vote = ((1 - self.current_reward) / 
                (self.num_cables * (count_by_cable + 1) * 
                 np.random.random_sample(count_by_cable.shape) + tools.EPSILON))
        exploration_vote = np.minimum(exploration_vote, 1.)
        exploration_vote[self.num_cables:] = 0.
        #exploration_vote = np.zeros(reward_value_by_cable.shape)
        # debug
        include_goals = True
        if include_goals:
            #total_vote = (reward_value_by_cable + goal_value_by_cable +
            #              exploration_vote)
            cable_goals = tools.bounded_sum([reward_value_by_cable, 
                                   goal_value_by_cable, exploration_vote])
        else:
            #total_vote = reward_value_by_cable + exploration_vote
            cable_goals = tools.bounded_sum([reward_value_by_cable, exploration_vote])
        self.deliberation_vote = np.maximum(cable_goals, self.deliberation_vote)
        # TODO perform deliberation centrally at the guru and 
        # modify cable goals accordingly. In this case cable_goals
        # will be all reactive, except for the deliberative component
        # from the guru.
        return cable_goals[:self.num_cables]
Beispiel #4
0
    def step_down(self, bundle_goals):
        """ 
        Project the bundle goal values to the appropriate cables

        Multiply the bundle goals across the cables that contribute 
        to them, and perform a bounded sum over all bundles to get 
        the estimated activity associated with each cable.
        """
        if bundle_goals.size > 0:
            bundle_goals = tools.pad(bundle_goals, (self.max_num_bundles, 0))
            cable_activity_goals = tools.bounded_sum(self.bundle_map * 
                                                     bundle_goals, axis=0)
        else:
            cable_activity_goals = np.zeros((self.max_num_cables, 1))
        return cable_activity_goals
Beispiel #5
0
    def step_down(self, bundle_goals):
        """ 
        Project the bundle goal values to the appropriate cables

        Multiply the bundle goals across the cables that contribute 
        to them, and perform a bounded sum over all bundles to get 
        the estimated activity associated with each cable.
        """
        if bundle_goals.size > 0:
            bundle_goals = tools.pad(bundle_goals, (self.max_num_bundles, 0))
            cable_activity_goals = tools.bounded_sum(self.bundle_map *
                                                     bundle_goals,
                                                     axis=0)
        else:
            cable_activity_goals = np.zeros((self.max_num_cables, 1))
        return cable_activity_goals
Beispiel #6
0
    def step_up(self, new_cable_activities):
        """ Find bundle_activities that result from new_cable_activities """
        # Condition the cable activities to fall between 0 and 1
        if new_cable_activities.size < self.max_cables:
            new_cable_activities = tools.pad(new_cable_activities,
                                             (self.max_cables, 1))
        self.min_vals = np.minimum(new_cable_activities, self.min_vals)
        self.max_vals = np.maximum(new_cable_activities, self.max_vals)
        spread = self.max_vals - self.min_vals
        new_cable_activities = (
            (new_cable_activities - self.min_vals) /
            (self.max_vals - self.min_vals + tools.EPSILON))
        self.min_vals += spread * self.RANGE_DECAY_RATE
        self.max_vals -= spread * self.RANGE_DECAY_RATE
        # Update cable_activities, incorporating sensing dynamics
        self.cable_activities = tools.bounded_sum([
            new_cable_activities,
            self.cable_activities * (1. - self.ACTIVITY_DECAY_RATE)
        ])

        # Update the map from self.cable_activities to cogs
        self.ziptie.step_up(self.cable_activities)
        # Process the upward pass of each of the cogs in the block
        self.bundle_activities = np.zeros((0, 1))
        for cog_index in range(len(self.cogs)):
            # Pick out the cog's cable_activities, process them,
            # and assign the results to block's bundle_activities
            cog_cable_activities = self.cable_activities[
                self.ziptie.get_index_projection(cog_index).ravel().astype(
                    bool)]
            # Cogs are only allowed to start forming bundles once
            # the number of cables exceeds the fill_fraction_threshold
            enough_cables = (self.ziptie.cable_fraction_in_bundle(cog_index) >
                             self.fill_fraction_threshold)
            cog_bundle_activities = self.cogs[cog_index].step_up(
                cog_cable_activities, enough_cables)
            self.bundle_activities = np.concatenate(
                (self.bundle_activities, cog_bundle_activities))
        # Goal fulfillment and decay
        self.hub_cable_goals -= self.cable_activities
        self.hub_cable_goals *= self.ACTIVITY_DECAY_RATE
        self.hub_cable_goals = np.maximum(self.hub_cable_goals, 0.)
        return self.bundle_activities
Beispiel #7
0
    def step_up(self, new_cable_activities):
        """ Find bundle_activities that result from new_cable_activities """
        # Condition the cable activities to fall between 0 and 1
        if new_cable_activities.size < self.max_cables:
            new_cable_activities = tools.pad(new_cable_activities, 
                                             (self.max_cables, 1))
        self.min_vals = np.minimum(new_cable_activities, self.min_vals)
        self.max_vals = np.maximum(new_cable_activities, self.max_vals)
        spread = self.max_vals - self.min_vals
        new_cable_activities = ((new_cable_activities - self.min_vals) / 
                   (self.max_vals - self.min_vals + tools.EPSILON))
        self.min_vals += spread * self.RANGE_DECAY_RATE
        self.max_vals -= spread * self.RANGE_DECAY_RATE
        # Update cable_activities, incorporating sensing dynamics
        self.cable_activities = tools.bounded_sum([
                new_cable_activities, 
                self.cable_activities * (1. - self.ACTIVITY_DECAY_RATE)])

        # Update the map from self.cable_activities to cogs
        self.ziptie.step_up(self.cable_activities)
        # Process the upward pass of each of the cogs in the block
        self.bundle_activities = np.zeros((0, 1))
        for cog_index in range(len(self.cogs)):
            # Pick out the cog's cable_activities, process them, 
            # and assign the results to block's bundle_activities
            cog_cable_activities = self.cable_activities[
                    self.ziptie.get_index_projection(
                    cog_index).ravel().astype(bool)]
            # Cogs are only allowed to start forming bundles once 
            # the number of cables exceeds the fill_fraction_threshold
            enough_cables = (self.ziptie.cable_fraction_in_bundle(cog_index)
                             > self.fill_fraction_threshold)
            cog_bundle_activities = self.cogs[cog_index].step_up(
                    cog_cable_activities, enough_cables)
            self.bundle_activities = np.concatenate((self.bundle_activities, 
                                                     cog_bundle_activities))
        # Goal fulfillment and decay
        self.hub_cable_goals -= self.cable_activities
        self.hub_cable_goals *= self.ACTIVITY_DECAY_RATE
        self.hub_cable_goals = np.maximum(self.hub_cable_goals, 0.)
        return self.bundle_activities
Beispiel #8
0
    def step_up(self, new_cable_activities, reward):
        """ Find bundle_activities that result from new_cable_activities """
        new_cable_activities = tools.pad(new_cable_activities, 
                                         (self.max_cables, 1))
        '''
        # Condition the new_cable_activities to fall between 0 and 1
        self.min_vals = np.minimum(new_cable_activities, self.min_vals)
        self.max_vals = np.maximum(new_cable_activities, self.max_vals)
        spread = self.max_vals - self.min_vals
        new_cable_activities = ((new_cable_activities - self.min_vals) / 
                            (self.max_vals - self.min_vals + tools.EPSILON))
        self.min_vals += spread * self.RANGE_DECAY_RATE
        self.max_vals -= spread * self.RANGE_DECAY_RATE
        '''
        # Update cable_activities, incorporating sensing dynamics
        self.cable_activities = tools.bounded_sum([
                new_cable_activities, 
                self.cable_activities * (1. - self.ACTIVITY_DECAY_RATE)])
        # debug 
        #print self.name, 'ca', self.cable_activities.shape
        #print self.cable_activities.ravel()

        # Update the map from self.cable_activities to cogs
        self.ziptie.update(self.cable_activities)
        # Process the upward pass of each of the cogs in the block
        self.bundle_activities = np.zeros((0, 1))
        for cog_index in range(len(self.cogs)):
            # Pick out the cog's cable_activities, process them, 
            # and assign the results to block's bundle_activities
            cog_cable_activities = self.cable_activities[
                    self.ziptie.get_projection(cog_index).ravel().astype(bool)]
            enough_cables = (self.ziptie.cable_fraction_in_bundle(cog_index)
                             > 0.7)
            cog_bundle_activities = self.cogs[cog_index].step_up(
                    cog_cable_activities, reward, enough_cables)
            self.bundle_activities = np.concatenate((self.bundle_activities, 
                                                     cog_bundle_activities))
        return self.bundle_activities
Beispiel #9
0
    def learn(self, features, actions):
        """
        Update the cerebellar model of the world and its dynamics.

        Parameters
        ----------
        features : array of floats
            The current set of feature activities.
        actions : array of floats
            The set of actions chosen in response to ``features``.
        """
        if self.skip:
            return

        new_combo = np.concatenate((actions, features))
        (num_combos, num_hypos, _) = self.hypos.shape
        #total_diff = np.sum(np.abs(new_combo - self.next_combo))
        for i_combo, new_val in enumerate(new_combo):

            # Increment tries and wins for every hypo.
            for i_hypo in np.arange(num_hypos):
                similarity = self.guess[i_combo, i_hypo]
                error = np.abs(similarity - new_val)
                self.tries[i_combo, i_hypo] += similarity
                self.wins[i_combo, i_hypo] += np.minimum(similarity, new_val)

            # Check whether the error is big enough to merit 
            # refining the model.
            if np.abs(error) > np.random.random_sample():
                """
                Add a new hypothesis.
                There are lots of possible schemes for forming new 
                hypotheses. Each one introduces its own bias. For now
                I'm randomly choosing either to use a randomly selected
                sample or to use the intersection 
                of two randomly selected hypotheses.
                """
                # Pick whether to combine samples or hypotheses.
                if np.random.random_sample() < .5:
                    # Try to combine two samples.
                    # Check whether there are enough samples.
                    if self.samples_filled[i_combo]:
                        # Pick the sample.
                        i_hypo = np.random.randint(self.num_samples) 
                        self._add_hypothesis(i_combo, 
                                             self.samples[i_combo,i_hypo,:],
                                             tries=10., wins=1.)
                else:
                    # Try to combine two hypotheses.
                    # Check whether there are enough hypotheses.
                    if self.hypos_filled[i_combo]:
                        # Pick the first hypothesis.
                        i_hypo_a = np.random.randint(self.num_samples) 
                        # Pick the second hypothesis.
                        # Make sure it's distinct.
                        i_hypo_b = i_hypo_a
                        while i_hypo_a == i_hypo_b:
                            i_hypo_b = np.random.randint(self.num_samples) 
                        intersection = np.minimum(
                                self.hypos[i_combo,i_hypo_a,:],
                                self.hypos[i_combo,i_hypo_b,:])
                        total_tries = (self.tries[i_combo,i_hypo_a] + 
                                       self.tries[i_combo,i_hypo_b]) 
                        total_wins = (self.wins[i_combo,i_hypo_a] + 
                                      self.wins[i_combo,i_hypo_b]) 
                        self._add_hypothesis(i_combo, intersection, 
                                             tries=total_tries, 
                                             wins=total_wins)

                # Add a new sample.
                self._add_sample(i_combo, self.recent_combo)

        # Handle the effects of time
        self.age += 1.
        self.recent_combo *= 1. - self.decay_rate
        self.recent_combo = tools.bounded_sum([self.recent_combo, new_combo])