def step_down(self, chain_goals): """ Propogate goals down through the transition model """ # Reshape chain_goals back into a square array chain_goals = np.reshape(chain_goals, (self.post.size, -1)) # Weight chain goals by the current cable activities upstream_goals = tools.bounded_sum(self.post * chain_goals.T) cable_goals = tools.bounded_sum([upstream_goals, self.reaction]) return cable_goals[:self.num_cables]
def deliberate(self, goal_value_by_chain): """ Choose goals deliberatively, based on deliberation_vote i and reward value """ # Maintain the internal deliberation_vote set deliberation_vote_fulfillment = 1 - self.post deliberation_vote_decay = 1 - self.VOTE_DECAY_RATE self.deliberation_vote *= (deliberation_vote_fulfillment * deliberation_vote_decay) similarity = np.tile(self.post, (1,self.post.size)) reward_noise = (np.random.random_sample( self.reward_uncertainty.shape)* 2 - 1) estimated_reward_value = (self.reward_value - self.current_reward + self.reward_uncertainty * reward_noise) estimated_reward_value = np.maximum(estimated_reward_value, 0) estimated_reward_value = np.minimum(estimated_reward_value, 1) reward_value_by_cable = tools.weighted_average( estimated_reward_value, similarity / (self.reward_uncertainty + tools.EPSILON)) reward_value_by_cable[self.num_cables:] = 0. # Reshape goal_value_by_chain back into a square array goal_value_by_chain = np.reshape(goal_value_by_chain, (self.deliberation_vote.size, -1)) # Bounded sum of the deliberation_vote values from above over all chains goal_value_by_cable = tools.bounded_sum(goal_value_by_chain.T * similarity) count_by_cable = tools.weighted_average(self.count, similarity) exploration_vote = ((1 - self.current_reward) / (self.num_cables * (count_by_cable + 1) * np.random.random_sample(count_by_cable.shape) + tools.EPSILON)) exploration_vote = np.minimum(exploration_vote, 1.) exploration_vote[self.num_cables:] = 0. #exploration_vote = np.zeros(reward_value_by_cable.shape) # debug include_goals = True if include_goals: #total_vote = (reward_value_by_cable + goal_value_by_cable + # exploration_vote) cable_goals = tools.bounded_sum([reward_value_by_cable, goal_value_by_cable, exploration_vote]) else: #total_vote = reward_value_by_cable + exploration_vote cable_goals = tools.bounded_sum([reward_value_by_cable, exploration_vote]) self.deliberation_vote = np.maximum(cable_goals, self.deliberation_vote) # TODO perform deliberation centrally at the guru and # modify cable goals accordingly. In this case cable_goals # will be all reactive, except for the deliberative component # from the guru. return cable_goals[:self.num_cables]
def step_down(self, bundle_goals): """ Project the bundle goal values to the appropriate cables Multiply the bundle goals across the cables that contribute to them, and perform a bounded sum over all bundles to get the estimated activity associated with each cable. """ if bundle_goals.size > 0: bundle_goals = tools.pad(bundle_goals, (self.max_num_bundles, 0)) cable_activity_goals = tools.bounded_sum(self.bundle_map * bundle_goals, axis=0) else: cable_activity_goals = np.zeros((self.max_num_cables, 1)) return cable_activity_goals
def step_up(self, new_cable_activities): """ Find bundle_activities that result from new_cable_activities """ # Condition the cable activities to fall between 0 and 1 if new_cable_activities.size < self.max_cables: new_cable_activities = tools.pad(new_cable_activities, (self.max_cables, 1)) self.min_vals = np.minimum(new_cable_activities, self.min_vals) self.max_vals = np.maximum(new_cable_activities, self.max_vals) spread = self.max_vals - self.min_vals new_cable_activities = ( (new_cable_activities - self.min_vals) / (self.max_vals - self.min_vals + tools.EPSILON)) self.min_vals += spread * self.RANGE_DECAY_RATE self.max_vals -= spread * self.RANGE_DECAY_RATE # Update cable_activities, incorporating sensing dynamics self.cable_activities = tools.bounded_sum([ new_cable_activities, self.cable_activities * (1. - self.ACTIVITY_DECAY_RATE) ]) # Update the map from self.cable_activities to cogs self.ziptie.step_up(self.cable_activities) # Process the upward pass of each of the cogs in the block self.bundle_activities = np.zeros((0, 1)) for cog_index in range(len(self.cogs)): # Pick out the cog's cable_activities, process them, # and assign the results to block's bundle_activities cog_cable_activities = self.cable_activities[ self.ziptie.get_index_projection(cog_index).ravel().astype( bool)] # Cogs are only allowed to start forming bundles once # the number of cables exceeds the fill_fraction_threshold enough_cables = (self.ziptie.cable_fraction_in_bundle(cog_index) > self.fill_fraction_threshold) cog_bundle_activities = self.cogs[cog_index].step_up( cog_cable_activities, enough_cables) self.bundle_activities = np.concatenate( (self.bundle_activities, cog_bundle_activities)) # Goal fulfillment and decay self.hub_cable_goals -= self.cable_activities self.hub_cable_goals *= self.ACTIVITY_DECAY_RATE self.hub_cable_goals = np.maximum(self.hub_cable_goals, 0.) return self.bundle_activities
def step_up(self, new_cable_activities): """ Find bundle_activities that result from new_cable_activities """ # Condition the cable activities to fall between 0 and 1 if new_cable_activities.size < self.max_cables: new_cable_activities = tools.pad(new_cable_activities, (self.max_cables, 1)) self.min_vals = np.minimum(new_cable_activities, self.min_vals) self.max_vals = np.maximum(new_cable_activities, self.max_vals) spread = self.max_vals - self.min_vals new_cable_activities = ((new_cable_activities - self.min_vals) / (self.max_vals - self.min_vals + tools.EPSILON)) self.min_vals += spread * self.RANGE_DECAY_RATE self.max_vals -= spread * self.RANGE_DECAY_RATE # Update cable_activities, incorporating sensing dynamics self.cable_activities = tools.bounded_sum([ new_cable_activities, self.cable_activities * (1. - self.ACTIVITY_DECAY_RATE)]) # Update the map from self.cable_activities to cogs self.ziptie.step_up(self.cable_activities) # Process the upward pass of each of the cogs in the block self.bundle_activities = np.zeros((0, 1)) for cog_index in range(len(self.cogs)): # Pick out the cog's cable_activities, process them, # and assign the results to block's bundle_activities cog_cable_activities = self.cable_activities[ self.ziptie.get_index_projection( cog_index).ravel().astype(bool)] # Cogs are only allowed to start forming bundles once # the number of cables exceeds the fill_fraction_threshold enough_cables = (self.ziptie.cable_fraction_in_bundle(cog_index) > self.fill_fraction_threshold) cog_bundle_activities = self.cogs[cog_index].step_up( cog_cable_activities, enough_cables) self.bundle_activities = np.concatenate((self.bundle_activities, cog_bundle_activities)) # Goal fulfillment and decay self.hub_cable_goals -= self.cable_activities self.hub_cable_goals *= self.ACTIVITY_DECAY_RATE self.hub_cable_goals = np.maximum(self.hub_cable_goals, 0.) return self.bundle_activities
def step_up(self, new_cable_activities, reward): """ Find bundle_activities that result from new_cable_activities """ new_cable_activities = tools.pad(new_cable_activities, (self.max_cables, 1)) ''' # Condition the new_cable_activities to fall between 0 and 1 self.min_vals = np.minimum(new_cable_activities, self.min_vals) self.max_vals = np.maximum(new_cable_activities, self.max_vals) spread = self.max_vals - self.min_vals new_cable_activities = ((new_cable_activities - self.min_vals) / (self.max_vals - self.min_vals + tools.EPSILON)) self.min_vals += spread * self.RANGE_DECAY_RATE self.max_vals -= spread * self.RANGE_DECAY_RATE ''' # Update cable_activities, incorporating sensing dynamics self.cable_activities = tools.bounded_sum([ new_cable_activities, self.cable_activities * (1. - self.ACTIVITY_DECAY_RATE)]) # debug #print self.name, 'ca', self.cable_activities.shape #print self.cable_activities.ravel() # Update the map from self.cable_activities to cogs self.ziptie.update(self.cable_activities) # Process the upward pass of each of the cogs in the block self.bundle_activities = np.zeros((0, 1)) for cog_index in range(len(self.cogs)): # Pick out the cog's cable_activities, process them, # and assign the results to block's bundle_activities cog_cable_activities = self.cable_activities[ self.ziptie.get_projection(cog_index).ravel().astype(bool)] enough_cables = (self.ziptie.cable_fraction_in_bundle(cog_index) > 0.7) cog_bundle_activities = self.cogs[cog_index].step_up( cog_cable_activities, reward, enough_cables) self.bundle_activities = np.concatenate((self.bundle_activities, cog_bundle_activities)) return self.bundle_activities
def learn(self, features, actions): """ Update the cerebellar model of the world and its dynamics. Parameters ---------- features : array of floats The current set of feature activities. actions : array of floats The set of actions chosen in response to ``features``. """ if self.skip: return new_combo = np.concatenate((actions, features)) (num_combos, num_hypos, _) = self.hypos.shape #total_diff = np.sum(np.abs(new_combo - self.next_combo)) for i_combo, new_val in enumerate(new_combo): # Increment tries and wins for every hypo. for i_hypo in np.arange(num_hypos): similarity = self.guess[i_combo, i_hypo] error = np.abs(similarity - new_val) self.tries[i_combo, i_hypo] += similarity self.wins[i_combo, i_hypo] += np.minimum(similarity, new_val) # Check whether the error is big enough to merit # refining the model. if np.abs(error) > np.random.random_sample(): """ Add a new hypothesis. There are lots of possible schemes for forming new hypotheses. Each one introduces its own bias. For now I'm randomly choosing either to use a randomly selected sample or to use the intersection of two randomly selected hypotheses. """ # Pick whether to combine samples or hypotheses. if np.random.random_sample() < .5: # Try to combine two samples. # Check whether there are enough samples. if self.samples_filled[i_combo]: # Pick the sample. i_hypo = np.random.randint(self.num_samples) self._add_hypothesis(i_combo, self.samples[i_combo,i_hypo,:], tries=10., wins=1.) else: # Try to combine two hypotheses. # Check whether there are enough hypotheses. if self.hypos_filled[i_combo]: # Pick the first hypothesis. i_hypo_a = np.random.randint(self.num_samples) # Pick the second hypothesis. # Make sure it's distinct. i_hypo_b = i_hypo_a while i_hypo_a == i_hypo_b: i_hypo_b = np.random.randint(self.num_samples) intersection = np.minimum( self.hypos[i_combo,i_hypo_a,:], self.hypos[i_combo,i_hypo_b,:]) total_tries = (self.tries[i_combo,i_hypo_a] + self.tries[i_combo,i_hypo_b]) total_wins = (self.wins[i_combo,i_hypo_a] + self.wins[i_combo,i_hypo_b]) self._add_hypothesis(i_combo, intersection, tries=total_tries, wins=total_wins) # Add a new sample. self._add_sample(i_combo, self.recent_combo) # Handle the effects of time self.age += 1. self.recent_combo *= 1. - self.decay_rate self.recent_combo = tools.bounded_sum([self.recent_combo, new_combo])