def step(self, feature_activities, brain_live_features, reward): """ Update the model and choose a new goal. Parameters ---------- feature_activities : array of floats The current activity levels of each of the features. live_features : array of floats A binary array of all features that have every been active. reward : float The reward reported by the world during the most recent time step. """ # TODO: Remove live_features. Assume all are live. live_features = self._update_activities(feature_activities, brain_live_features) # Update sequences before prefixes. nb.update_sequences(live_features, self.FAIs, self.prefix_activities, self.sequence_occurrences) nb.update_prefixes(live_features, self.prefix_decay_rate, self.previous_feature_activities, self.feature_goal_activities, self.prefix_activities, self.prefix_occurrences, self.prefix_uncertainties) nb.update_rewards(live_features, self.reward_update_rate, reward, self.prefix_credit, self.prefix_rewards) nb.update_curiosities(live_features, self.curiosity_update_rate, self.prefix_occurrences, self.prefix_curiosities, self.previous_feature_activities, self.feature_activities, self.feature_goal_activities, self.prefix_uncertainties) nb.update_fitness(self.feature_fitness, self.prefix_occurrences, self.prefix_rewards, self.prefix_uncertainties, self.sequence_occurrences) self.feature_goal_votes = nb.calculate_goal_votes( self.n_features, live_features, self.prefix_rewards, self.prefix_curiosities, self.prefix_occurrences, self.sequence_occurrences, self.feature_activities, self.feature_goal_activities) # TODO: break this out into a separate object. goal_index, max_vote = self._choose_feature_goals() nb.update_reward_credit(live_features, goal_index, max_vote, self.feature_activities, self.credit_decay_rate, self.prefix_credit) # Trim off the first two elements. The are internal to the model only. return self.feature_goal_activities[2:]
def step(self, feature_activities, brain_live_features, reward): """ Update the model and choose a new goal. Parameters ---------- feature_activities : array of floats The current activity levels of each of the features. live_features : array of floats A binary array of all features that have every been active. reward : float The reward reported by the world during the most recent time step. """ live_features = self._update_activities(feature_activities, brain_live_features) # Update sequences before prefixes. #nb.update_sequences( # live_features, # self.FAIs, # self.prefix_activities, # self.sequence_occurrences) nb.update_prefixes(live_features, self.prefix_decay_rate, self.previous_feature_activities, self.feature_goal_activities, self.prefix_activities, self.prefix_occurrences) nb.update_rewards(live_features, self.reward_update_rate, reward, self.prefix_credit, self.prefix_rewards) nb.update_curiosities(live_features, self.curiosity_update_rate, self.prefix_occurrences, self.prefix_curiosities, self.previous_feature_activities, self.feature_activities, self.feature_goal_activities) self.feature_goal_votes = nb.calculate_goal_votes( self.num_features, live_features, self.prefix_rewards, self.prefix_curiosities, self.prefix_occurrences, #self.sequence_occurrences, self.feature_activities, self.feature_goal_activities) goal_index, max_vote = self._choose_feature_goals() nb.update_reward_credit(live_features, goal_index, max_vote, self.feature_activities, self.credit_decay_rate, self.prefix_credit) return self.feature_goal_activities[2:]
def step(self, candidate_activities, reward): """ Update the model and choose a new goal. Parameters ---------- candidate_activities : array of floats The current activity levels of each of the feature candidates. reward : float The reward reported by the world during the most recent time step. """ # Update feature_activities and previous_feature_activities self.update_activities(candidate_activities) # Update sequences before prefixes. nb.update_sequences( self.feature_activities, self.prefix_activities, self.sequence_occurrences, ) nb.update_prefixes( self.prefix_decay_rate, self.previous_feature_activities, self.goal_activities, self.prefix_activities, self.prefix_occurrences, self.prefix_uncertainties, ) nb.update_rewards( self.reward_update_rate, reward, self.prefix_credit, self.prefix_rewards, ) nb.update_curiosities( self.curiosity_update_rate, self.prefix_occurrences, self.prefix_curiosities, self.previous_feature_activities, self.feature_activities, self.goal_activities, self.prefix_uncertainties, ) nb.predict_features( self.feature_activities, self.prefix_occurrences, self.sequence_occurrences, self.conditional_predictions, ) nb.predict_rewards( self.feature_activities, self.prefix_rewards, self.conditional_rewards, ) nb.predict_curiosities( self.feature_activities, self.prefix_curiosities, self.conditional_curiosities, ) return ( self.conditional_predictions, self.conditional_rewards, self.conditional_curiosities)
def step(self, feature_activities, brain_live_features, reward): """ Update the model and choose a new goal. Parameters ---------- feature_activities : array of floats The current activity levels of each of the features. live_features : array of floats A binary array of all features that have every been active. reward : float The reward reported by the world during the most recent time step. """ live_features = self._update_activities( feature_activities, brain_live_features) # Update sequences before prefixes. #nb.update_sequences( # live_features, # self.FAIs, # self.prefix_activities, # self.sequence_occurrences) nb.update_prefixes( live_features, self.prefix_decay_rate, self.previous_feature_activities, self.feature_goal_activities, self.prefix_activities, self.prefix_occurrences) nb.update_rewards( live_features, self.reward_update_rate, reward, self.prefix_credit, self.prefix_rewards) nb.update_curiosities( live_features, self.curiosity_update_rate, self.prefix_occurrences, self.prefix_curiosities, self.previous_feature_activities, self.feature_activities, self.feature_goal_activities) self.feature_goal_votes = nb.calculate_goal_votes( self.num_features, live_features, self.prefix_rewards, self.prefix_curiosities, self.prefix_occurrences, #self.sequence_occurrences, self.feature_activities, self.feature_goal_activities) goal_index, max_vote = self._choose_feature_goals() nb.update_reward_credit( live_features, goal_index, max_vote, self.feature_activities, self.credit_decay_rate, self.prefix_credit) return self.feature_goal_activities[2:]
def step(self, candidate_activities, reward): """ Update the model and choose a new goal. Parameters ---------- candidate_activities : array of floats The current activity levels of each of the feature candidates. reward : float The reward reported by the world during the most recent time step. """ # Update feature_activities and previous_feature_activities self.update_activities(candidate_activities) # Update sequences before prefixes. nb.update_prefixes( self.prefix_decay_rate, self.previous_feature_activities, self.goal_activities, self.prefix_activities, self.prefix_occurrences, self.prefix_uncertainties, ) nb.update_sequences( self.feature_activities, self.prefix_activities, self.prefix_occurrences, self.sequence_occurrences, self.sequence_likelihoods, ) nb.update_rewards( self.reward_update_rate, reward, self.prefix_credit, self.prefix_rewards, ) nb.update_curiosities( self.curiosity_update_rate, self.prefix_occurrences, self.prefix_curiosities, self.previous_feature_activities, self.feature_activities, self.goal_activities, self.prefix_uncertainties, ) self.conditional_predictions = nb.predict_features( self.feature_activities, self.sequence_likelihoods, ) self.conditional_rewards = nb.predict_rewards( self.feature_activities, self.prefix_rewards, ) self.conditional_curiosities = nb.predict_curiosities( self.feature_activities, self.prefix_curiosities, ) return ( self.feature_activities, self.conditional_predictions, self.conditional_rewards, self.conditional_curiosities)
def step(self, feature_activities, live_features, reward, satisfaction): """ Update the model and choose a new goal. Parameters ---------- feature_activities : array of floats The current activity levels of each of the features. live_features : array of floats A binary array of all features that have every been active. reward : float The reward reported by the world during the most recent time step. satisfaction : float A filtered version of recent reward history. """ print( '=========================================================================================================', ) self._update_activities(feature_activities) # Update sequences before prefixes. print('reward', reward) print('satisfaction', satisfaction) print('previous_feature_activities', np.where(self.previous_feature_activities > .1)[0]) print('feature_activities', np.where(self.feature_activities > .1)[0]) print('previousFAIs', np.where(self.previous_FAIs > .1)[0]) print('new_FAIs', np.where(self.new_FAIs > .1)[0]) print('FAIs', np.where(self.FAIs > .1)[0]) print('FGIs', np.where(self.FGIs > .1)[0]) nb.update_sequences(live_features, self.new_FAIs, self.prefix_activities, self.sequence_occurrences) nb.update_prefixes(live_features, self.prefix_decay_rate, self.previous_FAIs, self.FGIs, self.prefix_activities, self.prefix_activities_base, self.prefix_activities_age, self.prefix_occurrences) #print('',) #print('',) nb.update_rewards( live_features, self.reward_update_rate, reward, #self.prefix_activities, self.prefix_occurrences, self.prefix_credit, self.prefix_rewards) #print('',) #print('',) #print('',) nb.update_curiosities(satisfaction, live_features, self.curiosity_update_rate, self.prefix_occurrences, self.prefix_curiosities, self.FAIs, self.previous_FAIs, self.FGIs, self.feature_goal_activities) #print('',) #print('',) #print('',) self.feature_goal_votes = nb.calculate_goal_votes( self.num_features, live_features, #self.time_since_goal, #self.jumpiness, self.prefix_goal_votes, self.prefix_credit, self.prefix_rewards, self.prefix_curiosities, self.prefix_occurrences, self.sequence_occurrences, self.FAIs, self.feature_goal_activities) #print('',) #print('',) #print('',) goal_index, max_vote = self._choose_feature_goals(satisfaction) print('self.time_since_goal', self.time_since_goal, 'self.jumpiness', self.jumpiness) print('goal_index', goal_index, 'max_vote', max_vote) nb.update_reward_credit(live_features, goal_index, max_vote, self.prefix_goal_votes, self.prefix_credit, self.prefix_credit_base, self.prefix_credit_age) #print('',) return self.feature_goal_activities