Esempio n. 1
0
    def step(self, feature_activities, brain_live_features, reward):
        """
        Update the model and choose a new goal.

        Parameters
        ----------
        feature_activities : array of floats
            The current activity levels of each of the features.
        live_features : array of floats
            A binary array of all features that have every been active.
        reward : float
            The reward reported by the world during the most recent time step.
        """
        # TODO: Remove live_features. Assume all are live.
        live_features = self._update_activities(feature_activities,
                                                brain_live_features)

        # Update sequences before prefixes.
        nb.update_sequences(live_features, self.FAIs, self.prefix_activities,
                            self.sequence_occurrences)

        nb.update_prefixes(live_features, self.prefix_decay_rate,
                           self.previous_feature_activities,
                           self.feature_goal_activities,
                           self.prefix_activities, self.prefix_occurrences,
                           self.prefix_uncertainties)

        nb.update_rewards(live_features, self.reward_update_rate, reward,
                          self.prefix_credit, self.prefix_rewards)

        nb.update_curiosities(live_features, self.curiosity_update_rate,
                              self.prefix_occurrences, self.prefix_curiosities,
                              self.previous_feature_activities,
                              self.feature_activities,
                              self.feature_goal_activities,
                              self.prefix_uncertainties)

        nb.update_fitness(self.feature_fitness, self.prefix_occurrences,
                          self.prefix_rewards, self.prefix_uncertainties,
                          self.sequence_occurrences)

        self.feature_goal_votes = nb.calculate_goal_votes(
            self.n_features, live_features, self.prefix_rewards,
            self.prefix_curiosities, self.prefix_occurrences,
            self.sequence_occurrences, self.feature_activities,
            self.feature_goal_activities)

        # TODO: break this out into a separate object.
        goal_index, max_vote = self._choose_feature_goals()

        nb.update_reward_credit(live_features, goal_index, max_vote,
                                self.feature_activities,
                                self.credit_decay_rate, self.prefix_credit)

        # Trim off the first two elements. The are internal to the model only.
        return self.feature_goal_activities[2:]
Esempio n. 2
0
    def step(self, feature_activities, brain_live_features, reward):
        """
        Update the model and choose a new goal.

        Parameters
        ----------
        feature_activities : array of floats
            The current activity levels of each of the features.
        live_features : array of floats
            A binary array of all features that have every been active.
        reward : float
            The reward reported by the world during the most recent time step.
        """
        live_features = self._update_activities(feature_activities,
                                                brain_live_features)

        # Update sequences before prefixes.
        #nb.update_sequences(
        #    live_features,
        #    self.FAIs,
        #    self.prefix_activities,
        #    self.sequence_occurrences)

        nb.update_prefixes(live_features, self.prefix_decay_rate,
                           self.previous_feature_activities,
                           self.feature_goal_activities,
                           self.prefix_activities, self.prefix_occurrences)

        nb.update_rewards(live_features, self.reward_update_rate, reward,
                          self.prefix_credit, self.prefix_rewards)

        nb.update_curiosities(live_features, self.curiosity_update_rate,
                              self.prefix_occurrences, self.prefix_curiosities,
                              self.previous_feature_activities,
                              self.feature_activities,
                              self.feature_goal_activities)

        self.feature_goal_votes = nb.calculate_goal_votes(
            self.num_features,
            live_features,
            self.prefix_rewards,
            self.prefix_curiosities,
            self.prefix_occurrences,
            #self.sequence_occurrences,
            self.feature_activities,
            self.feature_goal_activities)

        goal_index, max_vote = self._choose_feature_goals()

        nb.update_reward_credit(live_features, goal_index, max_vote,
                                self.feature_activities,
                                self.credit_decay_rate, self.prefix_credit)

        return self.feature_goal_activities[2:]
Esempio n. 3
0
    def step(self, candidate_activities, reward):
        """
        Update the model and choose a new goal.

        Parameters
        ----------
        candidate_activities : array of floats
            The current activity levels of each of the feature candidates.
        reward : float
            The reward reported by the world during the most recent time step.
        """
        # Update feature_activities and previous_feature_activities
        self.update_activities(candidate_activities)

        # Update sequences before prefixes.
        nb.update_sequences(
            self.feature_activities,
            self.prefix_activities,
            self.sequence_occurrences,
        )

        nb.update_prefixes(
            self.prefix_decay_rate,
            self.previous_feature_activities,
            self.goal_activities,
            self.prefix_activities,
            self.prefix_occurrences,
            self.prefix_uncertainties,
        )

        nb.update_rewards(
            self.reward_update_rate,
            reward,
            self.prefix_credit,
            self.prefix_rewards,
        )

        nb.update_curiosities(
            self.curiosity_update_rate,
            self.prefix_occurrences,
            self.prefix_curiosities,
            self.previous_feature_activities,
            self.feature_activities,
            self.goal_activities,
            self.prefix_uncertainties,
        )
        
        nb.predict_features(
            self.feature_activities,
            self.prefix_occurrences,
            self.sequence_occurrences,
            self.conditional_predictions,
        )
        nb.predict_rewards(
            self.feature_activities,
            self.prefix_rewards,
            self.conditional_rewards,
        )
        nb.predict_curiosities(
            self.feature_activities,
            self.prefix_curiosities,
            self.conditional_curiosities,
        )

        return (
            self.conditional_predictions,
            self.conditional_rewards,
            self.conditional_curiosities)
Esempio n. 4
0
    def step(self, feature_activities, brain_live_features, reward):
        """
        Update the model and choose a new goal.

        Parameters
        ----------
        feature_activities : array of floats
            The current activity levels of each of the features.
        live_features : array of floats
            A binary array of all features that have every been active.
        reward : float
            The reward reported by the world during the most recent time step.
        """
        live_features = self._update_activities(
            feature_activities, brain_live_features)

        # Update sequences before prefixes.
        #nb.update_sequences(
        #    live_features,
        #    self.FAIs,
        #    self.prefix_activities,
        #    self.sequence_occurrences)

        nb.update_prefixes(
            live_features,
            self.prefix_decay_rate,
            self.previous_feature_activities,
            self.feature_goal_activities,
            self.prefix_activities,
            self.prefix_occurrences)

        nb.update_rewards(
            live_features,
            self.reward_update_rate,
            reward,
            self.prefix_credit,
            self.prefix_rewards)

        nb.update_curiosities(
            live_features,
            self.curiosity_update_rate,
            self.prefix_occurrences,
            self.prefix_curiosities,
            self.previous_feature_activities,
            self.feature_activities,
            self.feature_goal_activities)

        self.feature_goal_votes = nb.calculate_goal_votes(
            self.num_features,
            live_features,
            self.prefix_rewards,
            self.prefix_curiosities,
            self.prefix_occurrences,
            #self.sequence_occurrences,
            self.feature_activities,
            self.feature_goal_activities)

        goal_index, max_vote = self._choose_feature_goals()

        nb.update_reward_credit(
            live_features,
            goal_index,
            max_vote,
            self.feature_activities,
            self.credit_decay_rate,
            self.prefix_credit)

        return self.feature_goal_activities[2:]
Esempio n. 5
0
    def step(self, candidate_activities, reward):
        """
        Update the model and choose a new goal.

        Parameters
        ----------
        candidate_activities : array of floats
            The current activity levels of each of the feature candidates.
        reward : float
            The reward reported by the world during
            the most recent time step.
        """
        # Update feature_activities and previous_feature_activities
        self.update_activities(candidate_activities)

        # Update sequences before prefixes.
        nb.update_prefixes(
            self.prefix_decay_rate,
            self.previous_feature_activities,
            self.goal_activities,
            self.prefix_activities,
            self.prefix_occurrences,
            self.prefix_uncertainties,
        )

        nb.update_sequences(
            self.feature_activities,
            self.prefix_activities,
            self.prefix_occurrences,
            self.sequence_occurrences,
            self.sequence_likelihoods,
        )

        nb.update_rewards(
            self.reward_update_rate,
            reward,
            self.prefix_credit,
            self.prefix_rewards,
        )

        nb.update_curiosities(
            self.curiosity_update_rate,
            self.prefix_occurrences,
            self.prefix_curiosities,
            self.previous_feature_activities,
            self.feature_activities,
            self.goal_activities,
            self.prefix_uncertainties,
        )

        self.conditional_predictions = nb.predict_features(
            self.feature_activities,
            self.sequence_likelihoods,
        )

        self.conditional_rewards = nb.predict_rewards(
            self.feature_activities,
            self.prefix_rewards,
        )

        self.conditional_curiosities = nb.predict_curiosities(
            self.feature_activities,
            self.prefix_curiosities,
        )

        return (
            self.feature_activities,
            self.conditional_predictions,
            self.conditional_rewards,
            self.conditional_curiosities)
Esempio n. 6
0
    def step(self, feature_activities, live_features, reward, satisfaction):
        """
        Update the model and choose a new goal.

        Parameters
        ----------
        feature_activities : array of floats
            The current activity levels of each of the features.
        live_features : array of floats
            A binary array of all features that have every been active.
        reward : float
            The reward reported by the world during the most recent time step.
        satisfaction : float
            A filtered version of recent reward history.
        """
        print(
            '=========================================================================================================',
        )
        self._update_activities(feature_activities)
        # Update sequences before prefixes.

        print('reward', reward)
        print('satisfaction', satisfaction)
        print('previous_feature_activities',
              np.where(self.previous_feature_activities > .1)[0])
        print('feature_activities', np.where(self.feature_activities > .1)[0])
        print('previousFAIs', np.where(self.previous_FAIs > .1)[0])
        print('new_FAIs', np.where(self.new_FAIs > .1)[0])
        print('FAIs', np.where(self.FAIs > .1)[0])
        print('FGIs', np.where(self.FGIs > .1)[0])
        nb.update_sequences(live_features, self.new_FAIs,
                            self.prefix_activities, self.sequence_occurrences)
        nb.update_prefixes(live_features, self.prefix_decay_rate,
                           self.previous_FAIs, self.FGIs,
                           self.prefix_activities, self.prefix_activities_base,
                           self.prefix_activities_age, self.prefix_occurrences)
        #print('',)
        #print('',)
        nb.update_rewards(
            live_features,
            self.reward_update_rate,
            reward,
            #self.prefix_activities,
            self.prefix_occurrences,
            self.prefix_credit,
            self.prefix_rewards)
        #print('',)
        #print('',)
        #print('',)
        nb.update_curiosities(satisfaction, live_features,
                              self.curiosity_update_rate,
                              self.prefix_occurrences, self.prefix_curiosities,
                              self.FAIs, self.previous_FAIs, self.FGIs,
                              self.feature_goal_activities)
        #print('',)
        #print('',)
        #print('',)
        self.feature_goal_votes = nb.calculate_goal_votes(
            self.num_features,
            live_features,
            #self.time_since_goal,
            #self.jumpiness,
            self.prefix_goal_votes,
            self.prefix_credit,
            self.prefix_rewards,
            self.prefix_curiosities,
            self.prefix_occurrences,
            self.sequence_occurrences,
            self.FAIs,
            self.feature_goal_activities)
        #print('',)
        #print('',)
        #print('',)
        goal_index, max_vote = self._choose_feature_goals(satisfaction)
        print('self.time_since_goal', self.time_since_goal, 'self.jumpiness',
              self.jumpiness)
        print('goal_index', goal_index, 'max_vote', max_vote)
        nb.update_reward_credit(live_features, goal_index, max_vote,
                                self.prefix_goal_votes, self.prefix_credit,
                                self.prefix_credit_base,
                                self.prefix_credit_age)
        #print('',)
        return self.feature_goal_activities