コード例 #1
0
ファイル: brain.py プロジェクト: matt2000/becca
class Brain(object):
    """
    A biologically motivated learning algorithm.

    Attributes
    ----------
    affect : Affect
        See the pydocs in the module ``affect.py`` for the class ``Affect``.
    backup_interval : int
        The number of time steps between saving a copy of the ``brain``
        out to a pickle file for easy recovery.
    levels : list of ``Level``
        Collectively, the levels form a hierarchy with ``levels[0]``
        on the bottom.
        Refer to ``level.py`` for a detailed description of a level.
    log_dir : str
        Relative path to the ``log`` directory. This is where backups
        and images of the ``brain``'s state and performance are kept.
    name : str
        Unique name for this ``brain``.
    num_actions : int
        The number of distinct actions that the ``brain`` can choose to
        execute in the world.
    num_features : int
        The total number of features, including sensors and all features
        derived from them.
    num_sensors : int
        The number of distinct sensors that the world will be passing in
        to the ``brain``.
    pickle_filename : str
        Relative path and filename of the backup pickle file.
    satisfaction : float
        The level of contentment experienced by the brain. Higher contentment
        dampens curiosity and the drive to explore.
    timestep : int
        The age of the ``brain`` in discrete time steps.
    """
    def __init__(self, num_sensors, num_actions, brain_name='test_brain'):
        """
        Configure the Brain.

        Parameters
        ----------
        """
        self.num_sensors = num_sensors
        # Always include an extra action. The last is the 'do nothing' action.
        self.num_actions = num_actions + 1

        self.backup_interval = 1e5
        self.name = brain_name
        self.log_dir = os.path.normpath(os.path.join(MODPATH, 'log'))
        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
        self.pickle_filename = os.path.join(self.log_dir,
                                            '{0}.pickle'.format(brain_name))
        self.affect = Affect()
        self.satisfaction = 0.

        # Initialize the first ``Level``
        num_inputs = self.num_sensors + self.num_actions
        #num_bundles = 3 * num_inputs
        #num_elements = num_inputs + num_bundles
        #num_sequences = 1 * num_elements
        level_index = 0
        #level_0 = Level(level_index, num_inputs, num_elements, num_sequences)
        level_0 = Level(level_index, num_inputs)
        self.levels = [level_0]
        self.actions = np.zeros(self.num_actions)

        self.timestep = 0


    def sense_act_learn(self, sensors, reward):
        """
        Take sensor and reward data in and use them to choose an action.

        Parameters
        ----------
        sensors : array of floats
            The information coming from the sensors in the world.
            The array should have ``self.num_sensors`` inputs.
            Each value in the array is expected to be between 0 and 1,
            inclusive. Sensor values are interpreted as fuzzy binary
            values, rather than continuous values. For instance,
            the ``brain`` doesn't interpret a contact sensor value of .5
            to mean that the contact
            sensor was only weakly contacted. It interprets it
            to mean that the sensor was fully contacted for 50% of the sensing
            duration or that there is a 50% chance that the sensor was
            fully contacted during the entire sensing duration. For another
            example, a light sensor reading of zero won't be
            interpreted as by the ``brain`` as darkness. It will just be
            interpreted as a lack of information about the lightness.
        reward : float
            The extent to which the ``brain`` is being rewarded by the
            world. It is expected to be between -1 and 1, inclusive.
            -1 is the worst pain ever. 1 is the most intense ecstasy
            imaginable. 0 is neutral.

        Returns
        -------
        actions : array of floats
            The action commands that the ``brain`` is sending to the world
            to be executed. The array should have ``self.num_actions``
            inputs in it. Each value should be binary: 0 and 1. This
            allows the ``brain`` to learn most effectively how to interact
            with the world to obtain more reward.
        """
        self.timestep += 1

        # Calculate the "mood" of the agent.
        self.satisfaction = self.affect.update(reward)

        # Calcuate activities of all the sequences in the hierarchy.
        input_activities = np.concatenate((sensors, self.actions))
        for level in self.levels:
            sequence_activities = level.step(input_activities,
                                             reward,
                                             self.satisfaction)
            # For the next level
            input_activities = sequence_activities

        # If the top level has more than half of its allocated sequences
        # created, then create a new level on top of it. 
        if level.num_sequences > level.max_num_sequences / 2.:
            # Initialize the next level.
            num_inputs = level.max_num_sequences
            #num_bundles = num_inputs
            #num_elements = num_inputs + num_bundles
            #num_sequences = 2 * num_elements
            level_index = level.level_index + 1
            print('------------------------------------------ Creating level',
                  level_index)
            next_level = Level(level_index,
                               num_inputs,
                               #num_elements,
                               #num_sequences
                               )
            self.levels.append(next_level)

            sequence_activities = next_level.step(input_activities,
                                                  reward,
                                                  self.satisfaction)
        # Pass goals back down.
        for i in range(len(self.levels) - 1)[::-1]:
            self.levels[i].sequence_goals = self.levels[i + 1].input_goals

        # Isolate the actions from the rest of the goals..
        self.actions = self.levels[0].input_goals[
            self.num_sensors:self.num_sensors + self.num_actions]

        # debug: Random actions
        #self.actions = self.random_actions()

        # Periodically back up the ``brain``.
        if (self.timestep % self.backup_interval) == 0:
            self.backup()

        # Account for the fact that the last "do nothing" action
        # was added by the ``brain``.
        return self.actions[:-1]
        #return self.actions#[:-1]


    def random_actions(self):
        """
        Generate a random set of actions.

        Returns
        -------
        actions : array of floats
            See ``sense_act_learn.actions``.
        """
        threshold = .5 / float(self.num_actions)
        action_strength = np.random.random_sample(self.num_actions)
        actions = np.zeros(self.num_actions)
        actions[np.where(action_strength < threshold)] = 1.
        return actions


    def report_performance(self):
        """
        Make a report of how the brain did over its lifetime.

        Returns
        -------
        performance : float
            The average reward per time step collected by
            the ``brain`` over its lifetime.
        """
        return self.affect.visualize(self.timestep, self.name, self.log_dir)


    def backup(self):
        """
        Archive a copy of the brain object for future use.

        Returns
        -------
        success : bool
            If the backup process completed without any problems, ``success``
            is True, otherwise it is False.
        """
        success = False
        try:
            with open(self.pickle_filename, 'wb') as brain_data:
                pickle.dump(self, brain_data)
            # Save a second copy. If you only save one, and the user
            # happens to ^C out of the program while it is being saved,
            # the file becomes corrupted, and all the learning that the
            # ``brain`` did is lost.
            with open('{0}.bak'.format(self.pickle_filename),
                      'wb') as brain_data_bak:
                pickle.dump(self, brain_data_bak)
        except IOError as err:
            print('File error: {0} encountered while saving brain data'.
                  format(err))
        except pickle.PickleError as perr:
            print('Pickling error: {0} encountered while saving brain data'.
                  format(perr))
        else:
            success = True
        return success


    def restore(self):
        """
        Reconstitute the brain from a previously saved brain.

        Returns
        -------
        restored_brain : Brain
            If restoration was successful, the saved ``brain`` is returned.
            Otherwise a notification prints and a new ``brain`` is returned.
        """
        restored_brain = self
        try:
            with open(self.pickle_filename, 'rb') as brain_data:
                loaded_brain = pickle.load(brain_data)

            # Compare the number of channels in the restored brain with
            # those in the already initialized brain. If it matches,
            # accept the brain. If it doesn't,
            # print a message, and keep the just-initialized brain.
            # Sometimes the pickle file is corrputed. When this is the case
            # you can manually overwrite it by removing the .bak from the
            # .pickle.bak file. Then you can restore from the backup pickle.
            if ((loaded_brain.num_sensors == self.num_sensors) and
                    (loaded_brain.num_actions == self.num_actions)):
                print('Brain restored at timestep {0} from {1}'.format(
                    str(loaded_brain.timestep), self.pickle_filename))
                restored_brain = loaded_brain
            else:
                print('The brain {0} does not have the same number'.format(
                    self.pickle_filename))
                print('of sensors and actions as the world.')
                print('Creating a new brain from scratch.')
        except IOError:
            print('Couldn\'t open {0} for loading'.format(
                self.pickle_filename))
        except pickle.PickleError, err:
            print('Error unpickling world: {0}'.format(err))
        return restored_brain
コード例 #2
0
ファイル: brain.py プロジェクト: microgold/Becca35
class Brain(object):
    """
    A biologically motivated learning algorithm.

    Becca's Brain contains all of its learning algorithms,
    integrated into a single whole.

    Check out connector.py for an example for how to attach a world
    to a brain.
    """
    def __init__(self,
                 num_sensors,
                 num_actions,
                 brain_name='test_brain',
                 log_directory=None):
        """
        Configure the Brain.

        Parameters
        ----------
        brain_name : str
            A descriptive string identifying the brain.
        log_directory : str
            The full path name to a directory where information and
            backups for the world can be stored and retrieved.
        num_actions : array of ints
            The total number of action outputs that the world is expecting.
        num_sensors : array of ints
            The total number of sensor inputs that the world is providing.
        """
        # num_sensors : int
        #     The number of distinct sensors that the world will be passing in
        #     to the brain.
        self.num_sensors = num_sensors
        # num_actions : int
        #     The number of distinct actions that the brain can choose to
        #     execute in the world.
        self.num_actions = num_actions
        num_inputs = self.num_sensors + self.num_actions
        max_num_inputs = num_inputs
        max_num_features = 1 + 3 * max_num_inputs
        # actions : array of floats
        #     The set of actions to execute this time step.
        self.actions = np.ones(self.num_actions) * .1

        # timestep : int
        #     The age of the brain in discrete time steps.
        self.timestep = 0
        # backup_interval : int
        #     The number of time steps between saving a copy of the brain
        #     out to a pickle file for easy recovery.
        self.backup_interval = 1e5
        # name : str
        #     Unique name for this brain.
        self.name = brain_name

        if log_directory is None:
            # Identify the full local path of the brain.py module.
            # This trick is used to conveniently locate other Becca resources.
            module_path = os.path.dirname(os.path.abspath(__file__))
            # log_dir : str
            #     Relative path to the log directory. This is where backups
            #     and images of the brain's state and performance are kept.
            self.log_dir = os.path.normpath(os.path.join(module_path, 'log'))
        else:
            self.log_dir = log_directory
        # Check whether the directory is already there. If not, create it.
        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
        # pickle_filename : str
        #     Relative path and filename of the backup pickle file.
        self.pickle_filename = os.path.join(self.log_dir,
                                            '{0}.pickle'.format(brain_name))

        # affect : Affect
        #     See the pydocs in the module affect.py for the class Affect.
        self.affect = Affect()
        # satisfaction : float
        #     The level of contentment experienced by the brain.
        #     Higher contentment dampens curiosity and the drive to explore.
        self.satisfaction = 0.

        # featurizer : Featurizer
        #     The featurizer is an unsupervised learner that learns
        #     features from the inputs.
        self.featurizer = Featurizer(max_num_inputs, max_num_features)
        # model : Model
        #     The model builds sequences of features and goals and uses
        #     them to choose new goals.
        self.model = Model(max_num_features, self)

    def sense_act_learn(self, sensors, reward):
        """
        Take sensor and reward data in and use them to choose an action.

        Parameters
        ----------
        sensors : array of floats
            The information coming from the sensors in the world.
            The array should have self.num_sensors inputs.
            Whatever the low and high value of each sensor, its value
            will be rescaled to fall between 0 and 1.
            Sensor values are interpreted as fuzzy binary
            values, rather than continuous values. For instance,
            the brain doesn't interpret a contact sensor value of .5
            to mean that the contact
            sensor was only weakly contacted. It interprets it
            to mean that the sensor was fully contacted for 50% of the sensing
            duration or that there is a 50% chance that the sensor was
            fully contacted during the entire sensing duration. For another
            example, a light sensor reading of zero won't be
            interpreted as by the brain as darkness. It will just be
            interpreted as a lack of information about the lightness.
        reward : float
            The extent to which the brain is being rewarded by the
            world. It is expected to be between -1 and 1, inclusive.
            -1 is the worst pain ever. 1 is the most intense ecstasy
            imaginable. 0 is neutral.

        Returns
        -------
        actions : array of floats
            The action commands that the brain is sending to the world
            to be executed. The array should have self.num_actions
            inputs in it. Each value should be binary: 0 and 1. This
            allows the brain to learn most effectively how to interact
            with the world to obtain more reward.
        """
        self.timestep += 1

        # Calculate the "mood" of the agent.
        self.satisfaction = self.affect.update(reward)

        input_activities = np.concatenate((self.actions, sensors))
        feature_activities, live_features = self.featurizer.featurize(
            input_activities)
        feature_goals = self.model.step(feature_activities, live_features,
                                        reward)
        # Pass goals back down.
        input_goals = self.featurizer.defeaturize(feature_goals)

        # Isolate the actions from the rest of the goals.
        self.actions = input_goals[:self.num_actions]

        # Create a set of random actions.
        # This is occasionally helpful when debugging.
        take_random_actions = False
        if take_random_actions:
            self.actions = self.random_actions()

        # Periodically back up the brain.
        if (self.timestep % self.backup_interval) == 0:
            self.backup()

        return self.actions

    def random_actions(self):
        """
        Generate a random set of actions.

        This is used for debugging. Running a world with random
        actions gives a baseline performance floor on a world.

        Returns
        -------
        actions : array of floats
            See sense_act_learn.actions.
        """
        threshold = .1 / float(self.num_actions)
        action_strength = np.random.random_sample(self.num_actions)
        actions = np.zeros(self.num_actions)
        actions[np.where(action_strength < threshold)] = 1.
        return actions

    def report_performance(self):
        """
        Make a report of how the brain did over its lifetime.

        Returns
        -------
        performance : float
            The average reward per time step collected by
            the brain over its lifetime.
        """
        performance = self.affect.visualize(self.timestep, self.name,
                                            self.log_dir)
        return performance

    def backup(self):
        """
        Archive a copy of the brain object for future use.

        Returns
        -------
        success : bool
            If the backup process completed without any problems, success
            is True, otherwise it is False.
        """
        success = False
        try:
            with open(self.pickle_filename, 'wb') as brain_data:
                pickle.dump(self, brain_data)
            # Save a second copy. If you only save one, and the user
            # happens to ^C out of the program while it is being saved,
            # the file becomes corrupted, and all the learning that the
            # brain did is lost.
            make_second_backup = False
            if make_second_backup:
                with open('{0}.bak'.format(self.pickle_filename),
                          'wb') as brain_data_bak:
                    pickle.dump(self, brain_data_bak)
        except IOError as err:
            print('File error: {0} encountered while saving brain data'.format(
                err))
        except pickle.PickleError as perr:
            print('Pickling error: {0} encountered while saving brain data'.
                  format(perr))
        else:
            success = True
        return success

    def restore(self):
        """
        Reconstitute the brain from a previously saved brain.

        Returns
        -------
        restored_brain : Brain
            If restoration was successful, the saved brain is returned.
            Otherwise a notification prints and a new brain is returned.
        """
        restored_brain = self
        try:
            with open(self.pickle_filename, 'rb') as brain_data:
                loaded_brain = pickle.load(brain_data)

            # Compare the number of channels in the restored brain with
            # those in the already initialized brain. If it matches,
            # accept the brain. If it doesn't,
            # print a message, and keep the just-initialized brain.
            # Sometimes the pickle file is corrputed. When this is the case
            # you can manually overwrite it by removing the .bak from the
            # .pickle.bak file. Then you can restore from the backup pickle.
            if ((loaded_brain.num_sensors == self.num_sensors)
                    and (loaded_brain.num_actions == self.num_actions)):
                print('Brain restored at timestep {0} from {1}'.format(
                    str(loaded_brain.timestep), self.pickle_filename))
                restored_brain = loaded_brain
            else:
                print('The brain {0} does not have the same number'.format(
                    self.pickle_filename))
                print('of sensors and actions as the world.')
                print('Creating a new brain from scratch.')
        except IOError:
            print('Couldn\'t open {0} for loading'.format(
                self.pickle_filename))
        except pickle.PickleError:
            print('Error unpickling world')
#    except pickle.PickleError, err:
#   print('Error unpickling world: {0}'.format(err))
        return restored_brain

    def visualize(self):
        """
        Show the current state and some history of the brain.

        This is typically called from a world's visualize method.
        """
        print(' ')
        print('{0} is {1} time steps old'.format(self.name, self.timestep))

        self.affect.visualize(self.timestep, self.name, self.log_dir)
        self.featurizer.visualize()
コード例 #3
0
ファイル: brain.py プロジェクト: brohrer/becca
class Brain(object):
    """
    A biologically motivated learning algorithm.

    Becca's Brain contains all of its learning algorithms,
    integrated into a single whole.

    Check out connector.py for an example for how to attach a world
    to a brain.
    """
    def __init__(self, world, config=None):
        """
        Configure the Brain.

        There are some superficial parameters that individual worlds
        might like to choose, like how often to visualize and
        how often to back things up. These can be changed by passing
        the appropriate key-value pairs in a dictionary.

        Parameters
        ----------
        world: World
            An environment with an appropriate step() function.
        config: dict
            Keys are brain parameters, values are desired values.

        Configuration parameters
        ------------------------
        backup_interval: int
            How often the brain will save a pickle backup of itself,
            in timesteps.
        debug: boolean
            Print informative error messages?
        log_directory : str
            The full path name to a directory where information and
            backups for the world can be stored and retrieved.
        n_features: int
            The limit on the number of features passed to the model.
            If this is smaller, Becca will run faster. If it is larger
            Becca will have more capacity to learn. It's an important
            input for determining performance.
        name: str
            A descriptive string identifying the brain.
        reporting_interval: int
            How often the brain will report on performance.
        restore : bool, optional
            If restore is True, try to restore the brain
            from a previously saved
            version, picking up where it left off.
            Otherwise it create a new one.
        visualize_interval: int
            The number of time steps between creating a new performance
            calculation and visualization of the brain.
        """
        defaults = {
            "backup_interval": 1e5,
            "debug": True,
            "log_directory": None,
            "n_features": None,
            "name": None,
            "reporting_interval": 1e3,
            "restore": True,
            "visualize_interval": 1e4,
        }
        if config is None:
            config = {}

        if config.get("name") is not None:
            self.name = config.get("name")
        else:
            self.name = '{0}_brain'.format(world.name)

        if config.get("debug") is not None:
            self.debug = config.get("debug")
        else:
            self.debug = defaults.get("debug")

        if config.get("log_directory") is not None:
            self.log_dir = config.get("log_directory")
        else:
            # Identify the full local path of the brain.py module.
            # This trick is used to conveniently locate
            # other Becca resources.
            module_path = os.path.dirname(os.path.abspath(__file__))
            # log_dir : str
            #     Relative path to the log directory.
            #     This is where backups
            #     and images of the brain's state and performance are kept.
            self.log_dir = os.path.normpath(
                os.path.join(module_path, 'log'))

        # Check whether the directory is already there. If not, create it.
        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
        # pickle_filename : str
        #     Relative path and filename of the backup pickle file.
        self.pickle_filename = os.path.join(
            self.log_dir, '{0}.pickle'.format(self.name))

        # One of the few constraints on the world is that it has to have
        # n_actions and n_sensors members.
        # n_actions: int
        #     This is the total number of action outputs that
        #     the world is expecting.
        # n_sensors: int
        #     The number of distinct sensors that the world
        #     will be passing in to the brain.
        self.n_actions = world.n_actions
        self.n_sensors = world.n_sensors

        self.timestep = 0

        if config.get("restore") is not None:
            restore_flag = config.get("restore")
        else:
            restore_flag = defaults.get("restore")
        if restore_flag:
            restored_brain = restore(self)

        if restore_flag and restored_brain is not None:
            self.timestep = restored_brain.timestep
            self.input_activities = restored_brain.input_activities
            self.actions = restored_brain.actions
            self.n_features = restored_brain.n_features
            self.postprocessor = restored_brain.postprocessor
            self.n_commands = restored_brain.n_commands
            self.commands = restored_brain.commands
            self.preprocessor = restored_brain.preprocessor
            self.affect = restored_brain.affect
            self.satisfaction = restored_brain.satisfaction
            self.featurizer = restored_brain.featurizer
            self.model = restored_brain.model
            self.actor = restored_brain.actor

        else:
            # Initialize everything.

            # The preprocessor takes raw sensors and commands and converts
            # them into discrete inputs.
            # Assume all actions are in a continuous space.
            # This means that it can be repeatedly subdivided to
            # generate actions of various magnitudes and increase control.
            self.preprocessor = Preprocessor(n_sensors=self.n_sensors)

            # The postprocessor converts actions to discretized actions
            # and back.
            self.postprocessor = Postprocessor(n_actions=self.n_actions)

            # actions: array of floats
            #     The set of actions to execute this time step.
            #     Initializing them to non-zero helps to kick start the
            #     act-sense-decide loop.
            self.actions = np.ones(self.n_actions) * .1

            self.affect = Affect()
            # satisfaction: float
            #     The level of contentment experienced by the brain.
            #     Higher contentment dampens curiosity and
            #     the drive to explore.
            self.satisfaction = 0

            # n_commands: array of floats
            #     commands are discretized actions, suitable
            #     for use within becca. The postprocessor
            #     translates commands into actions.
            self.n_commands = self.postprocessor.n_commands
            self.commands = np.zeros(self.n_commands)

            if config.get("n_features") is not None:
                self.n_features = config.get("n_features")
            else:
                self.n_features = (2 * self.n_commands
                                   + 8 * self.n_sensors)

            self.input_activities = np.zeros(self.n_features)
            # The featurizer is an unsupervised learner that learns
            # features from the inputs.
            self.featurizer = Featurizer(
                debug=self.debug,
                n_inputs=self.n_features,
            )
            # The model builds sequences of features and goals and reward
            # for making predictions about its world.
            self.model = Model(
                brain=self,
                debug=self.debug,
                n_features=self.n_features,
            )

            # The actor takes conditional predictions from the model and
            # uses them to choose new goals.
            self.actor = Actor(self.n_features, self)

        # Finish with the superficial configuration.
        # This might change from session to session.
        if config.get("backup_interval") is not None:
            self.backup_interval = config.get("backup_interval")
        else:
            self.backup_interval = defaults.get("backup_interval")

        if config.get("reporting_interval") is not None:
            self.reporting_interval = config.get("reporting_interval")
        else:
            self.reporting_interval = defaults.get("reporting_interval")

        if config.get("visualize_interval") is not None:
            self.visualize_interval = config.get("visualize_interval")
        else:
            self.visualize_interval = defaults.get("visualize_interval")

        return

    def sense_act_learn(self, sensors, reward):
        """
        Take sensor and reward data in and use them to choose an action.

        Parameters
        ----------
        sensors : array of floats
            The information coming from the sensors in the world.
            The array should have self.n_sensors inputs.
            Whatever the low and high value of each sensor, its value
            will be rescaled to fall between 0 and 1.
            Sensor values are interpreted as fuzzy binary
            values, rather than continuous values. For instance,
            the brain doesn't interpret a contact sensor value of .5
            to mean that the contact
            sensor was only weakly contacted. It interprets it
            to mean that the sensor was fully contacted for
            50% of the sensing
            duration or that there is a 50% chance that the sensor was
            fully contacted during the entire sensing duration. For another
            example, a light sensor reading of zero won't be
            interpreted as by the brain as darkness. It will just be
            interpreted as a lack of information about the lightness.
        reward : float
            The extent to which the brain is being rewarded by the
            world. It is expected to be between -1 and 1, inclusive.
            -1 is the worst pain ever. 1 is the most intense ecstasy
            imaginable. 0 is neutral.

        Returns
        -------
        actions : array of floats
            The action commands that the brain is sending to the world
            to be executed. The array should have self.n_actions
            inputs in it. Each value should be binary: 0 and 1. This
            allows the brain to learn most effectively how to interact
            with the world to obtain more reward.
        """
        self.timestep += 1

        # Calculate the "mood" of the agent.
        self.satisfaction = self.affect.update(reward)

        # Calculate new activities in a bottom-up pass.
        input_activities = self.preprocessor.convert_to_inputs(sensors)
        feature_activities = self.featurizer.featurize(
            np.concatenate((self.postprocessor.consolidated_commands,
                            input_activities)))

        (model_feature_activities,
            conditional_predictions,
            conditional_rewards,
            conditional_curiosities
        ) = self.model.step(feature_activities, reward)

        feature_goals, i_goal = self.actor.choose(
            feature_activities=model_feature_activities,
            conditional_predictions=conditional_predictions,
            conditional_rewards=conditional_rewards,
            conditional_curiosities=conditional_curiosities,
        )
        feature_pool_goals = self.model.update_goals(
            feature_goals, i_goal)

        debug_local = False
        if debug_local:
            rep = "Brain"
            rep += " last action: " + str(self.actions[0]) + ", "
            rep += " reward of " + str(reward) + ", "
            rep += " next sensors " + str(sensors)
            logger.debug(rep)

        # Pass goals back down.
        input_goals = self.featurizer.defeaturize(feature_pool_goals)

        # Isolate the actions from the rest of the goals.
        self.actions = (self.postprocessor.convert_to_actions(
            input_goals[:self.n_commands]))

        # Update the inputs in a pair of top-down/bottom-up passes.
        # Top-down
        candidate_fitness = self.model.calculate_fitness()
        self.featurizer.calculate_fitness(candidate_fitness)
        # Bottom-up
        candidate_resets = self.featurizer.update_inputs()
        feature_resets = self.model.update_inputs(candidate_resets)
        self.actor.reset(feature_resets)

        # Create a set of random actions.
        # This is occasionally helpful when debugging.
        take_random_actions = False
        if take_random_actions:
            self.actions = self.random_actions()

        # Periodically back up the brain.
        if (self.timestep % self.backup_interval) == 0:
            self.backup()

        # Report on performance.
        if self.timestep % self.reporting_interval == 0:
            self.affect.visualize(self)

        # Create visualization.
        if self.timestep % self.visualize_interval == 0:
            viz.visualize(self)

        return self.actions

    def random_actions(self):
        """
        Generate a random set of actions.

        This is used for debugging. Running a world with random
        actions gives a baseline performance floor on a world.

        Returns
        -------
        actions : array of floats
            See sense_act_learn.actions.
        """
        threshold = .1 / float(self.n_actions)
        action_strength = np.random.random_sample(self.n_actions)
        actions = np.zeros(self.n_actions)
        actions[np.where(action_strength < threshold)] = 1
        return actions

    def report_performance(self):
        """
        Make a report of how the brain did over its lifetime.

        Returns
        -------
        performance : float
            The average reward per time step collected by
            the brain over its lifetime.
        """
        performance = self.affect.visualize(self)
        return performance

    def backup(self):
        """
        Archive a copy of the brain object for future use.

        Returns
        -------
        success : bool
            If the backup process completed without any problems, success
            is True, otherwise it is False.
        """
        success = False
        try:
            with open(self.pickle_filename, 'wb') as brain_data:
                pickle.dump(self, brain_data)
            # Save a second copy. If you only save one, and the user
            # happens to ^C out of the program while it is being saved,
            # the file becomes corrupted, and all the learning that the
            # brain did is lost.
            make_second_backup = True
            if make_second_backup:
                with open('{0}.bak'.format(self.pickle_filename),
                          'wb') as brain_data_bak:
                    pickle.dump(self, brain_data_bak)
        except IOError as err:
            logger.error('File error: {0} encountered while saving brain data'.
                  format(err))
        except pickle.PickleError as perr:
            logger.error('Pickling error: {0} encountered while saving brain data'.
                  format(perr))
        except Exception as err:
            logger.error('Unknown error: {0} encountered while saving brain data'
                  .format(err))
        else:
            success = True
        return success
コード例 #4
0
class Brain(object):
    """
    A biologically motivated learning algorithm.

    Becca's Brain contains all of its learning algorithms,
    integrated into a single whole.

    Check out connector.py for an example for how to attach a world
    to a brain.
    """
    def __init__(self, world, config=None):
        """
        Configure the Brain.

        There are some superficial parameters that individual worlds
        might like to choose, like how often to visualize and
        how often to back things up. These can be changed by passing
        the appropriate key-value pairs in a dictionary.

        Parameters
        ----------
        world: World
            An environment with an appropriate step() function.
        config: dict
            Keys are brain parameters, values are desired values.

        Configuration parameters
        ------------------------
        backup_interval: int
            How often the brain will save a pickle backup of itself,
            in timesteps.
        debug: boolean
            Print informative error messages?
        log_directory : str
            The full path name to a directory where information and
            backups for the world can be stored and retrieved.
        n_features: int
            The limit on the number of features passed to the model.
            If this is smaller, Becca will run faster. If it is larger
            Becca will have more capacity to learn. It's an important
            input for determining performance.
        name: str
            A descriptive string identifying the brain.
        reporting_interval: int
            How often the brain will report on performance.
        restore : bool, optional
            If restore is True, try to restore the brain
            from a previously saved
            version, picking up where it left off.
            Otherwise it create a new one.
        visualize_interval: int
            The number of time steps between creating a new performance
            calculation and visualization of the brain.
        """
        defaults = {
            "backup_interval": 1e5,
            "debug": True,
            "log_directory": None,
            "n_features": None,
            "name": None,
            "reporting_interval": 1e3,
            "restore": True,
            "visualize_interval": 1e4,
        }
        if config is None:
            config = {}

        if config.get("name") is not None:
            self.name = config.get("name")
        else:
            self.name = '{0}_brain'.format(world.name)

        if config.get("debug") is not None:
            self.debug = config.get("debug")
        else:
            self.debug = defaults.get("debug")

        if config.get("log_directory") is not None:
            self.log_dir = config.get("log_directory")
        else:
            # Identify the full local path of the brain.py module.
            # This trick is used to conveniently locate
            # other Becca resources.
            module_path = os.path.dirname(os.path.abspath(__file__))
            # log_dir : str
            #     Relative path to the log directory.
            #     This is where backups
            #     and images of the brain's state and performance are kept.
            self.log_dir = os.path.normpath(
                os.path.join(module_path, 'log'))

        # Check whether the directory is already there. If not, create it.
        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
        # pickle_filename : str
        #     Relative path and filename of the backup pickle file.
        self.pickle_filename = os.path.join(
            self.log_dir, '{0}.pickle'.format(self.name))

        # One of the few constraints on the world is that it has to have
        # n_actions and n_sensors members.
        # n_actions: int
        #     This is the total number of action outputs that
        #     the world is expecting.
        # n_sensors: int
        #     The number of distinct sensors that the world
        #     will be passing in to the brain.
        self.n_actions = world.n_actions
        self.n_sensors = world.n_sensors

        self.timestep = 0

        if config.get("restore") is not None:
            restore_flag = config.get("restore")
        else:
            restore_flag = defaults.get("restore")
        if restore_flag:
            restored_brain = restore(self)

        if restore_flag and restored_brain is not None:
            self.timestep = restored_brain.timestep
            self.input_activities = restored_brain.input_activities
            self.actions = restored_brain.actions
            self.n_features = restored_brain.n_features
            self.postprocessor = restored_brain.postprocessor
            self.n_commands = restored_brain.n_commands
            self.commands = restored_brain.commands
            self.preprocessor = restored_brain.preprocessor
            self.affect = restored_brain.affect
            self.satisfaction = restored_brain.satisfaction
            self.featurizer = restored_brain.featurizer
            self.model = restored_brain.model
            self.actor = restored_brain.actor

        else:
            # Initialize everything.

            # The preprocessor takes raw sensors and commands and converts
            # them into discrete inputs.
            # Assume all actions are in a continuous space.
            # This means that it can be repeatedly subdivided to
            # generate actions of various magnitudes and increase control.
            self.preprocessor = Preprocessor(n_sensors=self.n_sensors)

            # The postprocessor converts actions to discretized actions
            # and back.
            self.postprocessor = Postprocessor(n_actions=self.n_actions)

            # actions: array of floats
            #     The set of actions to execute this time step.
            #     Initializing them to non-zero helps to kick start the
            #     act-sense-decide loop.
            self.actions = np.ones(self.n_actions) * .1

            self.affect = Affect()
            # satisfaction: float
            #     The level of contentment experienced by the brain.
            #     Higher contentment dampens curiosity and
            #     the drive to explore.
            self.satisfaction = 0.

            # n_commands: array of floats
            #     commands are discretized actions, suitable
            #     for use within becca. The postprocessor
            #     translates commands into actions.
            self.n_commands = self.postprocessor.n_commands
            self.commands = np.zeros(self.n_commands)

            if config.get("n_features") is not None:
                self.n_features = config.get("n_features")
            else:
                self.n_features = (2 * self.n_commands
                                   + 8 * self.n_sensors)

            self.input_activities = np.zeros(self.n_features)
            # The featurizer is an unsupervised learner that learns
            # features from the inputs.
            self.featurizer = Featurizer(
                debug=self.debug,
                n_inputs=self.n_features,
            )
            # The model builds sequences of features and goals and reward
            # for making predictions about its world.
            self.model = Model(
                brain=self,
                debug=self.debug,
                n_features=self.n_features,
            )

            # The actor takes conditional predictions from the model and
            # uses them to choose new goals.
            self.actor = Actor(self.n_features, self)

        # Finish with the superficial configuration.
        # This might change from session to session.
        if config.get("backup_interval") is not None:
            self.backup_interval = config.get("backup_interval")
        else:
            self.backup_interval = defaults.get("backup_interval")

        if config.get("reporting_interval") is not None:
            self.reporting_interval = config.get("reporting_interval")
        else:
            self.reporting_interval = defaults.get("reporting_interval")

        if config.get("visualize_interval") is not None:
            self.visualize_interval = config.get("visualize_interval")
        else:
            self.visualize_interval = defaults.get("visualize_interval")

        return

    def sense_act_learn(self, sensors, reward):
        """
        Take sensor and reward data in and use them to choose an action.

        Parameters
        ----------
        sensors : array of floats
            The information coming from the sensors in the world.
            The array should have self.n_sensors inputs.
            Whatever the low and high value of each sensor, its value
            will be rescaled to fall between 0 and 1.
            Sensor values are interpreted as fuzzy binary
            values, rather than continuous values. For instance,
            the brain doesn't interpret a contact sensor value of .5
            to mean that the contact
            sensor was only weakly contacted. It interprets it
            to mean that the sensor was fully contacted for
            50% of the sensing
            duration or that there is a 50% chance that the sensor was
            fully contacted during the entire sensing duration. For another
            example, a light sensor reading of zero won't be
            interpreted as by the brain as darkness. It will just be
            interpreted as a lack of information about the lightness.
        reward : float
            The extent to which the brain is being rewarded by the
            world. It is expected to be between -1 and 1, inclusive.
            -1 is the worst pain ever. 1 is the most intense ecstasy
            imaginable. 0 is neutral.

        Returns
        -------
        actions : array of floats
            The action commands that the brain is sending to the world
            to be executed. The array should have self.n_actions
            inputs in it. Each value should be binary: 0 and 1. This
            allows the brain to learn most effectively how to interact
            with the world to obtain more reward.
        """
        self.timestep += 1

        # Calculate the "mood" of the agent.
        self.satisfaction = self.affect.update(reward)

        # Calculate new activities in a bottom-up pass.
        input_activities = self.preprocessor.convert_to_inputs(sensors)
        feature_activities = self.featurizer.featurize(
            np.concatenate((self.postprocessor.consolidated_commands,
                            input_activities)))

        (model_feature_activities,
            conditional_predictions,
            conditional_rewards,
            conditional_curiosities
        ) = self.model.step(feature_activities, reward)

        feature_goals, i_goal = self.actor.choose(
            feature_activities=model_feature_activities,
            conditional_predictions=conditional_predictions,
            conditional_rewards=conditional_rewards,
            conditional_curiosities=conditional_curiosities,
        )
        feature_pool_goals = self.model.update_goals(
            feature_goals, i_goal)

        debug_local = False
        if debug_local:
            rep = "Brain"
            rep += " last action: " + str(self.actions[0]) + ", "
            rep += " reward of " + str(reward) + ", "
            rep += " next sensors " + str(sensors)
            print(rep)

        # Pass goals back down.
        input_goals = self.featurizer.defeaturize(feature_pool_goals)

        # Isolate the actions from the rest of the goals.
        self.actions = (self.postprocessor.convert_to_actions(
            input_goals[:self.n_commands]))

        # Update the inputs in a pair of top-down/bottom-up passes.
        # Top-down
        candidate_fitness = self.model.calculate_fitness()
        self.featurizer.calculate_fitness(candidate_fitness)
        # Bottom-up
        candidate_resets = self.featurizer.update_inputs()
        feature_resets = self.model.update_inputs(candidate_resets)
        self.actor.reset(feature_resets)

        # Create a set of random actions.
        # This is occasionally helpful when debugging.
        take_random_actions = False
        if take_random_actions:
            self.actions = self.random_actions()

        # Periodically back up the brain.
        if (self.timestep % self.backup_interval) == 0:
            self.backup()

        # Report on performance.
        if self.timestep % self.reporting_interval == 0:
            self.affect.visualize(self)

        # Create visualization.
        if self.timestep % self.visualize_interval == 0:
            viz.visualize(self)

        return self.actions

    def random_actions(self):
        """
        Generate a random set of actions.

        This is used for debugging. Running a world with random
        actions gives a baseline performance floor on a world.

        Returns
        -------
        actions : array of floats
            See sense_act_learn.actions.
        """
        threshold = .1 / float(self.n_actions)
        action_strength = np.random.random_sample(self.n_actions)
        actions = np.zeros(self.n_actions)
        actions[np.where(action_strength < threshold)] = 1.
        return actions

    def report_performance(self):
        """
        Make a report of how the brain did over its lifetime.

        Returns
        -------
        performance : float
            The average reward per time step collected by
            the brain over its lifetime.
        """
        performance = self.affect.visualize(self)
        return performance

    def backup(self):
        """
        Archive a copy of the brain object for future use.

        Returns
        -------
        success : bool
            If the backup process completed without any problems, success
            is True, otherwise it is False.
        """
        success = False
        try:
            with open(self.pickle_filename, 'wb') as brain_data:
                pickle.dump(self, brain_data)
            # Save a second copy. If you only save one, and the user
            # happens to ^C out of the program while it is being saved,
            # the file becomes corrupted, and all the learning that the
            # brain did is lost.
            make_second_backup = True
            if make_second_backup:
                with open('{0}.bak'.format(self.pickle_filename),
                          'wb') as brain_data_bak:
                    pickle.dump(self, brain_data_bak)
        except IOError as err:
            print('File error: {0} encountered while saving brain data'.
                  format(err))
        except pickle.PickleError as perr:
            print('Pickling error: {0} encountered while saving brain data'.
                  format(perr))
        except Exception as err:
            print('Unknown error: {0} encountered while saving brain data'
                  .format(err))
        else:
            success = True
        return success
コード例 #5
0
class Brain(object):
    """
    A biologically motivated learning algorithm.

    Attributes
    ----------
    affect : Affect
        See the pydocs in the module ``affect.py`` for the class ``Affect``.
    backup_interval : int
        The number of time steps between saving a copy of the ``brain``
        out to a pickle file for easy recovery.
    levels : list of ``Level``
        Collectively, the levels form a hierarchy with ``levels[0]``
        on the bottom.
        Refer to ``level.py`` for a detailed description of a level.
    log_dir : str
        Relative path to the ``log`` directory. This is where backups
        and images of the ``brain``'s state and performance are kept.
    name : str
        Unique name for this ``brain``.
    num_actions : int
        The number of distinct actions that the ``brain`` can choose to
        execute in the world.
    num_features : int
        The total number of features, including sensors and all features
        derived from them.
    num_sensors : int
        The number of distinct sensors that the world will be passing in
        to the ``brain``.
    pickle_filename : str
        Relative path and filename of the backup pickle file.
    satisfaction : float
        The level of contentment experienced by the brain. Higher contentment
        dampens curiosity and the drive to explore.
    timestep : int
        The age of the ``brain`` in discrete time steps.
    """
    def __init__(self, num_sensors, num_actions, brain_name='test_brain'):
        """
        Configure the Brain.

        Parameters
        ----------
        """
        self.num_sensors = num_sensors
        # Always include an extra action. The last is the 'do nothing' action.
        self.num_actions = num_actions + 1

        self.backup_interval = 1e5
        self.name = brain_name
        self.log_dir = os.path.normpath(os.path.join(MODPATH, 'log'))
        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
        self.pickle_filename = os.path.join(self.log_dir,
                                            '{0}.pickle'.format(brain_name))
        self.affect = Affect()
        self.satisfaction = 0.

        # Initialize the first ``Level``
        num_inputs = self.num_sensors + self.num_actions
        #num_bundles = 3 * num_inputs
        #num_elements = num_inputs + num_bundles
        #num_sequences = 1 * num_elements
        level_index = 0
        #level_0 = Level(level_index, num_inputs, num_elements, num_sequences)
        level_0 = Level(level_index, num_inputs)
        self.levels = [level_0]
        self.actions = np.zeros(self.num_actions)

        self.timestep = 0

    def sense_act_learn(self, sensors, reward):
        """
        Take sensor and reward data in and use them to choose an action.

        Parameters
        ----------
        sensors : array of floats
            The information coming from the sensors in the world.
            The array should have ``self.num_sensors`` inputs.
            Each value in the array is expected to be between 0 and 1,
            inclusive. Sensor values are interpreted as fuzzy binary
            values, rather than continuous values. For instance,
            the ``brain`` doesn't interpret a contact sensor value of .5
            to mean that the contact
            sensor was only weakly contacted. It interprets it
            to mean that the sensor was fully contacted for 50% of the sensing
            duration or that there is a 50% chance that the sensor was
            fully contacted during the entire sensing duration. For another
            example, a light sensor reading of zero won't be
            interpreted as by the ``brain`` as darkness. It will just be
            interpreted as a lack of information about the lightness.
        reward : float
            The extent to which the ``brain`` is being rewarded by the
            world. It is expected to be between -1 and 1, inclusive.
            -1 is the worst pain ever. 1 is the most intense ecstasy
            imaginable. 0 is neutral.

        Returns
        -------
        actions : array of floats
            The action commands that the ``brain`` is sending to the world
            to be executed. The array should have ``self.num_actions``
            inputs in it. Each value should be binary: 0 and 1. This
            allows the ``brain`` to learn most effectively how to interact
            with the world to obtain more reward.
        """
        self.timestep += 1

        # Calculate the "mood" of the agent.
        self.satisfaction = self.affect.update(reward)

        # Calcuate activities of all the sequences in the hierarchy.
        input_activities = np.concatenate((sensors, self.actions))
        for level in self.levels:
            sequence_activities = level.step(input_activities, reward,
                                             self.satisfaction)
            # For the next level
            input_activities = sequence_activities

        # If the top level has more than half of its allocated sequences
        # created, then create a new level on top of it.
        if level.num_sequences > level.max_num_sequences / 2.:
            # Initialize the next level.
            num_inputs = level.max_num_sequences
            #num_bundles = num_inputs
            #num_elements = num_inputs + num_bundles
            #num_sequences = 2 * num_elements
            level_index = level.level_index + 1
            print('------------------------------------------ Creating level',
                  level_index)
            next_level = Level(
                level_index,
                num_inputs,
                #num_elements,
                #num_sequences
            )
            self.levels.append(next_level)

            sequence_activities = next_level.step(input_activities, reward,
                                                  self.satisfaction)
        # Pass goals back down.
        for i in range(len(self.levels) - 1)[::-1]:
            self.levels[i].sequence_goals = self.levels[i + 1].input_goals

        # Isolate the actions from the rest of the goals..
        self.actions = self.levels[0].input_goals[self.num_sensors:self.
                                                  num_sensors +
                                                  self.num_actions]

        # debug: Random actions
        #self.actions = self.random_actions()

        # Periodically back up the ``brain``.
        if (self.timestep % self.backup_interval) == 0:
            self.backup()

        # Account for the fact that the last "do nothing" action
        # was added by the ``brain``.
        return self.actions[:-1]
        #return self.actions#[:-1]

    def random_actions(self):
        """
        Generate a random set of actions.

        Returns
        -------
        actions : array of floats
            See ``sense_act_learn.actions``.
        """
        threshold = .5 / float(self.num_actions)
        action_strength = np.random.random_sample(self.num_actions)
        actions = np.zeros(self.num_actions)
        actions[np.where(action_strength < threshold)] = 1.
        return actions

    def report_performance(self):
        """
        Make a report of how the brain did over its lifetime.

        Returns
        -------
        performance : float
            The average reward per time step collected by
            the ``brain`` over its lifetime.
        """
        return self.affect.visualize(self.timestep, self.name, self.log_dir)

    def backup(self):
        """
        Archive a copy of the brain object for future use.

        Returns
        -------
        success : bool
            If the backup process completed without any problems, ``success``
            is True, otherwise it is False.
        """
        success = False
        try:
            with open(self.pickle_filename, 'wb') as brain_data:
                pickle.dump(self, brain_data)
            # Save a second copy. If you only save one, and the user
            # happens to ^C out of the program while it is being saved,
            # the file becomes corrupted, and all the learning that the
            # ``brain`` did is lost.
            with open('{0}.bak'.format(self.pickle_filename),
                      'wb') as brain_data_bak:
                pickle.dump(self, brain_data_bak)
        except IOError as err:
            print('File error: {0} encountered while saving brain data'.format(
                err))
        except pickle.PickleError as perr:
            print('Pickling error: {0} encountered while saving brain data'.
                  format(perr))
        else:
            success = True
        return success

    def restore(self):
        """
        Reconstitute the brain from a previously saved brain.

        Returns
        -------
        restored_brain : Brain
            If restoration was successful, the saved ``brain`` is returned.
            Otherwise a notification prints and a new ``brain`` is returned.
        """
        restored_brain = self
        try:
            with open(self.pickle_filename, 'rb') as brain_data:
                loaded_brain = pickle.load(brain_data)

            # Compare the number of channels in the restored brain with
            # those in the already initialized brain. If it matches,
            # accept the brain. If it doesn't,
            # print a message, and keep the just-initialized brain.
            # Sometimes the pickle file is corrputed. When this is the case
            # you can manually overwrite it by removing the .bak from the
            # .pickle.bak file. Then you can restore from the backup pickle.
            if ((loaded_brain.num_sensors == self.num_sensors)
                    and (loaded_brain.num_actions == self.num_actions)):
                print('Brain restored at timestep {0} from {1}'.format(
                    str(loaded_brain.timestep), self.pickle_filename))
                restored_brain = loaded_brain
            else:
                print('The brain {0} does not have the same number'.format(
                    self.pickle_filename))
                print('of sensors and actions as the world.')
                print('Creating a new brain from scratch.')
        except IOError:
            print('Couldn\'t open {0} for loading'.format(
                self.pickle_filename))
        except pickle.PickleError, err:
            print('Error unpickling world: {0}'.format(err))
        return restored_brain
コード例 #6
0
class Brain(object):
    """
    A biologically motivated learning algorithm.

    Becca's Brain contains all of its learning algorithms,
    integrated into a single whole.

    Check out connector.py for an example for how to attach a world
    to a brain.
    """
    def __init__(
        self,
        backup_interval=int(2**20),
        brain_name='test_brain',
        debug=True,
        log_directory=None,
        n_actions=4,
        n_features=64,
        n_sensors=4,
        timestep=0,
        visualize_interval=int(2**18),
    ):
        """
        Configure the Brain.

        Parameters
        ----------
        backup_interval: int
            How often the brain will save a pickle backup of itself,
            in timesteps.
        brain_name: str
            A descriptive string identifying the brain.
        debug: boolean
            Print informative error messages?
        log_directory : str
            The full path name to a directory where information and
            backups for the world can be stored and retrieved.
        n_actions: int
            This is the total number of action outputs that
            the world is expecting.
        n_sensors: int
            The number of distinct sensors that the world will be passing in
            to the brain.
        n_features: int
            The limit on the number of features passed to the model.
            If this is smaller, Becca will run faster. If it is larger
            Becca will have more capacity to learn. It's an important
            input for determining performance.
        timestep: int
            The age of the brain in discrete time steps.
        visualize_interval: int
            How often to visualize the world, in time steps.
        """
        self.debug = debug
        self.n_sensors = n_sensors
        self.n_actions = n_actions
        self.n_features = np.maximum(
            n_features, self.n_actions + 4 * self.n_sensors)

        self.input_activities = np.zeros(self.n_features)

        # actions: array of floats
        #     The set of actions to execute this time step.
        #     Initializing them to non-zero helps to kick start the
        #     act-sense-decide loop.
        self.actions = np.ones(self.n_actions) * .1

        # The postprocessor converts actions to discretized actions
        # and back.
        self.postprocessor = Postprocessor(n_actions=self.n_actions)

        # n_commands: array of floats
        #     commands are discretized actions, suitable for use within
        #     becca. The postprocessor translates commands into actions.
        self.n_commands = self.postprocessor.n_commands
        # previous_commands: array of floats
        #     The discretized actions executed on the previous time step.
        self.previous_commands = np.zeros(self.n_commands)
        self.commands = np.zeros(self.n_commands)

        # The preprocessor takes raw sensors and commands and converts
        # them into discrete inputs.
        # Assume all actions are in a continuous space.
        # This means that it can be repeatedly subdivided to
        # generate actions of various magnitudes and increase control.
        self.preprocessor = Preprocessor(
            n_commands=self.n_commands,
            n_sensors=self.n_sensors,
        )

        self.affect = Affect()
        # satisfaction: float
        #     The level of contentment experienced by the brain.
        #     Higher contentment dampens curiosity and the drive to explore.
        self.satisfaction = 0.

        # The featurizer is an unsupervised learner that learns
        # features from the inputs.
        self.featurizer = Featurizer(
            debug=self.debug,
            n_inputs=self.n_features,
            threshold=1e3,
        )
        # The model builds sequences of features and goals and reward
        # for making predictions about its world.
        self.model = Model(
            brain=self,
            debug=self.debug,
            n_features=self.n_features,
        )

        # The actor takes conditional predictions from the model and 
        # uses them to choose new goals.
        self.actor = Actor(self.n_features, self)

        self.timestep = timestep
        self.visualize_interval = visualize_interval
        self.backup_interval = backup_interval
        self.name = brain_name

        if log_directory:
            self.log_dir = log_directory
        else:
            # Identify the full local path of the brain.py module.
            # This trick is used to conveniently locate other Becca resources.
            module_path = os.path.dirname(os.path.abspath(__file__))
            # log_dir : str
            #     Relative path to the log directory. This is where backups
            #     and images of the brain's state and performance are kept.
            self.log_dir = os.path.normpath(os.path.join(module_path, 'log'))

        # Check whether the directory is already there. If not, create it.
        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
        # pickle_filename : str
        #     Relative path and filename of the backup pickle file.
        self.pickle_filename = os.path.join(
            self.log_dir, '{0}.pickle'.format(brain_name))

    def sense_act_learn(self, sensors, reward):
        """
        Take sensor and reward data in and use them to choose an action.

        Parameters
        ----------
        sensors : array of floats
            The information coming from the sensors in the world.
            The array should have self.n_sensors inputs.
            Whatever the low and high value of each sensor, its value
            will be rescaled to fall between 0 and 1.
            Sensor values are interpreted as fuzzy binary
            values, rather than continuous values. For instance,
            the brain doesn't interpret a contact sensor value of .5
            to mean that the contact
            sensor was only weakly contacted. It interprets it
            to mean that the sensor was fully contacted for 50% of the sensing
            duration or that there is a 50% chance that the sensor was
            fully contacted during the entire sensing duration. For another
            example, a light sensor reading of zero won't be
            interpreted as by the brain as darkness. It will just be
            interpreted as a lack of information about the lightness.
        reward : float
            The extent to which the brain is being rewarded by the
            world. It is expected to be between -1 and 1, inclusive.
            -1 is the worst pain ever. 1 is the most intense ecstasy
            imaginable. 0 is neutral.

        Returns
        -------
        actions : array of floats
            The action commands that the brain is sending to the world
            to be executed. The array should have self.n_actions
            inputs in it. Each value should be binary: 0 and 1. This
            allows the brain to learn most effectively how to interact
            with the world to obtain more reward.
        """
        self.timestep += 1

        # Calculate the "mood" of the agent.
        self.satisfaction = self.affect.update(reward)

        # Calculate new activities in a bottom-up pass.
        input_activities = self.preprocessor.convert_to_inputs(
            self.previous_commands, sensors)

        feature_activities = self.featurizer.featurize(input_activities)
        (conditional_predictions,
            conditional_rewards,
            conditional_curiosities
        ) = self.model.step(feature_activities, reward)
        feature_goals, i_goal = self.actor.choose(
            conditional_predictions=conditional_predictions,
            conditional_rewards=conditional_rewards,
            conditional_curiosities=conditional_curiosities,
        )
        feature_pool_goals = self.model.update_goals(feature_goals, i_goal)

        # Pass goals back down.
        input_goals = self.featurizer.defeaturize(feature_pool_goals)

        # Isolate the actions from the rest of the goals.
        # self.previous_actions = self.actions
        # self.actions = input_goals[:self.n_actions]
        self.previous_commands,  self.actions = (
            self.postprocessor.convert_to_actions(
                input_goals[:self.n_commands]))

        # Update the inputs in a pair of top-down/bottom-up passes.
        # Top-down
        candidate_fitness = self.model.calculate_fitness()
        self.featurizer.calculate_fitness(candidate_fitness)
        # Bottom-up
        candidate_resets = self.featurizer.update_inputs()
        feature_resets = self.model.update_inputs(candidate_resets)
        self.actor.reset(feature_resets)

        # Create a set of random actions.
        # This is occasionally helpful when debugging.
        take_random_actions = False
        if take_random_actions:
            self.actions = self.random_actions()

        # Periodically back up the brain.
        if (self.timestep % self.backup_interval) == 0:
            self.backup()

        return self.actions

    def random_actions(self):
        """
        Generate a random set of actions.

        This is used for debugging. Running a world with random
        actions gives a baseline performance floor on a world.

        Returns
        -------
        actions : array of floats
            See sense_act_learn.actions.
        """
        threshold = .1 / float(self.n_actions)
        action_strength = np.random.random_sample(self.n_actions)
        actions = np.zeros(self.n_actions)
        actions[np.where(action_strength < threshold)] = 1.
        return actions

    def report_performance(self):
        """
        Make a report of how the brain did over its lifetime.

        Returns
        -------
        performance : float
            The average reward per time step collected by
            the brain over its lifetime.
        """
        performance = self.affect.visualize(self)
        return performance

    def backup(self):
        """
        Archive a copy of the brain object for future use.

        Returns
        -------
        success : bool
            If the backup process completed without any problems, success
            is True, otherwise it is False.
        """
        success = False
        try:
            with open(self.pickle_filename, 'wb') as brain_data:
                pickle.dump(self, brain_data)
            # Save a second copy. If you only save one, and the user
            # happens to ^C out of the program while it is being saved,
            # the file becomes corrupted, and all the learning that the
            # brain did is lost.
            make_second_backup = True
            if make_second_backup:
                with open('{0}.bak'.format(self.pickle_filename),
                          'wb') as brain_data_bak:
                    pickle.dump(self, brain_data_bak)
        except IOError as err:
            print('File error: {0} encountered while saving brain data'.
                  format(err))
        except pickle.PickleError as perr:
            print('Pickling error: {0} encountered while saving brain data'.
                  format(perr))
        except Exception as err:
            print('Unknown error: {0} encountered while saving brain data'.
                  format(err))
        else:
            success = True
        return success

    def restore(self):
        """
        Reconstitute the brain from a previously saved brain.

        Returns
        -------
        restored_brain : Brain
            If restoration was successful, the saved brain is returned.
            Otherwise a notification prints and a new brain is returned.
        """
        restored_brain = self
        try:
            with open(self.pickle_filename, 'rb') as brain_data:
                loaded_brain = pickle.load(brain_data)

            # Compare the number of channels in the restored brain with
            # those in the already initialized brain. If it matches,
            # accept the brain. If it doesn't,
            # print a message, and keep the just-initialized brain.
            # Sometimes the pickle file is corrputed. When this is the case
            # you can manually overwrite it by removing the .bak from the
            # .pickle.bak file. Then you can restore from the backup pickle.
            if ((loaded_brain.n_sensors == self.n_sensors) and
                    (loaded_brain.n_actions == self.n_actions)):
                print('Brain restored at timestep {0} from {1}'.format(
                    str(loaded_brain.timestep), self.pickle_filename))
                restored_brain = loaded_brain
            else:
                print('The brain {0} does not have the same number'.format(
                    self.pickle_filename))
                print('of sensors and actions as the world.')
                print('Creating a new brain from scratch.')
        except IOError:
            print('Couldn\'t open {0} for loading'.format(self.pickle_filename))
        except pickle.PickleError:
            print('Error unpickling world')
        return restored_brain

    def visualize(self):
        """
        Show the current state and some history of the brain.

        This is typically called from a world's visualize method.
        """
        print(' ')
        print('{0} is {1} time steps old'.format(self.name, self.timestep))

        viz.brain_activity(self)
        self.affect.visualize(self)
コード例 #7
0
class Brain(object):
    """
    A biologically motivated learning algorithm.

    Becca's Brain contains all of its learning algorithms,
    integrated into a single whole.

    Check out connector.py for an example for how to attach a world
    to a brain.
    """
    def __init__(self,
                 num_sensors,
                 num_actions,
                 brain_name='test_brain',
                 visualize_interval=int(1e3),
                 log_directory=None):
        """
        Configure the Brain.

        Parameters
        ----------
        brain_name : str
            A descriptive string identifying the brain.
        log_directory : str
            The full path name to a directory where information and
            backups for the world can be stored and retrieved.
        num_actions : array of ints
            The total number of action outputs that the world is expecting.
        num_sensors : array of ints
            The total number of sensor inputs that the world is providing.
        visualize_interval : int
            How often to visualize the world, in time steps.
        """
        # num_sensors : int
        #     The number of distinct sensors that the world will be passing in
        #     to the brain.
        self.num_sensors = num_sensors
        # num_actions : int
        #     The number of distinct actions that the brain can choose to
        #     execute in the world.
        self.num_actions = num_actions
        num_inputs = self.num_sensors + self.num_actions
        max_num_inputs = num_inputs
        max_num_features = 1 + 4 * max_num_inputs
        # actions : array of floats
        #     The set of actions to execute this time step.
        self.actions = np.ones(self.num_actions) * .1

        # timestep : int
        #     The age of the brain in discrete time steps.
        self.timestep = 0
        # visualize_interval : int
        #     How often to visualize the world, in time steps.
        self.visualize_interval = visualize_interval
        # backup_interval : int
        #     The number of time steps between saving a copy of the brain
        #     out to a pickle file for easy recovery.
        self.backup_interval = 1e5
        # name : str
        #     Unique name for this brain.
        self.name = brain_name

        if log_directory is None:
            # Identify the full local path of the brain.py module.
            # This trick is used to conveniently locate other Becca resources.
            module_path = os.path.dirname(os.path.abspath(__file__))
            # log_dir : str
            #     Relative path to the log directory. This is where backups
            #     and images of the brain's state and performance are kept.
            self.log_dir = os.path.normpath(os.path.join(module_path, 'log'))
        else:
            self.log_dir = log_directory
        # Check whether the directory is already there. If not, create it.
        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
        # pickle_filename : str
        #     Relative path and filename of the backup pickle file.
        self.pickle_filename = os.path.join(
            self.log_dir, '{0}.pickle'.format(brain_name))

        # affect : Affect
        #     See the pydocs in the module affect.py for the class Affect.
        self.affect = Affect()
        # satisfaction : float
        #     The level of contentment experienced by the brain.
        #     Higher contentment dampens curiosity and the drive to explore.
        self.satisfaction = 0.

        # featurizer : Featurizer
        #     The featurizer is an unsupervised learner that learns
        #     features from the inputs.
        self.featurizer = Featurizer(self, max_num_inputs, max_num_features)
        # model : Model
        #     The model builds sequences of features and goals and uses
        #     them to choose new goals.
        self.model = Model(max_num_features, self)


    def sense_act_learn(self, sensors, reward):
        """
        Take sensor and reward data in and use them to choose an action.

        Parameters
        ----------
        sensors : array of floats
            The information coming from the sensors in the world.
            The array should have self.num_sensors inputs.
            Whatever the low and high value of each sensor, its value
            will be rescaled to fall between 0 and 1.
            Sensor values are interpreted as fuzzy binary
            values, rather than continuous values. For instance,
            the brain doesn't interpret a contact sensor value of .5
            to mean that the contact
            sensor was only weakly contacted. It interprets it
            to mean that the sensor was fully contacted for 50% of the sensing
            duration or that there is a 50% chance that the sensor was
            fully contacted during the entire sensing duration. For another
            example, a light sensor reading of zero won't be
            interpreted as by the brain as darkness. It will just be
            interpreted as a lack of information about the lightness.
        reward : float
            The extent to which the brain is being rewarded by the
            world. It is expected to be between -1 and 1, inclusive.
            -1 is the worst pain ever. 1 is the most intense ecstasy
            imaginable. 0 is neutral.

        Returns
        -------
        actions : array of floats
            The action commands that the brain is sending to the world
            to be executed. The array should have self.num_actions
            inputs in it. Each value should be binary: 0 and 1. This
            allows the brain to learn most effectively how to interact
            with the world to obtain more reward.
        """
        self.timestep += 1

        # Calculate the "mood" of the agent.
        self.satisfaction = self.affect.update(reward)

        input_activities = np.concatenate((self.actions, sensors))
        feature_activities, live_features = self.featurizer.featurize(
            input_activities)
        feature_goals = self.model.step(feature_activities,
                                        live_features,
                                        reward)
        # Pass goals back down.
        input_goals = self.featurizer.defeaturize(feature_goals)

        # Isolate the actions from the rest of the goals.
        self.actions = input_goals[:self.num_actions]

        # Create a set of random actions.
        # This is occasionally helpful when debugging.
        take_random_actions = False
        if take_random_actions:
            self.actions = self.random_actions()

        # Periodically back up the brain.
        if (self.timestep % self.backup_interval) == 0:
            self.backup()

        return self.actions


    def random_actions(self):
        """
        Generate a random set of actions.

        This is used for debugging. Running a world with random
        actions gives a baseline performance floor on a world.

        Returns
        -------
        actions : array of floats
            See sense_act_learn.actions.
        """
        threshold = .1 / float(self.num_actions)
        action_strength = np.random.random_sample(self.num_actions)
        actions = np.zeros(self.num_actions)
        actions[np.where(action_strength < threshold)] = 1.
        return actions


    def report_performance(self):
        """
        Make a report of how the brain did over its lifetime.

        Returns
        -------
        performance : float
            The average reward per time step collected by
            the brain over its lifetime.
        """
        performance = self.affect.visualize(self)
        return performance


    def backup(self):
        """
        Archive a copy of the brain object for future use.

        Returns
        -------
        success : bool
            If the backup process completed without any problems, success
            is True, otherwise it is False.
        """
        success = False
        try:
            with open(self.pickle_filename, 'wb') as brain_data:
                pickle.dump(self, brain_data)
            # Save a second copy. If you only save one, and the user
            # happens to ^C out of the program while it is being saved,
            # the file becomes corrupted, and all the learning that the
            # brain did is lost.
            make_second_backup = True
            if make_second_backup:
                with open('{0}.bak'.format(self.pickle_filename),
                          'wb') as brain_data_bak:
                    pickle.dump(self, brain_data_bak)
        except IOError as err:
            print('File error: {0} encountered while saving brain data'.
                  format(err))
        except pickle.PickleError as perr:
            print('Pickling error: {0} encountered while saving brain data'.
                  format(perr))
        except err:
            print('Unknown error: {0} encountered while saving brain data'.
                  format(err))
        else:
            success = True
        return success


    def restore(self):
        """
        Reconstitute the brain from a previously saved brain.

        Returns
        -------
        restored_brain : Brain
            If restoration was successful, the saved brain is returned.
            Otherwise a notification prints and a new brain is returned.
        """
        restored_brain = self
        try:
            with open(self.pickle_filename, 'rb') as brain_data:
                loaded_brain = pickle.load(brain_data)

            # Compare the number of channels in the restored brain with
            # those in the already initialized brain. If it matches,
            # accept the brain. If it doesn't,
            # print a message, and keep the just-initialized brain.
            # Sometimes the pickle file is corrputed. When this is the case
            # you can manually overwrite it by removing the .bak from the
            # .pickle.bak file. Then you can restore from the backup pickle.
            if ((loaded_brain.num_sensors == self.num_sensors) and
                    (loaded_brain.num_actions == self.num_actions)):
                print('Brain restored at timestep {0} from {1}'.format(
                    str(loaded_brain.timestep), self.pickle_filename))
                restored_brain = loaded_brain
            else:
                print('The brain {0} does not have the same number'.format(
                    self.pickle_filename))
                print('of sensors and actions as the world.')
                print('Creating a new brain from scratch.')
        except IOError:
            print('Couldn\'t open {0} for loading'.format(
                self.pickle_filename))
        except EOFError:
            print('The pickle file is incomplete.')
            print('It was probably interrupted during saving.')
            print('Revert to the pickle.bak file if you have one.')
            print('Otherwise a new world will be created from scratch.')
        except pickle.PickleError, err:
            print('Error unpickling world: {0}'.format(err))
        return restored_brain