def __init__(self, world, config=None): """ Configure the Brain. There are some superficial parameters that individual worlds might like to choose, like how often to visualize and how often to back things up. These can be changed by passing the appropriate key-value pairs in a dictionary. Parameters ---------- world: World An environment with an appropriate step() function. config: dict Keys are brain parameters, values are desired values. Configuration parameters ------------------------ backup_interval: int How often the brain will save a pickle backup of itself, in timesteps. debug: boolean Print informative error messages? log_directory : str The full path name to a directory where information and backups for the world can be stored and retrieved. n_features: int The limit on the number of features passed to the model. If this is smaller, Becca will run faster. If it is larger Becca will have more capacity to learn. It's an important input for determining performance. name: str A descriptive string identifying the brain. reporting_interval: int How often the brain will report on performance. restore : bool, optional If restore is True, try to restore the brain from a previously saved version, picking up where it left off. Otherwise it create a new one. visualize_interval: int The number of time steps between creating a new performance calculation and visualization of the brain. """ defaults = { "backup_interval": 1e5, "debug": True, "log_directory": None, "n_features": None, "name": None, "reporting_interval": 1e3, "restore": True, "visualize_interval": 1e4, } if config is None: config = {} if config.get("name") is not None: self.name = config.get("name") else: self.name = '{0}_brain'.format(world.name) if config.get("debug") is not None: self.debug = config.get("debug") else: self.debug = defaults.get("debug") if config.get("log_directory") is not None: self.log_dir = config.get("log_directory") else: # Identify the full local path of the brain.py module. # This trick is used to conveniently locate # other Becca resources. module_path = os.path.dirname(os.path.abspath(__file__)) # log_dir : str # Relative path to the log directory. # This is where backups # and images of the brain's state and performance are kept. self.log_dir = os.path.normpath( os.path.join(module_path, 'log')) # Check whether the directory is already there. If not, create it. if not os.path.isdir(self.log_dir): os.makedirs(self.log_dir) # pickle_filename : str # Relative path and filename of the backup pickle file. self.pickle_filename = os.path.join( self.log_dir, '{0}.pickle'.format(self.name)) # One of the few constraints on the world is that it has to have # n_actions and n_sensors members. # n_actions: int # This is the total number of action outputs that # the world is expecting. # n_sensors: int # The number of distinct sensors that the world # will be passing in to the brain. self.n_actions = world.n_actions self.n_sensors = world.n_sensors self.timestep = 0 if config.get("restore") is not None: restore_flag = config.get("restore") else: restore_flag = defaults.get("restore") if restore_flag: restored_brain = restore(self) if restore_flag and restored_brain is not None: self.timestep = restored_brain.timestep self.input_activities = restored_brain.input_activities self.actions = restored_brain.actions self.n_features = restored_brain.n_features self.postprocessor = restored_brain.postprocessor self.n_commands = restored_brain.n_commands self.commands = restored_brain.commands self.preprocessor = restored_brain.preprocessor self.affect = restored_brain.affect self.satisfaction = restored_brain.satisfaction self.featurizer = restored_brain.featurizer self.model = restored_brain.model self.actor = restored_brain.actor else: # Initialize everything. # The preprocessor takes raw sensors and commands and converts # them into discrete inputs. # Assume all actions are in a continuous space. # This means that it can be repeatedly subdivided to # generate actions of various magnitudes and increase control. self.preprocessor = Preprocessor(n_sensors=self.n_sensors) # The postprocessor converts actions to discretized actions # and back. self.postprocessor = Postprocessor(n_actions=self.n_actions) # actions: array of floats # The set of actions to execute this time step. # Initializing them to non-zero helps to kick start the # act-sense-decide loop. self.actions = np.ones(self.n_actions) * .1 self.affect = Affect() # satisfaction: float # The level of contentment experienced by the brain. # Higher contentment dampens curiosity and # the drive to explore. self.satisfaction = 0. # n_commands: array of floats # commands are discretized actions, suitable # for use within becca. The postprocessor # translates commands into actions. self.n_commands = self.postprocessor.n_commands self.commands = np.zeros(self.n_commands) if config.get("n_features") is not None: self.n_features = config.get("n_features") else: self.n_features = (2 * self.n_commands + 8 * self.n_sensors) self.input_activities = np.zeros(self.n_features) # The featurizer is an unsupervised learner that learns # features from the inputs. self.featurizer = Featurizer( debug=self.debug, n_inputs=self.n_features, ) # The model builds sequences of features and goals and reward # for making predictions about its world. self.model = Model( brain=self, debug=self.debug, n_features=self.n_features, ) # The actor takes conditional predictions from the model and # uses them to choose new goals. self.actor = Actor(self.n_features, self) # Finish with the superficial configuration. # This might change from session to session. if config.get("backup_interval") is not None: self.backup_interval = config.get("backup_interval") else: self.backup_interval = defaults.get("backup_interval") if config.get("reporting_interval") is not None: self.reporting_interval = config.get("reporting_interval") else: self.reporting_interval = defaults.get("reporting_interval") if config.get("visualize_interval") is not None: self.visualize_interval = config.get("visualize_interval") else: self.visualize_interval = defaults.get("visualize_interval") return
def __init__(self, num_sensors, num_actions, brain_name='test_brain', log_directory=None): """ Configure the Brain. Parameters ---------- brain_name : str A descriptive string identifying the brain. log_directory : str The full path name to a directory where information and backups for the world can be stored and retrieved. num_actions : array of ints The total number of action outputs that the world is expecting. num_sensors : array of ints The total number of sensor inputs that the world is providing. """ # num_sensors : int # The number of distinct sensors that the world will be passing in # to the brain. self.num_sensors = num_sensors # num_actions : int # The number of distinct actions that the brain can choose to # execute in the world. self.num_actions = num_actions num_inputs = self.num_sensors + self.num_actions max_num_inputs = num_inputs max_num_features = 1 + 3 * max_num_inputs # actions : array of floats # The set of actions to execute this time step. self.actions = np.ones(self.num_actions) * .1 # timestep : int # The age of the brain in discrete time steps. self.timestep = 0 # backup_interval : int # The number of time steps between saving a copy of the brain # out to a pickle file for easy recovery. self.backup_interval = 1e5 # name : str # Unique name for this brain. self.name = brain_name if log_directory is None: # Identify the full local path of the brain.py module. # This trick is used to conveniently locate other Becca resources. module_path = os.path.dirname(os.path.abspath(__file__)) # log_dir : str # Relative path to the log directory. This is where backups # and images of the brain's state and performance are kept. self.log_dir = os.path.normpath(os.path.join(module_path, 'log')) else: self.log_dir = log_directory # Check whether the directory is already there. If not, create it. if not os.path.isdir(self.log_dir): os.makedirs(self.log_dir) # pickle_filename : str # Relative path and filename of the backup pickle file. self.pickle_filename = os.path.join(self.log_dir, '{0}.pickle'.format(brain_name)) # affect : Affect # See the pydocs in the module affect.py for the class Affect. self.affect = Affect() # satisfaction : float # The level of contentment experienced by the brain. # Higher contentment dampens curiosity and the drive to explore. self.satisfaction = 0. # featurizer : Featurizer # The featurizer is an unsupervised learner that learns # features from the inputs. self.featurizer = Featurizer(max_num_inputs, max_num_features) # model : Model # The model builds sequences of features and goals and uses # them to choose new goals. self.model = Model(max_num_features, self)
def __init__( self, backup_interval=int(2**20), brain_name='test_brain', debug=True, log_directory=None, n_actions=4, n_features=64, n_sensors=4, timestep=0, visualize_interval=int(2**18), ): """ Configure the Brain. Parameters ---------- backup_interval: int How often the brain will save a pickle backup of itself, in timesteps. brain_name: str A descriptive string identifying the brain. debug: boolean Print informative error messages? log_directory : str The full path name to a directory where information and backups for the world can be stored and retrieved. n_actions: int This is the total number of action outputs that the world is expecting. n_sensors: int The number of distinct sensors that the world will be passing in to the brain. n_features: int The limit on the number of features passed to the model. If this is smaller, Becca will run faster. If it is larger Becca will have more capacity to learn. It's an important input for determining performance. timestep: int The age of the brain in discrete time steps. visualize_interval: int How often to visualize the world, in time steps. """ self.debug = debug self.n_sensors = n_sensors self.n_actions = n_actions self.n_features = np.maximum( n_features, self.n_actions + 4 * self.n_sensors) self.input_activities = np.zeros(self.n_features) # actions: array of floats # The set of actions to execute this time step. # Initializing them to non-zero helps to kick start the # act-sense-decide loop. self.actions = np.ones(self.n_actions) * .1 # The postprocessor converts actions to discretized actions # and back. self.postprocessor = Postprocessor(n_actions=self.n_actions) # n_commands: array of floats # commands are discretized actions, suitable for use within # becca. The postprocessor translates commands into actions. self.n_commands = self.postprocessor.n_commands # previous_commands: array of floats # The discretized actions executed on the previous time step. self.previous_commands = np.zeros(self.n_commands) self.commands = np.zeros(self.n_commands) # The preprocessor takes raw sensors and commands and converts # them into discrete inputs. # Assume all actions are in a continuous space. # This means that it can be repeatedly subdivided to # generate actions of various magnitudes and increase control. self.preprocessor = Preprocessor( n_commands=self.n_commands, n_sensors=self.n_sensors, ) self.affect = Affect() # satisfaction: float # The level of contentment experienced by the brain. # Higher contentment dampens curiosity and the drive to explore. self.satisfaction = 0. # The featurizer is an unsupervised learner that learns # features from the inputs. self.featurizer = Featurizer( debug=self.debug, n_inputs=self.n_features, threshold=1e3, ) # The model builds sequences of features and goals and reward # for making predictions about its world. self.model = Model( brain=self, debug=self.debug, n_features=self.n_features, ) # The actor takes conditional predictions from the model and # uses them to choose new goals. self.actor = Actor(self.n_features, self) self.timestep = timestep self.visualize_interval = visualize_interval self.backup_interval = backup_interval self.name = brain_name if log_directory: self.log_dir = log_directory else: # Identify the full local path of the brain.py module. # This trick is used to conveniently locate other Becca resources. module_path = os.path.dirname(os.path.abspath(__file__)) # log_dir : str # Relative path to the log directory. This is where backups # and images of the brain's state and performance are kept. self.log_dir = os.path.normpath(os.path.join(module_path, 'log')) # Check whether the directory is already there. If not, create it. if not os.path.isdir(self.log_dir): os.makedirs(self.log_dir) # pickle_filename : str # Relative path and filename of the backup pickle file. self.pickle_filename = os.path.join( self.log_dir, '{0}.pickle'.format(brain_name))