def __init__(self, serializer, task_scheduler, scramble=False, max_reward_per_task=10): # save parameters into member variables self._task_scheduler = task_scheduler self._serializer = serializer self._max_reward_per_task = max_reward_per_task # cumulative reward per task self._reward_per_task = defaultdict(int) # the event manager is the controller that dispatches # changes in the environment (like new inputs or state changes) # to handler functions in the tasks that tell the environment # how to react self.event_manager = EventManager() # intialize member variables self._current_task = None self._current_world = None # we hear to our own output self._output_channel_listener = InputChannel(serializer) if scramble: serializer = ScramblingSerializerWrapper(serializer) # output channel self._output_channel = OutputChannel(serializer) # input channel self._input_channel = InputChannel(serializer) # priority of ongoing message self._output_priority = 0 # reward that is to be given at the learner at the end of the task self._reward = None # Current task time self._task_time = None # Task deinitialized self._current_task_deinitialized = False # Internal logger self.logger = logging.getLogger(__name__) # signals self.world_updated = Observable() self.task_updated = Observable() self.reward_given = Observable() # Register channel observers self._input_channel.sequence_updated.register( self._on_input_sequence_updated) self._input_channel.message_updated.register( self._on_input_message_updated) self._output_channel_listener.sequence_updated.register( self._on_output_sequence_updated) self._output_channel_listener.message_updated.register( self._on_output_message_updated)
class Environment: ''' The Environment is the one that communicates with the Learner, interpreting its output and reacting to it. The interaction is governed by an ongoing task which is picked by a TaskScheduler object. :param serializer: a Serializer object that translates text into binary and back. :param task_scheduler: a TaskScheduler object that determines which task is going to be run next. :param scramble: if True, the words outputted by the tasks are randomly scrambled. :param max_reward_per_task: maximum amount of reward that a learner can receive for a given task. ''' def __init__(self, serializer, task_scheduler, scramble=False, max_reward_per_task=10): # save parameters into member variables self._task_scheduler = task_scheduler self._serializer = serializer self._max_reward_per_task = max_reward_per_task # cumulative reward per task self._reward_per_task = defaultdict(int) # the event manager is the controller that dispatches # changes in the environment (like new inputs or state changes) # to handler functions in the tasks that tell the environment # how to react self.event_manager = EventManager() # intialize member variables self._current_task = None self._current_world = None # we hear to our own output self._output_channel_listener = InputChannel(serializer) if scramble: serializer = ScramblingSerializerWrapper(serializer) # output channel self._output_channel = OutputChannel(serializer) # input channel self._input_channel = InputChannel(serializer) # priority of ongoing message self._output_priority = 0 # reward that is to be given at the learner at the end of the task self._reward = None # Current task time self._task_time = None # Task separator issued self._task_separator_issued = False # Internal logger self.logger = logging.getLogger(__name__) # signals self.world_updated = Observable() self.task_updated = Observable() # Register channel observers self._input_channel.sequence_updated.register( self._on_input_sequence_updated) self._input_channel.message_updated.register( self._on_input_message_updated) self._output_channel_listener.sequence_updated.register( self._on_output_sequence_updated) self._output_channel_listener.message_updated.register( self._on_output_message_updated) def next(self, learner_input): '''Main loop of the Environment. Receives one bit from the learner and produces a response (also one bit)''' # Make sure we have a task if not self._current_task: self._switch_new_task() # If the task has not reached the end by either Timeout or # achieving the goal if not self._current_task.has_ended(): # Check if a Timeout occurred self._current_task.check_timeout(self._task_time) # Process the input from the learner and raise events if learner_input is not None: # record the input from the learner and deserialize it # TODO this bit is dropped otherwise on a timeout... self._input_channel.consume_bit(learner_input) # fill the output buffer if the task hasn't produced any output if self._output_channel.is_empty(): # demand for some output from the task (usually, silence) self._output_channel.set_message( self._current_task.get_default_output()) # remember if this was acting as a task separator if self._current_task.has_ended(): self._task_separator_issued = True # We are in the middle of the task, so no rewards are given reward = None else: # If the task has ended and there is nothing else to say, # issue a silence and then return reward and move to next task if self._output_channel.is_empty(): # TODO: taks separation should be implemented at the task-level if self._task_separator_issued: # I have nothing to say, I have nothing to say, I have... # reward the learner if necessary and switch to new task reward = self._reward if self._reward is not None else 0 self._switch_new_task() self._task_separator_issued = False else: self._output_channel.set_message( self._current_task.get_default_output()) self._task_separator_issued = True reward = None else: # Do Nothing until the output channel is empty reward = None # Get one bit from the output buffer and ship it output = self._output_channel.consume_bit() # we hear to ourselves (WARNING: this can still generate behavior # in the task via the OutputMessageUpdated event) self._output_channel_listener.consume_bit(output) # advance time self._task_time += 1 if reward is not None: # process the reward (clearing it if it's not allowed) reward = self._allowable_reward(reward) self._task_scheduler.reward(reward) return output, reward def get_reward_per_task(self): ''' Returns a dictonary that contains the cumulative reward for each task. ''' return self._reward_per_task def _allowable_reward(self, reward): '''Checks if the reward is allowed within the limits of the `max_reward_per_task` parameter, and resets it to 0 if not.''' task_name = self._current_task.get_name() if self._reward_per_task[task_name] < self._max_reward_per_task: self._reward_per_task[task_name] += reward return reward else: return 0 def is_silent(self): ''' Tells if the environment is sending any information through the output channel. ''' return self._output_channel.is_silent() def _on_input_sequence_updated(self, sequence): if self.event_manager.raise_event(SequenceReceived(sequence)): self.logger.debug("Sequence received by running task: '{0}'".format( sequence)) def _on_input_message_updated(self, message): # send the current received message to the task if self.event_manager.raise_event(MessageReceived( message)): self.logger.debug("Message received by running task: '{0}'".format( message)) def _on_output_sequence_updated(self, sequence): self.event_manager.raise_event(OutputSequenceUpdated(sequence)) def _on_output_message_updated(self, message): self.event_manager.raise_event(OutputMessageUpdated(message)) def set_reward(self, reward, message='', priority=0): '''Sets the reward that is going to be given to the learner once the task has sent all the remaining message''' self._reward = reward self._current_task.end() self.logger.debug('Setting reward {0} with message "{1}"' ' and priority {2}' .format(reward, message, priority)) self.set_message(message, priority) def set_message(self, message, priority=0): ''' Saves the message in the output buffer so it can be delivered bit by bit. It overwrites any previous content. ''' if self._output_channel.is_empty() or priority >= self._output_priority: self.logger.debug('Setting message "{0}" with priority {1}' .format(message, priority)) self._output_channel.set_message(message) self._output_priority = priority else: self.logger.info( 'Message "{0}" blocked because of ' 'low priority ({1}<{2}) '.format( message, priority, self._output_priority) ) def raise_event(self, event): return self.event_manager.raise_event(event) def raise_state_changed(self): ''' This rases a StateChanged Event, meaning that something in the state of the world or the tasks changed (but we don't keep track what) ''' # state changed events can only be raised if the current task is # started if self._current_task and self._current_task.has_started(): # tasks that have a world should also take the world state as # an argument if self._current_world: self.raise_event(StateChanged( self._current_world.state, self._current_task.state)) else: self.raise_event(StateChanged(self._current_task.state)) return True return False def _deregister_current_task(self): # deregister previous event managers if self._current_task: self._current_task.deinit() self._deregister_task_triggers(self._current_task) def _on_task_ended(self, task): assert (task == self._current_task) # when a task ends, it doesn't process any more events self._deregister_current_task() def _switch_new_task(self): ''' Asks the task scheduler for a new task, reset buffers and time, and registers the event handlers ''' # pick a new task self._current_task = self._task_scheduler.get_next_task() # register to the ending event self._current_task.ended_updated.register(self._on_task_ended) try: # This is to check whether the user didn't mess up in instantiating # the class self._current_task.get_world() except TypeError: raise RuntimeError("The task {0} is not correctly instantiated. " "Are you sure you are not forgetting to " "instantiate the class?".format( self._current_task)) self.logger.debug("Starting new task: {0}".format(self._current_task)) # check if it has a world: if self._current_task.get_world() != self._current_world: # if we had an ongoing world, end it. if self._current_world: self._current_world.end() self._deregister_task_triggers(self._current_world) self._current_world = self._current_task.get_world() if self._current_world: # register new event handlers for the world self._register_task_triggers(self._current_world) # initialize the new world self._current_world.start(self) self.world_updated(self._current_world) # reset state self._task_time = 0 self._reward = None self._input_channel.clear() self._output_channel.clear() self._output_channel_listener.clear() # register new event handlers self._register_task_triggers(self._current_task) # start the task, sending the current environment # so it can interact by sending back rewards and messages self._current_task.start(self) self.task_updated(self._current_task) def _deregister_task_triggers(self, task): for trigger in task.get_triggers(): try: self.event_manager.deregister(task, trigger) except ValueError: # if the trigger was not registered, we don't worry about it pass except KeyError: # if the trigger was not registered, we don't worry about it pass task.clean_dynamic_handlers() def _register_task_triggers(self, task): for trigger in task.get_triggers(): self._register_task_trigger(task, trigger) def _register_task_trigger(self, task, trigger): self.event_manager.register(task, trigger)
def __init__(self, serializer, task_scheduler, scramble=False, max_reward_per_task=10000, byte_mode=False): # save parameters into member variables self._task_scheduler = task_scheduler self._serializer = serializer self._max_reward_per_task = max_reward_per_task # cumulative reward per task self._reward_per_task = defaultdict(int) # the event manager is the controller that dispatches # changes in the environment (like new inputs or state changes) # to handler functions in the tasks that tell the environment # how to react self.event_manager = EventManager() # intialize member variables self._current_task = None self._current_world = None if scramble: serializer = ScramblingSerializerWrapper(serializer) if byte_mode: # we hear to our own output self._output_channel_listener = ByteInputChannel(serializer) # output channel self._output_channel = ByteOutputChannel(serializer) # input channel self._input_channel = ByteInputChannel(serializer) else: # we hear to our own output self._output_channel_listener = InputChannel(serializer) # output channel self._output_channel = OutputChannel(serializer) # input channel self._input_channel = InputChannel(serializer) # priority of ongoing message self._output_priority = 0 # reward that is to be given at the learner at the end of the task self._reward = None self._result = None self._last_result = None # reward that is to be given immediately self._immediate_reward = None # Current task time self._task_time = None # Task separator issued self._task_separator_issued = False # Internal logger self.logger = logging.getLogger(__name__) # signals self.world_updated = Observable() self.task_updated = Observable() # Register channel observers self._input_channel.sequence_updated.register( self._on_input_sequence_updated) self._input_channel.message_updated.register( self._on_input_message_updated) self._output_channel_listener.sequence_updated.register( self._on_output_sequence_updated) self._output_channel_listener.message_updated.register( self._on_output_message_updated)
class Environment: ''' The Environment is the one that communicates with the Learner, interpreting its output and reacting to it. The interaction is governed by an ongoing task which is picked by a TaskScheduler object. :param serializer: a Serializer object that translates text into binary and back. :param task_scheduler: a TaskScheduler object that determines which task is going to be run next. :param scramble: if True, the words outputted by the tasks are randomly scrambled. :param max_reward_per_task: maximum amount of reward that a learner can receive for a given task. ''' def __init__(self, serializer, task_scheduler, scramble=False, max_reward_per_task=10000, byte_mode=False): # save parameters into member variables self._task_scheduler = task_scheduler self._serializer = serializer self._max_reward_per_task = max_reward_per_task # cumulative reward per task self._reward_per_task = defaultdict(int) # the event manager is the controller that dispatches # changes in the environment (like new inputs or state changes) # to handler functions in the tasks that tell the environment # how to react self.event_manager = EventManager() # intialize member variables self._current_task = None self._current_world = None if scramble: serializer = ScramblingSerializerWrapper(serializer) if byte_mode: # we hear to our own output self._output_channel_listener = ByteInputChannel(serializer) # output channel self._output_channel = ByteOutputChannel(serializer) # input channel self._input_channel = ByteInputChannel(serializer) else: # we hear to our own output self._output_channel_listener = InputChannel(serializer) # output channel self._output_channel = OutputChannel(serializer) # input channel self._input_channel = InputChannel(serializer) # priority of ongoing message self._output_priority = 0 # reward that is to be given at the learner at the end of the task self._reward = None self._result = None self._last_result = None # reward that is to be given immediately self._immediate_reward = None # Current task time self._task_time = None # Task separator issued self._task_separator_issued = False # Internal logger self.logger = logging.getLogger(__name__) # signals self.world_updated = Observable() self.task_updated = Observable() # Register channel observers self._input_channel.sequence_updated.register( self._on_input_sequence_updated) self._input_channel.message_updated.register( self._on_input_message_updated) self._output_channel_listener.sequence_updated.register( self._on_output_sequence_updated) self._output_channel_listener.message_updated.register( self._on_output_message_updated) def next(self, learner_input): '''Main loop of the Environment. Receives one bit from the learner and produces a response (also one bit)''' self._last_result = None # will be set while execution is inside this function or its child tree # Make sure we have a task if not self._current_task: self._switch_new_task() # If the task has not reached the end by either Timeout or # achieving the goal if not self._current_task.has_ended(): reward = None # Check if a Timeout occurred self._current_task.check_timeout(self._task_time) # Process the input from the learner and raise events if learner_input is not None: # record the input from the learner and deserialize it # TODO this bit is dropped otherwise on a timeout... self._input_channel.consume(learner_input) # switch to next task immediately if this input caused the task to end # and there is no feedback to output (output_channel is empty) if self._current_task.has_ended() and self._output_channel.is_empty(): self._switch_new_task() # We are in the middle of the task, so no rewards are given else: # If the task is ended and there is nothing else to say, # issue a silence and then return reward and move to next task if self._output_channel.is_empty(): if self._task_separator_issued or self._should_skip_separator(): # Have nothing more to say # reward the learner if necessary and switch to new task reward = self._reward if self._reward is not None else 0 self._switch_new_task() self._task_separator_issued = False else: self._output_channel.set_message( self._serializer.SILENCE_TOKEN) self._task_separator_issued = True reward = None else: # TODO: decide what to do here. # Should we consume the bit or not? self._input_channel.consume(learner_input) # If there is still something to say, continue saying it reward = None # Get one bit from the output buffer and ship it if self._output_channel.is_empty(): self._output_channel.set_message(self._serializer.SILENCE_TOKEN) output = self._output_channel.consume() # we hear to ourselves self._output_channel_listener.consume(output) # advance time self._task_time += 1 if self._immediate_reward is not None and reward is None: reward = self._immediate_reward self._immediate_reward = None if reward is not None: # process the reward (clearing it if it's not allowed) reward = self._allowable_reward(reward) else: reward = 0 return output, reward def get_reward_per_task(self): ''' Returns a dictonary that contains the cumulative reward for each task. ''' return self._reward_per_task def _allowable_reward(self, reward): '''Checks if the reward is allowed within the limits of the `max_reward_per_task` parameter, and resets it to 0 if not.''' task_name = self._current_task.get_name() if self._reward_per_task[task_name] < self._max_reward_per_task: self._reward_per_task[task_name] += reward return reward else: return 0 def is_silent(self): ''' Tells if the environment is sending any information through the output channel. ''' return self._output_channel.is_silent() def _on_input_sequence_updated(self, sequence): if self.event_manager.raise_event(SequenceReceived(sequence)): self.logger.debug("Sequence received by running task: '{0}'".format( sequence)) def _on_input_message_updated(self, message): # send the current received message to the task if self.event_manager.raise_event(MessageReceived( message)): self.logger.debug("Message received by running task: '{0}'".format( message)) def _on_output_sequence_updated(self, sequence): self.event_manager.raise_event(OutputSequenceUpdated(sequence)) def _on_output_message_updated(self, message): self.event_manager.raise_event(OutputMessageUpdated(message)) def _should_skip_separator(self): return hasattr(self._current_task, 'skip_task_separator') and self._current_task.skip_task_separator def set_result(self, result, message='', priority=0, provide_result_as_reward=True): # the following two ifs prevent repeating the same feedback ad infinitum, which otherwise happens in mini-tasks # in case of a repeated invalid input. self._result is set back to None every time a new task is switched. if self._result is True and result is True: return if self._result is False and result is False: return if provide_result_as_reward: self._reward = result self._result = result self._current_task.end() self.logger.debug('Terminating instance with result {0} with message "{1}"' ' and priority {2}' .format(result, message, priority)) # adds a final space to the final message of the task # to separate the next task instructions self.set_message(message, priority) def set_immediate_reward(self, reward): '''Sets the reward immediately''' self._immediate_reward = reward self.logger.debug('Setting immediate reward {}'.format(reward)) def set_message(self, message, priority=0): ''' Saves the message in the output buffer so it can be delivered bit by bit. It overwrites any previous content. ''' if self._output_channel.is_empty() or priority >= self._output_priority: self.logger.debug('Setting message "{0}" with priority {1}' .format(message, priority)) self._output_channel.set_message(message) self._output_priority = priority else: self.logger.info( 'Message "{0}" blocked because of ' 'low priority ({1}<{2}) '.format( message, priority, self._output_priority) ) def raise_event(self, event): return self.event_manager.raise_event(event) def raise_state_changed(self): ''' This rases a StateChanged Event, meaning that something in the state of the world or the tasks changed (but we don't keep track what) ''' # state changed events can only be raised if the current task is # started if self._current_task and self._current_task.has_started(): # tasks that have a world should also take the world state as # an argument if self._current_world: self.raise_event(StateChanged( self._current_world.state, self._current_task.state)) else: self.raise_event(StateChanged(self._current_task.state)) return True return False def _switch_new_task(self): ''' Asks the task scheduler for a new task, reset buffers and time, and registers the event handlers ''' # deregister previous event managers if self._current_task: self._current_task.deinit() self._deregister_task_triggers(self._current_task) # pick a new task if self._result != None: self._last_result = self._result self._task_scheduler.reward(self._result) self._result = None self._current_task = self._task_scheduler.get_next_task() try: # This is to check whether the user didn't mess up in instantiating # the class self._current_task.get_world() except TypeError: raise RuntimeError("The task {0} is not correctly instantiated. " "Are you sure you are not forgetting to " "instantiate the class?".format( self._current_task)) self.logger.debug("Starting new task: {0}".format(self._current_task)) # check if it has a world: if self._current_task.get_world() != self._current_world: # if we had an ongoing world, end it. if self._current_world: self._current_world.end() self._deregister_task_triggers(self._current_world) self._current_world = self._current_task.get_world() if self._current_world: # register new event handlers for the world self._register_task_triggers(self._current_world) # initialize the new world self._current_world.start(self) self.world_updated(self._current_world) # reset state self._task_time = 0 self._reward = None self._input_channel.clear() self._output_channel.clear() self._output_channel_listener.clear() # register new event handlers self._register_task_triggers(self._current_task) # start the task, sending the current environment # so it can interact by sending back rewards and messages self._current_task.start(self) self.task_updated(self._current_task) def _deregister_task_triggers(self, task): for trigger in task.get_triggers(): try: self.event_manager.deregister(task, trigger) except ValueError: # if the trigger was not registered, we don't worry about it pass except KeyError: # if the trigger was not registered, we don't worry about it pass task.clean_dynamic_handlers() def _register_task_triggers(self, task): for trigger in task.get_triggers(): self._register_task_trigger(task, trigger) def _register_task_trigger(self, task, trigger): self.event_manager.register(task, trigger)