class EnvironmentByteMessenger: def __init__(self, env, serializer): self._env = env self._serializer = serializer self._input_channel = ByteInputChannel(serializer) self._output_channel = ByteOutputChannel(serializer) self.init() def init(self): first_symbol, reward = self._env.next(None) self._input_channel.consume(first_symbol) self._input_channel.get_text() def send(self, msg=None): msg = msg or ' ' nsymbols = 0 self._output_channel.set_message(msg) while not self._output_channel.is_empty(): env_symbol, reward = self._env.next(self._output_channel.consume()) self._input_channel.consume(env_symbol) nsymbols += 1 return reward def get_text(self): return self._input_channel.get_text()
def __init__(self, serializer, byte_mode): ''' Takes the serialization protocol ''' self._serializer = serializer if byte_mode: self._input_channel = ByteInputChannel(serializer) self._output_channel = ByteOutputChannel(serializer) else: self._input_channel = InputChannel(serializer) self._output_channel = OutputChannel(serializer) self.logger = logging.getLogger(__name__) self.speaking = False
def __init__(self, serializer, task_scheduler, scramble=False, max_reward_per_task=10000, byte_mode=False): # save parameters into member variables self._task_scheduler = task_scheduler self._serializer = serializer self._max_reward_per_task = max_reward_per_task # cumulative reward per task self._reward_per_task = defaultdict(int) # the event manager is the controller that dispatches # changes in the environment (like new inputs or state changes) # to handler functions in the tasks that tell the environment # how to react self.event_manager = EventManager() # intialize member variables self._current_task = None self._current_world = None if scramble: serializer = ScramblingSerializerWrapper(serializer) if byte_mode: # we hear to our own output self._output_channel_listener = ByteInputChannel(serializer) # output channel self._output_channel = ByteOutputChannel(serializer) # input channel self._input_channel = ByteInputChannel(serializer) else: # we hear to our own output self._output_channel_listener = InputChannel(serializer) # output channel self._output_channel = OutputChannel(serializer) # input channel self._input_channel = InputChannel(serializer) # priority of ongoing message self._output_priority = 0 # reward that is to be given at the learner at the end of the task self._reward = None self._result = None self._last_result = None # reward that is to be given immediately self._immediate_reward = None # Current task time self._task_time = None # Task separator issued self._task_separator_issued = False # Internal logger self.logger = logging.getLogger(__name__) # signals self.world_updated = Observable() self.task_updated = Observable() # Register channel observers self._input_channel.sequence_updated.register( self._on_input_sequence_updated) self._input_channel.message_updated.register( self._on_input_message_updated) self._output_channel_listener.sequence_updated.register( self._on_output_sequence_updated) self._output_channel_listener.message_updated.register( self._on_output_message_updated)
def __init__(self, env, serializer): self._env = env self._serializer = serializer self._input_channel = ByteInputChannel(serializer) self._output_channel = ByteOutputChannel(serializer) self.init()