class ConsoleView(BaseView): def __init__(self, env, session, serializer, show_world=False, byte_channels=False): super(ConsoleView, self).__init__(env, session) # for visualization purposes, we keep an internal buffer of the # input and output stream so when they are cleared from task to # task, we can keep the history intact. self.input_buffer = '' self.output_buffer = '' self.reward_buffer = '' self.panic = 'SKIP' if byte_channels: # record what the learner says self._learner_channel = ByteInputChannel(serializer) # record what the environment says self._env_channel = ByteInputChannel(serializer) else: # record what the learner says self._learner_channel = InputChannel(serializer) # record what the environment says self._env_channel = InputChannel(serializer) # listen to the updates in these channels self._learner_channel.sequence_updated.register( self.on_learner_sequence_updated) self._learner_channel.message_updated.register( self.on_learner_message_updated) self._env_channel.sequence_updated.register( self.on_env_sequence_updated) self._env_channel.message_updated.register( self.on_env_message_updated) if show_world: # register a handler to plot the world if show_world is active env.world_updated.register( self.on_world_updated) # connect the channels with the observed input bits session.env_token_updated.register(self.on_env_token_updated) session.learner_token_updated.register(self.on_learner_token_updated) del self.info['current_task'] def on_total_reward_updated(self, reward): change = reward - self.info['reward'] BaseView.on_total_reward_updated(self, reward) self.reward_buffer = "_" * self._scroll_msg_length + self.reward_buffer + self.encode_reward(change) self.reward_buffer = self.reward_buffer[-self._scroll_msg_length+11:] self._win.addstr(self._reward_seq_y, 0, self.reward_buffer) self._win.refresh() @staticmethod def encode_reward(reward): d = {0: " ", 1: "+", -1: "-", 2: "2", -2: "\u01BB"} return d[reward] def on_env_token_updated(self, token): self._env_channel.consume(token) def on_learner_token_updated(self, token): self._learner_channel.consume(token) def on_learner_message_updated(self, message): # we use the fact that messages arrive character by character if self._learner_channel.get_text(): self.input_buffer += self._learner_channel.get_text()[-1] self.input_buffer = self.input_buffer[-self._scroll_msg_length:] learner_input = self.channel_to_str( self.input_buffer + ' ', self._learner_channel.get_undeserialized()) self._win.addstr(self._learner_seq_y, 0, learner_input.encode(code).decode(code)) self._win.refresh() def on_learner_sequence_updated(self, sequence): learner_input = self.channel_to_str( self.input_buffer + ' ', self._learner_channel.get_undeserialized()) self._win.addstr(self._learner_seq_y, 0, learner_input.encode(code).decode(code)) self._win.refresh() def on_env_message_updated(self, message): if self._env_channel.get_text(): self.output_buffer += \ self._env_channel.get_text()[-1] self.output_buffer = self.output_buffer[-self._scroll_msg_length:] env_output = self.channel_to_str( self.output_buffer, self._env_channel.get_undeserialized()) self._win.addstr(self._teacher_seq_y, 0, env_output.encode(code).decode(code)) self._win.refresh() def on_env_sequence_updated(self, sequence): env_output = self.channel_to_str( self.output_buffer, self._env_channel.get_undeserialized()) self._win.addstr(self._teacher_seq_y, 0, env_output.encode(code).decode(code)) self._win.refresh() def on_world_updated(self, world): if world: world.state_updated.register(self.on_world_state_updated) self._worldwin.addstr(0, 0, str(world)) self._worldwin.refresh() else: self._worldwin.clear() self._worldwin.refresh() def on_world_state_updated(self, world): self._worldwin.addstr(0, 0, str(world)) self._worldwin.refresh() def initialize(self): # initialize curses self._stdscr = curses.initscr() begin_x = 0 begin_y = 0 self._teacher_seq_y = 0 self._learner_seq_y = 1 self._reward_seq_y = 2 self._world_win_y = 4 self._world_win_x = 0 self._info_win_width = 20 self._info_win_height = 4 self._user_input_win_y = 4 self._user_input_win_x = 10 self.height, self.width = self._stdscr.getmaxyx() self._scroll_msg_length = self.width - self._info_win_width - 1 self._win = self._stdscr.subwin(self.height, self.width, begin_y, begin_x) self._worldwin = self._win.subwin(self.height - self._world_win_y, self.width - self._world_win_x, self._world_win_y, self._world_win_x) # create info box with reward and time self._info_win = self._win.subwin(self._info_win_height, self._info_win_width, 0, self.width - self._info_win_width) self._user_input_win = \ self._win.subwin(1, self.width - self._user_input_win_x, self._user_input_win_y, self._user_input_win_x) self._user_input_label_win = \ self._win.subwin(1, self._user_input_win_x - 1, self._user_input_win_y, 0) curses.noecho() curses.cbreak() def get_input(self): self._user_input_label_win.addstr(0, 0, 'input:') self._user_input_label_win.refresh() curses.echo() inputstr = self._user_input_win.getstr( 0, 0, self.width - self._user_input_win_x).decode(code) curses.noecho() if platform.python_version_tuple()[0] == '2': inputstr = to_unicode(inputstr) self._user_input_win.clear() if inputstr == self.panic: inputstr = '' self._env._task_time = float('inf') return inputstr def channel_to_str(self, text, bits): length = self._scroll_msg_length - 10 return "{0:_>{length}}[{1: <8}]".format( text[-length:], bits[-7:], length=length)
class ConsoleView(BaseView): def __init__(self, env, session, serializer, show_world=False, byte_channels=False): super(ConsoleView, self).__init__(env, session) # for visualization purposes, we keep an internal buffer of the # input and output stream so when they are cleared from task to # task, we can keep the history intact. self.input_buffer = '' self.output_buffer = '' self.reward_buffer = '' self.panic = 'SKIP' if byte_channels: # record what the learner says self._learner_channel = ByteInputChannel(serializer) # record what the environment says self._env_channel = ByteInputChannel(serializer) else: # record what the learner says self._learner_channel = InputChannel(serializer) # record what the environment says self._env_channel = InputChannel(serializer) # listen to the updates in these channels self._learner_channel.sequence_updated.register( self.on_learner_sequence_updated) self._learner_channel.message_updated.register( self.on_learner_message_updated) self._env_channel.sequence_updated.register( self.on_env_sequence_updated) self._env_channel.message_updated.register(self.on_env_message_updated) if show_world: # register a handler to plot the world if show_world is active env.world_updated.register(self.on_world_updated) # connect the channels with the observed input bits session.env_token_updated.register(self.on_env_token_updated) session.learner_token_updated.register(self.on_learner_token_updated) del self.info['current_task'] def on_total_reward_updated(self, reward): change = reward - self.info['reward'] BaseView.on_total_reward_updated(self, reward) self.reward_buffer = "_" * self._scroll_msg_length + self.reward_buffer + self.encode_reward( change) self.reward_buffer = self.reward_buffer[-self._scroll_msg_length + 11:] self._win.addstr(self._reward_seq_y, 0, self.reward_buffer) self._win.refresh() @staticmethod def encode_reward(reward): d = {0: " ", 1: "+", -1: "-", 2: "2", -2: "\u01BB"} return d[reward] def on_env_token_updated(self, token): self._env_channel.consume(token) def on_learner_token_updated(self, token): self._learner_channel.consume(token) def on_learner_message_updated(self, message): # we use the fact that messages arrive character by character if self._learner_channel.get_text(): self.input_buffer += self._learner_channel.get_text()[-1] self.input_buffer = self.input_buffer[-self._scroll_msg_length:] learner_input = self.channel_to_str( self.input_buffer + ' ', self._learner_channel.get_undeserialized()) self._win.addstr(self._learner_seq_y, 0, learner_input.encode(code).decode(code)) self._win.refresh() def on_learner_sequence_updated(self, sequence): learner_input = self.channel_to_str( self.input_buffer + ' ', self._learner_channel.get_undeserialized()) self._win.addstr(self._learner_seq_y, 0, learner_input.encode(code).decode(code)) self._win.refresh() def on_env_message_updated(self, message): if self._env_channel.get_text(): self.output_buffer += \ self._env_channel.get_text()[-1] self.output_buffer = self.output_buffer[-self._scroll_msg_length:] env_output = self.channel_to_str( self.output_buffer, self._env_channel.get_undeserialized()) self._win.addstr(self._teacher_seq_y, 0, env_output.encode(code).decode(code)) self._win.refresh() def on_env_sequence_updated(self, sequence): env_output = self.channel_to_str( self.output_buffer, self._env_channel.get_undeserialized()) self._win.addstr(self._teacher_seq_y, 0, env_output.encode(code).decode(code)) self._win.refresh() def on_world_updated(self, world): if world: world.state_updated.register(self.on_world_state_updated) self._worldwin.addstr(0, 0, str(world)) self._worldwin.refresh() else: self._worldwin.clear() self._worldwin.refresh() def on_world_state_updated(self, world): self._worldwin.addstr(0, 0, str(world)) self._worldwin.refresh() def initialize(self): # initialize curses self._stdscr = curses.initscr() begin_x = 0 begin_y = 0 self._teacher_seq_y = 0 self._learner_seq_y = 1 self._reward_seq_y = 2 self._world_win_y = 4 self._world_win_x = 0 self._info_win_width = 20 self._info_win_height = 4 self._user_input_win_y = 4 self._user_input_win_x = 10 self.height, self.width = self._stdscr.getmaxyx() self._scroll_msg_length = self.width - self._info_win_width - 1 self._win = self._stdscr.subwin(self.height, self.width, begin_y, begin_x) self._worldwin = self._win.subwin(self.height - self._world_win_y, self.width - self._world_win_x, self._world_win_y, self._world_win_x) # create info box with reward and time self._info_win = self._win.subwin(self._info_win_height, self._info_win_width, 0, self.width - self._info_win_width) self._user_input_win = \ self._win.subwin(1, self.width - self._user_input_win_x, self._user_input_win_y, self._user_input_win_x) self._user_input_label_win = \ self._win.subwin(1, self._user_input_win_x - 1, self._user_input_win_y, 0) curses.noecho() curses.cbreak() def get_input(self): self._user_input_label_win.addstr(0, 0, 'input:') self._user_input_label_win.refresh() curses.echo() inputstr = self._user_input_win.getstr( 0, 0, self.width - self._user_input_win_x).decode(code) curses.noecho() self._user_input_win.clear() if inputstr == self.panic: inputstr = '' self._env._task_time = float('inf') return inputstr def channel_to_str(self, text, bits): length = self._scroll_msg_length - 10 return "{0:_>{length}}[{1: <8}]".format(text[-length:], bits[-7:], length=length)
class StdInOutView(WinBaseView): def __init__(self, env, session, serializer, show_world=False, byte_channels=False): super(StdInOutView, self).__init__(env, session) # for visualization purposes, we keep an internal buffer of the # input and output stream so when they are cleared from task to # task, we can keep the history intact. self.input_buffer = '' self.output_buffer = '' self.panic = u'SKIP' self.quit = 'QUIT' self._byte_channels = byte_channels if byte_channels: # record what the learner says self._learner_channel = ByteInputChannel(serializer) # record what the environment says self._env_channel = ByteInputChannel(serializer) # reward buffer self._reward_buffer = '' else: # record what the learner says self._learner_channel = InputChannel(serializer) # record what the environment says self._env_channel = InputChannel(serializer) # listen to the updates in these channels self._learner_channel.sequence_updated.register( self.on_learner_sequence_updated) self._learner_channel.message_updated.register( self.on_learner_message_updated) self._env_channel.sequence_updated.register( self.on_env_sequence_updated) self._env_channel.message_updated.register(self.on_env_message_updated) if show_world: # register a handler to plot the world if show_world is active env.world_updated.register(self.on_world_updated) # connect the channels with the observed input bits session.env_token_updated.register(self.on_env_token_updated) session.learner_token_updated.register(self.on_learner_token_updated) del self.info['current_task'] def on_total_reward_updated(self, reward): change = reward - self.info['reward'] self.info['reward'] = reward if self._byte_channels: self._reward_buffer = self._reward_buffer[0:-1] self._reward_buffer += self._encode_reward(change) self._reward = self.channel_to_str( self._reward_buffer + ' ', self._env_channel.get_undeserialized()) @staticmethod def _encode_reward(reward): d = {0: " ", 1: "+", -1: "-", 2: "2", -2: "\u01BB"} return d[reward] def on_env_token_updated(self, token): self._env_channel.consume(token) def on_learner_token_updated(self, token): self._learner_channel.consume(token) def on_learner_message_updated(self, message): # we use the fact that messages arrive character by character if self._learner_channel.get_text(): self.input_buffer += self._learner_channel.get_text()[-1] self.input_buffer = self.input_buffer[-self._scroll_msg_length:] self._learner_input = self.channel_to_str( self.input_buffer + ' ', self._learner_channel.get_undeserialized()) if self._byte_channels: self._reward_buffer += ' ' self._reward = self.channel_to_str( self._reward_buffer + ' ', self._env_channel.get_undeserialized()) def on_learner_sequence_updated(self, sequence): self._learner_input = self.channel_to_str( self.input_buffer + ' ', self._learner_channel.get_undeserialized()) def on_env_message_updated(self, message): if self._env_channel.get_text(): self.output_buffer += \ self._env_channel.get_text()[-1] self.output_buffer = self.output_buffer[-self._scroll_msg_length:] self._env_output = self.channel_to_str( self.output_buffer, self._env_channel.get_undeserialized()) def on_env_sequence_updated(self, sequence): self._env_output = self.channel_to_str( self.output_buffer, self._env_channel.get_undeserialized()) def on_world_updated(self, world): if world: world.state_updated.register(self.on_world_state_updated) def on_world_state_updated(self, world): pass print(str(world)) def initialize(self): rows, columns = get_console_size() reward_len = 15 self._total_msg_length = columns - 1 self._scroll_msg_length = columns - 1 - reward_len # properties init self._learner_input = self.channel_to_str( ' ', self._learner_channel.get_undeserialized()) def get_input(self): print("_" * self._total_msg_length) print(self._env_output + ' reward:{:7}'.format(self.info['reward'])) print(self._learner_input + ' time:{:9}'.format(self.info['time'])) if self._byte_channels: print(self._reward) _ver = sys.version_info if _ver[0] == 2: input_str = raw_input() else: input_str = input() if platform.python_version_tuple()[0] == '2': input_str = to_unicode(input_str) if input_str == self.panic: input_str = '' self._env._task_time = float('inf') elif input_str == self.quit: sys.exit() return input_str def channel_to_str(self, text, bits): length = self._scroll_msg_length - 10 if length <= 1: raise Exception('The command window is too small.') return "{0:_>{length}}[{1: <8}]".format(text[-length:], bits[-7:], length=length)
class StdInOutView(WinBaseView): def __init__(self, env, session, serializer, show_world=False, byte_channels=False): super(StdInOutView, self).__init__(env, session) # for visualization purposes, we keep an internal buffer of the # input and output stream so when they are cleared from task to # task, we can keep the history intact. self.input_buffer = '' self.output_buffer = '' self.panic = u'SKIP' self.quit = 'QUIT' self._byte_channels = byte_channels if byte_channels: # record what the learner says self._learner_channel = ByteInputChannel(serializer) # record what the environment says self._env_channel = ByteInputChannel(serializer) # reward buffer self._reward_buffer = '' else: # record what the learner says self._learner_channel = InputChannel(serializer) # record what the environment says self._env_channel = InputChannel(serializer) # listen to the updates in these channels self._learner_channel.sequence_updated.register( self.on_learner_sequence_updated) self._learner_channel.message_updated.register( self.on_learner_message_updated) self._env_channel.sequence_updated.register( self.on_env_sequence_updated) self._env_channel.message_updated.register( self.on_env_message_updated) if show_world: # register a handler to plot the world if show_world is active env.world_updated.register( self.on_world_updated) # connect the channels with the observed input bits session.env_token_updated.register(self.on_env_token_updated) session.learner_token_updated.register(self.on_learner_token_updated) del self.info['current_task'] def on_total_reward_updated(self, reward): change = reward - self.info['reward'] self.info['reward'] = reward if self._byte_channels: self._reward_buffer = self._reward_buffer[0:-1] self._reward_buffer += self._encode_reward(change) self._reward = self.channel_to_str( self._reward_buffer + ' ', self._env_channel.get_undeserialized()) @staticmethod def _encode_reward(reward): d = {0: " ", 1: "+", -1: "-", 2: "2", -2: "\u01BB"} return d[reward] def on_env_token_updated(self, token): self._env_channel.consume(token) def on_learner_token_updated(self, token): self._learner_channel.consume(token) def on_learner_message_updated(self, message): # we use the fact that messages arrive character by character if self._learner_channel.get_text(): self.input_buffer += self._learner_channel.get_text()[-1] self.input_buffer = self.input_buffer[-self._scroll_msg_length:] self._learner_input = self.channel_to_str( self.input_buffer + ' ', self._learner_channel.get_undeserialized()) if self._byte_channels: self._reward_buffer += ' ' self._reward = self.channel_to_str( self._reward_buffer + ' ', self._env_channel.get_undeserialized()) def on_learner_sequence_updated(self, sequence): self._learner_input = self.channel_to_str( self.input_buffer + ' ', self._learner_channel.get_undeserialized()) def on_env_message_updated(self, message): if self._env_channel.get_text(): self.output_buffer += \ self._env_channel.get_text()[-1] self.output_buffer = self.output_buffer[-self._scroll_msg_length:] self._env_output = self.channel_to_str( self.output_buffer, self._env_channel.get_undeserialized()) def on_env_sequence_updated(self, sequence): self._env_output = self.channel_to_str( self.output_buffer, self._env_channel.get_undeserialized()) def on_world_updated(self, world): if world: world.state_updated.register(self.on_world_state_updated) def on_world_state_updated(self, world): pass print(str(world)) def initialize(self): rows, columns = get_console_size() reward_len = 15 self._total_msg_length = columns - 1 self._scroll_msg_length = columns - 1 - reward_len # properties init self._learner_input = self.channel_to_str( ' ', self._learner_channel.get_undeserialized()) def get_input(self): print("_"*self._total_msg_length) print(self._env_output + ' reward:{:7}'.format(self.info['reward'])) print(self._learner_input + ' time:{:9}'.format(self.info['time'])) if self._byte_channels: print(self._reward) _ver = sys.version_info if _ver[0] == 2: input_str = raw_input() else: input_str = input() if platform.python_version_tuple()[0] == '2': input_str = to_unicode(input_str) if input_str == self.panic: input_str = '' self._env._task_time = float('inf') elif input_str == self.quit: sys.exit() return input_str def channel_to_str(self, text, bits): length = self._scroll_msg_length - 10 if length <= 1: raise Exception('The command window is too small.') return "{0:_>{length}}[{1: <8}]".format( text[-length:], bits[-7:], length=length)