class ConsoleView(BaseView):

    def __init__(self, env, session, serializer, show_world=False, byte_channels=False):
        super(ConsoleView, self).__init__(env, session)

        # for visualization purposes, we keep an internal buffer of the
        # input and output stream so when they are cleared from task to
        # task, we can keep the history intact.
        self.input_buffer = ''
        self.output_buffer = ''
        self.reward_buffer = ''
        self.panic = 'SKIP'
        if byte_channels:
            # record what the learner says
            self._learner_channel = ByteInputChannel(serializer)
            # record what the environment says
            self._env_channel = ByteInputChannel(serializer)
        else:
            # record what the learner says
            self._learner_channel = InputChannel(serializer)
            # record what the environment says
            self._env_channel = InputChannel(serializer)

        # listen to the updates in these channels
        self._learner_channel.sequence_updated.register(
            self.on_learner_sequence_updated)
        self._learner_channel.message_updated.register(
            self.on_learner_message_updated)
        self._env_channel.sequence_updated.register(
            self.on_env_sequence_updated)
        self._env_channel.message_updated.register(
            self.on_env_message_updated)
        if show_world:
            # register a handler to plot the world if show_world is active
            env.world_updated.register(
                self.on_world_updated)
        # connect the channels with the observed input bits
        session.env_token_updated.register(self.on_env_token_updated)
        session.learner_token_updated.register(self.on_learner_token_updated)
        del self.info['current_task']

    def on_total_reward_updated(self, reward):
        change = reward - self.info['reward']
        BaseView.on_total_reward_updated(self, reward)
        self.reward_buffer = "_" * self._scroll_msg_length + self.reward_buffer + self.encode_reward(change)
        self.reward_buffer = self.reward_buffer[-self._scroll_msg_length+11:]
        self._win.addstr(self._reward_seq_y, 0, self.reward_buffer)
        self._win.refresh()

    @staticmethod
    def encode_reward(reward):
        d = {0: " ", 1: "+", -1: "-", 2: "2", -2: "\u01BB"}
        return d[reward]

    def on_env_token_updated(self, token):
        self._env_channel.consume(token)

    def on_learner_token_updated(self, token):
        self._learner_channel.consume(token)

    def on_learner_message_updated(self, message):
        # we use the fact that messages arrive character by character
        if self._learner_channel.get_text():
            self.input_buffer += self._learner_channel.get_text()[-1]
            self.input_buffer = self.input_buffer[-self._scroll_msg_length:]
            learner_input = self.channel_to_str(
                self.input_buffer + ' ',
                self._learner_channel.get_undeserialized())
            self._win.addstr(self._learner_seq_y, 0, learner_input.encode(code).decode(code))
            self._win.refresh()

    def on_learner_sequence_updated(self, sequence):
        learner_input = self.channel_to_str(
            self.input_buffer + ' ',
            self._learner_channel.get_undeserialized())
        self._win.addstr(self._learner_seq_y, 0, learner_input.encode(code).decode(code))
        self._win.refresh()

    def on_env_message_updated(self, message):
        if self._env_channel.get_text():
            self.output_buffer += \
                self._env_channel.get_text()[-1]
            self.output_buffer = self.output_buffer[-self._scroll_msg_length:]
            env_output = self.channel_to_str(
                self.output_buffer,
                self._env_channel.get_undeserialized())
            self._win.addstr(self._teacher_seq_y, 0, env_output.encode(code).decode(code))
            self._win.refresh()

    def on_env_sequence_updated(self, sequence):
        env_output = self.channel_to_str(
            self.output_buffer,
            self._env_channel.get_undeserialized())
        self._win.addstr(self._teacher_seq_y, 0, env_output.encode(code).decode(code))
        self._win.refresh()

    def on_world_updated(self, world):
        if world:
            world.state_updated.register(self.on_world_state_updated)
            self._worldwin.addstr(0, 0, str(world))
            self._worldwin.refresh()
        else:
            self._worldwin.clear()
        self._worldwin.refresh()

    def on_world_state_updated(self, world):
        self._worldwin.addstr(0, 0, str(world))
        self._worldwin.refresh()

    def initialize(self):
        # initialize curses
        self._stdscr = curses.initscr()
        begin_x = 0
        begin_y = 0
        self._teacher_seq_y = 0
        self._learner_seq_y = 1
        self._reward_seq_y = 2
        self._world_win_y = 4
        self._world_win_x = 0
        self._info_win_width = 20
        self._info_win_height = 4
        self._user_input_win_y = 4
        self._user_input_win_x = 10
        self.height, self.width = self._stdscr.getmaxyx()
        self._scroll_msg_length = self.width - self._info_win_width - 1
        self._win = self._stdscr.subwin(self.height, self.width, begin_y,
                                        begin_x)
        self._worldwin = self._win.subwin(self.height - self._world_win_y,
                                          self.width - self._world_win_x,
                                          self._world_win_y,
                                          self._world_win_x)
        # create info box with reward and time
        self._info_win = self._win.subwin(self._info_win_height,
                                          self._info_win_width,
                                          0,
                                          self.width - self._info_win_width)
        self._user_input_win = \
            self._win.subwin(1,
                             self.width - self._user_input_win_x,
                             self._user_input_win_y,
                             self._user_input_win_x)
        self._user_input_label_win = \
            self._win.subwin(1,
                             self._user_input_win_x - 1,
                             self._user_input_win_y,
                             0)
        curses.noecho()
        curses.cbreak()

    def get_input(self):
        self._user_input_label_win.addstr(0, 0, 'input:')
        self._user_input_label_win.refresh()
        curses.echo()
        inputstr = self._user_input_win.getstr(
            0,
            0,
            self.width - self._user_input_win_x).decode(code)
        curses.noecho()
        if platform.python_version_tuple()[0] == '2':
            inputstr = to_unicode(inputstr)
        self._user_input_win.clear()

        if inputstr == self.panic:
            inputstr = ''
            self._env._task_time = float('inf')
        return inputstr

    def channel_to_str(self, text, bits):
        length = self._scroll_msg_length - 10
        return "{0:_>{length}}[{1: <8}]".format(
            text[-length:], bits[-7:], length=length)
예제 #2
0
class ConsoleView(BaseView):
    def __init__(self,
                 env,
                 session,
                 serializer,
                 show_world=False,
                 byte_channels=False):
        super(ConsoleView, self).__init__(env, session)

        # for visualization purposes, we keep an internal buffer of the
        # input and output stream so when they are cleared from task to
        # task, we can keep the history intact.
        self.input_buffer = ''
        self.output_buffer = ''
        self.reward_buffer = ''
        self.panic = 'SKIP'
        if byte_channels:
            # record what the learner says
            self._learner_channel = ByteInputChannel(serializer)
            # record what the environment says
            self._env_channel = ByteInputChannel(serializer)
        else:
            # record what the learner says
            self._learner_channel = InputChannel(serializer)
            # record what the environment says
            self._env_channel = InputChannel(serializer)

        # listen to the updates in these channels
        self._learner_channel.sequence_updated.register(
            self.on_learner_sequence_updated)
        self._learner_channel.message_updated.register(
            self.on_learner_message_updated)
        self._env_channel.sequence_updated.register(
            self.on_env_sequence_updated)
        self._env_channel.message_updated.register(self.on_env_message_updated)
        if show_world:
            # register a handler to plot the world if show_world is active
            env.world_updated.register(self.on_world_updated)
        # connect the channels with the observed input bits
        session.env_token_updated.register(self.on_env_token_updated)
        session.learner_token_updated.register(self.on_learner_token_updated)
        del self.info['current_task']

    def on_total_reward_updated(self, reward):
        change = reward - self.info['reward']
        BaseView.on_total_reward_updated(self, reward)
        self.reward_buffer = "_" * self._scroll_msg_length + self.reward_buffer + self.encode_reward(
            change)
        self.reward_buffer = self.reward_buffer[-self._scroll_msg_length + 11:]
        self._win.addstr(self._reward_seq_y, 0, self.reward_buffer)
        self._win.refresh()

    @staticmethod
    def encode_reward(reward):
        d = {0: " ", 1: "+", -1: "-", 2: "2", -2: "\u01BB"}
        return d[reward]

    def on_env_token_updated(self, token):
        self._env_channel.consume(token)

    def on_learner_token_updated(self, token):
        self._learner_channel.consume(token)

    def on_learner_message_updated(self, message):
        # we use the fact that messages arrive character by character
        if self._learner_channel.get_text():
            self.input_buffer += self._learner_channel.get_text()[-1]
            self.input_buffer = self.input_buffer[-self._scroll_msg_length:]
            learner_input = self.channel_to_str(
                self.input_buffer + ' ',
                self._learner_channel.get_undeserialized())
            self._win.addstr(self._learner_seq_y, 0,
                             learner_input.encode(code).decode(code))
            self._win.refresh()

    def on_learner_sequence_updated(self, sequence):
        learner_input = self.channel_to_str(
            self.input_buffer + ' ',
            self._learner_channel.get_undeserialized())
        self._win.addstr(self._learner_seq_y, 0,
                         learner_input.encode(code).decode(code))
        self._win.refresh()

    def on_env_message_updated(self, message):
        if self._env_channel.get_text():
            self.output_buffer += \
                self._env_channel.get_text()[-1]
            self.output_buffer = self.output_buffer[-self._scroll_msg_length:]
            env_output = self.channel_to_str(
                self.output_buffer, self._env_channel.get_undeserialized())
            self._win.addstr(self._teacher_seq_y, 0,
                             env_output.encode(code).decode(code))
            self._win.refresh()

    def on_env_sequence_updated(self, sequence):
        env_output = self.channel_to_str(
            self.output_buffer, self._env_channel.get_undeserialized())
        self._win.addstr(self._teacher_seq_y, 0,
                         env_output.encode(code).decode(code))
        self._win.refresh()

    def on_world_updated(self, world):
        if world:
            world.state_updated.register(self.on_world_state_updated)
            self._worldwin.addstr(0, 0, str(world))
            self._worldwin.refresh()
        else:
            self._worldwin.clear()
        self._worldwin.refresh()

    def on_world_state_updated(self, world):
        self._worldwin.addstr(0, 0, str(world))
        self._worldwin.refresh()

    def initialize(self):
        # initialize curses
        self._stdscr = curses.initscr()
        begin_x = 0
        begin_y = 0
        self._teacher_seq_y = 0
        self._learner_seq_y = 1
        self._reward_seq_y = 2
        self._world_win_y = 4
        self._world_win_x = 0
        self._info_win_width = 20
        self._info_win_height = 4
        self._user_input_win_y = 4
        self._user_input_win_x = 10
        self.height, self.width = self._stdscr.getmaxyx()
        self._scroll_msg_length = self.width - self._info_win_width - 1
        self._win = self._stdscr.subwin(self.height, self.width, begin_y,
                                        begin_x)
        self._worldwin = self._win.subwin(self.height - self._world_win_y,
                                          self.width - self._world_win_x,
                                          self._world_win_y, self._world_win_x)
        # create info box with reward and time
        self._info_win = self._win.subwin(self._info_win_height,
                                          self._info_win_width, 0,
                                          self.width - self._info_win_width)
        self._user_input_win = \
            self._win.subwin(1,
                             self.width - self._user_input_win_x,
                             self._user_input_win_y,
                             self._user_input_win_x)
        self._user_input_label_win = \
            self._win.subwin(1,
                             self._user_input_win_x - 1,
                             self._user_input_win_y,
                             0)
        curses.noecho()
        curses.cbreak()

    def get_input(self):
        self._user_input_label_win.addstr(0, 0, 'input:')
        self._user_input_label_win.refresh()
        curses.echo()
        inputstr = self._user_input_win.getstr(
            0, 0, self.width - self._user_input_win_x).decode(code)
        curses.noecho()
        self._user_input_win.clear()
        if inputstr == self.panic:
            inputstr = ''
            self._env._task_time = float('inf')
        return inputstr

    def channel_to_str(self, text, bits):
        length = self._scroll_msg_length - 10
        return "{0:_>{length}}[{1: <8}]".format(text[-length:],
                                                bits[-7:],
                                                length=length)
class StdInOutView(WinBaseView):
    def __init__(self,
                 env,
                 session,
                 serializer,
                 show_world=False,
                 byte_channels=False):
        super(StdInOutView, self).__init__(env, session)

        # for visualization purposes, we keep an internal buffer of the
        # input and output stream so when they are cleared from task to
        # task, we can keep the history intact.
        self.input_buffer = ''
        self.output_buffer = ''
        self.panic = u'SKIP'
        self.quit = 'QUIT'
        self._byte_channels = byte_channels

        if byte_channels:
            # record what the learner says
            self._learner_channel = ByteInputChannel(serializer)
            # record what the environment says
            self._env_channel = ByteInputChannel(serializer)
            # reward buffer
            self._reward_buffer = ''
        else:
            # record what the learner says
            self._learner_channel = InputChannel(serializer)
            # record what the environment says
            self._env_channel = InputChannel(serializer)

        # listen to the updates in these channels
        self._learner_channel.sequence_updated.register(
            self.on_learner_sequence_updated)
        self._learner_channel.message_updated.register(
            self.on_learner_message_updated)
        self._env_channel.sequence_updated.register(
            self.on_env_sequence_updated)
        self._env_channel.message_updated.register(self.on_env_message_updated)
        if show_world:
            # register a handler to plot the world if show_world is active
            env.world_updated.register(self.on_world_updated)
        # connect the channels with the observed input bits
        session.env_token_updated.register(self.on_env_token_updated)
        session.learner_token_updated.register(self.on_learner_token_updated)
        del self.info['current_task']

    def on_total_reward_updated(self, reward):
        change = reward - self.info['reward']
        self.info['reward'] = reward
        if self._byte_channels:
            self._reward_buffer = self._reward_buffer[0:-1]
            self._reward_buffer += self._encode_reward(change)
            self._reward = self.channel_to_str(
                self._reward_buffer + ' ',
                self._env_channel.get_undeserialized())

    @staticmethod
    def _encode_reward(reward):
        d = {0: " ", 1: "+", -1: "-", 2: "2", -2: "\u01BB"}
        return d[reward]

    def on_env_token_updated(self, token):
        self._env_channel.consume(token)

    def on_learner_token_updated(self, token):
        self._learner_channel.consume(token)

    def on_learner_message_updated(self, message):
        # we use the fact that messages arrive character by character
        if self._learner_channel.get_text():
            self.input_buffer += self._learner_channel.get_text()[-1]
            self.input_buffer = self.input_buffer[-self._scroll_msg_length:]
            self._learner_input = self.channel_to_str(
                self.input_buffer + ' ',
                self._learner_channel.get_undeserialized())
            if self._byte_channels:
                self._reward_buffer += ' '
                self._reward = self.channel_to_str(
                    self._reward_buffer + ' ',
                    self._env_channel.get_undeserialized())

    def on_learner_sequence_updated(self, sequence):
        self._learner_input = self.channel_to_str(
            self.input_buffer + ' ',
            self._learner_channel.get_undeserialized())

    def on_env_message_updated(self, message):
        if self._env_channel.get_text():
            self.output_buffer += \
                self._env_channel.get_text()[-1]
            self.output_buffer = self.output_buffer[-self._scroll_msg_length:]
            self._env_output = self.channel_to_str(
                self.output_buffer, self._env_channel.get_undeserialized())

    def on_env_sequence_updated(self, sequence):
        self._env_output = self.channel_to_str(
            self.output_buffer, self._env_channel.get_undeserialized())

    def on_world_updated(self, world):
        if world:
            world.state_updated.register(self.on_world_state_updated)

    def on_world_state_updated(self, world):
        pass
        print(str(world))

    def initialize(self):
        rows, columns = get_console_size()
        reward_len = 15
        self._total_msg_length = columns - 1
        self._scroll_msg_length = columns - 1 - reward_len
        # properties init
        self._learner_input = self.channel_to_str(
            ' ', self._learner_channel.get_undeserialized())

    def get_input(self):
        print("_" * self._total_msg_length)
        print(self._env_output + ' reward:{:7}'.format(self.info['reward']))
        print(self._learner_input + ' time:{:9}'.format(self.info['time']))
        if self._byte_channels:
            print(self._reward)
        _ver = sys.version_info
        if _ver[0] == 2:
            input_str = raw_input()
        else:
            input_str = input()
        if platform.python_version_tuple()[0] == '2':
            input_str = to_unicode(input_str)
        if input_str == self.panic:
            input_str = ''
            self._env._task_time = float('inf')
        elif input_str == self.quit:
            sys.exit()
        return input_str

    def channel_to_str(self, text, bits):
        length = self._scroll_msg_length - 10
        if length <= 1:
            raise Exception('The command window is too small.')
        return "{0:_>{length}}[{1: <8}]".format(text[-length:],
                                                bits[-7:],
                                                length=length)
class StdInOutView(WinBaseView):

    def __init__(self, env, session, serializer, show_world=False, byte_channels=False):
        super(StdInOutView, self).__init__(env, session)

        # for visualization purposes, we keep an internal buffer of the
        # input and output stream so when they are cleared from task to
        # task, we can keep the history intact.
        self.input_buffer = ''
        self.output_buffer = ''
        self.panic = u'SKIP'
        self.quit = 'QUIT'
        self._byte_channels = byte_channels

        if byte_channels:
            # record what the learner says
            self._learner_channel = ByteInputChannel(serializer)
            # record what the environment says
            self._env_channel = ByteInputChannel(serializer)
            # reward buffer
            self._reward_buffer = ''
        else:
            # record what the learner says
            self._learner_channel = InputChannel(serializer)
            # record what the environment says
            self._env_channel = InputChannel(serializer)



        # listen to the updates in these channels
        self._learner_channel.sequence_updated.register(
            self.on_learner_sequence_updated)
        self._learner_channel.message_updated.register(
            self.on_learner_message_updated)
        self._env_channel.sequence_updated.register(
            self.on_env_sequence_updated)
        self._env_channel.message_updated.register(
            self.on_env_message_updated)
        if show_world:
            # register a handler to plot the world if show_world is active
            env.world_updated.register(
                self.on_world_updated)
        # connect the channels with the observed input bits
        session.env_token_updated.register(self.on_env_token_updated)
        session.learner_token_updated.register(self.on_learner_token_updated)
        del self.info['current_task']

    def on_total_reward_updated(self, reward):
        change = reward - self.info['reward']
        self.info['reward'] = reward
        if self._byte_channels:
            self._reward_buffer = self._reward_buffer[0:-1]
            self._reward_buffer += self._encode_reward(change)
            self._reward = self.channel_to_str(
                self._reward_buffer + ' ',
                self._env_channel.get_undeserialized())

    @staticmethod
    def _encode_reward(reward):
        d = {0: " ", 1: "+", -1: "-", 2: "2", -2: "\u01BB"}
        return d[reward]

    def on_env_token_updated(self, token):
        self._env_channel.consume(token)

    def on_learner_token_updated(self, token):
        self._learner_channel.consume(token)

    def on_learner_message_updated(self, message):
        # we use the fact that messages arrive character by character
        if self._learner_channel.get_text():
            self.input_buffer += self._learner_channel.get_text()[-1]
            self.input_buffer = self.input_buffer[-self._scroll_msg_length:]
            self._learner_input = self.channel_to_str(
                self.input_buffer + ' ',
                self._learner_channel.get_undeserialized())
            if self._byte_channels:
                self._reward_buffer += ' '
                self._reward = self.channel_to_str(
                    self._reward_buffer + ' ',
                    self._env_channel.get_undeserialized())

    def on_learner_sequence_updated(self, sequence):
        self._learner_input = self.channel_to_str(
            self.input_buffer + ' ',
            self._learner_channel.get_undeserialized())

    def on_env_message_updated(self, message):
        if self._env_channel.get_text():
            self.output_buffer += \
                self._env_channel.get_text()[-1]
            self.output_buffer = self.output_buffer[-self._scroll_msg_length:]
            self._env_output = self.channel_to_str(
                self.output_buffer,
                self._env_channel.get_undeserialized())

    def on_env_sequence_updated(self, sequence):
        self._env_output = self.channel_to_str(
            self.output_buffer,
            self._env_channel.get_undeserialized())

    def on_world_updated(self, world):
        if world:
            world.state_updated.register(self.on_world_state_updated)

    def on_world_state_updated(self, world):
        pass
        print(str(world))

    def initialize(self):
        rows, columns = get_console_size()
        reward_len = 15
        self._total_msg_length = columns - 1
        self._scroll_msg_length = columns - 1 - reward_len
        # properties init
        self._learner_input = self.channel_to_str(
            ' ',
            self._learner_channel.get_undeserialized())

    def get_input(self):
        print("_"*self._total_msg_length)
        print(self._env_output + ' reward:{:7}'.format(self.info['reward']))
        print(self._learner_input + ' time:{:9}'.format(self.info['time']))
        if self._byte_channels:
            print(self._reward)
        _ver = sys.version_info
        if _ver[0] == 2:
            input_str = raw_input()
        else:
            input_str = input()
        if platform.python_version_tuple()[0] == '2':
            input_str = to_unicode(input_str)
        if input_str == self.panic:
            input_str = ''
            self._env._task_time = float('inf')
        elif input_str == self.quit:
            sys.exit()
        return input_str

    def channel_to_str(self, text, bits):
        length = self._scroll_msg_length - 10
        if length <= 1:
            raise Exception('The command window is too small.')
        return "{0:_>{length}}[{1: <8}]".format(
            text[-length:], bits[-7:], length=length)