Ejemplo n.º 1
0
    def _update(self, screen):
        def crop_and_repaint(observation):
            observations = [
                cropper.crop(observation) for cropper in self._croppers
            ]
            if self._repainter:
                if len(observations) == 1:
                    return [self._repainter(observations[0])]
                else:
                    return [
                        copy.deepcopy(self._repainter(obs))
                        for obs in observations
                    ]
            else:
                return observations

        action = self.action
        observation, reward, discount = self._game.play(action)
        self.observation = observation
        self.reward = reward
        self.discount = discount
        observations = crop_and_repaint(observation)
        if self._total_return is None:
            self._total_return = reward
        elif reward is not None:
            self._total_return += reward

        elapsed = datetime.datetime.now() - self._start_time
        #self._display(screen, observations, self._total_return, elapsed)
        self._display(screen, observations, self.display, elapsed)
        self._update_game_console(plab_logging.consume(self._game.the_plot),
                                  self.console, self.paint_console)

        curses.doupdate()
        screen.getkey()
Ejemplo n.º 2
0
    def _init_curses(self, screen):
        self._init_colour()
        curses.curs_set(0)
        if self._delay is None:
            screen.timeout(-1)
        else:
            screen.timeout(self._delay)

        # Create the curses window for the log display
        rows, cols = screen.getmaxyx()
        self.console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0)

        # By default, the log display window is hidden
        self.paint_console = False

        def crop_and_repaint(observation):
            observations = [
                cropper.crop(observation) for cropper in self._croppers
            ]
            if self._repainter:
                if len(observations) == 1:
                    return [self._repainter(observations[0])]
                else:
                    return [
                        copy.deepcopy(self._repainter(obs))
                        for obs in observations
                    ]
            else:
                return observations

        observation, reward, discount = self._game.its_showtime()
        self.observation = observation
        self.reward = reward
        self.discount = discount
        observations = crop_and_repaint(observation)
        self._total_return = reward
        self._display(screen,
                      observations,
                      self._total_return,
                      elapsed=datetime.timedelta())
        self._update_game_console(plab_logging.consume(self._game.the_plot),
                                  self.console, self.paint_console)

        curses.doupdate()
        screen.getkey()
Ejemplo n.º 3
0
    def update(self, actions, board, layers, backdrop, things, the_plot):
        del backdrop, things, layers  # Unused

        the_plot.add_reward(self.speed)

        if actions == UP:
            self._speed = min(self.speed + 1, self._speed_limit)
        elif actions == DOWN:
            self._speed = max(self.speed - 1, 1)
        elif actions == LEFT:
            self._west(board, the_plot)
        elif actions == RIGHT:
            self._east(board, the_plot)
        elif actions == NO_OP:
            self._stay(board, the_plot)
        elif actions == 5:
            the_plot.terminate_episode()
        elif actions == 6:
            print(plab_logging.consume(the_plot))
Ejemplo n.º 4
0
    def _init_curses_and_play(self, screen):
        """Set up an already-running curses; do interaction loop.

    This method is intended to be passed as an argument to `curses.wrapper`,
    so its only argument is the main, full-screen curses window.

    Args:
      screen: the main, full-screen curses window.

    Raises:
      ValueError: if any key in the `keys_to_actions` dict supplied to the
          constructor has already been reserved for use by `CursesUi`.
    """
        # See whether the user is using any reserved keys. This check ought to be in
        # the constructor, but it can't run until curses is actually initialised, so
        # it's here instead.
        for key, action in six.iteritems(self._keycodes_to_actions):
            if key in (curses.KEY_PPAGE, curses.KEY_NPAGE):
                raise ValueError(
                    'the keys_to_actions argument to the CursesUi constructor binds '
                    'action {} to the {} key, which is reserved for CursesUi. Please '
                    'choose a different key for this action.'.format(
                        repr(action), repr(curses.keyname(key))))

        # If the terminal supports colour, program the colours into curses as
        # "colour pairs". Update our dict mapping characters to colour pairs.
        self._init_colour()
        curses.curs_set(0)  # We don't need to see the cursor.
        if self._delay is None:
            screen.timeout(-1)  # Blocking reads
        else:
            screen.timeout(
                self._delay)  # Nonblocking (if 0) or timing-out reads

        # Create the curses window for the log display
        rows, cols = screen.getmaxyx()
        console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0)

        # By default, the log display window is hidden
        paint_console = False

        def crop_and_repaint(observation):
            # Helper for game display: applies all croppers to the observation, then
            # repaints the cropped subwindows. Since the same repainter is used for
            # all subwindows, and since repainters "own" what they return and are
            # allowed to overwrite it, we copy repainted observations when we have
            # multiple subwindows.
            observations = [
                cropper.crop(observation) for cropper in self._croppers
            ]
            if self._repainter:
                if len(observations) == 1:
                    return [self._repainter(observations[0])]
                else:
                    return [
                        copy.deepcopy(self._repainter(obs))
                        for obs in observations
                    ]
            else:
                return observations

        # Kick off the game---get first observation, crop and repaint as needed,
        # initialise our total return, and display the first frame.
        observation, reward, _ = self._game.its_showtime()
        observations = crop_and_repaint(observation)
        self._total_return = reward
        self._display(screen,
                      observations,
                      self._total_return,
                      elapsed=datetime.timedelta())

        # Oh boy, play the game!
        while not self._game.game_over:
            # Wait (or not, depending) for user input, and convert it to an action.
            # Unrecognised keycodes cause the game display to repaint (updating the
            # elapsed time clock and potentially showing/hiding/updating the log
            # message display) but don't trigger a call to the game engine's play()
            # method. Note that the timeout "keycode" -1 is treated the same as any
            # other keycode here.

            # Load the agent policy here:
            action = self.agent.agent_network(observation, self._action_list)
            time.sleep(0.2)
            observation, reward, _ = self._game.play(action)
            observations = crop_and_repaint(observation)
            if self._total_return is None:
                self._total_return = reward
            elif reward is not None:
                self._total_return += reward

            # Update the game display, regardless of whether we've called the game's
            # play() method.
            elapsed = datetime.datetime.now() - self._start_time
            self._display(screen, observations, self._total_return, elapsed)

            # Update game console message buffer with new messages from the game.
            self._update_game_console(
                plab_logging.consume(self._game.the_plot), console,
                paint_console)

            # Show the screen to the user.
            curses.doupdate()
Ejemplo n.º 5
0
    def _init_curses_and_play(self, screen):
        """Set up an already-running curses; do interaction loop.

    This method is intended to be passed as an argument to `curses.wrapper`,
    so its only argument is the main, full-screen curses window.

    Args:
      screen: the main, full-screen curses window.

    Raises:
      ValueError: if any key in the `keys_to_actions` dict supplied to the
          constructor has already been reserved for use by `CursesUi`.
    """
        # This needs to be overwritten to use `self._env.step()` instead of
        # `self._game.play()`.

        # See whether the user is using any reserved keys. This check ought to be in
        # the constructor, but it can't run until curses is actually initialised, so
        # it's here instead.
        for key, action in self._keycodes_to_actions.iteritems():
            if key in (curses.KEY_PPAGE, curses.KEY_NPAGE):
                raise ValueError(
                    'the keys_to_actions argument to the CursesUi constructor binds '
                    'action {} to the {} key, which is reserved for CursesUi. Please '
                    'choose a different key for this action.'.format(
                        repr(action), repr(curses.keyname(key))))

        # If the terminal supports colour, program the colours into curses as
        # "colour pairs". Update our dict mapping characters to colour pairs.
        self._init_colour()
        curses.curs_set(0)  # We don't need to see the cursor.
        if self._delay is None:
            screen.timeout(-1)  # Blocking reads
        else:
            screen.timeout(
                self._delay)  # Nonblocking (if 0) or timing-out reads

        # Create the curses window for the log display
        rows, cols = screen.getmaxyx()
        console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0)

        # By default, the log display window is hidden
        paint_console = False

        # Kick off the game---get first observation, repaint it if desired,
        # initialise our total return, and display the first frame.
        self._env.reset()
        self._game = self._env.current_game
        # Use undistilled observations.
        observation = self._game._board  # pylint: disable=protected-access
        if self._repainter: observation = self._repainter(observation)
        self._display(screen,
                      observation,
                      self._env.episode_return,
                      elapsed=datetime.timedelta())

        #replay = [Actions.DOWN, Actions.DOWN, Actions.RIGHT, Actions.RIGHT]
        #replay_idx = 0

        # Oh boy, play the game!
        while not self._env._game_over:  # and replay_idx < len(replay):  # pylint: disable=protected-access
            # Wait (or not, depending) for user input, and convert it to an action.
            # Unrecognised keycodes cause the game display to repaint (updating the
            # elapsed time clock and potentially showing/hiding/updating the log
            # message display) but don't trigger a call to the game engine's play()
            # method. Note that the timeout "keycode" -1 is treated the same as any
            # other keycode here.
            time.sleep(0.2)
            paint_console = True
            keycode = screen.getch()
            # if keycode == curses.KEY_PPAGE:    # Page Up? Show the game console.
            #   paint_console = True
            # elif keycode == curses.KEY_NPAGE:  # Page Down? Hide the game console.
            #   paint_console = False
            if True:
                #elif keycode in self._keycodes_to_actions:
                # Convert the keycode to a game action and send that to the engine.
                # Receive a new observation, reward, pcontinue; update total return.
                #action = self._keycodes_to_actions[keycode]
                action = np.random.choice(4)
                # action = replay[replay_idx]
                # replay_idx += 1
                timestep = self._env.step(action)
                reward = timestep.reward
                # Use undistilled observations.
                observation = self._game._board  # pylint: disable=protected-access
                LAVA = 76
                AGENT = 65
                bad_mask = np.isin(observation.board, [LAVA]).astype(np.int)
                #self._game.the_plot.log("{}".format(bad_mask))
                coords = np.where(np.isin(observation.board, [AGENT]))
                agent_y, agent_x = coords[0][0], coords[1][0]
                safety_term = danger_distance(agent_x, agent_y, bad_mask)
                self._game.the_plot.log(
                    "agent_x={}, agent_y={}, reward={}, safety_term={}".format(
                        agent_x, agent_y, reward, safety_term))
                # danger_scale_hyper = ? # TODO: Set by hand
                # final_reward = reward + danger_scale_hyper * danger_term

                if self._repainter: observation = self._repainter(observation)

            # Update the game display, regardless of whether we've called the game's
            # play() method.
            elapsed = datetime.datetime.now() - self._start_time
            self._display(screen, observation, self._env.episode_return,
                          elapsed)

            # Update game console message buffer with new messages from the game.
            self._update_game_console(
                plab_logging.consume(self._game.the_plot), console,
                paint_console)

            # Show the screen to the user.
            curses.doupdate()

        while True:
            pass
Ejemplo n.º 6
0
    def _init_curses_and_play(self, screen):
        """Set up an already-running curses; do interaction loop.

      This method is intended to be passed as an argument to `curses.wrapper`,
      so its only argument is the main, full-screen curses window.

      Args:
        screen: the main, full-screen curses window.

      Raises:
        ValueError: if any key in the `keys_to_actions` dict supplied to the
            constructor has already been reserved for use by `CursesUi`.
      """
        # See whether the user is using any reserved keys. This check ought to be in
        # the constructor, but it can't run until curses is actually initialised, so
        # it's here instead.

        # If the terminal supports colour, program the colours into curses as
        # "colour pairs". Update our dict mapping characters to colour pairs.
        self._init_colour()
        curses.curs_set(0)  # We don't need to see the cursor.
        if self._delay is None:
            screen.timeout(-1)  # Blocking reads
        else:
            screen.timeout(
                self._delay)  # Nonblocking (if 0) or timing-out reads

        # Create the curses window for the log display
        rows, cols = screen.getmaxyx()
        console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0)

        # By default, the log display window is hidden
        paint_console = False

        def crop_and_repaint(observation):
            # Helper for game display: applies all croppers to the observation, then
            # repaints the cropped subwindows. Since the same repainter is used for
            # all subwindows, and since repainters "own" what they return and are
            # allowed to overwrite it, we copy repainted observations when we have
            # multiple subwindows.
            observations = [
                cropper.crop(observation) for cropper in self._croppers
            ]
            if self._repainter:
                if len(observations) == 1:
                    return [self._repainter(observations[0])]
                else:
                    return [
                        copy.deepcopy(self._repainter(obs))
                        for obs in observations
                    ]
            else:
                return observations

        # Kick off the game---get first observation, crop and repaint as needed,
        # initialise our total return, and display the first frame.
        observation, reward, _ = self._game.its_showtime()
        observations = crop_and_repaint(observation)
        self._total_return = reward
        self._display(screen,
                      observations,
                      self._total_return,
                      elapsed=datetime.timedelta())
        action_keys = self._randcodes_to_actions.keys()
        # Oh boy, play the game!
        while not self._game.game_over:
            keycode = Model.AgentModel(observation, action_keys, reward)
            if keycode in self._randcodes_to_actions:
                # Convert the keycode to a game action and send that to the engine.
                # Receive a new observation, reward, discount; crop and repaint; update
                # total return.
                action = self._randcodes_to_actions[keycode]
                observation, reward, _ = self._game.play(action)
                observations = crop_and_repaint(observation)
                if self._total_return is None:
                    self._total_return = reward
                elif reward is not None:
                    self._total_return += reward

            # Update the game display, regardless of whether we've called the game's
            # play() method.
            elapsed = datetime.datetime.now() - self._start_time
            self._display(screen, observations, self._total_return, elapsed)

            # Update game console message buffer with new messages from the game.
            self._update_game_console(
                plab_logging.consume(self._game.the_plot), console,
                paint_console)

            # Show the screen to the user.
            curses.doupdate()
Ejemplo n.º 7
0
    def _init_curses_and_play(self, screen):
        """Set up an already-running curses; do interaction loop.

    This method is intended to be passed as an argument to `curses.wrapper`,
    so its only argument is the main, full-screen curses window.

    Args:
      screen: the main, full-screen curses window.

    Raises:
      ValueError: if any key in the `keys_to_actions` dict supplied to the
          constructor has already been reserved for use by `CursesUi`.
    """
        # See whether the user is using any reserved keys. This check ought to be in
        # the constructor, but it can't run until curses is actually initialised, so
        # it's here instead.
        for key, action in six.iteritems(self._keycodes_to_actions):
            if key in (curses.KEY_PPAGE, curses.KEY_NPAGE):
                raise ValueError(
                    'the keys_to_actions argument to the CursesUi constructor binds '
                    'action {} to the {} key, which is reserved for CursesUi. Please '
                    'choose a different key for this action.'.format(
                        repr(action), repr(curses.keyname(key))))

        # If the terminal supports colour, program the colours into curses as
        # "colour pairs". Update our dict mapping characters to colour pairs.
        self._init_colour()
        curses.curs_set(0)  # We don't need to see the cursor.
        if self._delay is None:
            screen.timeout(-1)  # Blocking reads
        else:
            screen.timeout(
                self._delay)  # Nonblocking (if 0) or timing-out reads

        # Create the curses window for the log display
        rows, cols = screen.getmaxyx()
        console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0)

        # By default, the log display window is hidden
        paint_console = False

        # Kick off the game---get first observation, repaint it if desired,
        # initialise our total return, and display the first frame.
        ctr = 0
        observation, reward, _ = self._game.its_showtime(ctr)
        if self._repainter: observation = self._repainter(observation)
        self._total_return = reward
        self._display(screen,
                      observation,
                      self._total_return,
                      elapsed=datetime.timedelta())

        # Oh boy, play the game!

        while not self._game.game_over:
            # Wait (or not, depending) for user input, and convert it to an action.
            # Unrecognised keycodes cause the game display to repaint (updating the
            # elapsed time clock and potentially showing/hiding/updating the log
            # message display) but don't trigger a call to the game engine's play()
            # method. Note that the timeout "keycode" -1 is treated the same as any
            # other keycode here.
            keycode = screen.getch()
            if keycode == curses.KEY_PPAGE:  # Page Up? Show the game console.
                paint_console = True
            elif keycode == curses.KEY_NPAGE:  # Page Down? Hide the game console.
                paint_console = False
            elif keycode in self._keycodes_to_actions:
                # Convert the keycode to a game action and send that to the engine.
                # Receive a new observation, reward, discount; update total return.
                action = self._keycodes_to_actions[keycode]
                if action in [0, 1, 2, 3]:
                    ctr += 1
                observation, reward, _ = self._game.play(action, ctr)
                if self._repainter: observation = self._repainter(observation)
                if self._total_return is None:
                    self._total_return = reward
                elif reward is not None:
                    self._total_return += reward

            # Update the game display, regardless of whether we've called the game's
            # play() method.
            elapsed = datetime.datetime.now() - self._start_time
            self._display(screen, observation, self._total_return, elapsed)

            # Update game console message buffer with new messages from the game.
            self._update_game_console(
                plab_logging.consume(self._game.the_plot), console,
                paint_console)

            # Show the screen to the user.
            curses.doupdate()