def _update(self, screen): def crop_and_repaint(observation): observations = [ cropper.crop(observation) for cropper in self._croppers ] if self._repainter: if len(observations) == 1: return [self._repainter(observations[0])] else: return [ copy.deepcopy(self._repainter(obs)) for obs in observations ] else: return observations action = self.action observation, reward, discount = self._game.play(action) self.observation = observation self.reward = reward self.discount = discount observations = crop_and_repaint(observation) if self._total_return is None: self._total_return = reward elif reward is not None: self._total_return += reward elapsed = datetime.datetime.now() - self._start_time #self._display(screen, observations, self._total_return, elapsed) self._display(screen, observations, self.display, elapsed) self._update_game_console(plab_logging.consume(self._game.the_plot), self.console, self.paint_console) curses.doupdate() screen.getkey()
def _init_curses(self, screen): self._init_colour() curses.curs_set(0) if self._delay is None: screen.timeout(-1) else: screen.timeout(self._delay) # Create the curses window for the log display rows, cols = screen.getmaxyx() self.console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0) # By default, the log display window is hidden self.paint_console = False def crop_and_repaint(observation): observations = [ cropper.crop(observation) for cropper in self._croppers ] if self._repainter: if len(observations) == 1: return [self._repainter(observations[0])] else: return [ copy.deepcopy(self._repainter(obs)) for obs in observations ] else: return observations observation, reward, discount = self._game.its_showtime() self.observation = observation self.reward = reward self.discount = discount observations = crop_and_repaint(observation) self._total_return = reward self._display(screen, observations, self._total_return, elapsed=datetime.timedelta()) self._update_game_console(plab_logging.consume(self._game.the_plot), self.console, self.paint_console) curses.doupdate() screen.getkey()
def update(self, actions, board, layers, backdrop, things, the_plot): del backdrop, things, layers # Unused the_plot.add_reward(self.speed) if actions == UP: self._speed = min(self.speed + 1, self._speed_limit) elif actions == DOWN: self._speed = max(self.speed - 1, 1) elif actions == LEFT: self._west(board, the_plot) elif actions == RIGHT: self._east(board, the_plot) elif actions == NO_OP: self._stay(board, the_plot) elif actions == 5: the_plot.terminate_episode() elif actions == 6: print(plab_logging.consume(the_plot))
def _init_curses_and_play(self, screen): """Set up an already-running curses; do interaction loop. This method is intended to be passed as an argument to `curses.wrapper`, so its only argument is the main, full-screen curses window. Args: screen: the main, full-screen curses window. Raises: ValueError: if any key in the `keys_to_actions` dict supplied to the constructor has already been reserved for use by `CursesUi`. """ # See whether the user is using any reserved keys. This check ought to be in # the constructor, but it can't run until curses is actually initialised, so # it's here instead. for key, action in six.iteritems(self._keycodes_to_actions): if key in (curses.KEY_PPAGE, curses.KEY_NPAGE): raise ValueError( 'the keys_to_actions argument to the CursesUi constructor binds ' 'action {} to the {} key, which is reserved for CursesUi. Please ' 'choose a different key for this action.'.format( repr(action), repr(curses.keyname(key)))) # If the terminal supports colour, program the colours into curses as # "colour pairs". Update our dict mapping characters to colour pairs. self._init_colour() curses.curs_set(0) # We don't need to see the cursor. if self._delay is None: screen.timeout(-1) # Blocking reads else: screen.timeout( self._delay) # Nonblocking (if 0) or timing-out reads # Create the curses window for the log display rows, cols = screen.getmaxyx() console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0) # By default, the log display window is hidden paint_console = False def crop_and_repaint(observation): # Helper for game display: applies all croppers to the observation, then # repaints the cropped subwindows. Since the same repainter is used for # all subwindows, and since repainters "own" what they return and are # allowed to overwrite it, we copy repainted observations when we have # multiple subwindows. observations = [ cropper.crop(observation) for cropper in self._croppers ] if self._repainter: if len(observations) == 1: return [self._repainter(observations[0])] else: return [ copy.deepcopy(self._repainter(obs)) for obs in observations ] else: return observations # Kick off the game---get first observation, crop and repaint as needed, # initialise our total return, and display the first frame. observation, reward, _ = self._game.its_showtime() observations = crop_and_repaint(observation) self._total_return = reward self._display(screen, observations, self._total_return, elapsed=datetime.timedelta()) # Oh boy, play the game! while not self._game.game_over: # Wait (or not, depending) for user input, and convert it to an action. # Unrecognised keycodes cause the game display to repaint (updating the # elapsed time clock and potentially showing/hiding/updating the log # message display) but don't trigger a call to the game engine's play() # method. Note that the timeout "keycode" -1 is treated the same as any # other keycode here. # Load the agent policy here: action = self.agent.agent_network(observation, self._action_list) time.sleep(0.2) observation, reward, _ = self._game.play(action) observations = crop_and_repaint(observation) if self._total_return is None: self._total_return = reward elif reward is not None: self._total_return += reward # Update the game display, regardless of whether we've called the game's # play() method. elapsed = datetime.datetime.now() - self._start_time self._display(screen, observations, self._total_return, elapsed) # Update game console message buffer with new messages from the game. self._update_game_console( plab_logging.consume(self._game.the_plot), console, paint_console) # Show the screen to the user. curses.doupdate()
def _init_curses_and_play(self, screen): """Set up an already-running curses; do interaction loop. This method is intended to be passed as an argument to `curses.wrapper`, so its only argument is the main, full-screen curses window. Args: screen: the main, full-screen curses window. Raises: ValueError: if any key in the `keys_to_actions` dict supplied to the constructor has already been reserved for use by `CursesUi`. """ # This needs to be overwritten to use `self._env.step()` instead of # `self._game.play()`. # See whether the user is using any reserved keys. This check ought to be in # the constructor, but it can't run until curses is actually initialised, so # it's here instead. for key, action in self._keycodes_to_actions.iteritems(): if key in (curses.KEY_PPAGE, curses.KEY_NPAGE): raise ValueError( 'the keys_to_actions argument to the CursesUi constructor binds ' 'action {} to the {} key, which is reserved for CursesUi. Please ' 'choose a different key for this action.'.format( repr(action), repr(curses.keyname(key)))) # If the terminal supports colour, program the colours into curses as # "colour pairs". Update our dict mapping characters to colour pairs. self._init_colour() curses.curs_set(0) # We don't need to see the cursor. if self._delay is None: screen.timeout(-1) # Blocking reads else: screen.timeout( self._delay) # Nonblocking (if 0) or timing-out reads # Create the curses window for the log display rows, cols = screen.getmaxyx() console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0) # By default, the log display window is hidden paint_console = False # Kick off the game---get first observation, repaint it if desired, # initialise our total return, and display the first frame. self._env.reset() self._game = self._env.current_game # Use undistilled observations. observation = self._game._board # pylint: disable=protected-access if self._repainter: observation = self._repainter(observation) self._display(screen, observation, self._env.episode_return, elapsed=datetime.timedelta()) #replay = [Actions.DOWN, Actions.DOWN, Actions.RIGHT, Actions.RIGHT] #replay_idx = 0 # Oh boy, play the game! while not self._env._game_over: # and replay_idx < len(replay): # pylint: disable=protected-access # Wait (or not, depending) for user input, and convert it to an action. # Unrecognised keycodes cause the game display to repaint (updating the # elapsed time clock and potentially showing/hiding/updating the log # message display) but don't trigger a call to the game engine's play() # method. Note that the timeout "keycode" -1 is treated the same as any # other keycode here. time.sleep(0.2) paint_console = True keycode = screen.getch() # if keycode == curses.KEY_PPAGE: # Page Up? Show the game console. # paint_console = True # elif keycode == curses.KEY_NPAGE: # Page Down? Hide the game console. # paint_console = False if True: #elif keycode in self._keycodes_to_actions: # Convert the keycode to a game action and send that to the engine. # Receive a new observation, reward, pcontinue; update total return. #action = self._keycodes_to_actions[keycode] action = np.random.choice(4) # action = replay[replay_idx] # replay_idx += 1 timestep = self._env.step(action) reward = timestep.reward # Use undistilled observations. observation = self._game._board # pylint: disable=protected-access LAVA = 76 AGENT = 65 bad_mask = np.isin(observation.board, [LAVA]).astype(np.int) #self._game.the_plot.log("{}".format(bad_mask)) coords = np.where(np.isin(observation.board, [AGENT])) agent_y, agent_x = coords[0][0], coords[1][0] safety_term = danger_distance(agent_x, agent_y, bad_mask) self._game.the_plot.log( "agent_x={}, agent_y={}, reward={}, safety_term={}".format( agent_x, agent_y, reward, safety_term)) # danger_scale_hyper = ? # TODO: Set by hand # final_reward = reward + danger_scale_hyper * danger_term if self._repainter: observation = self._repainter(observation) # Update the game display, regardless of whether we've called the game's # play() method. elapsed = datetime.datetime.now() - self._start_time self._display(screen, observation, self._env.episode_return, elapsed) # Update game console message buffer with new messages from the game. self._update_game_console( plab_logging.consume(self._game.the_plot), console, paint_console) # Show the screen to the user. curses.doupdate() while True: pass
def _init_curses_and_play(self, screen): """Set up an already-running curses; do interaction loop. This method is intended to be passed as an argument to `curses.wrapper`, so its only argument is the main, full-screen curses window. Args: screen: the main, full-screen curses window. Raises: ValueError: if any key in the `keys_to_actions` dict supplied to the constructor has already been reserved for use by `CursesUi`. """ # See whether the user is using any reserved keys. This check ought to be in # the constructor, but it can't run until curses is actually initialised, so # it's here instead. # If the terminal supports colour, program the colours into curses as # "colour pairs". Update our dict mapping characters to colour pairs. self._init_colour() curses.curs_set(0) # We don't need to see the cursor. if self._delay is None: screen.timeout(-1) # Blocking reads else: screen.timeout( self._delay) # Nonblocking (if 0) or timing-out reads # Create the curses window for the log display rows, cols = screen.getmaxyx() console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0) # By default, the log display window is hidden paint_console = False def crop_and_repaint(observation): # Helper for game display: applies all croppers to the observation, then # repaints the cropped subwindows. Since the same repainter is used for # all subwindows, and since repainters "own" what they return and are # allowed to overwrite it, we copy repainted observations when we have # multiple subwindows. observations = [ cropper.crop(observation) for cropper in self._croppers ] if self._repainter: if len(observations) == 1: return [self._repainter(observations[0])] else: return [ copy.deepcopy(self._repainter(obs)) for obs in observations ] else: return observations # Kick off the game---get first observation, crop and repaint as needed, # initialise our total return, and display the first frame. observation, reward, _ = self._game.its_showtime() observations = crop_and_repaint(observation) self._total_return = reward self._display(screen, observations, self._total_return, elapsed=datetime.timedelta()) action_keys = self._randcodes_to_actions.keys() # Oh boy, play the game! while not self._game.game_over: keycode = Model.AgentModel(observation, action_keys, reward) if keycode in self._randcodes_to_actions: # Convert the keycode to a game action and send that to the engine. # Receive a new observation, reward, discount; crop and repaint; update # total return. action = self._randcodes_to_actions[keycode] observation, reward, _ = self._game.play(action) observations = crop_and_repaint(observation) if self._total_return is None: self._total_return = reward elif reward is not None: self._total_return += reward # Update the game display, regardless of whether we've called the game's # play() method. elapsed = datetime.datetime.now() - self._start_time self._display(screen, observations, self._total_return, elapsed) # Update game console message buffer with new messages from the game. self._update_game_console( plab_logging.consume(self._game.the_plot), console, paint_console) # Show the screen to the user. curses.doupdate()
def _init_curses_and_play(self, screen): """Set up an already-running curses; do interaction loop. This method is intended to be passed as an argument to `curses.wrapper`, so its only argument is the main, full-screen curses window. Args: screen: the main, full-screen curses window. Raises: ValueError: if any key in the `keys_to_actions` dict supplied to the constructor has already been reserved for use by `CursesUi`. """ # See whether the user is using any reserved keys. This check ought to be in # the constructor, but it can't run until curses is actually initialised, so # it's here instead. for key, action in six.iteritems(self._keycodes_to_actions): if key in (curses.KEY_PPAGE, curses.KEY_NPAGE): raise ValueError( 'the keys_to_actions argument to the CursesUi constructor binds ' 'action {} to the {} key, which is reserved for CursesUi. Please ' 'choose a different key for this action.'.format( repr(action), repr(curses.keyname(key)))) # If the terminal supports colour, program the colours into curses as # "colour pairs". Update our dict mapping characters to colour pairs. self._init_colour() curses.curs_set(0) # We don't need to see the cursor. if self._delay is None: screen.timeout(-1) # Blocking reads else: screen.timeout( self._delay) # Nonblocking (if 0) or timing-out reads # Create the curses window for the log display rows, cols = screen.getmaxyx() console = curses.newwin(rows // 2, cols, rows - (rows // 2), 0) # By default, the log display window is hidden paint_console = False # Kick off the game---get first observation, repaint it if desired, # initialise our total return, and display the first frame. ctr = 0 observation, reward, _ = self._game.its_showtime(ctr) if self._repainter: observation = self._repainter(observation) self._total_return = reward self._display(screen, observation, self._total_return, elapsed=datetime.timedelta()) # Oh boy, play the game! while not self._game.game_over: # Wait (or not, depending) for user input, and convert it to an action. # Unrecognised keycodes cause the game display to repaint (updating the # elapsed time clock and potentially showing/hiding/updating the log # message display) but don't trigger a call to the game engine's play() # method. Note that the timeout "keycode" -1 is treated the same as any # other keycode here. keycode = screen.getch() if keycode == curses.KEY_PPAGE: # Page Up? Show the game console. paint_console = True elif keycode == curses.KEY_NPAGE: # Page Down? Hide the game console. paint_console = False elif keycode in self._keycodes_to_actions: # Convert the keycode to a game action and send that to the engine. # Receive a new observation, reward, discount; update total return. action = self._keycodes_to_actions[keycode] if action in [0, 1, 2, 3]: ctr += 1 observation, reward, _ = self._game.play(action, ctr) if self._repainter: observation = self._repainter(observation) if self._total_return is None: self._total_return = reward elif reward is not None: self._total_return += reward # Update the game display, regardless of whether we've called the game's # play() method. elapsed = datetime.datetime.now() - self._start_time self._display(screen, observation, self._total_return, elapsed) # Update game console message buffer with new messages from the game. self._update_game_console( plab_logging.consume(self._game.the_plot), console, paint_console) # Show the screen to the user. curses.doupdate()