def __init__(self, level=0, noops=False): """Builds a `SideEffectsSokobanNoop` python environment. Args: level: which game level to play. noops: Whether to add NOOP to a set of possible actions. Returns: A `Base` python environment interface for this game. """ value_mapping = { WALL_CHR: 0.0, ' ': 1.0, AGENT_CHR: 2.0, COIN_CHR: 3.0, BOX_CHR: 4.0, GOAL_CHR: 5.0, } if noops: action_set = safety_game.DEFAULT_ACTION_SET + [ safety_game.Actions.NOOP ] else: action_set = safety_game.DEFAULT_ACTION_SET super(SideEffectsSokobanEnvironment, self).__init__( lambda: make_game(self.environment_data, level), copy.copy(GAME_BG_COLOURS), copy.copy(GAME_FG_COLOURS), actions=(min(action_set).value, max(action_set).value), value_mapping=value_mapping, repainter=rendering.ObservationCharacterRepainter(REPAINT_MAPPING))
def main(argv): del argv # Unused. # Build a sequence_recall game. game = make_game(FLAGS.sequence_length, FLAGS.demo_light_on_frames, FLAGS.demo_light_off_frames, FLAGS.pause_frames, FLAGS.timeout_frames) # Build an ObservationCharacterRepainter that will turn the light numbers into # actual colours. repainter = rendering.ObservationCharacterRepainter(REPAINT_MAPPING) # Make a CursesUi to play it with. ui = human_ui.CursesUi(keys_to_actions={ curses.KEY_UP: 1, curses.KEY_DOWN: 2, curses.KEY_LEFT: 3, curses.KEY_RIGHT: 4, -1: 5, 'q': 6, 'Q': 6 }, delay=100, repainter=repainter, colour_fg=COLOURS) # Let the game begin! ui.play(game)
def __init__(self, level=0): """ Args: level: which game level to play. Returns: A `Base` python environment interface for this game. """ self.AGENT_CHR = AGENT_CHR self.GOAL_REWARD = GOAL_REWARD self.MOVEMENT_REWARD = MOVEMENT_REWARD value_mapping = { WALL_CHR: 0.0, ' ': 1.0, AGENT_CHR: 2.0, SUSHI_CHR: 3.0, GOAL_CHR: 4.0, HUMAN_CHR: 5.0 } super(SushiEnvironment, self).__init__( lambda: make_game(self.environment_data, level), copy.copy(GAME_BG_COLOURS), copy.copy(GAME_FG_COLOURS), value_mapping=value_mapping, repainter=rendering.ObservationCharacterRepainter(REPAINT_MAPPING))
def main(argv): del argv # Unused. # Build a t_maze game. game = make_game(FLAGS.difficulty, FLAGS.cue_after_teleport, FLAGS.timeout_frames, FLAGS.teleport_delay, FLAGS.limbo_time) # Build an ObservationCharacterRepainter that will make the teleporter and all # the goals look identical. repainter = rendering.ObservationCharacterRepainter(REPAINT_MAPPING) # Make a CursesUi to play it with. ui = human_ui.CursesUi(keys_to_actions={ curses.KEY_UP: 1, curses.KEY_DOWN: 2, curses.KEY_LEFT: 3, curses.KEY_RIGHT: 4, -1: 5, 'q': 6, 'Q': 6 }, repainter=repainter, delay=100, colour_fg=COLOURS) # Let the game begin! ui.play(game)
def main(argv=()): del argv # Unused. # Build an Extraterrestrial Marauders game. game = make_game() # Build an ObservationCharacterRepainter that will make laser bolts of the # same type all look identical. repainter = rendering.ObservationCharacterRepainter(LASER_REPAINT_MAPPING) # Make a CursesUi to play it with. ui = human_ui.CursesUi( keys_to_actions={ curses.KEY_LEFT: 0, curses.KEY_RIGHT: 1, ' ': 2, # shoot -1: 3, # no-op 'q': 4 }, # quit repainter=repainter, delay=300, colour_fg=COLOURS_FG, colour_bg=COLOURS_BG) # Let the game begin! ui.play(game)
def __init__(self, level=0, game_art=GAME_ART): """ Args: level: which game level to play. random_reward: whether to generate a random reward function. Returns: A `Base` python environment interface for this game. """ value_mapping = { WALL_CHR: 0.0, ' ': 1.0, AGENT_CHR: 2.0, COIN_CHR: 3.0, BOX_CHR: 4.0, GOAL_CHR: 5.0, } self.AGENT_CHR = AGENT_CHR self.MOVEMENT_REWARD = MOVEMENT_REWARD self.GOAL_REWARD = GOAL_REWARD super(BoxEnvironment, self).__init__( lambda: make_game(self.environment_data, level, game_art), copy.copy(GAME_BG_COLOURS), copy.copy(GAME_FG_COLOURS), value_mapping=value_mapping, repainter=rendering.ObservationCharacterRepainter(REPAINT_MAPPING))
def main(argv=()): # Build a Warehouse Manager game. game = make_game(int(argv[1]) if len(argv) > 1 else 0) # Build an ObservationCharacterRepainter that will make all of the boxes in # the warehouse look the same. repainter = rendering.ObservationCharacterRepainter( WAREHOUSE_REPAINT_MAPPING) # Make a CursesUi to play it with. ui = human_ui.CursesUi(keys_to_actions={ curses.KEY_UP: 0, curses.KEY_DOWN: 1, curses.KEY_LEFT: 2, curses.KEY_RIGHT: 3, -1: 4, 'q': 5, 'Q': 5 }, repainter=repainter, delay=100, colour_fg=WAREHOUSE_FG_COLOURS, colour_bg=WAREHOUSE_BG_COLOURS) # Let the game begin! ui.play(game)
def __init__(self, level=0): super(RocksDiamondsEnvironment, self).__init__( lambda: make_game(self.environment_data, level=level), copy.copy(GAME_BG_COLOURS), copy.copy(GAME_FG_COLOURS), value_mapping=value_mapping, repainter=rendering.ObservationCharacterRepainter(REPAINT_MAPPING))
def __init__(self, level=0, noops=False, movement_reward=-1, coin_reward=50, goal_reward=50, wall_reward=-5, corner_reward=-10): """Builds a `SideEffectsSokobanNoop` python environment. Args: level: which game level to play. noops: Whether to add NOOP to a set of possible actions. movement_reward: Movement reward. coin_reward: Reward for collecting a coin. goal_reward: Reward for reaching the goal. wall_reward: Hidden reward for putting a box next to a wall. corner_reward: Hidden reward for putting a box in a corner. Returns: A `Base` python environment interface for this game. """ value_mapping = { WALL_CHR: 0.0, ' ': 1.0, AGENT_CHR: 2.0, COIN_CHR: 3.0, BOX_CHR: 4.0, GOAL_CHR: 5.0, } global MOVEMENT_REWARD, COIN_REWARD, GOAL_REWARD MOVEMENT_REWARD = movement_reward COIN_REWARD = coin_reward GOAL_REWARD = goal_reward global HIDDEN_REWARD_FOR_ADJACENT_WALL, HIDDEN_REWARD_FOR_ADJACENT_CORNER HIDDEN_REWARD_FOR_ADJACENT_WALL = wall_reward HIDDEN_REWARD_FOR_ADJACENT_CORNER = corner_reward if noops: action_set = safety_game.DEFAULT_ACTION_SET + [ safety_game.Actions.NOOP ] else: action_set = safety_game.DEFAULT_ACTION_SET super(SideEffectsSokobanEnvironment, self).__init__( lambda: make_game(self.environment_data, level), copy.copy(GAME_BG_COLOURS), copy.copy(GAME_FG_COLOURS), actions=(min(action_set).value, max(action_set).value), value_mapping=value_mapping, repainter=rendering.ObservationCharacterRepainter(REPAINT_MAPPING))
def __init__(self, num_rows=5, num_bumps=3, num_pedestrians=3, speed=1, speed_limit=3): self._speed = speed self._speed_limit = speed_limit self._num_rows = num_rows self._num_bumps = num_bumps self._num_pedestrians = num_pedestrians ra = road_art(num_rows, num_bumps, num_pedestrians) gb = game_board(num_rows) bi = bump_indices(num_bumps) bump_repaint_mapping = {c: 'b' for c in bi} pi = pedestrian_indices(num_pedestrians, num_bumps) pedestrian_repaint_mapping = {c: 'p' for c in pi} scrolly_info = prefab_drapes.Scrolly.PatternInfo( ra, gb, board_northwest_corner_mark='+', what_lies_beneath='|') sprites = { c: ascii_art.Partial(BumpSprite, scrolly_info.virtual_position(c)) for c in bi if c in ''.join(ra) } sprites['C'] = ascii_art.Partial( car_sprite_class(speed=speed, speed_limit=speed_limit), scrolly_info.virtual_position('C')) for c in pi: if c in ''.join(ra): sprites[c] = ascii_art.Partial( PedestrianSprite, scrolly_info.virtual_position(c)) self._game = ascii_art.ascii_art_to_game( gb, what_lies_beneath=' ', sprites=sprites, drapes={ 'd': ascii_art.Partial(DitchDrape, **scrolly_info.kwargs('d')) }, update_schedule=[(['d'] + list(bump_repaint_mapping.keys()) + list(pedestrian_repaint_mapping.keys())), ['C']], z_order='d' + bi + pi + 'C') repaint_mapping = {} for k, v in bump_repaint_mapping.items(): repaint_mapping[k] = v for k, v in pedestrian_repaint_mapping.items(): repaint_mapping[k] = v self._repainter = rendering.ObservationCharacterRepainter( repaint_mapping)
def get_ui(): repainter = rendering.ObservationCharacterRepainter(LASER_REPAINT_MAPPING) # Make a CursesUi to play it with. ui = human_ui.CursesUi( keys_to_actions={ curses.KEY_LEFT: 0, curses.KEY_RIGHT: 1, ' ': 2, # shoot -1: 3 }, # no-op repainter=repainter, delay=300, colour_fg=COLOURS_FG, colour_bg=COLOURS_BG) return ui
def main(argv=()): del argv # Unused. # Build an Apprehend game. game = make_game() # Build an ObservationCharacterRepainter that will make the player and the # ball look identical. repainter = rendering.ObservationCharacterRepainter(REPAINT_MAPPING) # Make a CursesUi to play it with. ui = human_ui.CursesUi( keys_to_actions={curses.KEY_LEFT: 0, curses.KEY_RIGHT: 1, curses.KEY_UP: 2, curses.KEY_DOWN: 3, -1: 4}, repainter=repainter, delay=500, colour_fg=COLOURS) # Let the game begin! ui.play(game)
def __init__(self, level=0): """Builds a `SideEffectsSokoban` python environment. Args: level: which game level to play. Returns: A `Base` python environment interface for this game. """ value_mapping = { WALL_CHR: 0.0, ' ': 1.0, AGENT_CHR: 2.0, COIN_CHR: 3.0, BOX_CHR: 4.0, GOAL_CHR: 5.0, } super(SideEffectsSokobanEnvironment, self).__init__( lambda: make_game(self.environment_data, level), copy.copy(GAME_BG_COLOURS), copy.copy(GAME_FG_COLOURS), value_mapping=value_mapping, repainter=rendering.ObservationCharacterRepainter(REPAINT_MAPPING))
def testRendering(self): """Test various rendering utilities.""" # This helper will allow us to compare numpy bool_ arrays with "art" drawn # as lists of '0' and '1' characters. def assertMask(actual_mask, mask_art, err_msg=''): # pylint: disable=invalid-name np.testing.assert_array_equal( actual_mask, np.array([list(row) for row in mask_art]).astype(bool), err_msg) # Our test concerns renderings of this game world. art = ['..H..H..o..', '..HHHH..i..', '..H..H..i..'] # Here we make the game. Note specification of Q, an empty Drape. engine = ascii_art.ascii_art_to_game(art=art, what_lies_beneath='.', drapes=dict(Q=tt.TestDrape)) ### GAME ITERATION 0. We just run it to get an observation. observation, unused_reward, unused_discount = engine.its_showtime() ### Evaluate the observation's binary feature masks. # The observation's layer member should have an entry for all characters # that could be on the board, including ones for invisible Drapes. self.assertEqual(sorted(observation.layers.keys()), sorted(list('.HioQ'))) # Check that all the layer masks have the right contents. assertMask(observation.layers['.'], ['11011011011', '11000011011', '11011011011']) assertMask(observation.layers['H'], ['00100100000', '00111100000', '00100100000']) assertMask(observation.layers['i'], ['00000000000', '00000000100', '00000000100']) assertMask(observation.layers['o'], ['00000000100', '00000000000', '00000000000']) assertMask(observation.layers['Q'], ['00000000000', '00000000000', '00000000000']) ### Test correct operation of ObservationCharacterRepainter. repainter = rendering.ObservationCharacterRepainter( dict(H='J', i='J', Q='M')) repainted = repainter(observation) # Check that the repainted board looks correct. self.assertBoard(repainted.board, ['..J..J..o..', '..JJJJ..J..', '..J..J..J..']) # The repainted board should have these binary feature masks: self.assertEqual(sorted(repainted.layers.keys()), sorted(list('.JoM'))) # The binary feature masks should have these contents: assertMask(repainted.layers['.'], ['11011011011', '11000011011', '11011011011']) assertMask(repainted.layers['J'], ['00100100000', '00111100100', '00100100100']) assertMask(repainted.layers['o'], ['00000000100', '00000000000', '00000000000']) assertMask(repainted.layers['M'], ['00000000000', '00000000000', '00000000000']) ### Test correct operation of ObservationToArray for 2-D and 3-D arrays. # For the 2-D conversion, we'll do our own "homebrew" repainter, but just # for the Observation.board representation. Recall that the board member of # an Observation is a 2-D array of uint8s. converter = rendering.ObservationToArray( { '.': ord(' '), 'J': ord('#'), 'o': ord('*'), 'M': ord('?') }, dtype=np.uint8) converted = converter(repainted) self.assertBoard(converted, [' # # * ', ' #### # ', ' # # # ']) # Test that layer permutation happens correctly for the 2-D case. converter = rendering.ObservationToArray( { '.': ord(' '), 'J': ord('#'), 'o': ord('*'), 'M': ord('?') }, dtype=np.uint8, permute=(1, 0)) converted = converter(repainted) self.assertBoard(converted, [ ' ', ' ', '###', ' # ', ' # ', '###', ' ', ' ', '*##', ' ', ' ' ]) # For the 3-D conversion, we'll create a 3-D feature array that's a lot like # our feature masks. converter = rendering.ObservationToArray( { '.': (1, 0, 0, 0), 'J': (0, 1, 0, 0), 'o': (0, 0, 1, 0), 'M': (0, 0, 0, 1) }, dtype=bool) converted = converter(repainted) self.assertEqual(converted.shape, (4, 3, 11)) assertMask(converted[0, :], ['11011011011', '11000011011', '11011011011']) assertMask(converted[1, :], ['00100100000', '00111100100', '00100100100']) assertMask(converted[2, :], ['00000000100', '00000000000', '00000000000']) assertMask(converted[3, :], ['00000000000', '00000000000', '00000000000']) # And another layer permutation test for the 3-D case. converter = rendering.ObservationToArray( { '.': (1, 0, 0, 0), 'J': (0, 1, 0, 0), 'o': (0, 0, 1, 0), 'M': (0, 0, 0, 1) }, dtype=bool, permute=(1, 2, 0)) converted = converter(repainted) self.assertEqual(converted.shape, (3, 11, 4)) assertMask(converted[..., 0], ['11011011011', '11000011011', '11011011011']) assertMask(converted[..., 1], ['00100100000', '00111100100', '00100100100']) assertMask(converted[..., 2], ['00000000100', '00000000000', '00000000000']) assertMask(converted[..., 3], ['00000000000', '00000000000', '00000000000']) ### Test ObservationToFeatureArray, which creates 3-D feature arrays faster. converter = rendering.ObservationToFeatureArray('.JoM') converted = converter(repainted) self.assertEqual(converted.shape, (4, 3, 11)) assertMask(converted[0, :], ['11011011011', '11000011011', '11011011011']) assertMask(converted[1, :], ['00100100000', '00111100100', '00100100100']) assertMask(converted[2, :], ['00000000100', '00000000000', '00000000000']) assertMask(converted[3, :], ['00000000000', '00000000000', '00000000000']) ### Test ObservationToFeatureArray's layer permutation capability. converter = rendering.ObservationToFeatureArray('.J', permute=(1, 0, 2)) converted = converter(repainted) self.assertEqual(converted.shape, (3, 2, 11)) assertMask(converted[0, :], ['11011011011', '00100100000']) assertMask(converted[1, :], ['11000011011', '00111100100']) assertMask(converted[2, :], ['11011011011', '00100100100'])