def eval_network(epoch, child_index, child_model): pyboy = PyBoy('SuperMarioLand.gb', game_wrapper=True) #, window_type="headless") pyboy.set_emulation_speed(3) mario = pyboy.game_wrapper() mario.start_game() # start off with just one life each mario.set_lives_left(0) run = 0 scores = [] fitness_scores = [] level_progress = [] time_left = [] # prev_action = np.asarray([1,0]) # do_action(prev_action, pyboy) while run < run_per_child: # do some things action = get_action(pyboy, mario, child_model) action = action.detach().numpy() action = np.where(action < np.max(action), 0, action) action = np.where(action == np.max(action), 1, action) action = action.astype(int) action = action.reshape((2, )) # do_action_multiple(prev_action,action,pyboy) do_action(action, pyboy) # prev_action = action # Game over: if mario.game_over() or mario.score == max_score: scores.append(mario.score) #fitness_scores.append(mario.fitness) fitness_scores.append( fitness_calc(mario.score, mario.level_progress, mario.time_left)) level_progress.append(mario.level_progress) time_left.append(mario.time_left) if run == run_per_child - 1: pyboy.stop() else: mario.reset_game() run += 1 child_fitness = np.average(fitness_scores) logger.info("-" * 20) logger.info("Iteration %s - child %s" % (epoch, child_index)) logger.info("Score: %s, Level Progress: %s, Time Left %s" % (scores, level_progress, time_left)) logger.info("Fitness: %s" % child_fitness) #logger.info("Output weight:") #weights = {} #for i, j in zip(feature_names, child_model.output.weight.data.tolist()[0]): # weights[i] = np.round(j, 3) #logger.info(weights) return child_fitness
def eval_genome(genome, config): global max_fitness pyboy = PyBoy('SuperMarioLand.gb', game_wrapper=True) #, window_type="headless") pyboy.set_emulation_speed(0) mario = pyboy.game_wrapper() mario.start_game() # start off with just one life each mario.set_lives_left(0) run = 0 scores = [] fitness_scores = [] level_progress = [] time_left = [] # create NN from neat model = neat.nn.FeedForwardNetwork.create(genome, config) child_fitness = 0 while run < run_per_child: # do some things action = get_action_neat(pyboy, mario, model) action = np.asarray([np.mean(val) for val in action]) action = np.where(action < np.max(action), 0, action) action = np.where(action == np.max(action), 1, action) action = action.astype(int) action = action.reshape((2, )) do_action(action, pyboy) # Game over: if mario.game_over() or mario.score == max_score: scores.append(mario.score) fitness_scores.append( fitness_calc(mario.score, mario.level_progress, mario.time_left)) level_progress.append(mario.level_progress) time_left.append(mario.time_left) if run == run_per_child - 1: pyboy.stop() else: mario.reset_game() run += 1 child_fitness = np.average(fitness_scores) #logger.info("-" * 20) #logger.info("Iteration %s - child %s" % (epoch, child_index)) #logger.info("Score: %s, Level Progress: %s, Time Left %s" % (scores, level_progress, time_left)) #logger.info("Fitness: %s" % child_fitness) return child_fitness
def test_mario_basics(): pyboy = PyBoy(supermarioland_rom, window_type="dummy", game_wrapper=True) pyboy.set_emulation_speed(0) assert pyboy.cartridge_title() == "SUPER MARIOLAN" mario = pyboy.game_wrapper() mario.start_game(world_level=(1, 1)) assert mario.score == 0 assert mario.lives_left == 2 assert mario.time_left == 400 assert mario.world == (1, 1) assert mario.fitness == 0 # A built-in fitness score for AI development pyboy.stop()
def test_mario_game_over(): pyboy = PyBoy(supermarioland_rom, window_type="dummy", game_wrapper=True) pyboy.set_emulation_speed(0) mario = pyboy.game_wrapper() mario.start_game() mario.set_lives_left(0) pyboy.send_input(WindowEvent.PRESS_ARROW_RIGHT) for _ in range( 500 ): # Enough to game over correctly, and not long enough it'll work without setting the lives pyboy.tick() if mario.game_over(): break pyboy.stop()
def test_mario_advanced(): pyboy = PyBoy(supermarioland_rom, window_type="dummy", game_wrapper=True) pyboy.set_emulation_speed(0) assert pyboy.cartridge_title() == "SUPER MARIOLAN" mario = pyboy.game_wrapper() mario.start_game(world_level=(3, 2)) lives = 99 mario.set_lives_left(lives) pyboy.tick() assert mario.score == 0 assert mario.lives_left == lives assert mario.time_left == 400 assert mario.world == (3, 2) assert mario.fitness == 10000 * lives + 6510 # A built-in fitness score for AI development pyboy.stop()
from pyboy import PyBoy, WindowEvent # isort:skip # Check if the ROM is given through argv if len(sys.argv) > 1: filename = sys.argv[1] else: print("Usage: python mario_boiler_plate.py [ROM file]") exit(1) quiet = "--quiet" in sys.argv pyboy = PyBoy(filename, window_type="headless" if quiet else "SDL2", window_scale=3, debug=not quiet, game_wrapper=True) pyboy.set_emulation_speed(.1) assert pyboy.cartridge_title() == "SUPER MARIOLAN" mario = pyboy.game_wrapper() mario.start_game() assert mario.score == 0 assert mario.lives_left == 2 assert mario.time_left == 400 assert mario.world == (1, 1) assert mario.fitness == 0 # A built-in fitness score for AI development last_fitness = 0 print(mario) pyboy.send_input(WindowEvent.PRESS_ARROW_RIGHT) for _ in range(1000): assert mario.fitness >= last_fitness last_fitness = mario.fitness
from pyboy import PyBoy, WindowEvent # isort:skip # Check if the ROM is given through argv if len(sys.argv) > 1: filename = sys.argv[1] else: print("Usage: python mario_boiler_plate.py [ROM file]") exit(1) quiet = "--quiet" in sys.argv pyboy = PyBoy(filename, window_type="headless" if quiet else "SDL2", window_scale=3, debug=not quiet, game_wrapper=True) pyboy.set_emulation_speed(0) assert pyboy.cartridge_title() == "TETRIS" tetris = pyboy.game_wrapper() tetris.start_game(timer_div=0x00) # The timer_div works like a random seed in Tetris assert tetris.next_tetromino() == "Z" assert tetris.score == 0 assert tetris.level == 0 assert tetris.lines == 0 assert tetris.fitness == 0 # A built-in fitness score for AI development blank_tile = 47 first_brick = False for frame in range(1000): # Enough frames for the test. Otherwise do: `while not pyboy.tick():` pyboy.tick() # The playing "technique" is just to move the Tetromino to the right. if frame % 2 == 0: # Even frames
def test_tetris(): NEXT_TETROMINO = 0xC213 pyboy = PyBoy(tetris_rom, bootrom_file="pyboy_fast", window_type="headless", game_wrapper=True) pyboy.set_emulation_speed(0) tetris = pyboy.game_wrapper() tetris.set_tetromino("T") first_brick = False tile_map = pyboy.botsupport_manager().tilemap_window() state_data = io.BytesIO() for frame in range( 5282 ): # Enough frames to get a "Game Over". Otherwise do: `while not pyboy.tick():` pyboy.tick() assert pyboy.botsupport_manager().screen().tilemap_position() == ((0, 0), (-7, 0)) # Start game. Just press Start and A when the game allows us. # The frames are not 100% accurate. if frame == 144: pyboy.send_input(WindowEvent.PRESS_BUTTON_START) elif frame == 145: pyboy.send_input(WindowEvent.RELEASE_BUTTON_START) elif frame == 152: pyboy.send_input(WindowEvent.PRESS_BUTTON_A) elif frame == 153: pyboy.send_input(WindowEvent.RELEASE_BUTTON_A) elif frame == 156: pyboy.send_input(WindowEvent.PRESS_BUTTON_A) elif frame == 157: pyboy.send_input(WindowEvent.RELEASE_BUTTON_A) elif frame == 162: pyboy.send_input(WindowEvent.PRESS_BUTTON_A) elif frame == 163: pyboy.send_input(WindowEvent.RELEASE_BUTTON_A) # Play game. When we are passed the 168th frame, the game has begone. # The "technique" is just to move the Tetromino to the right. elif frame > 168: if frame % 2 == 0: pyboy.send_input(WindowEvent.PRESS_ARROW_RIGHT) elif frame % 2 == 1: pyboy.send_input(WindowEvent.RELEASE_ARROW_RIGHT) # Show how we can read the tile data for the screen. We can use # this to see when one of the Tetrominos touch the bottom. This # could be used to extract a matrix of the occupied squares by # iterating from the top to the bottom of the screen. # Sidenote: The currently moving Tetromino is a sprite, so it # won't show up in the tile data. The tile data shows only the # placed Tetrominos. # We could also read out the score from the screen instead of # finding the corresponding value in RAM. if not first_brick: # 17 for the bottom tile when zero-indexed # 2 because we skip the border on the left side. Then we take a slice of 10 more tiles # 303 is the white background tile index if any(filter(lambda x: x != 303, tile_map[2:12, 17])): first_brick = True print(frame) print("First brick touched the bottom!") game_board_matrix = list(tile_map[2:12, :18]) assert game_board_matrix == ([ [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 133, 133, 133], [303, 303, 303, 303, 303, 303, 303, 303, 133, 303] ]) tile_map.use_tile_objects(True) t1 = tile_map[0, 0] t2 = tile_map.tile(0, 0) t3 = tile_map.tile(1, 0) assert t1 == t2, "Testing __eq__ method of Tile object" assert t1 != t3, "Testing not __eq__ method of Tile object" game_board_matrix = [[x.tile_identifier for x in row] for row in tile_map[2:12, :18]] tile_map.use_tile_objects(False) assert game_board_matrix == ([ [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 133, 133, 133], [303, 303, 303, 303, 303, 303, 303, 303, 133, 303] ]) if frame == 1012: assert not first_brick if frame == 1013: assert first_brick s1 = pyboy.botsupport_manager().sprite(0) s2 = pyboy.botsupport_manager().sprite(1) assert s1 == s1 assert s1 != s2 assert s1.tiles[0] == s2.tiles[ 0], "Testing equal tiles of two different sprites" # Test that both ways of getting identifiers work and provides the same result. all_sprites = [ (s.x, s.y, s.tiles[0].tile_identifier, s.on_screen) for s in [pyboy.botsupport_manager().sprite(n) for n in range(40)] ] all_sprites2 = [ (s.x, s.y, s.tile_identifier, s.on_screen) for s in [pyboy.botsupport_manager().sprite(n) for n in range(40)] ] assert all_sprites == all_sprites2 # Verify data with known reference # pyboy.botsupport_manager().screen().screen_image().show() assert all_sprites == ([ (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (72, 128, 133, True), (80, 128, 133, True), (88, 128, 133, True), (80, 136, 133, True), (120, 112, 133, True), (128, 112, 133, True), (136, 112, 133, True), (128, 120, 133, True), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), (-8, -16, 0, False), ]) assert pyboy.get_memory_value(NEXT_TETROMINO) == 24 assert tetris.next_tetromino() == "T" with open("tmp.state", "wb") as f: pyboy.save_state(f) pyboy.save_state(state_data) break pre_load_game_board_matrix = None for frame in range(1015, 1865): pyboy.tick() if frame == 1864: game_board_matrix = list(tile_map[2:12, :18]) assert game_board_matrix == ([ [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 303, 303, 303, 303, 303, 303, 303], [303, 303, 303, 133, 133, 133, 303, 133, 133, 133], [303, 303, 303, 303, 133, 303, 303, 303, 133, 303] ]) pre_load_game_board_matrix = game_board_matrix state_data.seek( 0 ) # Reset to the start of the buffer. Otherwise, we call `load_state` at end of file with open("tmp.state", "rb") as f: for _f in [f, state_data ]: # Tests both file-written state and in-memory state pyboy.load_state( _f) # Reverts memory state to before we changed the Tetromino pyboy.tick() for frame in range(1015, 1865): pyboy.tick() if frame == 1864: game_board_matrix = list(tile_map[2:12, :18]) assert game_board_matrix == pre_load_game_board_matrix break os.remove("tmp.state") pyboy.stop(save=False)
def eval_genome(genome, config): global max_fitness pyboy = PyBoy('tetris_1.1.gb', window_type='quiet', game_wrapper=True) pyboy.set_emulation_speed(0) tetris = pyboy.game_wrapper() tetris.start_game() # Set block animation to fall instantly pyboy.set_memory_value(0xff9a, 2) model = neat.nn.FeedForwardNetwork.create(genome, config) child_fitness = 0 while not pyboy.tick(): # Beginning of action best_action_score = np.NINF best_action = {'Turn': 0, 'Left': 0, 'Right': 0} begin_state = io.BytesIO() begin_state.seek(0) pyboy.save_state(begin_state) s_lines = tetris.lines # Determine how many possible rotations we need to check for the block block_tile = pyboy.get_memory_value(0xc203) turns_needed = check_needed_turn(block_tile) lefts_needed, rights_needed = check_needed_dirs(block_tile) # Do middle for move_dir in do_action('Middle', n_dir=1, n_turn=turns_needed, pyboy=pyboy): score = get_score(tetris, model, s_lines, neat=True) if score is not None and score > best_action_score: best_action_score = score best_action = { 'Turn': move_dir['Turn'], 'Left': move_dir['Left'], 'Right': move_dir['Right'] } begin_state.seek(0) pyboy.load_state(begin_state) # Do left for move_dir in do_action('Left', n_dir=lefts_needed, n_turn=turns_needed, pyboy=pyboy): score = get_score(tetris, model, s_lines, neat=True) if score is not None and score > best_action_score: best_action_score = score best_action = { 'Turn': move_dir['Turn'], 'Left': move_dir['Left'], 'Right': move_dir['Right'] } begin_state.seek(0) pyboy.load_state(begin_state) # Do right for move_dir in do_action('Right', n_dir=rights_needed, n_turn=turns_needed, pyboy=pyboy): score = get_score(tetris, model, s_lines, neat=True) if score is not None and score > best_action_score: best_action_score = score best_action = { 'Turn': move_dir['Turn'], 'Left': move_dir['Left'], 'Right': move_dir['Right'] } begin_state.seek(0) pyboy.load_state(begin_state) # Do best action for i in range(best_action['Turn']): do_turn(pyboy) for i in range(best_action['Left']): do_sideway(pyboy, 'Left') for i in range(best_action['Right']): do_sideway(pyboy, 'Right') drop_down(pyboy) pyboy.tick() # Game over: if tetris.game_over() or tetris.score == max_score: child_fitness = tetris.score if tetris.score == max_score: print("Max score reached") break # Dump best model if child_fitness >= max_fitness: max_fitness = child_fitness file_name = str(np.round(max_fitness, 2)) if tetris.level >= 20: with open('neat_models/%s' % file_name, 'wb') as f: pickle.dump(model, f) pyboy.stop() return child_fitness
def eval_network(epoch, child_index, child_model): pyboy = PyBoy('tetris_1.1.gb', game_wrapper=True, window_type="headless") pyboy.set_emulation_speed(0) tetris = pyboy.game_wrapper() tetris.start_game() # Set block animation to fall instantly pyboy.set_memory_value(0xff9a, 2) run = 0 scores = [] levels = [] lines = [] while run < run_per_child: # Beginning of action best_action_score = np.NINF best_action = {'Turn': 0, 'Left': 0, 'Right': 0} begin_state = io.BytesIO() begin_state.seek(0) pyboy.save_state(begin_state) # Number of lines at the start s_lines = tetris.lines # Determine how many possible rotations we need to check for the block block_tile = pyboy.get_memory_value(0xc203) turns_needed = check_needed_turn(block_tile) lefts_needed, rights_needed = check_needed_dirs(block_tile) # Do middle for move_dir in do_action('Middle', pyboy, n_dir=1, n_turn=turns_needed): score = get_score(tetris, child_model, s_lines) if score is not None and score >= best_action_score: best_action_score = score best_action = { 'Turn': move_dir['Turn'], 'Left': move_dir['Left'], 'Right': move_dir['Right'] } begin_state.seek(0) pyboy.load_state(begin_state) # Do left for move_dir in do_action('Left', pyboy, n_dir=lefts_needed, n_turn=turns_needed): score = get_score(tetris, child_model, s_lines) if score is not None and score >= best_action_score: best_action_score = score best_action = { 'Turn': move_dir['Turn'], 'Left': move_dir['Left'], 'Right': move_dir['Right'] } begin_state.seek(0) pyboy.load_state(begin_state) # Do right for move_dir in do_action('Right', pyboy, n_dir=rights_needed, n_turn=turns_needed): score = get_score(tetris, child_model, s_lines) if score is not None and score >= best_action_score: best_action_score = score best_action = { 'Turn': move_dir['Turn'], 'Left': move_dir['Left'], 'Right': move_dir['Right'] } begin_state.seek(0) pyboy.load_state(begin_state) # Do best action for _ in range(best_action['Turn']): do_turn(pyboy) for _ in range(best_action['Left']): do_sideway(pyboy, 'Left') for _ in range(best_action['Right']): do_sideway(pyboy, 'Right') drop_down(pyboy) pyboy.tick() # Game over: if tetris.game_over() or tetris.score == max_score: scores.append(tetris.score) levels.append(tetris.level) lines.append(tetris.lines) if run == run_per_child - 1: pyboy.stop() else: tetris.reset_game() run += 1 child_fitness = np.average(scores) logger.info("-" * 20) logger.info("Iteration %s - child %s" % (epoch, child_index)) logger.info("Score: %s, Level: %s, Lines %s" % (scores, levels, lines)) logger.info("Fitness: %s" % child_fitness) logger.info("Output weight:") weights = {} for i, j in zip(feature_names, child_model.output.weight.data.tolist()[0]): weights[i] = np.round(j, 3) logger.info(weights) return child_fitness
class Environment: def __init__(self, filename, max_steps, visualize=False): self.pyboy = PyBoy(filename, window_type="headless" if not visualize else "SDL2", window_scale=3, debug=visualize, game_wrapper=True) assert self.pyboy.cartridge_title() == "SUPER MARIOLAN" self.pyboy.set_emulation_speed(0) self.mario = self.pyboy.game_wrapper() self.mario_lives = None self.mario_size = 0 self.fitness_score = 0 self.previous_fitness_score = 0 self.previous_direction = 0 self._level_progress_max = 0 self.observation_space = (16, 20) self.action_space = [4] self.max_steps = max_steps self.pair_actions = {"5": 13, "3": 11, "4": 12, "0": 0} self.action_jump = [ WindowEvent.PRESS_BUTTON_A, WindowEvent.RELEASE_BUTTON_A ] self.action_direction = [ WindowEvent.PRESS_ARROW_LEFT, WindowEvent.PRESS_ARROW_RIGHT, 0 ] def start(self): self.mario.start_game() self.mario_lives = self.mario.lives_left self.fitness_score = 0 def reset(self): self.mario.reset_game() self.mario_lives = self.mario.lives_left self.mario_size = 0 self.fitness_score = 0 self.level_progress_max = 0 # Compute first fitness score self.previous_fitness_score = self.compute_reward() self.previous_direction = 0 def stop(self): self.pyboy.stop() def obs(self): game_area = self.normalize_input(self.mario.game_area()) return game_area def normalize_input(self, game_area): game_area = np.asarray(game_area).astype('int64') if 32 in game_area and 49 in game_area: self.mario_size = torch.ones(1) else: self.mario_size = torch.zeros(1) mario = np.arange(70) mario = np.delete(mario, [15, 31]) background = [ 336, 87, 89, 88, 91, 145, 168, 169, 128, 247, 248, 254, 300, 305, 306, 307, 308, 310, 316, 328, 331, 332, 338, 339, 320, 321, 322, 323, 324, 325, 326, 327, 329, 330, 338, 339, 350 ] floor = [142, 143, 239, 352, 353, 232] block_bonus = 129 bonus = [131, 132, 244, 246] ennemy = [ 144, 150, 151, 152, 153, 160, 161, 162, 163, 176, 177, 178, 179 ] obstacle = [368, 130, 369, 355, 370, 371, 383] game_area[np.isin(game_area, mario)] = 1 game_area[np.isin(game_area, mario)] = 1 game_area[np.isin(game_area, background)] = 0 game_area[np.isin(game_area, ennemy)] = 3 game_area[np.isin(game_area, obstacle)] = 4 game_area[np.isin(game_area, floor)] = 4 game_area[game_area == block_bonus] = 5 game_area[np.isin(game_area, bonus)] = 6 return game_area def compute_reward(self): self.level_progress_max = max(self.mario.level_progress, self.level_progress_max) fitness = self.mario.score + self.mario.time_left * 10 + self.level_progress_max * 5 + self.mario.lives_left * 100 return fitness def print_obs(self, game_area): print( "\n".join([ f"{i: <3}| " + "".join([str(tile).ljust(4) for tile in line]) for i, line in enumerate(game_area) ]), "\n") def step(self, actions=None): if actions is not None: direction = self.action_direction[actions[0]] jump = self.action_jump[actions[1]] if direction != self.previous_direction: # Release the previous direction end_action = self.pair_actions[str(self.previous_direction)] self.pyboy.send_input(end_action) self.previous_direction = direction self.pyboy.send_input(direction) self.pyboy.send_input(jump) for ministeps in range(6): self.pyboy.tick() else: for ministeps in range(5): self.pyboy.tick() # Compute reward self.fitness_score = self.compute_reward() reward = min( (self.fitness_score - self.previous_fitness_score), 100) / 100 self.previous_fitness_score = self.fitness_score # Update fitness score self.fitness_score = self.mario.fitness new_state = torch.Tensor(self.obs()) if (15 in new_state and 31 in new_state ) or self.mario.lives_left == 1: # titles used for dead mario reward = -1 done = True else: done = False return new_state, reward, done
if len(sys.argv) > 1: filename = sys.argv[1] else: print("Usage: python gamewrapper_kirby.py [ROM file]") exit(1) quiet = "--quiet" in sys.argv pyboy = PyBoy(filename, window_type="headless" if quiet else "SDL2", window_scale=3, debug=not quiet, game_wrapper=True) pyboy.set_emulation_speed(0) assert pyboy.cartridge_title() == "KIRBY DREAM LA" kirby = pyboy.game_wrapper() kirby.start_game() assert kirby.score == 0 assert kirby.lives_left == 4 assert kirby.health == 6 pyboy.send_input(WindowEvent.PRESS_ARROW_RIGHT) for _ in range(280): # Walk for 280 ticks pyboy.tick() assert kirby.score == 800 assert kirby.health == 5 print(kirby)