class aleForET: def __init__(self, rom_file, screen, rndseed, resume_state_file=None): # When you might pass None to screen: # You are not interested in running any functions that displays graphics # For example, you should only run proceed_one_step__fast__no_scr_support() # Otherwise, those functions uses self.screen and you will get a RuntimeError if screen != None: pygame.init() self.screen = screen GAME_W, GAME_H = 160, 210 self.size = GAME_W * V.xSCALE, GAME_H * V.ySCALE # Get & Set the desired settings self.ale = ALEInterface() self.ale.setInt("random_seed", rndseed) self.ale.setBool('sound', False) self.ale.setBool('display_screen', False) self.ale.setBool('color_averaging', COLOR_AVG) self.ale.setFloat('repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(rom_file) self.gamename = os.path.basename(rom_file).split('.')[0] self.clock = pygame.time.Clock() self._last_time = time.time() self.score = 0 self.episode = 0 self.frame_cnt = 0 # Get the list of legal actions self.legal_actions = self.ale.getLegalActionSet() if resume_state_file: self.loadALEState(resume_state_file) def saveALEState(self, fname): basedir = os.path.dirname(fname) if not os.path.exists(basedir): os.makedirs(basedir) pALEState = self.ale.cloneSystemState( ) # actually it returns an int, a memory address pointing to a C++ object ALEState serialized_np = self.ale.encodeState( pALEState) # this func actually takes a pointer np.savez(fname, state=serialized_np, score=self.score, episode=self.episode) def loadALEState(self, fname): npzfile = np.load(fname) serialized_np = npzfile['state'] self.score = npzfile['score'] self.episode = npzfile['episode'] pALEState = self.ale.decodeState( serialized_np ) # actually it returns an int, a memory address pointing to a C++ object ALEState self.ale.restoreSystemState( pALEState) # this func actually takes a pointer def proceed_one_step(self, action, refresh_screen=False, fps_limit=0, model_gaze_output=None, gc_window_drawer_func=None): self.clock.tick( fps_limit) # control FPS. fps_limit == 0 means no limit self.frame_cnt += 1 # Display FPS diff_time = time.time() - self._last_time if diff_time > 1.0: print 'FPS: %.1f' % self.clock.get_fps() self._last_time = time.time() # Show game image cur_frame_np = self.ale.getScreenRGB() if refresh_screen: cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. # Slightly faster than scaling cur_frame_Surface and then transfer to screen. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and model_gaze_output: gc_window_drawer_func(self.screen, model_gaze_output) pygame.display.flip() # Apply an action and get the resulting reward reward = self.ale.act(action) self.score += reward return cur_frame_np, reward, self.check_episode_end_and_if_true_reset_game( ) def proceed_one_step__fast__no_scr_support(self, action): self.frame_cnt += 1 cur_frame_np = self.ale.getScreenRGB() reward = self.ale.act(action) self.score += reward return cur_frame_np, reward, self.check_episode_end_and_if_true_reset_game( ) def check_episode_end_and_if_true_reset_game(self): end = self.ale.game_over() if end: print 'Episode', self.episode, 'ended with score:', self.score self.score = 0 self.episode += 1 self.ale.reset_game() return end # after reset_game(), ale.game_over()'s return value will change to false def run(self, gc_window_drawer_func=None, save_screen_func=None, event_handler_func=None, record_a_and_r_func=None): self.run_start_time = time.time() # used in alerecord_main.py while True: self.check_episode_end_and_if_true_reset_game() self.clock.tick(FRAME_RATE) # control FPS self.frame_cnt += 1 key = pygame.key.get_pressed() if event_handler_func != None: stop, eyelink_err_code, bool_drawgc = event_handler_func( key, self) if stop: return eyelink_err_code # Display FPS diff_time = time.time() - self._last_time if diff_time > 1.0: print 'FPS: %.1f' % self.clock.get_fps() self._last_time = time.time() # Show game image cur_frame_np = self.ale.getScreenRGB() cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. # Slightly faster than scaling cur_frame_Surface and then transfer to screen. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and bool_drawgc: gc_window_drawer_func(self.screen) pygame.display.flip() # Save frame to disk (160*210, i.e. not scaled; because this is faster) if save_screen_func != None: save_screen_func(cur_frame_Surface, self.frame_cnt) # Apply an action and get the resulting reward a_index = aenum.action_map(key, self.gamename) a = self.legal_actions[a_index] reward = self.ale.act(a) self.score += reward if record_a_and_r_func != None: record_a_and_r_func(a, reward, self.episode, self.score) pygame.event.pump() # need this line to get new key pressed assert False, "Returning should only happen in the while True loop" def run_in_step_by_step_mode(self, gc_window_drawer_func=None, save_screen_func=None, event_handler_func=None, record_a_and_r_func=None): bool_drawgc = False self.run_start_time = time.time() # used in alerecord_main.py while True: self.check_episode_end_and_if_true_reset_game() # Get game image cur_frame_np = self.ale.getScreenRGB() cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) self.frame_cnt += 1 # Save frame to disk (160*210, i.e. not scaled; because this is faster) if save_screen_func != None: save_screen_func(cur_frame_Surface, self.frame_cnt) key, draw_next_game_frame = None, False while not draw_next_game_frame: self.clock.tick(FRAME_RATE) # control FPS key = pygame.key.get_pressed() if event_handler_func != None: stop, eyelink_err_code, bool_drawgc = event_handler_func( key, self) if stop: return eyelink_err_code a_index = aenum.action_map(key, self.gamename) # Not in all cases when action_map returns "NO OP" is the real action "NO OP", # Only when the human press "TAB", is the real action "NO OP". if (a_index == aenum.PLAYER_A_NOOP and key[pygame.K_TAB]) \ or a_index != aenum.PLAYER_A_NOOP: draw_next_game_frame = True # Draw the image onto screen. # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and bool_drawgc: gc_window_drawer_func(self.screen) pygame.display.flip() pygame.event.pump() # need this line to get new key pressed # Apply an action and get the resulting reward a = self.legal_actions[a_index] reward = self.ale.act(a) self.score += reward if record_a_and_r_func != None: record_a_and_r_func(a, reward, self.episode, self.score) assert False, "Returning code should only be in the while True loop"
class ALEEnvironment(Environment): """ A environment wrapper for the ALE environment """ def __init__(self, rom_name, visible=True): super().__init__('Arcade Learning Environment') frame_skip = 20 self._ale = ALEInterface() self._ale_sampler = ALEInterface() self._ale.setBool(b'display_screen', visible) #self._ale.setInt(b'frame_skip', frame_skip) #self._ale_sampler.setBool(b'display_screen', True) #self._ale_sampler.setInt(b'frame_skip', frame_skip) self._ale.loadROM(rom_name.encode('ascii')) self._ale_sampler.loadROM(rom_name.encode('ascii')) self._action_space = self._ale.getLegalActionSet() self._current_score = 0 def evaluate_rollout(self, solution, discount_factor=0): #temp_state = self._ale.cloneState() temp_ale = self._ale.encodeState(self._ale.cloneState()) temp_state = self._ale_sampler.decodeState(temp_ale) self._ale_sampler.restoreState(temp_state) prev_lives = self._ale.lives() total_rollout_reward = 0 discount = 1 for action in solution: rollout_reward = self._ale_sampler.act(action) if discount_factor is not None: rollout_reward *= discount discount *= discount_factor total_rollout_reward += rollout_reward if self._ale_sampler.game_over(): break score_delta = total_rollout_reward + (self._ale_sampler.lives() - prev_lives) #self._ale.restoreState(temp_state) return score_delta def perform_action(self, action): reward = self._ale.act(action) self._current_score += reward def get_current_score(self): return self._current_score def get_current_lives(self): return self._ale.lives() def get_random_action(self): return np.random.choice(self._action_space) def is_game_over(self): return self._ale.game_over()