class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState( self.ale.decodeState(checkpoints[random.randint( 0, 99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class emulator: def __init__(self, rom_name, vis,windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname,nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class aleForET: def __init__(self, rom_file, screen, rndseed, resume_state_file=None): # When you might pass None to screen: # You are not interested in running any functions that displays graphics # For example, you should only run proceed_one_step__fast__no_scr_support() # Otherwise, those functions uses self.screen and you will get a RuntimeError if screen != None: pygame.init() self.screen = screen GAME_W, GAME_H = 160, 210 self.size = GAME_W * V.xSCALE, GAME_H * V.ySCALE # Get & Set the desired settings self.ale = ALEInterface() self.ale.setInt("random_seed", rndseed) self.ale.setBool('sound', False) self.ale.setBool('display_screen', False) self.ale.setBool('color_averaging', COLOR_AVG) self.ale.setFloat('repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(rom_file) self.gamename = os.path.basename(rom_file).split('.')[0] self.clock = pygame.time.Clock() self._last_time = time.time() self.score = 0 self.episode = 0 self.frame_cnt = 0 # Get the list of legal actions self.legal_actions = self.ale.getLegalActionSet() if resume_state_file: self.loadALEState(resume_state_file) def saveALEState(self, fname): basedir = os.path.dirname(fname) if not os.path.exists(basedir): os.makedirs(basedir) pALEState = self.ale.cloneSystemState( ) # actually it returns an int, a memory address pointing to a C++ object ALEState serialized_np = self.ale.encodeState( pALEState) # this func actually takes a pointer np.savez(fname, state=serialized_np, score=self.score, episode=self.episode) def loadALEState(self, fname): npzfile = np.load(fname) serialized_np = npzfile['state'] self.score = npzfile['score'] self.episode = npzfile['episode'] pALEState = self.ale.decodeState( serialized_np ) # actually it returns an int, a memory address pointing to a C++ object ALEState self.ale.restoreSystemState( pALEState) # this func actually takes a pointer def proceed_one_step(self, action, refresh_screen=False, fps_limit=0, model_gaze_output=None, gc_window_drawer_func=None): self.clock.tick( fps_limit) # control FPS. fps_limit == 0 means no limit self.frame_cnt += 1 # Display FPS diff_time = time.time() - self._last_time if diff_time > 1.0: print 'FPS: %.1f' % self.clock.get_fps() self._last_time = time.time() # Show game image cur_frame_np = self.ale.getScreenRGB() if refresh_screen: cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. # Slightly faster than scaling cur_frame_Surface and then transfer to screen. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and model_gaze_output: gc_window_drawer_func(self.screen, model_gaze_output) pygame.display.flip() # Apply an action and get the resulting reward reward = self.ale.act(action) self.score += reward return cur_frame_np, reward, self.check_episode_end_and_if_true_reset_game( ) def proceed_one_step__fast__no_scr_support(self, action): self.frame_cnt += 1 cur_frame_np = self.ale.getScreenRGB() reward = self.ale.act(action) self.score += reward return cur_frame_np, reward, self.check_episode_end_and_if_true_reset_game( ) def check_episode_end_and_if_true_reset_game(self): end = self.ale.game_over() if end: print 'Episode', self.episode, 'ended with score:', self.score self.score = 0 self.episode += 1 self.ale.reset_game() return end # after reset_game(), ale.game_over()'s return value will change to false def run(self, gc_window_drawer_func=None, save_screen_func=None, event_handler_func=None, record_a_and_r_func=None): self.run_start_time = time.time() # used in alerecord_main.py while True: self.check_episode_end_and_if_true_reset_game() self.clock.tick(FRAME_RATE) # control FPS self.frame_cnt += 1 key = pygame.key.get_pressed() if event_handler_func != None: stop, eyelink_err_code, bool_drawgc = event_handler_func( key, self) if stop: return eyelink_err_code # Display FPS diff_time = time.time() - self._last_time if diff_time > 1.0: print 'FPS: %.1f' % self.clock.get_fps() self._last_time = time.time() # Show game image cur_frame_np = self.ale.getScreenRGB() cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. # Slightly faster than scaling cur_frame_Surface and then transfer to screen. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and bool_drawgc: gc_window_drawer_func(self.screen) pygame.display.flip() # Save frame to disk (160*210, i.e. not scaled; because this is faster) if save_screen_func != None: save_screen_func(cur_frame_Surface, self.frame_cnt) # Apply an action and get the resulting reward a_index = aenum.action_map(key, self.gamename) a = self.legal_actions[a_index] reward = self.ale.act(a) self.score += reward if record_a_and_r_func != None: record_a_and_r_func(a, reward, self.episode, self.score) pygame.event.pump() # need this line to get new key pressed assert False, "Returning should only happen in the while True loop" def run_in_step_by_step_mode(self, gc_window_drawer_func=None, save_screen_func=None, event_handler_func=None, record_a_and_r_func=None): bool_drawgc = False self.run_start_time = time.time() # used in alerecord_main.py while True: self.check_episode_end_and_if_true_reset_game() # Get game image cur_frame_np = self.ale.getScreenRGB() cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) self.frame_cnt += 1 # Save frame to disk (160*210, i.e. not scaled; because this is faster) if save_screen_func != None: save_screen_func(cur_frame_Surface, self.frame_cnt) key, draw_next_game_frame = None, False while not draw_next_game_frame: self.clock.tick(FRAME_RATE) # control FPS key = pygame.key.get_pressed() if event_handler_func != None: stop, eyelink_err_code, bool_drawgc = event_handler_func( key, self) if stop: return eyelink_err_code a_index = aenum.action_map(key, self.gamename) # Not in all cases when action_map returns "NO OP" is the real action "NO OP", # Only when the human press "TAB", is the real action "NO OP". if (a_index == aenum.PLAYER_A_NOOP and key[pygame.K_TAB]) \ or a_index != aenum.PLAYER_A_NOOP: draw_next_game_frame = True # Draw the image onto screen. # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and bool_drawgc: gc_window_drawer_func(self.screen) pygame.display.flip() pygame.event.pump() # need this line to get new key pressed # Apply an action and get the resulting reward a = self.legal_actions[a_index] reward = self.ale.act(a) self.score += reward if record_a_and_r_func != None: record_a_and_r_func(a, reward, self.episode, self.score) assert False, "Returning code should only be in the while True loop"
class ALEEnvironment(Environment): """ A environment wrapper for the ALE environment """ def __init__(self, rom_name, visible=True): super().__init__('Arcade Learning Environment') frame_skip = 20 self._ale = ALEInterface() self._ale_sampler = ALEInterface() self._ale.setBool(b'display_screen', visible) #self._ale.setInt(b'frame_skip', frame_skip) #self._ale_sampler.setBool(b'display_screen', True) #self._ale_sampler.setInt(b'frame_skip', frame_skip) self._ale.loadROM(rom_name.encode('ascii')) self._ale_sampler.loadROM(rom_name.encode('ascii')) self._action_space = self._ale.getLegalActionSet() self._current_score = 0 def evaluate_rollout(self, solution, discount_factor=0): #temp_state = self._ale.cloneState() temp_ale = self._ale.encodeState(self._ale.cloneState()) temp_state = self._ale_sampler.decodeState(temp_ale) self._ale_sampler.restoreState(temp_state) prev_lives = self._ale.lives() total_rollout_reward = 0 discount = 1 for action in solution: rollout_reward = self._ale_sampler.act(action) if discount_factor is not None: rollout_reward *= discount discount *= discount_factor total_rollout_reward += rollout_reward if self._ale_sampler.game_over(): break score_delta = total_rollout_reward + (self._ale_sampler.lives() - prev_lives) #self._ale.restoreState(temp_state) return score_delta def perform_action(self, action): reward = self._ale.act(action) self._current_score += reward def get_current_score(self): return self._current_score def get_current_lives(self): return self._ale.lives() def get_random_action(self): return np.random.choice(self._action_space) def is_game_over(self): return self._ale.game_over()