class AsyncAtariEnvironment(object): def __init__(self, hyperparams, rom_fnm): self.hyperparams = hyperparams self.show_screen = hyperparams['show_screen'] self.state_len = hyperparams['state_len'] # self.screen_resize = hyperparams['screen_resize'] self.rom_fnm = rom_fnm self.init_ale(display=self.show_screen) self.actions = self.ale.getMinimalActionSet() print('Num possible actions: %d' % len(self.actions)) self.state_shape = (84, 84, self.state_len) def init_ale(self, display=False): self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123) # self.ale.setInt(b'delay_msec', 0) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setInt(b'frame_skip', self.hyperparams['frame_skip']) if display: self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode(self.rom_fnm)) # def set_screen_shape(self): # self.start_episode() # # self.perform_action(0) # self.state_shape = self.get_state().shape # print(self.state_shape) # self.prev_screen_rgb = self.ale.getScreenRGB() # screen = self.ale.getScreenRGB() # screen = self.preprocess_screen(screen) # self.screen_shape = screen.shape # self.state_shape = (self.state_len,) + \ # self.screen_shape def preprocess_screen(self, screen_rgb): # observation = cv2.cvtColor(cv2.resize( # screen_rgb, (84, 110)), cv2.COLOR_BGR2GRAY) # observation = observation[26:110, :] # ret, observation = cv2.threshold( # observation, 1, 255, cv2.THRESH_BINARY) # return np.reshape(observation, (84, 84)) screen = np.dot(screen_rgb, np.array([.299, .587, .114])).astype(np.uint8) screen = ndimage.zoom(screen, (0.4, 0.525)) screen.resize((84, 84)) # screen = screen / 255.0 return np.array(screen) def get_state_shape(self): return self.state_shape def start_episode(self): self.ale.reset_game() self.episode_reward = 0 self.states = [] for _ in range(self.state_len): self.states.append(self.preprocess_screen(self.ale.getScreenRGB())) def get_episode_reward(self): return self.episode_reward def get_state(self): curr_state = np.stack(self.states, axis=2) return curr_state # curr_state = np.zeros(self.state_shape) # for i in range(self.state_len): # curr_state[i] = self.states[i] # return curr_state def perform_action(self, action_dist): # print(action_ind) action = self.actions[np.argmax(action_dist)] # self.curr_reward = 0 # for frame in range(self.hyperparams['frame_skip']): # self.prev_screen = self.ale.getScreenRGB() # self.curr_reward += self.ale.act(action) self.curr_reward = self.ale.act(action) self.episode_reward += self.curr_reward # if not self.curr_reward == 0: # print(bcolors.WARNING + # 'Got real reward!' + # bcolors.ENDC) if self.curr_reward > 0: self.curr_reward = 1 # print(bcolors.WARNING + # 'Got real reward!' + # bcolors.ENDC) elif self.curr_reward < 0: self.curr_reward = -1 screen = self.ale.getScreenRGB() # screen = np.maximum(screen, self.prev_screen) screen = self.preprocess_screen(screen) self.states = self.states[:self.state_len - 1] self.states.insert(0, screen) def start_eval_mode(self): # self.init_ale(display=self.show_screen) pass def end_eval_mode(self): # self.init_ale(display=False) pass def get_reward(self): return self.curr_reward def episode_is_over(self): return self.ale.game_over() def get_actions(self): return list(range(len(self.actions)))
class AtariTask(object): def __init__(self, hyperparams, rom_fnm): self.hyperparams = hyperparams self.show_screen = hyperparams['show_screen'] self.state_len = hyperparams['state_len'] # self.screen_resize = hyperparams['screen_resize'] self.rom_fnm = rom_fnm self.init_ale(display=self.show_screen) self.actions = self.ale.getMinimalActionSet() print('Num possible actions: %d' % len(self.actions)) self.state_shape = (84, 84, self.state_len) def init_ale(self, display=False): self.ale = ALEInterface() self.ale.setInt(b'random_seed', random.randrange(999)) # self.ale.setInt(b'delay_msec', 0) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setInt(b'frame_skip', self.hyperparams['frame_skip']) if display: self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode(self.rom_fnm)) # def set_screen_shape(self): # self.start_episode() # # self.perform_action(0) # self.state_shape = self.get_state().shape # print(self.state_shape) # self.prev_screen_rgb = self.ale.getScreenRGB() # screen = self.ale.getScreenRGB() # screen = self.preprocess_screen(screen) # self.screen_shape = screen.shape # self.state_shape = (self.state_len,) + \ # self.screen_shape def preprocess_screen(self, screen_rgb): screen = cv2.cvtColor(cv2.resize( screen_rgb, (84, 110)), cv2.COLOR_BGR2GRAY) # observation = observation[26:110, :] # ret, observation = cv2.threshold( # observation, 1, 255, cv2.THRESH_BINARY) # return np.reshape(observation, (84, 84)) # screen = np.dot( # screen_rgb, np.array([.299, .587, .114])).astype(np.uint8) screen = ndimage.zoom(screen_rgb, (0.4, 0.525)) screen.resize((84, 84)) # screen = screen / 255.0 return np.array(screen) def get_state_shape(self): return self.state_shape def get_actions(self): return list(range(len(self.actions))) def start_episode(self): self.ale.reset_game() self.episode_reward = 0 self.states = [] for _ in range(self.state_len): self.states.append(self.preprocess_screen( self.ale.getScreenRGB())) def get_episode_reward(self): return self.episode_reward def get_state(self): curr_state = np.stack(self.states, axis=2) return curr_state # curr_state = np.zeros(self.state_shape) # for i in range(self.state_len): # curr_state[i] = self.states[i] # return curr_state def perform_action(self, action_dist): # print(action_ind) action = self.actions[np.argmax(action_dist)] # self.curr_reward = 0 # for frame in range(self.hyperparams['frame_skip']): # self.prev_screen = self.ale.getScreenRGB() # self.curr_reward += self.ale.act(action) self.curr_reward = self.ale.act(action) self.episode_reward += self.curr_reward # if not self.curr_reward == 0: # print(bcolors.WARNING + # 'Got real reward!' + # bcolors.ENDC) if self.curr_reward > 0: self.curr_reward = 1 # print(bcolors.WARNING + # 'Got real reward!' + # bcolors.ENDC) elif self.curr_reward < 0: self.curr_reward = -1 screen = self.ale.getScreenRGB() # screen = np.maximum(screen, self.prev_screen) screen = self.preprocess_screen(screen) self.states = self.states[:self.state_len - 1] self.states.insert(0, screen) def start_eval_mode(self): # self.init_ale(display=self.show_screen) pass def end_eval_mode(self): # self.init_ale(display=False) pass def get_reward(self): return self.curr_reward def episode_is_over(self): return self.ale.game_over()
# pygame.display.flip() episode = 0 total_reward = 0.0 while (episode < 10): a = legal_actions[np.random.randint(legal_actions.size)] reward = ale.act(a) total_reward += reward # numpy_surface = np.frombuffer(screen.get_buffer(), dtype=np.int32) # rgb = ale.getScreenRGB() # print(rgb.shape) # print(rgb.mean()) # # print(tuple(map(lambda s: s / 3, rgb.shape))) # # print(np.frombuffer(screen.get_buffer()).shape) # print(pygame.PixelArray(screen).shape) # bv = screen.get_buffer() # bv.write(rgb.tostring(), 0) # pygame.display.flip() if (ale.game_over()): episode_frame_number = ale.getEpisodeFrameNumber() frame_number = ale.getFrameNumber() print("Frame Number: " + str(frame_number) + " Episode Frame Number: " + str(episode_frame_number)) print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() total_reward = 0.0 episode = episode + 1
# pygame.display.flip() episode = 0 total_reward = 0.0 while(episode < 10): a = legal_actions[np.random.randint(legal_actions.size)] reward = ale.act(a) total_reward += reward # numpy_surface = np.frombuffer(screen.get_buffer(), dtype=np.int32) # rgb = ale.getScreenRGB() # print(rgb.shape) # print(rgb.mean()) # # print(tuple(map(lambda s: s / 3, rgb.shape))) # # print(np.frombuffer(screen.get_buffer()).shape) # print(pygame.PixelArray(screen).shape) # bv = screen.get_buffer() # bv.write(rgb.tostring(), 0) # pygame.display.flip() if(ale.game_over()): episode_frame_number = ale.getEpisodeFrameNumber() frame_number = ale.getFrameNumber() print("Frame Number: " + str(frame_number) + " Episode Frame Number: " + str(episode_frame_number)) print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() total_reward = 0.0 episode = episode + 1
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setBool(b'sound', False) self.ale.setBool(b'display_screen', False) self.ale.setInt(b'random_seed', 123456) random.seed(123) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.loadROM(b'./breakout.bin') self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) #self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number