class Atari(AtariEnv): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, game, obs_type) assert obs_type in ('ram', 'image') self.game_path = atari_py.get_game_path(game) if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist' % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self._seed() (screen_width, screen_height) = self.ale.getScreenDims() self._buffer = np.empty((screen_height, screen_width, 3), dtype=np.uint8) self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128) + 255) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3)) else: raise error.Error('Unrecognized observation type: {}'.format( self._obs_type)) def _get_image(self): return self.ale.getScreenRGB(self._buffer).copy()
def main(): arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0') pygame.init() ale = ALEInterface() ale.setInt(b'random_seed', 123) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(arguments['<rom_file>'])) legal_actions = ale.getLegalActionSet() width, height = ale.getScreenDims() print(width, height) frame = ale.getScreenRGB() frame = np.array(frame, dtype=float) rewards, num_episodes = [], int(arguments['--iters'] or 5) for episode in range(num_episodes): total_reward = 0 while not ale.game_over(): total_reward += ale.act(random.choice(legal_actions)) print('Episode %d reward %d.' % (episode, total_reward)) rewards.append(total_reward) ale.reset_game() average = sum(rewards) / len(rewards) print('Average for %d episodes: %d' % (num_episodes, average))
class env_atari: def __init__(self, params): self.params = params self.ale = ALEInterface() self.ale.setInt('random_seed', np.random.randint(0, 500)) self.ale.setFloat('repeat_action_probability', params['repeat_prob']) self.ale.setInt(b'frame_skip', params['frameskip']) self.ale.setBool('color_averaging', True) self.ale.loadROM('roms/' + params['rom'] + '.bin') self.actions = self.ale.getMinimalActionSet() self.action_space = c_action_space(len(self.actions)) self.screen_width, self.screen_height = self.ale.getScreenDims() def reset(self): self.ale.reset_game() seed = np.random.randint(0, 7) for i in range(seed): self.ale.act(0) return self.get_image() def step(self, action): reward = self.ale.act(self.actions[action]) next_s = self.get_image() terminate = self.ale.game_over() return next_s, reward, float(terminate), 0 def get_image(self): temp = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(temp) #self.ale.getScreenGrayscale(temp) return temp.reshape((self.screen_height, self.screen_width, 3))
class FastAtariEnv(AtariEnv): def __init__(self, game='Breakout', obs_type='image', frameskip=(2, 5), repeat_action_probability=0.): self.game_path = atari_py.get_game_path(game) self._obs_type = obs_type self.frameskip = frameskip self.ale = ALEInterface() self.viewer = None assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self._seed() (screen_width, screen_height) = self.ale.getScreenDims() self._buffer = np.empty((screen_height, screen_width, 3), dtype=np.uint8) def _get_image(self): # Don't reorder from rgb to bgr as we're converting to greyscale anyway self.ale.getScreenRGB(self._buffer) # says rgb but actually bgr return self._buffer
class Atari: def __init__(self, rom_dir): self.ale = ALEInterface() # Set settings self.ale.setInt("random_seed", 123) self.frame_skip = 4 self.ale.setInt("frame_skip", self.frame_skip) self.ale.setBool("display_screen", False) self.ale.setBool("sound", True) self.record_sound_for_user = True self.ale.setBool("record_sound_for_user", self.record_sound_for_user) # NOTE recording audio to file still works. But if both file recording and # record_sound_for_user are enabled, then only the latter is done # self.ale.setString("record_sound_filename", "") # Get settings self.ale.loadROM(rom_dir) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getLegalActionSet() # Action count across all episodes self.action_count = 0 self.start_time = time.time() self.reset() def reset(self): self.ale.reset_game() def take_action(self): action = self.legal_actions[np.random.randint(self.legal_actions.size)] self.ale.act(action) self.action_count += 1 def print_fps(self, delta_t=500): if self.action_count % delta_t == 0: print '[atari.py] Frames/second: %f' % ( self.action_count / (time.time() - self.start_time)) print '[atari.py] Overall game frame count:', atari.action_count * atari.frame_skip print '---------' def get_image_and_audio(self): np_data_image = np.zeros(self.screen_width * self.screen_height * 3, dtype=np.uint8) if self.record_sound_for_user: np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8) self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio) # Also supports independent audio queries if user desires: # self.ale.getAudio(np_data_audio) else: np_data_audio = 0 self.ale.getScreenRGB(np_data_image) return np.reshape(np_data_image, (self.screen_height, self.screen_width, 3)), np.asarray(np_data_audio)
class emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_mum_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print self.legal_actions self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: "+ str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): # numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) # self.ale.getScreenRGB(numpy_surface) # image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) image = self.ale.getScreenRGB() image = np.reshape(image, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image(), 0, False def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over() def train(self): for episode in range(10): total_reward = 0 frame_number = 0 while not self.ale.game_over(): a = self.legal_actions[random.randrange(len(self.legal_actions))] # Apply an action and get the resulting reward reward = self.ale.act(a); total_reward += reward screen = self.ale.getScreenRGB() screen = np.array(screen).reshape([self.screen_height, self.screen_width, -1]) frame_number = self.ale.getEpisodeFrameNumber() cv2.imshow("screen", screen/255.0) cv2.waitKey(0) self.ale.saveScreenPNG("test_"+str(frame_number)+".png") print('Episode %d ended with score: %d' % (episode, total_reward)) print('Frame number is : ', frame_number) self.ale.reset_game()
class AtariEnvironment(Environment): """ Atari Environment Object """ def __init__(self, rom_path, action_repeat=4, death_end=True, width_resize=84, height_resize=84, resize_mod='scale'): super(Environment, self).__init__() self.action_repeat = action_repeat self.death_end = death_end self.width_resize = width_resize self.height_resize = height_resize self.resize_mod = resize_mod self.display = False from ale_python_interface import ALEInterface self.ale = ALEInterface() self.ale.loadROM(rom_path) self.ale.setInt('random_seed', np.random.randint(1000)) self.ale.setBool('display_screen', self.display) self.action_set = self.ale.getMinimalActionSet() self.num_actions = len(self.action_set) self.start_lives = self.ale.lives() width, height = self.ale.getScreenDims() self.currentScreen = np.empty((height, width), dtype=np.uint8) self.reset() def reset(self): self.ale.reset_game() self.ale.getScreenGrayscale(self.currentScreen) self.terminal = False def step(self, action, repeat=None): repeat = self.action_repeat if repeat is None else repeat reward = 0 for _ in range(repeat): reward += self.ale.act(self.action_set[action]) self.ale.getScreenGrayscale(self.currentScreen) self.terminal = self.death_end and self.ale.lives( ) < self.start_lives or self.ale.game_over() return reward def get_frame(self): if self.resize_mod == 'scale': return imresize(self.currentScreen, (self.width_resize, self.height_resize), interp='bilinear') elif self.resize_mod == 'crop': height, width = self.currentScreen.shape res = (height - width) / 2 crop = self.currentScreen[res:(res + width), :] return imresize(crop, (self.width_resize, self.height_resize), interp='bilinear')
class Env(): def __init__(self, rom_name): self.__initALE() self.__loadROM(rom_name) self.screen_history = [] self.screens = [] def __initALE(self): self.ale = ALEInterface() self.ale.setInt(b'random_seed', randrange(1000)) self.ale.setInt(b'fragsize', 64) self.ale.setInt(b'frame_skip', 1) # qq set this back to 0.25? self.ale.setFloat(b'repeat_action_probability', 0) self.ale.setLoggerMode('error') def __loadROM(self, rom_name): self.ale.loadROM(rom_name.encode('utf-8')) self.actions = self.ale.getMinimalActionSet() (width, height) = self.ale.getScreenDims() self.screen_data1 = np.empty((height, width, 3), dtype=np.uint8) self.screen_data2 = np.empty((height, width, 3), dtype=np.uint8) def get_legal_action_count(self): return len(self.actions) def act(self, action_index): action = self.actions[action_index] reward = 0 # perform the action 4 times reward += _clip(self.ale.act(action), -1, 1) reward += _clip(self.ale.act(action), -1, 1) reward += _clip(self.ale.act(action), -1, 1) self.ale.getScreenRGB(self.screen_data1) reward += _clip(self.ale.act(action), -1, 1) self.ale.getScreenRGB(self.screen_data2) # return the pixel-wise max of the last two frames (some games only # render every other frame) screen_data_combined = np.maximum(self.screen_data1, self.screen_data2) terminal = self.ale.game_over() self.screens.append(preprocess_screen(screen_data_combined)) phi = get_phi(self.screens) return (terminal, reward, phi, self.screen_data2) def get_s(self): return get_phi(self.screens) def reset(self): self.ale.reset_game() self.screens = []
class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState( self.ale.decodeState(checkpoints[random.randint( 0, 99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class Environment: def __init__(self, show_screen, history_length): self.ale = ALEInterface() self.ale.setInt('frame_skip', 4) self.history = None self.history_length = history_length if show_screen: self.display_screen() self.load_game() (screen_width, screen_height) = self.ale.getScreenDims() self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8) # 210x160 screen data self.dims = (84, 84) # input size for neural network self.actions = [3, 0, 1, 4] # noop, left, right, fire, def display_screen(self): self.ale.setBool("display_screen", True) def turn_on_sound(self): self.ale.setBool("sound", True) def restart(self): """reset game""" self.ale.reset_game() def act(self, action): """:returns reward of an action""" return self.ale.act(self.actions[action]) def __get_screen(self): """:returns Grayscale thresholded resized screen image """ self.ale.getScreenGrayscale(self.screen_data) resized = cv2.resize(self.screen_data, self.dims) return resized def get_state(self): binary_screen = self.__get_screen() if self.history is None: self.history = deque(maxlen=self.history_length) for _ in range(self.history_length - 1): self.history.append(binary_screen) self.history.append(binary_screen) result = np.stack(self.history, axis=0) return result def isTerminal(self): """checks if game is over""" return self.ale.game_over() def load_game(self): """load game from file""" self.ale.loadROM("Breakout.bin")
class emulator(object): def __init__(self, rom_name, vis, frameskip=1, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", frameskip) romfile = str(ROM_PATH) + str(rom_name) if not os.path.exists(romfile): print('No ROM file found at "' + romfile + '".\nAdjust ROM_PATH or double-check the filt exists.') self.ale.loadROM(romfile) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow( self.windowname, flags=cv2.WINDOW_AUTOSIZE) # permit manual resizing def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) if sys.platform == 'darwin': # if we don't do this, can hang on OS X cv2.waitKey(2) return nextstate, reward, self.ale.game_over()
class Atari: # Constructor def __init__(self, rom_name): # 1º Passo: carregamos o jogo e definimos seus parâmetros self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( b"max_num_frames_per_episode") self.ale.setInt(b"random_seed", 123) self.ale.setInt(b"frame_skip", 4) self.ale.loadROM(('game/' + rom_name).encode()) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # 2º Passo: criamos a janela para exibição self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) # Essa função será utilizada para receber uma imagem do emulador, já em um formato esperado # por nosso algoritmo de treinamento. def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image # Simplesmente inicializa o jogo def newGame(self): self.ale.reset_game() return self.get_image() # Essa função será responsável por retornar as informações da observação do estado após certa ação ser tomada. def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname, nextstate) if self.ale.game_over(): self.newGame() return nextstate, reward, self.ale.game_over()
class emulator: def __init__(self, rom_name, vis,windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname,nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class Emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) #added by ben may 2016 print image print '&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& printing' return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over()
class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() # When it starts self.ale.setInt("random_seed", 123) # Skipping 4 frames self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() print('Actions : %s' % self.legal_actions) self.action_map = dict() self.windowname = windowname # Raw atari frames, 210 * 160 pixel images self.screen_width, self.screen_height = self.ale.getScreenDims() print("widht/height: " + str(self.screen_width) + "/" + str(self.screen_height)) # Visualize self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): # Need to specify data type as uint8 numpy_surface = np.zeros([self.screen_width * self.screen_height * 3], dtype=np.uint8) # get RGB values self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, [self.screen_height, self.screen_width, 3]) return image def new_game(self): self.ale.reset_game() # Reset game and getting reset image value return self.get_image() def next(self, action_index): # Get R(s,a) reward = self.ale.act(action_index) # Get image pixel value after taking an action next_state = self.get_image() if self.vis: cv2.imshow(self.windowname, next_state) # self.ale.game_over() returns True when game is over return next_state, reward, self.ale.game_over()
class Atari: def __init__(self, rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('game/' + rom_name) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print len(self.legal_actions) self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname, nextstate) if self.ale.game_over(): self.newGame() # print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class Atari: def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM(rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print len(self.legal_actions) self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) def preprocess(self, image): image = cv2.cvtColor(cv2.resize(image, (84, 110)), cv2.COLOR_BGR2GRAY) image = image[26:110,:] ret, image = cv2.threshold(image,1,255,cv2.THRESH_BINARY) return np.reshape(image,(84,84, 1)) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return self.preprocess(image) def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname,nextstate) if self.ale.game_over(): self.newGame() #print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class emulator: def __init__(self, rom_name, vis): if vis: import cv2 self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow('preview',nextstate) return nextstate, reward, self.ale.game_over()
class Atari: def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('./' +rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print len(self.legal_actions) self.windowname = rom_name #cv2.startWindowThread() #cv2.namedWindow(rom_name) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() #cv2.imshow(self.windowname,nextstate) if self.ale.game_over(): self.newGame() #print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class Environment: """docstring for Environment""" BUFFER_LEN = 2 EPISODE_FRAMES = 18000 EPOCH_COUNT = 200 EPOCH_STEPS = 250000 EVAL_EPS = 0.001 FRAMES_SKIP = 4 FRAME_HEIGHT = 84 FRAME_WIDTH = 84 MAX_NO_OP = 30 MAX_REWARD = 1 def __init__(self, rom_name, rng, display_screen = False): self.api = ALEInterface() self.api.setInt('random_seed', rng.randint(333)) self.api.setBool('display_screen', display_screen) self.api.setFloat('repeat_action_probability', 0.0) self.rom_name = rom_name self.display_screen = display_screen self.rng = rng self.repeat = Environment.FRAMES_SKIP self.buffer_len = Environment.BUFFER_LEN self.height = Environment.FRAME_HEIGHT self.width = Environment.FRAME_WIDTH self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP self.merge_id = 0 self.max_reward = Environment.MAX_REWARD self.eval_eps = Environment.EVAL_EPS self.log_dir = '' self.network_dir = '' self.api.loadROM('../rom/' + self.rom_name) self.minimal_actions = self.api.getMinimalActionSet() original_width, original_height = self.api.getScreenDims() self.merge_frame = np.zeros((self.buffer_len , original_height , original_width) , dtype = np.uint8) def get_action_count(self): return len(self.minimal_actions) def train(self, agent, store_freq, folder = None, start_epoch = 0): self._open_log_files(agent, folder) obs = np.zeros((self.height, self.width), dtype = np.uint8) epoch_count = Environment.EPOCH_COUNT for epoch in xrange(start_epoch, epoch_count): self.need_reset = True steps_left = Environment.EPOCH_STEPS print "\n" + "=" * 50 print "Epoch #%d" % (epoch + 1) episode = 0 train_start = time.time() while steps_left > 0: num_step, _ = self._run_episode(agent, steps_left, obs) steps_left -= num_step episode += 1 if steps_left == 0 or episode % 10 == 0: print "Finished episode #%d, steps_left = %d" \ % (episode, steps_left) train_end = time.time() valid_values = agent.get_validate_values() eval_values = self.evaluate(agent) test_end = time.time() train_time = train_end - train_start test_time = test_end - train_end step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time) print "\tFinished epoch #%d, episode trained = %d\n" \ "\tValidate values = %.3f, evaluate reward = %.3f\n"\ "\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \ % (epoch + 1, episode, valid_values, eval_values\ , train_time, test_time, step_per_sec) self._update_log_files(agent, epoch + 1, episode , valid_values, eval_values , train_time, test_time , step_per_sec, store_freq) gc.collect() def evaluate(self, agent, episodes = 30, obs = None): print "\n***Start evaluating" if obs is None: obs = np.zeros((self.height, self.width), dtype = np.uint8) sum_reward = 0.0 sum_step = 0.0 for episode in xrange(episodes): self.need_reset = True step, reward = self._run_episode(agent, self.episode_steps, obs , self.eval_eps, evaluating = True) sum_reward += reward sum_step += step print "Finished episode %d, reward = %d, step = %d" \ % (episode + 1, reward, step) self.need_reset = True print "Average reward per episode = %.4f" % (sum_reward / episodes) print "Average step per episode = %.4f" % (sum_step / episodes) return sum_reward / episodes def _prepare_game(self): if self.need_reset or self.api.game_over(): self.api.reset_game() self.need_reset = False if Environment.MAX_NO_OP > 0: num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \ + self.buffer_len for _ in xrange(num_no_op): self.api.act(0) for _ in xrange(self.buffer_len): self._update_buffer() def _run_episode(self, agent, steps_left, obs , eps = 0.0, evaluating = False): self._prepare_game() start_lives = self.api.lives() step_count = 0 sum_reward = 0 is_terminal = False while step_count < steps_left and not is_terminal: self._get_screen(obs) action_id, _ = agent.get_action(obs, eps, evaluating) reward = self._repeat_action(self.minimal_actions[action_id]) reward_clip = reward if self.max_reward > 0: reward_clip = np.clip(reward, -self.max_reward, self.max_reward) life_lost = not evaluating and self.api.lives() < start_lives is_terminal = self.api.game_over() or life_lost \ or step_count + 1 >= steps_left agent.add_experience(obs, is_terminal, action_id, reward_clip , evaluating) sum_reward += reward step_count += 1 return step_count, sum_reward def _update_buffer(self): self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...]) self.merge_id = (self.merge_id + 1) % self.buffer_len def _repeat_action(self, action): reward = 0 for i in xrange(self.repeat): reward += self.api.act(action) if i + self.buffer_len >= self.repeat: self._update_buffer() return reward def _get_screen(self, resized_frame): self._resize_frame(self.merge_frame.max(axis = 0), resized_frame) def _resize_frame(self, src_frame, dst_frame): cv2.resize(src = src_frame, dst = dst_frame, dsize = (self.width, self.height), interpolation = cv2.INTER_LINEAR) def _open_log_files(self, agent, folder): time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0] if folder is not None: self.log_dir = folder self.network_dir = self.log_dir + '/network' else: self.log_dir = '../run_results/' + base_rom_name + time_str self.network_dir = self.log_dir + '/network' info_name = get_next_name(self.log_dir, 'info', 'txt') git_name = get_next_name(self.log_dir, 'git-diff', '') try: os.stat(self.log_dir) except OSError: os.makedirs(self.log_dir) try: os.stat(self.network_dir) except OSError: os.makedirs(self.network_dir) with open(os.path.join(self.log_dir, info_name), 'w') as f: f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse' , 'HEAD'])) f.write('Run command: ') f.write(' '.join(pipes.quote(x) for x in sys.argv)) f.write('\n\n') f.write(agent.get_info()) write_info(f, Environment) write_info(f, agent.__class__) write_info(f, agent.network.__class__) # From https://github.com/spragunr/deep_q_rl/pull/49/files with open(os.path.join(self.log_dir, git_name), 'w') as f: f.write(subprocess.check_output(['git', 'diff', 'HEAD'])) if folder is not None: return with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f: f.write("epoch,episode_train,validate_values,evaluate_reward"\ ",train_time,test_time,steps_per_second\n") mem = psutil.virtual_memory() with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f: f.write("epoch,available,free,buffers,cached"\ ",available_readable,used_percent\n") f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (0, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) def _update_log_files(self, agent, epoch, episode, valid_values , eval_values, train_time, test_time, step_per_sec , store_freq): print "Updating log files" with open(self.log_dir + '/results.csv', 'a') as f: f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \ (epoch, episode, valid_values, eval_values , train_time, test_time, step_per_sec)) mem = psutil.virtual_memory() with open(self.log_dir + '/memory.csv', 'a') as f: f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (epoch, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz') if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \ (store_freq > 0 and (epoch % store_freq == 0)): agent.dump_exp(self.network_dir + '/exp.npz') def _setup_record(self, network_file): file_name, _ = os.path.splitext(os.path.basename(network_file)) time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) img_dir = os.path.dirname(network_file) + '/images_' \ + file_name + time_str rom_name, _ = os.path.splitext(self.rom_name) out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \ + file_name + time_str + '.mov' print out_name try: os.stat(img_dir) except OSError: os.makedirs(img_dir) self.api.setString('record_screen_dir', img_dir) self.api.loadROM('../rom/' + self.rom_name) return img_dir, out_name def record_run(self, agent, network_file, episode_id = 1): if episode_id > 1: self.evaluate(agent, episode_id - 1) system_state = self.api.cloneSystemState() img_dir, out_name = self._setup_record(network_file) if episode_id > 1: self.api.restoreSystemState(system_state) self.evaluate(agent, 1) script = \ """ { ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } || { avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } """ % (img_dir, out_name, img_dir, out_name) os.system(script)
# ale.setBool('sound', True) # ale.setBool('display_screen', True) # Load the ROM file ale.loadROM('Breakout.bin') # Get the list of legal actions # legal_actions = ale.getLegalActionSet() legal_actions = ale.getMinimalActionSet() print legal_actions # (screen_width,screen_height) = ale.getScreenDims() # screen_data = np.zeros(screen_width*screen_height,dtype=np.uint32) # ale.getScreenRGB(screen_data) (screen_width, screen_height) = ale.getScreenDims() screen_data = np.zeros(screen_width * screen_height, dtype=np.uint8) print type(ale.getScreen(screen_data)) # Play 10 episodes for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] # Apply an action and get the resulting reward reward = ale.act(a) print reward total_reward += reward print 'Episode', episode, 'ended with score:', total_reward ale.reset_game()
class ArcadeLearningEnvironment(Environment): """ [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment) adapter (specification key: `ale`, `arcade_learning_environment`). May require: ```bash sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git cd Arcade-Learning-Environment mkdir build && cd build cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON .. make -j 4 cd .. pip3 install . ``` Args: level (string): ALE rom file (<span style="color:#C00000"><b>required</b></span>). loss_of_life_termination: Signals a terminal state on loss of life (<span style="color:#00C000"><b>default</b></span>: false). loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty) (<span style="color:#00C000"><b>default</b></span>: 0.0). repeat_action_probability (float): Repeats last action with given probability (<span style="color:#00C000"><b>default</b></span>: 0.0). visualize (bool): Whether to visualize interaction (<span style="color:#00C000"><b>default</b></span>: false). frame_skip (int > 0): Number of times to repeat an action without observing (<span style="color:#00C000"><b>default</b></span>: 1). seed (int): Random seed (<span style="color:#00C000"><b>default</b></span>: none). """ def __init__( self, level, life_loss_terminal=False, life_loss_punishment=0.0, repeat_action_probability=0.0, visualize=False, frame_skip=1, seed=None ): from ale_python_interface import ALEInterface self.environment = ALEInterface() self.rom_file = level self.life_loss_terminal = life_loss_terminal self.life_loss_punishment = life_loss_punishment self.environment.setFloat(b'repeat_action_probability', repeat_action_probability) self.environment.setBool(b'display_screen', visualize) self.environment.setInt(b'frame_skip', frame_skip) if seed is not None: self.environment.setInt(b'random_seed', seed) # All set commands must be done before loading the ROM. self.environment.loadROM(rom_file=self.rom_file.encode()) self.available_actions = tuple(self.environment.getLegalActionSet()) # Full list of actions: # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire, # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left # Fire def __str__(self): return super().__str__() + '({})'.format(self.rom_file) def states(self): width, height = self.environment.getScreenDims() return dict(type='float', shape=(height, width, 3)) def actions(self): return dict(type='int', num_values=len(self.available_actions)) def close(self): self.environment.__del__() self.environment = None def get_states(self): screen = np.copy(self.environment.getScreenRGB(screen_data=self.screen)) screen = screen.astype(dtype=np.float32) / 255.0 return screen def reset(self): self.environment.reset_game() width, height = self.environment.getScreenDims() self.screen = np.empty((height, width, 3), dtype=np.uint8) self.lives = self.environment.lives() return self.get_states() def execute(self, actions): reward = self.environment.act(action=self.available_actions[actions]) terminal = self.environment.game_over() states = self.get_states() next_lives = self.environment.lives() if next_lives < self.lives: if self.life_loss_terminal: terminal = True elif self.life_loss_punishment > 0.0: reward -= self.life_loss_punishment self.lives = next_lives return states, terminal, reward
class GameEnvironment: def __init__(self, settings): self.ale = ALEInterface() self.ale.setBool('display_screen', settings['DISPLAY_SCREEN']) self.ale.setBool('sound', settings['SOUND']) self.ale.setBool('color_averaging', settings['COLOR_AVERAGING']) self.ale.setInt('random_seed', settings['RANDOM_SEED']) self.ale.setInt('frame_skip', settings['FRAME_SKIP']) self.ale.setFloat('repeat_action_probability', settings['REPEAT_ACTION_PROB']) roms_dir = settings['ROMS_DIR'] rom_name = settings['ROM_NAME'] ROM = None if(rom_name.endswith('.bin')): self.name = rom_name[:-4] ROM = rom_name else: self.name = rom_name ROM = rom_name + '.bin' self.ale.loadROM(os.path.join(roms_dir, ROM)) self.random_starts = settings['RANDOM_STARTS'] self.rng = settings['RNG'] if(settings['MINIMAL_ACTION_SET']): self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.n_actions = len(self.actions) self.width, self.height = self.ale.getScreenDims() self.observation = np.zeros((self.height, self.width), dtype='uint8') self.reward = None self.game_over = None self.terminal = None self.total_lives = None self.init() def init(self): self.restartGame() self.reward = 0 self.game_over = self.gameOver() self.terminal = self.game_over self.total_lives = self.lives() self.step(0) def getState(self): return self.observation, self.reward, self.terminal, self.game_over def step(self, action, training=False): self.reward = self.act(action) self.paint() lives = self.lives() self.game_over = self.gameOver() self.terminal = self.game_over if(training and (lives < self.total_lives)): self.terminal = True self.total_lives = lives return self.getState() def newGame(self): self.init() for i in xrange(self.rng.randint(1, self.random_starts)): self.act(0) terminal = self.gameOver() if(terminal): print "Warning terminal in random init" return self.step(0) def newTestGame(self): self.init() return self.getState() def paint(self): self.ale.getScreenGrayscale(self.observation) def getScreenRGB(self): return self.ale.getScreenRGB() def act(self, action): assert ((action >= 0) and (action < self.n_actions)) return self.ale.act(self.actions[action]) def lives(self): return self.ale.lives() def restartGame(self): self.ale.reset_game() def gameOver(self): return self.ale.game_over()
class MyEnv(Environment): VALIDATION_MODE = 0 def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, ale_options=[{"key": "random_seed", "value": 0}, {"key": "color_averaging", "value": True}, {"key": "repeat_action_probability", "value": 0.}]): self._mode = -1 self._modeScore = 0.0 self._modeEpisodeCount = 0 self._frameSkip = frame_skip if frame_skip >= 1 else 1 self._randomState = rng self._ale = ALEInterface() for option in ale_options: t = type(option["value"]) if t is int: self._ale.setInt(option["key"], option["value"]) elif t is float: self._ale.setFloat(option["key"], option["value"]) elif t is bool: self._ale.setBool(option["key"], option["value"]) else: raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t)) self._ale.loadROM(rom) w, h = self._ale.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reducedScreen = np.empty((84, 84), dtype=np.uint8) self._actions = self._ale.getMinimalActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._modeScore = 0.0 self._modeEpisodeCount = 0 else: self._modeEpisodeCount += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 self._ale.reset_game() for _ in range(self._randomState.randint(15)): self._ale.act(0) self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) return [4 * [84 * [84 * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frameSkip): reward += self._ale.act(action) if self.inTerminalState(): break self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) self._modeScore += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._modeEpisodeCount += 1 print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount)) def inputDimensions(self): return [(4, 84, 84)] def observationType(self, subject): return np.uint8 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reducedScreen)] def inTerminalState(self): return self._ale.game_over()
class UpdatedAtariEnv(AtariEnv): def __init__(self, rom_path, obs_type, frameskip=(2, 5), repeat_action_probability=0., mode=0, difficulty=0): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, rom_path, obs_type) assert obs_type in ('ram', 'image') self.rom_path = rom_path if not os.path.exists(self.rom_path): raise IOError('You asked for ROM %s but path %s does not exist' % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip # Load new ALE interface, instead of atari-py self.ale = ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self.seed() # Set mode and difficulty self.ale.setMode(mode) self.ale.setDifficulty(difficulty) self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=0, high=255, shape=(128, )) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3)) else: raise error.Error('Unrecognized observation type: {}'.format( self._obs_type)) def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 # Empirically, we need to seed before loading the ROM. self.ale.setInt(b'random_seed', seed2) # Load game from ROM instead of game path self.ale.loadROM(self.rom_path) return [seed1, seed2] def _get_image(self): return self.ale.getScreenRGB()
class AtariEmulator(BaseEnvironment): def __init__(self, rom_addr, random_start=False, random_seed=6, visualize=True, single_life=False): self.ale = ALEInterface() self.ale.setInt(b"random_seed", 2 * random_seed) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) full_rom_path = rom_addr self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.writer = imageio.get_writer('breakout0.gif', fps=30) self.random_start = random_start self.single_life_episodes = single_life self.call_on_new_frame = visualize # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.observation_pool = ObservationPool( np.zeros((84, 84, 4), dtype=np.uint8)) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = FramePool( np.empty((2, self.screen_height, self.screen_width), dtype=np.uint8), self.__process_frame_pool) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return np.squeeze(self.gray_screen) def on_new_frame(self, frame): pass def __new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if self.random_start: wait = random.randint(0, MAX_START_WAIT) for _ in range(wait): self.ale.act(self.legal_actions[0]) def __process_frame_pool(self, frame_pool): """ Preprocess frame pool """ img = np.amax(frame_pool, axis=0) img = imresize(img, (84, 84), interp='nearest') img = img.astype(np.uint8) return img def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_pool.new_frame(self.__get_screen_image()) return reward def get_initial_state(self): """ Get the initial state """ self.__new_game() for step in range(4): _ = self.__action_repeat(0) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) if self.__is_terminal(): raise Exception('This should never happen.') return self.observation_pool.get_pooled_observations() def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(np.argmax(action)) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) terminal = self.__is_terminal() self.lives = self.ale.lives() observation = self.observation_pool.get_pooled_observations() return observation, reward, terminal def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return [1.0, 0.0]
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box( low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w, 1) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis] return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
class AtariEmulator(BaseEnvironment): def __init__(self, emulator_id, game, resource_folder, random_seed=3, random_start=True, single_life_episodes=False, history_window=1, visualize=False, verbose=0, **unknown): if verbose >= 2: logging.debug('Emulator#{} received unknown args: {}'.format( emulator_id, unknown)) self.emulator_id = emulator_id self.ale = ALEInterface() self.ale.setInt(b"random_seed", random_seed * (emulator_id + 1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) self.ale.setBool(b"display_screen", visualize) full_rom_path = resource_folder + "/" + game + ".bin" self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() #this env is fixed until firing, so you have to... self._have_to_fire = ('FIRE' in [ ACTION_MEANING[a] for a in self.legal_actions ]) self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.random_start = random_start self.single_life_episodes = single_life_episodes self.call_on_new_frame = visualize self.history_window = history_window self.observation_shape = (self.history_window, IMG_SIZE_X, IMG_SIZE_Y) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) # Processed historcal frames that will be fed in to the network (i.e., four 84x84 images) self.history = create_history_observation(self.history_window) #ObservationPool(np.zeros(self.observation_shape, dtype=np.uint8)) self.frame_preprocessor = FramePreprocessor(self.gray_screen.shape, FRAMES_IN_POOL) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return self.gray_screen def on_new_frame(self, frame): pass def __random_start_reset(self): """ Restart game """ self.ale.reset_game() if self.random_start: wait = random.randint(0, MAX_START_WAIT + 1) for _ in range(wait): self.ale.act(self.get_noop()) if self.__is_over(): self.ale.reset_game() self.lives = self.ale.lives() def __new_game(self): self.__random_start_reset() if self._have_to_fire: #take action on reset for environments that are fixed until firing self.ale.act(self.legal_actions[1]) if self.__is_over(): self.__random_start_reset() self.ale.act(self.legal_actions[2]) if self.__is_over(): self.__random_start_reset() def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_preprocessor.new_frame(self.__get_screen_image()) return reward def reset(self): """ Get the initial state """ self.__new_game() for step in range(self.history_window): _ = self.__action_repeat(0) self.history.new_observation( self.frame_preprocessor.get_processed()) if self.__is_terminal(): raise Exception('This should never happen.') return self.history.get_state(), None def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(action) self.history.new_observation(self.frame_preprocessor.get_processed()) terminal = self.__is_terminal() self.lives = self.ale.lives() return self.history.get_state(), reward, terminal, None def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return self.legal_actions[0] def close(self): del self.ale
class Emulator: def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False): self.ale = ALEInterface() self.ale.setInt("random_seed", rseed * (actor_id +1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat("repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt("frame_skip", 1) self.ale.setBool("color_averaging", False) self.ale.loadROM(rom_path + "/" + rom_name + ".bin") self.legal_actions = self.ale.getMinimalActionSet() self.screen_width,self.screen_height = self.ale.getScreenDims() #self.ale.setBool('display_screen', True) # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8) self.frame_pool = np.empty((2, self.screen_height, self.screen_width)) self.current = 0 self.lives = self.ale.lives() self.visualize = visualize self.visualize_processed = False self.windowname = rom_name + ' ' + str(actor_id) if self.visualize: logger.debug("Opening emulator window...") #from skimage import io #io.use_plugin('qt') cv2.startWindowThread() cv2.namedWindow(self.windowname) logger.debug("Emulator window opened") if self.visualize_processed: logger.debug("Opening processed frame window...") cv2.startWindowThread() logger.debug("Processed frame window opened") cv2.namedWindow(self.windowname + "_processed") self.single_life_episodes = single_life_episodes def get_screen_image(self): """ Add screen (luminance) to frame pool """ # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), # self.ale.getScreenRGB()] self.ale.getScreenGrayscale(self.gray_screen) self.ale.getScreenRGB(self.rgb_screen) self.frame_pool[self.current] = np.squeeze(self.gray_screen) self.current = (self.current + 1) % FRAMES_IN_POOL return self.rgb_screen def new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if MAX_START_WAIT < 0: logger.debug("Cannot time travel yet.") sys.exit() elif MAX_START_WAIT > 0: wait = random.randint(0, MAX_START_WAIT) else: wait = 0 for _ in xrange(wait): self.ale.act(self.legal_actions[0]) def process_frame_pool(self): """ Preprocess frame pool """ img = None if BLEND_METHOD == "max_pool": img = np.amax(self.frame_pool, axis=0) #img resize(img[:210, :], (84, 84)) img = cv2.resize(img[:210, :], (84, 84), interpolation=cv2.INTER_LINEAR) img = img.astype(np.float32) img *= (1.0/255.0) return img # Reduce height to 210, if not so #cropped_img = img[:210, :] # Downsample to 110x84 #down_sampled_img = resize(cropped_img, (84, 84)) # Crop to 84x84 playing area #stackable_image = down_sampled_img[:, 26:110] #return stackable_image def action_repeat(self, a): """ Repeat action and grab screen into frame pool """ reward = 0 for i in xrange(ACTION_REPEAT): reward += self.ale.act(self.legal_actions[a]) new_screen_image_rgb = self.get_screen_image() return reward, new_screen_image_rgb def get_reshaped_state(self, state): return np.reshape(state, (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) #return np.reshape(self.screen_images_processed, # (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) def get_initial_state(self): """ Get the initial state """ self.new_game() for step in xrange(NR_IMAGES): reward, new_screen_image_rgb = self.action_repeat(0) self.screen_images_processed[:, :, step] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) if self.is_terminal(): MAX_START_WAIT -= 1 return self.get_initial_state() return np.copy(self.screen_images_processed) #get_reshaped_state() def next(self, action): """ Get the next state, reward, and game over signal """ reward, new_screen_image_rgb = self.action_repeat(np.argmax(action)) self.screen_images_processed[:, :, 0:3] = \ self.screen_images_processed[:, :, 1:4] self.screen_images_processed[:, :, 3] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) terminal = self.is_terminal() self.lives = self.ale.lives() return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal def show_screen(self, image): """ Show visuals for raw and processed images """ if self.visualize: #io.imshow(image[:210, :], fancy=True) cv2.imshow(self.windowname, image[:210, :]) if self.visualize_processed: #io.imshow(self.screen_images_processed[:, :, 3], fancy=True) cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3]) def is_terminal(self): if self.single_life_episodes: return (self.is_over() or (self.lives > self.ale.lives())) else: return self.is_over() def is_over(self): return self.ale.game_over()
pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', FLAGS.sound) ale.setBool('display_screen', FLAGS.display) # Load the ROM file rom_file = str.encode(FLAGS.rom) ale.loadROM(rom_file) # Get the list of legal actions legal_actions = ale.getLegalActionSet() minimal_actions = ale.getMinimalActionSet() print "Legal Actions:", legal_actions print "Minimal Actions:", minimal_actions print "Screen size:", ale.getScreenDims() # Play 10 episodes for episode in range(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] # Apply an action and get the resulting reward reward = ale.act(a); total_reward += reward #print ale.getFrameNumber(), ale.getEpisodeFrameNumber(), reward, total_reward print('Episode %d ended with score: %d' % (episode, total_reward)) ale.reset_game() if __name__ == '__main__':
class AtariSimulator(object): def __init__(self, settings): '''Initiate Arcade Learning Environment (ALE) using Python interface https://github.com/bbitmaster/ale_python_interface/wiki - Set number of frames to be skipped, random seed, ROM and title for display. - Retrieve a set of legal actions and their number. - Retrieve dimensions of the original screen (width/height), and set the dimensions of the cropped screen, together with the padding used to crop the screen rectangle. - Set dimensions of the pygame display that will show visualization of the simulation. (May be cropped --- showing what the learner sees, or not --- showing full Atari screen) - Allocate memory for generated grayscale screenshots. Accepts dims in (height/width) format ''' self.ale = ALEInterface() self.ale.setInt("frame_skip",settings["frame_skip"]) self.ale.setInt("random_seed",settings["seed_simulator"]) self.ale.loadROM(settings["rom_dir"] + '/' + settings["rom"]) self.title = "ALE Simulator: " + str(settings["rom"]) self.actions = self.ale.getLegalActionSet() self.n_actions = self.actions.size self.screen_dims = self.ale.getScreenDims() self.model_dims = settings['model_dims'] self.pad = settings['pad'] print("Original screen width/height: " + str(self.screen_dims[0]) + "/" + str(self.screen_dims[1])) print("Cropped screen width/height: " + str(self.model_dims[0]) + "/" + str(self.model_dims[1])) self.viz_cropped = settings['viz_cropped'] if self.viz_cropped: self.display_dims = (int(self.model_dims[0]*2), int(self.model_dims[1]*2)) else: self.display_dims = (int(self.screen_dims[0]*2), int(self.screen_dims[1]*2)) # preallocate an array to accept ALE screen data (height/width) ! self.screen_data = np.empty((self.screen_dims[1],self.screen_dims[0]),dtype=np.uint8) def get_screenshot(self): '''returns a cropped snapshot of the simulator - store grayscale values in a preallocated array - cut out a square from the rectangle, using provided padding value - downsample to the desired size and transpose from (height/width) to (width/height) ''' self.ale.getScreenGrayscale(self.screen_data) self.tmp = self.screen_data[(self.screen_dims[1]-self.screen_dims[0]-self.pad):(self.screen_dims[1]-self.pad),:] self.frame = spm.imresize(self.tmp,self.model_dims[::-1],interp='nearest').T #, interp='nearest' return self.frame def act(self,action_index): '''function to transition the simulator from s to s' using provided action the action that is provided is in form of an index simulator deals with translating the index into an actual action''' self.last_reward = self.ale.act(self.actions[action_index]) def reward(self): '''return reward - has to be called after the "act" function''' return self.last_reward def episode_over(self): '''return a boolean indicator on whether the game is still running''' return self.ale.game_over() def reset_episode(self): '''reset the game that ended''' self.ale.reset_game() def init_viz_display(self): '''initialize display that will show visualization''' pygame.init() self.screen = pygame.display.set_mode(self.display_dims) if self.title: pygame.display.set_caption(self.title) def refresh_viz_display(self): '''if display is shut down, shut the game down else move the current simulator's frame (cropped or not cropped) into the pygame display, after expanding it 2x along x and y dimensions''' for event in pygame.event.get(): if event.type == pygame.QUIT: exit if self.viz_cropped: self.surface = pygame.surfarray.make_surface(self.frame) # has already been transposed else: self.surface = pygame.surfarray.make_surface(self.screen_data.T) self.screen.blit(pygame.transform.scale2x(self.surface),(0,0)) pygame.display.flip()
class ALE(Environment): def __init__(self, rom, frame_skip=1, repeat_action_probability=0.0, loss_of_life_termination=False, loss_of_life_reward=0, display_screen=False, seed=np.random.RandomState()): """ Initialize ALE. Args: rom: Rom filename and directory. frame_skip: Repeat action for n frames. Default 1. repeat_action_probability: Repeats last action with given probability. Default 0. loss_of_life_termination: Signals a terminal state on loss of life. Default False. loss_of_life_reward: Reward/Penalty on loss of life (negative values are a penalty). Default 0. display_screen: Displays the emulator screen. Default False. seed: Random seed """ self.ale = ALEInterface() self.rom = rom self.ale.setBool(b'display_screen', display_screen) self.ale.setInt(b'random_seed', seed.randint(0, 9999)) self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setBool(b'color_averaging', False) self.ale.setInt(b'frame_skip', frame_skip) # all set commands must be done before loading the ROM self.ale.loadROM(rom.encode()) # setup gamescreen object width, height = self.ale.getScreenDims() self.gamescreen = np.empty((height, width, 3), dtype=np.uint8) self.frame_skip = frame_skip # setup action converter # ALE returns legal action indexes, convert these to just numbers self.action_inds = self.ale.getMinimalActionSet() # setup lives self.loss_of_life_reward = loss_of_life_reward self.cur_lives = self.ale.lives() self.loss_of_life_termination = loss_of_life_termination self.life_lost = False def __str__(self): return 'ALE({})'.format(self.rom) def close(self): self.ale = None def reset(self): self.ale.reset_game() self.cur_lives = self.ale.lives() self.life_lost = False # clear gamescreen self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8) return self.current_state def execute(self, action): # convert action to ale action ale_action = self.action_inds[action] # get reward and process terminal & next state rew = self.ale.act(ale_action) if self.loss_of_life_termination or self.loss_of_life_reward != 0: new_lives = self.ale.lives() if new_lives < self.cur_lives: self.cur_lives = new_lives self.life_lost = True rew += self.loss_of_life_reward terminal = self.is_terminal state_tp1 = self.current_state return state_tp1, rew, terminal @property def states(self): return dict(shape=self.gamescreen.shape, type=float) @property def actions(self): return dict(continuous=False, num_actions=len(self.action_inds), names=self.action_names) @property def current_state(self): self.gamescreen = self.ale.getScreenRGB(self.gamescreen) return np.copy(self.gamescreen) @property def is_terminal(self): if self.loss_of_life_termination and self.life_lost: return True else: return self.ale.game_over() @property def action_names(self): action_names = [ 'No-Op', 'Fire', 'Up', 'Right', 'Left', 'Down', 'Up Right', 'Up Left', 'Down Right', 'Down Left', 'Up Fire', 'Right Fire', 'Left Fire', 'Down Fire', 'Up Right Fire', 'Up Left Fire', 'Down Right Fire', 'Down Left Fire' ] return np.asarray(action_names)[self.action_inds]
class AtariWrapper(): """ ALE wrapper that tries to mimic the options in the DQN paper including the preprocessing (except resizing/cropping) """ action_words = [ 'NOOP', 'UP', 'RIGHT', 'LEFT', 'DOWN', "UPRIGHT", "UPLEFT", "DOWNRIGHT", "DOWNLEFT" ] _action_set = [0, 2, 3, 4, 5, 6, 7, 8, 9] #Valid actions for ALE. #Possible actions are just a list from 0,num_valid_actions #We still need to map from the latter to the former when possible_actions = list(range(len(_action_set))) def __init__(self, rom_path, seed=123, frameskip=4, show_display=False, stack_num_states=4, concatenate_state_every=4): """ Parameters: Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int. It's aka action repeat. stack_num_states: Number of dimensions/channels to have. concatenate_state_every: After how many frames should one channel be appended to state. Number is in terms of absolute frames independent of frameskip """ self.stack_num_states = stack_num_states self.concatenate_state_every = concatenate_state_every self.game_path = rom_path if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist' % (game, self.game_path)) self.frameskip = frameskip try: self.ale = ALEInterface() except Exception as e: print( "ALEInterface could not be loaded. ale_python_interface import failed" ) raise e #Set some default options self.ale.setInt(b'random_seed', seed) self.ale.setBool(b'sound', False) self.ale.setBool(b'display_screen', show_display) self.ale.setFloat(b'repeat_action_probability', 0.) #Load the rom self.ale.loadROM(self.game_path) (self.screen_width, self.screen_height) = self.ale.getScreenDims() self.latest_frame_fifo = deque( maxlen=2) #Holds the two closest frames to max. self.state_fifo = deque(maxlen=stack_num_states) def _step(self, a, force_noop=False): """Perform one step of the environment. Automatically repeats the step self.frameskip number of times parameters: force_noop: Force it to perform a no-op ignoring the action supplied. """ assert a in self.possible_actions + [0] if force_noop: action, num_steps = 0, 1 else: action = self._action_set[a] if isinstance(self.frameskip, int): num_steps = self.frameskip else: num_steps = np.random.randint(self.frameskip[0], self.frameskip[1]) reward = 0.0 for i in range(num_steps): reward += self.ale.act(action) cur_frame = self.observe_raw(get_rgb=True) cur_frame_cropped = self.crop_frame(cur_frame) self.latest_frame_fifo.append(cur_frame_cropped) if i % self.concatenate_state_every == 0: curmax_frame = np.amax(self.latest_frame_fifo, axis=0) frame_lumi = self.convert_to_gray(curmax_frame) self.state_fifo.append(frame_lumi) #Transpose so we get HxWxC instead of CxHxW self.current_frame = np.array(np.transpose(self.state_fifo, (1, 2, 0))) return self.current_frame, reward, self.ale.game_over(), { "ale.lives": self.ale.lives() } def step(self, *args, **kwargs): """Performs one step of the environment """ lives_before = self.ale.lives() next_state, reward, done, info = self._step(*args, **kwargs) lives_after = self.ale.lives() # End the episode when a life is lost if lives_before > lives_after: done = True return next_state, reward, done, info def observe_raw(self, get_rgb=False): """Observe either RGB or Gray frames. Initialzing arrays forces it to not modify stale pointers """ if get_rgb: cur_frame_rgb = np.zeros( (self.screen_height, self.screen_width, 3), dtype=np.uint8) self.ale.getScreenRGB(cur_frame_rgb) return cur_frame_rgb else: cur_frame_gray = np.zeros((self.screen_height, self.screen_width), dtype=np.uint8) self.ale.getScreenGrayscale(cur_frame_gray) return cur_frame_gray def crop_frame(self, frame): """Simply crops a frame. Does nothing by default. """ return frame def convert_to_gray(self, img): """Get Luminescence channel """ img_f = np.float32(img) img_lumi = 0.299*img_f[:,:,0] + \ 0.587*img_f[:,:,1] + \ 0.114*img_f[:,:,2] return np.uint8(img_lumi) def reset(self): """Reset the game """ self.ale.reset_game() s = self.observe_raw(get_rgb=True) s = self.crop_frame(s) #Populate missing frames with blank ones. for _ in range(self.stack_num_states - 1): self.state_fifo.append(np.zeros(shape=(s.shape[0], s.shape[1]))) self.latest_frame_fifo.append(s) #Push the latest frame curmax_frame = s frame_lumi = self.convert_to_gray(s) self.state_fifo.append(frame_lumi) self.state = np.transpose(self.state_fifo, (1, 2, 0)) return self.state def get_action_meanings(self): """Return in text what the actions correspond to. """ return [ACTION_MEANING[i] for i in self._action_set] def save_state(self): """Saves the current state and returns a identifier to saved state """ return self.ale.cloneSystemState() def restore_state(self, ident): """Restore game state Restores the saved state of the system and perform a no-op so a new frame can be generated incase a restore is followed by an observe() """ self.ale.restoreSystemState(ident) self.step(0, force_noop=True)
if USE_SDL: if sys.platform == 'darwin': import pygame print "!!!" pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.setInt('frame_skip',1) # Load the ROM file ale.loadROM('roms/breakout.bin') ale.setInt('max_num_frames',1) # Get the list of legal actions legal_actions = ale.getMinimalActionSet() a1,a2 = ale.getScreenDims() cnt = 0 # Play 10 episodes import numpy as np d = np.empty((a1,a2),dtype=np.uint8) for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] #print legal_actions # Apply an action and get the resulting reward reward = ale.act(a); ale.getScreenGrayscale(d) io.imshow(d) io.show()
class AleAgent: ## # @param processing_cls Class for processing game visual unput def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None): assert game_rom is not None self.game = ALEInterface() if encoder_weights is not None and encoder_model is not None: self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights) else: self.encoder = Encoder() self.processor = processing_cls() # Get & Set the desired settings self.game.setInt('random_seed', 0) self.game.setInt('frame_skip', 4) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': pygame.init() self.game.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.game.setBool('sound', False) # no sound self.game.setBool('display_screen', True) # Load the ROM file self.game.loadROM(game_rom) # Get the list of legal actions self.legal_actions = self.game.getLegalActionSet() # Get actions applicable in current game self.minimal_actions = self.game.getMinimalActionSet() if NFQ_model is not None and NFQ_weights is not None: self.NFQ = NFQ( self.encoder.out_dim, len(self.minimal_actions), model_path=NFQ_model, weights_path=NFQ_weights ) else: self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions)) (self.screen_width, self.screen_height) = self.game.getScreenDims() self.screen_data = np.zeros( (self.screen_height, self.screen_width), dtype=np.uint8 ) ## # Initialize the reinforcement learning def train(self, num_of_episodes=1500, eps=0.995, key_binding=None): pygame.init() for episode in xrange(num_of_episodes): total_reward = 0 moves = 0 hits = 0 print 'Starting episode: ', episode+1 if key_binding: eps = 0.05 else: eps -= 2/num_of_episodes self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) while not self.game.game_over(): current_state = next_state x = None if key_binding: key_pressed = pygame.key.get_pressed() x = key_binding(key_pressed) if x is None: r = np.random.rand() if r < eps: x = np.random.randint(self.minimal_actions.size) else: x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] # Apply an action and get the resulting reward reward = self.game.act(a) # record only every 3 frames # if not moves % 3: self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) transition = np.append(current_state, x) transition = np.append(transition, next_state) transition = np.append(transition, reward) self.NFQ.add_transition(transition) total_reward += reward if reward > 0: hits += 1 moves += 1 if eps > 0.1: eps -= 0.00001 # end while print 'Epsilon: ', eps print 'Episode', episode+1, 'ended with score:', total_reward print 'Hits: ', hits self.game.reset_game() self.NFQ.train() hits = 0 moves = 0 self.NFQ.save_net() # end for ## # Play the game! def play(self): total_reward = 0 moves = 1 while not self.game.game_over(): self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) current_state = self.encoder.encode(pooled_data) x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] reward = self.game.act(a) total_reward += reward moves += 1 print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'
def main(): if len(sys.argv) < 2: dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/T-Z/Tennis.bin' else: dir_rom = sys.argv[1] ale = ALEInterface() # Get & Set the desired settings ale.setInt(b'random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: # mac OS if sys.platform == 'darwin': pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file rom_file = str.encode(dir_rom) print('- Loading ROM - %s' % dir_rom) ale.loadROM(rom_file) print('- Complete loading ROM') (game_surface_width, game_surface_height) = ale.getScreenDims() print("game surface width/height: " + str(game_surface_width) + "/" + str(game_surface_height)) (display_width, display_height) = (800, 640) print 'display width/height', (display_width, display_height) # init pygame pygame.init() display_screen = pygame.display.set_mode((display_width, display_height)) pygame.display.set_caption("Arcade Learning Environment Player Agent Display") # init clock clock = pygame.time.Clock() is_exit = False # Play 10 episodes for episode in range(10): if is_exit: break total_reward = 0 while not ale.game_over() and not is_exit: a = getActionFromKeyboard() # Apply an action and get the resulting reward reward = ale.act(a) total_reward += reward # clear screen display_screen.fill((0, 0, 0)) # render game surface renderGameSurface(ale, display_screen, (game_surface_width, game_surface_height)) # display related info displayRelatedInfo(display_screen, a, total_reward) pygame.display.flip() # process pygame event queue for event in pygame.event.get(): if event.type == pygame.QUIT: is_exit = True break if event.type == pygame.KEYDOWN and event.key == pygame.K_q: is_exit = True break # delay to 60fps clock.tick(60.) print('Episode %d ended with score: %d' % (episode, total_reward)) ale.reset_game()
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_dir('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 10000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) float32 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver)
class AtariEnvironment(interfaces.Environment): def __init__(self, atari_rom, frame_skip=4, noop_max=30, terminate_on_end_life=False, random_seed=123, frame_history_length=4, use_gui=False, max_num_frames=500000, repeat_action_probability=0.0, record_screen_dir=None): self.ale = ALEInterface() self.ale.setInt('random_seed', random_seed) self.ale.setInt('frame_skip', 1) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setInt('max_num_frames_per_episode', max_num_frames) if record_screen_dir is not None: self.ale.setString('record_screen_dir', record_screen_dir) self.ale.loadROM(atari_rom) self.frame_skip = frame_skip self.repeat_action_probability = repeat_action_probability self.noop_max = noop_max self.terminate_on_end_life = terminate_on_end_life self.current_lives = self.ale.lives() self.is_terminal = False self.previous_action = 0 self.num_actions = len(self.ale.getMinimalActionSet()) w, h = self.ale.getScreenDims() self.screen_width = w self.screen_height = h self.zero_last_frames = [ np.zeros((84, 84), dtype=np.uint8), np.zeros((84, 84), dtype=np.uint8) ] self.last_two_frames = copy.copy(self.zero_last_frames) self.zero_history_frames = [ np.zeros((84, 84), dtype=np.uint8) for i in range(0, frame_history_length) ] self.frame_history = copy.copy(self.zero_history_frames) atari_actions = self.ale.getMinimalActionSet() self.atari_to_onehot = dict( list(zip(atari_actions, list(range(len(atari_actions)))))) self.onehot_to_atari = dict( list(zip(list(range(len(atari_actions))), atari_actions))) self.screen_image = np.zeros(self.screen_height * self.screen_width, dtype=np.uint8) self.use_gui = use_gui self.original_frame = np.zeros((h, w), dtype=np.uint8) self.refresh_time = datetime.timedelta(milliseconds=1000 / 60) self.last_refresh = datetime.datetime.now() if (self.use_gui): self.gui_screen = pygame.display.set_mode((w, h)) def getRAM(self, ram=None): return self.ale.getRAM(ram) def _get_frame(self): self.ale.getScreenGrayscale(self.screen_image) image = self.screen_image.reshape( [self.screen_height, self.screen_width, 1]) self.original_frame = image image = cv2.resize(image, (84, 84)) return image def perform_action(self, onehot_index_action): if self.repeat_action_probability > 0: if np.random.uniform() < self.repeat_action_probability: onehot_index_action = self.previous_action self.previous_action = onehot_index_action action = self.onehot_to_atari[onehot_index_action] state, action, reward, next_state, self.is_terminal = self.perform_atari_action( action) return state, onehot_index_action, reward, next_state, self.is_terminal def perform_atari_action(self, atari_action): state = self.get_current_state() reward = self._act(atari_action, self.frame_skip) if self.use_gui: self.refresh_gui() self.frame_history[:-1] = self.frame_history[1:] self.frame_history[-1] = np.max(self.last_two_frames, axis=0) next_state = self.get_current_state() return state, atari_action, reward, next_state, self.is_terminal def _act(self, ale_action, repeat): reward = 0 for i in range(repeat): reward += self.ale.act(ale_action) if i >= repeat - 2: self.last_two_frames = [ self.last_two_frames[1], self._get_frame() ] self.is_terminal = self.ale.game_over() # terminate the episode if current_lives has decreased lives = self.ale.lives() if self.current_lives != lives: if self.current_lives > lives and self.terminate_on_end_life: self.is_terminal = True self.current_lives = lives return reward def get_current_state(self): #return copy.copy(self.frame_history) return [x.copy() for x in self.frame_history] def get_actions_for_state(self, state): return [ self.atari_to_onehot[a] for a in self.ale.getMinimalActionSet() ] def reset_environment(self): self.last_two_frames = [self.zero_history_frames[0], self._get_frame()] if self.terminate_on_end_life: if self.ale.game_over(): self.ale.reset_game() else: self.ale.reset_game() self.current_lives = self.ale.lives() if self.noop_max > 0: num_noops = np.random.randint(self.noop_max + 1) self._act(0, num_noops) self.previous_action = 0 self.frame_history = copy.copy(self.zero_history_frames) self.frame_history[-1] = np.max(self.last_two_frames, axis=0) if self.use_gui: self.refresh_gui() def is_current_state_terminal(self): return self.is_terminal def refresh_gui(self): current_time = datetime.datetime.now() if (current_time - self.last_refresh) > self.refresh_time: self.last_refresh = current_time gui_image = np.tile( np.transpose(self.original_frame, axes=(1, 0, 2)), [1, 1, 3]) # gui_image = np.zeros((self.screen_width, self.screen_height, 3), dtype=np.uint8) # channel = np.random.randint(3) # gui_image[:,:,channel] = np.transpose(self.original_frame, axes=(1, 0, 2))[:,:,0] pygame.surfarray.blit_array(self.gui_screen, gui_image) pygame.display.update()
from visualize import Plotter # Create the ale interface and load rom files ale = ALEInterface() #ale.setBool('display_screen', True) #pygame.init() # Load the rom ale.loadROM('/Users/shashwat/Downloads/pong.bin') # These are the set of valid actions in the game legal_actions = ale.getMinimalActionSet() # How to get screen rgb? width, height = ale.getScreenDims() screen_buffer = np.empty((height, width), dtype=np.uint8) # Some common settings HISTORY_LENGTH = 4 MAX_STEPS = 1000000 MAX_EPOCHS = 10 MINIBATCH_SIZE = 32 LONG_PRESS_TIMES = 4 GAMMA = 0.9 EPSILON = 0.1 UPDATE_FREQUENCY = 4 MAX_LIVES = ale.lives() episode_sum = 0 episode_sums = []
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None, None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = os.path.join(get_dataset_dir('atari_rom'), rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) float32 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1], :].astype( 'float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver)
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box( low=0, high=255, shape=(self.height, self.width)) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def _reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def _step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
def getScreenDims(self): w, h = ALEInterface.getScreenDims(self) return h, w #dp
class AtariDriver(object): """ A wrapper for atari emulator. """ def __init__(self, rom_file, frame_skip=1, viz=0): """ :param rom_file: path to the rom :param frame_skip: skip every k frames :param viz: the delay. visualize the game while running. 0 to disable """ self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(self.rng.randint(0, 1000))) self.ale.setInt("frame_skip", frame_skip) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() if isinstance(viz, int): viz = float(viz) self.viz = viz self.romname = os.path.basename(rom_file) if self.viz and isinstance(self.viz, float): cv2.startWindowThread() cv2.namedWindow(self.romname) self._reset() self.last_image = self._grab_raw_image() self.framenum = 0 def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = np.zeros(self.height * self.width * 3, dtype=np.uint8) self.ale.getScreenRGB(m) return m.reshape((self.height, self.width, 3)) def grab_image(self): """ :returns: a gray-scale image, max-pooled over the last frame. """ now = self._grab_raw_image() ret = np.maximum(now, self.last_image) self.last_image = now if self.viz and isinstance(self.viz, float): cv2.imshow(self.romname, ret) time.sleep(self.viz) elif self.viz: cv2.imwrite("{}/{:06d}.jpg".format(self.viz, self.framenum), ret) self.framenum += 1 ret = cv2.cvtColor(ret, cv2.COLOR_BGR2YUV)[:,:,0] ret = ret[36:204,:] # several online repos all use this return ret def get_num_actions(self): """ :returns: the number of legal actions """ return len(self.actions) def _reset(self): self.ale.reset_game() def next(self, act): """ :param act: an index of the action :returns: (next_image, reward, isOver) """ r = self.ale.act(self.actions[act]) s = self.grab_image() isOver = self.ale.game_over() if isOver: self._reset() return (s, r, isOver)
class AtariEnvironment: num_actions = 18 # Use full action set def __init__(self, frame_shape, frame_postprocess=lambda x: x): self.ale = ALEInterface() self.ale.setBool(b"display_screen", cfg.display_screen) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) self.ale.setInt(b"random_seed", cfg.random_seed) self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob) self.ale.loadROM(str.encode(cfg.rom)) self.ale.setMode(cfg.mode) self.ale.setDifficulty(cfg.difficulty) self.action_set = self.ale.getLegalActionSet() assert len(self.action_set) == AtariEnvironment.num_actions screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,) self._frame_buffer = CircularBuffer( cfg.frame_buffer_size, screen_dims, np.uint8 ) self._frame_stack = CircularBuffer( cfg.frame_history_size, frame_shape, np.uint8 ) self._frame_postprocess = frame_postprocess self._episode_count = 0 self.reset(inc_episode_count=False) def _is_terminal(self): return self.ale.game_over() def _get_single_frame(self): stacked_frames = np.concatenate(self._frame_buffer, axis=2) maxed_frame = np.amax(stacked_frames, axis=2) expanded_frame = np.expand_dims(maxed_frame, 3) frame = self._frame_postprocess(expanded_frame) return frame def reset(self, inc_episode_count=True): self._episode_frames = 0 self._episode_reward = 0 if inc_episode_count: self._episode_count += 1 self.ale.reset_game() for _ in range(cfg.frame_buffer_size): self._frame_buffer.append(self.ale.getScreenGrayscale()) for _ in range(cfg.frame_history_size): self._frame_stack.append(self._get_single_frame()) def act(self, action): assert not self._is_terminal() cum_reward = 0 for _ in range(cfg.frame_skip): cum_reward += self.ale.act(self.action_set[action]) self._frame_buffer.append(self.ale.getScreenGrayscale()) self._frame_stack.append(self._get_single_frame()) self._episode_frames += cfg.frame_skip self._episode_reward += cum_reward cum_reward = np.clip(cum_reward, -1, 1) return cum_reward, self.state, self._is_terminal() @property def state(self): assert len(self._frame_buffer) == cfg.frame_buffer_size assert len(self._frame_stack) == cfg.frame_history_size return np.concatenate(self._frame_stack, axis=-1) @property def episode_reward(self): return self._episode_reward @property def episode_frames(self): return self._episode_frames @property def episode_steps(self): return self._episode_frames // cfg.frame_skip @property def episode_count(self): return self._episode_count
class ALEEnvironment(BaseEnvironment): """ The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply wrappers of the underlying ALE but with pythonic names/usage. Parameters ---------- rom : byte string Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin' display_screen : boolean Default False. Whether or not to show the game. True takes longer to run but can be fun to watch step_cap: int Default None. Maximum number of steps to run in an episode. Breakout can sometimes not return terminal even when game is ended. This fixes that and will return terminal after stepping above this count """ def __init__(self, rom, resize_shape=(84, 84), skip_frame=1, repeat_action_probability=0.0, step_cap=None, loss_of_life_termination=False, loss_of_life_negative_reward=False, grayscale=True, display_screen=False, seed=np.random.RandomState()): # set up emulator self.ale = ALEInterface() if display_screen: self.ale.setBool(b'display_screen', True) self.ale.setInt(b'frame_skip', skip_frame) self.ale.setInt(b'random_seed', seed.randint(0, 9999)) self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setBool(b'color_averaging', False) self.ale.loadROM(rom.encode()) # setup gamescreen object. I think this is faster than recreating an empty each time width, height = self.ale.getScreenDims() channels = 1 if grayscale else 3 self.grayscale = grayscale self.gamescreen = np.empty((height, width, 1), dtype=np.uint8) self.resize_shape = resize_shape self.skip_frame = skip_frame self.step_cap = step_cap self.curr_step_count = 0 # setup action converter # ALE returns legal action indexes, convert these to just numbers self.action_inds = self.ale.getMinimalActionSet() # setup lives self.loss_of_life_negative_reward = loss_of_life_negative_reward self.cur_lives = self.ale.lives() self.loss_of_life_termination = loss_of_life_termination self.life_lost = False def reset(self): self.ale.reset_game() self.cur_lives = self.ale.lives() self.life_lost = False self.curr_step_count = 0 def step(self, action): self.curr_step_count += 1 ale_action = self.action_inds[action] return self._step(ale_action) def _step(self, ale_action): if not self.loss_of_life_termination and not self.loss_of_life_negative_reward: return self.ale.act(ale_action) else: rew = self.ale.act(ale_action) new_lives = self.ale.lives() if new_lives < self.cur_lives: # if loss of life is negative reward subtract 1 from reward if self.loss_of_life_negative_reward: rew -= 1 self.cur_lives = new_lives self.life_lost = True return rew def get_state(self): if self.grayscale: self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen) else: self.gamescreen = self.ale.getScreenRGB(self.gamescreen) # if resize_shape is none then don't resize if self.resize_shape is not None: # if grayscale we remove the last dimmension (channel) if self.grayscale: processedImg = imresize(self.gamescreen[:, :, 0], self.resize_shape) else: processedImg = imresize(self.gamescreen, self.resize_shape) return processedImg def get_state_shape(self): return self.resize_shape def get_terminal(self): if self.loss_of_life_termination and self.life_lost: return True elif self.step_cap is not None and self.curr_step_count > self.step_cap: return True else: return self.ale.game_over() def get_num_actions(self): return len(self.action_inds)
class ALEEnvironment(BaseEnvironment): """ A wrapper of Arcade Learning Environment, which inherits all members of ``BaseEnvironment``. """ # 63 games ADVENTURE = "adventure" AIR_RAID = "air_raid" ALIEN = "alien" AMIDAR = "amidar" ASSAULT = "assault" ASTERIX = "asterix" ASTEROIDS = "asteroids" ATLANTIS = "aslantis" BANK_HEIST = "bank_heist" BATTLE_ZONE = "battle_zone" BEAM_RIDER = "beam_rider" BERZERK = "berzerk" BOWLING = "bowling" BOXING = "boxing" BREAKOUT = "breakout" CARNIVAL = "carnival" CENTIPEDE = "centipede" CHOPPER_COMMAND = "chopper_command" CRAZY_CLIMBER = "crazy_climber" DEFENDER = "defender" DEMON_ATTACK = "demon_attack" DOUBLE_DUNK = "double_dunk" ELEVATOR_ACTION = "elevator_action" ENDURO = "enduro" FISHING_DERBY = "fishing_derby" FREEWAY = "freeway" FROSTBITE = "frostbite" GOPHER = "gopher" GRAVITAR = "gravitar" HERO = "hero" ICE_HOCKEY = "ice_hockey" JAMESBOND = "jamesbond" JOURNEY_ESCAPE = "journey_escape" KABOOM = "kaboom" KANGAROO = "kangaroo" KRULL = "krull" KUNGFU_MASTER = "kung_fu_master" MONTEZUMA = "montezuma_revenge" MS_PACMAN = "ms_pacman" UNKNOWN = "name_this_game" PHOENIX = "phoenix" PITFALL = "pitfall" PONG = "pong" POOYAN = "pooyan" PRIVATE_EYE = "private_eye" QBERT = "qbert" RIVERRAID = "riverraid" ROAD_RUNNER = "road_runner" ROBOTANK = "robotank" SEAQUEST = "seaquest" SKIING = "skiing" SOLARIS = "solaris" SPACE_INVADERS = "space_invaders" STAR_GUNNER = "star_gunner" TENNIS = "tennis" TIME_PILOT = "time_pilot" TUTANKHAM = "tutankham" UP_N_DOWN = "up_n_down" VENTURE = "venture" VIDEO_PINBALL = "video_pinball" WIZARD_OF_WOR = "wizard_of_wor" YARS_REVENGE = "yars_revenge" ZAXXON = "zaxxon" def __init__(self, rom_name, frame_skip=4, repeat_action_probability=0., max_episode_steps=10000, loss_of_life_termination=False, loss_of_life_negative_reward=False, bitwise_max_on_two_consecutive_frames=False, is_render=False, seed=None, startup_policy=None, disable_actions=None, num_of_sub_actions=-1, state_processor=AtariProcessor(resize_shape=(84, 84), convert_to_grayscale=True)): os.environ['SDL_VIDEO_CENTERED'] = '1' file_exist = isfile(ALEEnvironment.get_rom_path(rom_name)) if not file_exist: raise ValueError("Rom not found ! Please put rom " + rom_name + ".bin into: " + ALEEnvironment.get_rom_path()) self.__rom_name = rom_name self.__ale = ALEInterface() if frame_skip < 0: print("Invalid frame_skip param ! Set default frame_skip = 4") self.__frame_skip = 4 else: self.__frame_skip = frame_skip if repeat_action_probability < 0 or repeat_action_probability > 1: raise ValueError("Invalid repeat_action_probability") else: self.__repeat_action_probability = repeat_action_probability self.__max_episode_steps = max_episode_steps self.__loss_of_life_termination = loss_of_life_termination self.__loss_of_life_negative_reward = loss_of_life_negative_reward self.__max_2_frames = bitwise_max_on_two_consecutive_frames # Max 2 frames only work with grayscale self.__grayscale = False if state_processor is not None and type( state_processor ) is AtariProcessor and state_processor.get_grayscale(): self.__grayscale = True if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale: self.__max_2_frames = True else: self.__max_2_frames = False self.__is_render = is_render self.__processor = state_processor if seed is None or seed <= 0 or seed >= 9999: if seed is not None and (seed < 0 or seed >= 9999): print("Invalid seed ! Default seed = randint(0, 9999") self.__seed = np.random.randint(0, 9999) self.__random_seed = True else: self.__random_seed = False self.__seed = seed self.__current_steps = 0 self.__is_life_lost = False self.__is_terminal = False self.__current_lives = 0 self.__action_reduction = num_of_sub_actions self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale( ) self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3), dtype=np.uint8) self.__current_buffer = np.empty( (self.__scr_height, self.__scr_width, 3), dtype=np.uint8) self.__current_state = None self.__prev_state = None self.__startup_policy = startup_policy if disable_actions is None: self.__dis_act = [] else: self.__dis_act = disable_actions if self.__processor is not None and self.__processor.get_number_of_objectives( ) > 1: self.__multi_objs = True else: self.__multi_objs = False def get_processor(self): return self.__processor def __init_ale(self): self.__ale.setBool(b'display_screen', self.__is_render) if self.__max_2_frames and self.__frame_skip > 1: self.__ale.setInt(b'frame_skip', 1) else: self.__ale.setInt(b'frame_skip', self.__frame_skip) self.__ale.setInt(b'random_seed', self.__seed) self.__ale.setFloat(b'repeat_action_probability', self.__repeat_action_probability) self.__ale.setBool(b'color_averaging', False) self.__ale.loadROM( ALEEnvironment.get_rom_path(self.__rom_name).encode()) width, height = self.__ale.getScreenDims() return width, height, self.__ale.getMinimalActionSet() def clone(self): if self.__random_seed: seed = np.random.randint(0, 9999) else: seed = self.__seed return ALEEnvironment(self.__rom_name, self.__frame_skip, self.__repeat_action_probability, self.__max_episode_steps, self.__loss_of_life_termination, self.__loss_of_life_negative_reward, self.__max_2_frames, self.__is_render, seed, self.__startup_policy, self.__dis_act, self.__action_reduction, self.__processor.clone()) def step_all(self, a): if isinstance(a, (list, np.ndarray)): if len(a) <= 0: raise ValueError('Empty action list !') a = a[0] self.__current_steps += 1 act = self.__action_set[a] rew = self._step(act) next_state = self.get_state() _is_terminal = self.is_terminal() return next_state, rew, _is_terminal, self.__current_steps def reset(self): self.__ale.reset_game() self.__current_lives = self.__ale.lives() self.__is_life_lost = False self.__is_terminal = False self.__current_state = None self.__prev_state = None action_space = self.get_action_space() v_range, is_range = action_space.get_range() if len(v_range) > 1: self.step(1) # No op steps if self.__startup_policy is not None: max_steps = int(self.__startup_policy.get_max_steps()) for _ in range(max_steps): act = self.__startup_policy.step(self.get_state(), action_space) self.step(act) # Start training from this point self.__current_steps = 0 # Reset processor if self.__processor is not None: self.__processor.reset() return self.get_state() def _pre_step(self, act): if self.__max_2_frames and self.__frame_skip > 1: rew = 0 for i in range(self.__frame_skip - 2): rew += self.__ale.act(act) self.__prev_buffer = self.__ale.getScreenRGB( self.__prev_buffer) self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer) rew += self.__ale.act(act) self.__current_buffer = self.__ale.getScreenRGB( self.__current_buffer) self.__is_terminal = self.__ale.game_over() if self.__processor is not None: self.__prev_state = self.__processor.process( self.__prev_buffer) self.__current_state = self.__processor.process( self.__current_buffer) else: self.__prev_state = self.__prev_buffer self.__current_state = self.__current_buffer self.__current_state = np.maximum.reduce( [self.__prev_state, self.__current_state]) else: rew = self.__ale.act(act) self.__current_buffer = self.__ale.getScreenRGB( self.__current_buffer) self.__is_terminal = self.__ale.game_over() if self.__processor is not None: self.__current_state = self.__processor.process( self.__current_buffer) if self.__multi_objs and self.__processor is not None: all_rewards = self.__processor.get_rewards(rew) return all_rewards else: return rew def _step(self, act): for i in range(len(self.__dis_act)): if act == self.__dis_act[i]: act = 0 if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward: if not self.__is_terminal: next_lives = self.__ale.lives() if next_lives < self.__current_lives: act = 1 self.__current_lives = next_lives return self._pre_step(act) else: rew = self._pre_step(act) next_lives = self.__ale.lives() if next_lives < self.__current_lives: if self.__loss_of_life_negative_reward: rew -= 1 self.__current_lives = next_lives self.__is_life_lost = True return rew def get_state(self): if not self.__max_2_frames: if self.__processor is not None: return self.__current_state else: return self.__current_buffer else: return self.__current_state def is_terminal(self): if self.__loss_of_life_termination and self.__is_life_lost: return True elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps: return True else: return self.__is_terminal @staticmethod def get_rom_path(rom=None): if rom is None: return os.path.dirname(os.path.abspath(__file__)) + "/roms/" else: return os.path.dirname( os.path.abspath(__file__)) + "/roms/" + rom + ".bin" @staticmethod def list_all_roms(): return [ f for f in listdir(ALEEnvironment.get_rom_path()) if isfile(join(ALEEnvironment.get_rom_path(), f)) ] def get_state_space(self): if self.__processor is None: shape = self.__current_buffer.shape else: shape = self.__processor.process(self.__current_buffer).shape min_value = np.zeros(shape, dtype=np.uint8) max_value = np.full(shape, 255) return Space(min_value, max_value, True) def get_action_space(self): if self.__action_reduction >= 1: return Space(0, self.__action_reduction - 1, True) else: return Space(0, len(self.__action_set) - 1, True) def step(self, act): if isinstance(act, (list, np.ndarray)): if len(act) <= 0: raise ValueError('Empty action list !') act = act[0] self.__current_steps += 1 act = self.__action_set[act] rew = self._step(act) return rew def get_current_steps(self): return self.__current_steps def is_atari(self): return True def is_render(self): return self.__is_render def get_number_of_objectives(self): if self.__processor is None: return 1 else: return self.__processor.get_number_of_objectives() def get_number_of_agents(self): if self.__processor is None: return 1 else: return self.__processor.get_number_of_agents() def get_state_processor(self): return self.__processor
class AleEnv(): def __init__(self, rom, display_screen, use_env_frame_skip, frame_repeat): self.actions = None self.rom = rom self.display_screen = display_screen self.use_env_frame_skip = use_env_frame_skip self.frame_repeat = frame_repeat def initialize(self): self.ale = ALEInterface() self.ale.setInt("random_seed", random.randint(1, 1000)) if self.display_screen: self.ale.setBool('display_screen', True) if self.use_env_frame_skip == True: self.ale.setInt('frame_skip', self.frame_repeat) self.ale.setBool('color_averaging', True) self.ale.setFloat('repeat_action_probability', 0) self.ale.loadROM(self.rom) self.actions = self.ale.getMinimalActionSet() print 'actions: %s' % self.actions (self.screen_width,self.screen_height) = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.initialized = True def get_actions(self, rom=None): if self.actions is None and rom != None: ale = ALEInterface() ale.loadROM(rom) self.actions = ale.getMinimalActionSet() return self.actions @property def state_dtype(self): return np.uint8 @property def continuous_action(self): return False def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getScreenRGB(self): return self.ale.getScreenRGB() def getState(self, debug_display=False, debug_input=None): screen = self.ale.getScreenGrayscale() if screen is not None and debug_display: debug_input.show(screen.reshape(screen.shape[0], screen.shape[1])) return screen.reshape(self.screen_height, self.screen_width) def act(self, action): return self.ale.act(action) def game_over(self): return self.ale.game_over() def finish(self): return
class MyEnv(Environment): VALIDATION_MODE = 0 def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, ale_options=[{ "key": "random_seed", "value": 0 }, { "key": "color_averaging", "value": True }, { "key": "repeat_action_probability", "value": 0. }]): self._mode = -1 self._modeScore = 0.0 self._modeEpisodeCount = 0 self._frameSkip = frame_skip if frame_skip >= 1 else 1 self._randomState = rng self._ale = ALEInterface() for option in ale_options: t = type(option["value"]) if t is int: self._ale.setInt(option["key"], option["value"]) elif t is float: self._ale.setFloat(option["key"], option["value"]) elif t is bool: self._ale.setBool(option["key"], option["value"]) else: raise ValueError( "Option {} ({}) is not an int, bool or float.".format( option["key"], t)) self._ale.loadROM(rom) w, h = self._ale.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reducedScreen = np.empty((84, 84), dtype=np.uint8) self._actions = self._ale.getMinimalActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._modeScore = 0.0 self._modeEpisodeCount = 0 else: self._modeEpisodeCount += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 self._ale.reset_game() for _ in range(self._randomState.randint(15)): self._ale.act(0) self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) return [4 * [84 * [84 * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frameSkip): reward += self._ale.act(action) if self.inTerminalState(): break self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) self._modeScore += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._modeEpisodeCount += 1 print("== Mean score per episode is {} over {} episodes ==".format( self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount)) def inputDimensions(self): return [(4, 84, 84)] def observationType(self, subject): return np.uint8 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reducedScreen)] def inTerminalState(self): return self._ale.game_over()
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. may need to check self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:] # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_num_actions(self): """ :returns: the number of legal actions """ return len(self.actions) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver) def get_stat(self): try: return {'avg_score': np.mean(self.stats['score']), 'max_score': float(np.max(self.stats['score'])) } except ValueError: return {}