class FastAtariEnv(AtariEnv): def __init__(self, game='Breakout', obs_type='image', frameskip=(2, 5), repeat_action_probability=0.): self.game_path = atari_py.get_game_path(game) self._obs_type = obs_type self.frameskip = frameskip self.ale = ALEInterface() self.viewer = None assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self._seed() (screen_width, screen_height) = self.ale.getScreenDims() self._buffer = np.empty((screen_height, screen_width, 3), dtype=np.uint8) def _get_image(self): # Don't reorder from rgb to bgr as we're converting to greyscale anyway self.ale.getScreenRGB(self._buffer) # says rgb but actually bgr return self._buffer
class env_atari: def __init__(self, params): self.params = params self.ale = ALEInterface() self.ale.setInt('random_seed', np.random.randint(0, 500)) self.ale.setFloat('repeat_action_probability', params['repeat_prob']) self.ale.setInt(b'frame_skip', params['frameskip']) self.ale.setBool('color_averaging', True) self.ale.loadROM('roms/' + params['rom'] + '.bin') self.actions = self.ale.getMinimalActionSet() self.action_space = c_action_space(len(self.actions)) self.screen_width, self.screen_height = self.ale.getScreenDims() def reset(self): self.ale.reset_game() seed = np.random.randint(0, 7) for i in range(seed): self.ale.act(0) return self.get_image() def step(self, action): reward = self.ale.act(self.actions[action]) next_s = self.get_image() terminate = self.ale.game_over() return next_s, reward, float(terminate), 0 def get_image(self): temp = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(temp) #self.ale.getScreenGrayscale(temp) return temp.reshape((self.screen_height, self.screen_width, 3))
class Atari: def __init__(self, rom_dir): self.ale = ALEInterface() # Set settings self.ale.setInt("random_seed", 123) self.frame_skip = 4 self.ale.setInt("frame_skip", self.frame_skip) self.ale.setBool("display_screen", False) self.ale.setBool("sound", True) self.record_sound_for_user = True self.ale.setBool("record_sound_for_user", self.record_sound_for_user) # NOTE recording audio to file still works. But if both file recording and # record_sound_for_user are enabled, then only the latter is done # self.ale.setString("record_sound_filename", "") # Get settings self.ale.loadROM(rom_dir) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getLegalActionSet() # Action count across all episodes self.action_count = 0 self.start_time = time.time() self.reset() def reset(self): self.ale.reset_game() def take_action(self): action = self.legal_actions[np.random.randint(self.legal_actions.size)] self.ale.act(action) self.action_count += 1 def print_fps(self, delta_t=500): if self.action_count % delta_t == 0: print '[atari.py] Frames/second: %f' % ( self.action_count / (time.time() - self.start_time)) print '[atari.py] Overall game frame count:', atari.action_count * atari.frame_skip print '---------' def get_image_and_audio(self): np_data_image = np.zeros(self.screen_width * self.screen_height * 3, dtype=np.uint8) if self.record_sound_for_user: np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8) self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio) # Also supports independent audio queries if user desires: # self.ale.getAudio(np_data_audio) else: np_data_audio = 0 self.ale.getScreenRGB(np_data_image) return np.reshape(np_data_image, (self.screen_height, self.screen_width, 3)), np.asarray(np_data_audio)
class ALE(object): def __init__(self, init_seed, init_rand): self.ale = ALEInterface() self.ale.setInt(b'random_seed', init_seed) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.loadROM('./breakout.bin') self.action_size = 4 self.screen = None self.reward = 0 self.terminal = True self.init_rand = init_rand def setSetting(self, action_repeat, screen_type): self.action_repeat = action_repeat self.screen_type = screen_type def _step(self, action): self.reward = self.ale.act(action) self.terminal = self.ale.game_over() if self.screen_type == 0: self.screen = self.ale.getScreenRGB() elif self.screen_type == 1: self.screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() def state(self): return self.reward, self.screen, self.terminal def act(self, action): cumulated_reward = 0 for _ in range(self.action_repeat): self._step(action) cumulated_reward += self.reward if self.terminal: break self.reward = cumulated_reward return self.state() def new_game(self): if self.ale.game_over(): self.ale.reset_game() if self.screen_type == 0: self.screen = self.ale.getScreenRGB() elif self.screen_type == 1: self.screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() for _ in range(self.init_rand): self._step(0) return self.screen
class emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_mum_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print self.legal_actions self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: "+ str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): # numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) # self.ale.getScreenRGB(numpy_surface) # image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) image = self.ale.getScreenRGB() image = np.reshape(image, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image(), 0, False def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over() def train(self): for episode in range(10): total_reward = 0 frame_number = 0 while not self.ale.game_over(): a = self.legal_actions[random.randrange(len(self.legal_actions))] # Apply an action and get the resulting reward reward = self.ale.act(a); total_reward += reward screen = self.ale.getScreenRGB() screen = np.array(screen).reshape([self.screen_height, self.screen_width, -1]) frame_number = self.ale.getEpisodeFrameNumber() cv2.imshow("screen", screen/255.0) cv2.waitKey(0) self.ale.saveScreenPNG("test_"+str(frame_number)+".png") print('Episode %d ended with score: %d' % (episode, total_reward)) print('Frame number is : ', frame_number) self.ale.reset_game()
class Env(): def __init__(self, rom_name): self.__initALE() self.__loadROM(rom_name) self.screen_history = [] self.screens = [] def __initALE(self): self.ale = ALEInterface() self.ale.setInt(b'random_seed', randrange(1000)) self.ale.setInt(b'fragsize', 64) self.ale.setInt(b'frame_skip', 1) # qq set this back to 0.25? self.ale.setFloat(b'repeat_action_probability', 0) self.ale.setLoggerMode('error') def __loadROM(self, rom_name): self.ale.loadROM(rom_name.encode('utf-8')) self.actions = self.ale.getMinimalActionSet() (width, height) = self.ale.getScreenDims() self.screen_data1 = np.empty((height, width, 3), dtype=np.uint8) self.screen_data2 = np.empty((height, width, 3), dtype=np.uint8) def get_legal_action_count(self): return len(self.actions) def act(self, action_index): action = self.actions[action_index] reward = 0 # perform the action 4 times reward += _clip(self.ale.act(action), -1, 1) reward += _clip(self.ale.act(action), -1, 1) reward += _clip(self.ale.act(action), -1, 1) self.ale.getScreenRGB(self.screen_data1) reward += _clip(self.ale.act(action), -1, 1) self.ale.getScreenRGB(self.screen_data2) # return the pixel-wise max of the last two frames (some games only # render every other frame) screen_data_combined = np.maximum(self.screen_data1, self.screen_data2) terminal = self.ale.game_over() self.screens.append(preprocess_screen(screen_data_combined)) phi = get_phi(self.screens) return (terminal, reward, phi, self.screen_data2) def get_s(self): return get_phi(self.screens) def reset(self): self.ale.reset_game() self.screens = []
class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState( self.ale.decodeState(checkpoints[random.randint( 0, 99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class emulator(object): def __init__(self, rom_name, vis, frameskip=1, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", frameskip) romfile = str(ROM_PATH) + str(rom_name) if not os.path.exists(romfile): print('No ROM file found at "' + romfile + '".\nAdjust ROM_PATH or double-check the filt exists.') self.ale.loadROM(romfile) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow( self.windowname, flags=cv2.WINDOW_AUTOSIZE) # permit manual resizing def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) if sys.platform == 'darwin': # if we don't do this, can hang on OS X cv2.waitKey(2) return nextstate, reward, self.ale.game_over()
class Atari: # Constructor def __init__(self, rom_name): # 1º Passo: carregamos o jogo e definimos seus parâmetros self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( b"max_num_frames_per_episode") self.ale.setInt(b"random_seed", 123) self.ale.setInt(b"frame_skip", 4) self.ale.loadROM(('game/' + rom_name).encode()) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # 2º Passo: criamos a janela para exibição self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) # Essa função será utilizada para receber uma imagem do emulador, já em um formato esperado # por nosso algoritmo de treinamento. def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image # Simplesmente inicializa o jogo def newGame(self): self.ale.reset_game() return self.get_image() # Essa função será responsável por retornar as informações da observação do estado após certa ação ser tomada. def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname, nextstate) if self.ale.game_over(): self.newGame() return nextstate, reward, self.ale.game_over()
def main(): arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0') pygame.init() ale = ALEInterface() ale.setInt(b'random_seed', 123) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(arguments['<rom_file>'])) legal_actions = ale.getLegalActionSet() width, height = ale.getScreenDims() print(width, height) frame = ale.getScreenRGB() frame = np.array(frame, dtype=float) rewards, num_episodes = [], int(arguments['--iters'] or 5) for episode in range(num_episodes): total_reward = 0 while not ale.game_over(): total_reward += ale.act(random.choice(legal_actions)) print('Episode %d reward %d.' % (episode, total_reward)) rewards.append(total_reward) ale.reset_game() average = sum(rewards) / len(rewards) print('Average for %d episodes: %d' % (num_episodes, average))
class Emulate: def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True): self.ale = ALEInterface() if display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', frame_skip) self.ale.setFloat('repeat_action_probability', repeat_action_probability) self.ale.setBool('color_averaging', color_averaging) if random_seed: self.ale.setInt('random_seed', random_seed) self.ale.loadROM(rom_file) if minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.dims = (screen_width,screen_height) def numActions(self): return len(self.actions) def getActions(self): return self.actions def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def getScreenGray(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def getScreenColor(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def isTerminal(self): return self.ale.game_over()
class Emulator(object): def __init__(self, settings): self.ale = ALEInterface() self.ale.setInt('frame_skip', settings['frame_skip']) self.ale.setInt('random_seed', np.random.RandomState().randint(1000)) self.ale.setBool('color_averaging', False) self.ale.loadROM('roms/' + settings['rom_name']) self.actions = self.ale.getMinimalActionSet() self.width = settings['screen_width'] self.height = settings['screen_height'] def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.height, self.width), interpolation=cv2.INTER_LINEAR) return np.reshape(screen, (self.height, self.width)) def full_image(self): screen = self.ale.getScreenRGB() return screen def act(self, action): return self.ale.act(self.actions[action]) def terminal(self): return self.ale.game_over()
class emulator: def __init__(self, rom_name, vis,windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname,nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class Emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) #added by ben may 2016 print image print '&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& printing' return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over()
class Atari(AtariEnv): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, game, obs_type) assert obs_type in ('ram', 'image') self.game_path = atari_py.get_game_path(game) if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist' % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self._seed() (screen_width, screen_height) = self.ale.getScreenDims() self._buffer = np.empty((screen_height, screen_width, 3), dtype=np.uint8) self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128) + 255) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3)) else: raise error.Error('Unrecognized observation type: {}'.format( self._obs_type)) def _get_image(self): return self.ale.getScreenRGB(self._buffer).copy()
class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() # When it starts self.ale.setInt("random_seed", 123) # Skipping 4 frames self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() print('Actions : %s' % self.legal_actions) self.action_map = dict() self.windowname = windowname # Raw atari frames, 210 * 160 pixel images self.screen_width, self.screen_height = self.ale.getScreenDims() print("widht/height: " + str(self.screen_width) + "/" + str(self.screen_height)) # Visualize self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): # Need to specify data type as uint8 numpy_surface = np.zeros([self.screen_width * self.screen_height * 3], dtype=np.uint8) # get RGB values self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, [self.screen_height, self.screen_width, 3]) return image def new_game(self): self.ale.reset_game() # Reset game and getting reset image value return self.get_image() def next(self, action_index): # Get R(s,a) reward = self.ale.act(action_index) # Get image pixel value after taking an action next_state = self.get_image() if self.vis: cv2.imshow(self.windowname, next_state) # self.ale.game_over() returns True when game is over return next_state, reward, self.ale.game_over()
class Atari: def __init__(self, rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('game/' + rom_name) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print len(self.legal_actions) self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname, nextstate) if self.ale.game_over(): self.newGame() # print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class Atari: def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM(rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print len(self.legal_actions) self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) def preprocess(self, image): image = cv2.cvtColor(cv2.resize(image, (84, 110)), cv2.COLOR_BGR2GRAY) image = image[26:110,:] ret, image = cv2.threshold(image,1,255,cv2.THRESH_BINARY) return np.reshape(image,(84,84, 1)) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return self.preprocess(image) def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname,nextstate) if self.ale.game_over(): self.newGame() #print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class Breakout(object): steps_between_actions = 4 def __init__(self): self.ale = ALEInterface() self.ale.setInt('random_seed', 123) self.ale.setBool("display_screen", False) self.ale.setBool("sound", False) self.ale.loadROM("%s/breakout.bin" % rom_directory) self.current_state = [ self.ale.getScreenRGB(), self.ale.getScreenRGB() ] def start_episode(self): self.ale.reset_game() def take_action(self, action): assert not self.terminated def step(): reward = self.ale.act(action) self.roll_state() return reward reward = sum(step() for _ in xrange(self.steps_between_actions)) return (reward, self.current_state) def roll_state(self): assert len(self.current_state) == 2 self.current_state = [self.current_state[1], self.ale.getScreenRGB()] assert len(self.current_state) == 2 @property def actions(self): return self.ale.getMinimalActionSet() @property def terminated(self): return self.ale.game_over() or self.ale.lives() < 5
class emulator: def __init__(self, rom_name, vis): if vis: import cv2 self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow('preview',nextstate) return nextstate, reward, self.ale.game_over()
class ALEInterfaceWrapper: def __init__(self, repeat_action_probability, rng): self.internal_action_repeat_prob = repeat_action_probability self.prev_action = 0 self.rng_source = rng self.rng = deepcopy(self.rng_source) self.ale = ALEInterface() ''' This sets the probability from the default 0.25 to 0. It ensures deterministic actions. ''' self.ale.setFloat('repeat_action_probability', 0.0) def getScreenRGB(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getMinimalActionSet(self): return self.ale.getMinimalActionSet() def setInt(self, key, value): self.ale.setInt(key, value) def setFloat(self, key, value): self.ale.setFloat(key, value) def loadROM(self, rom): self.ale.loadROM(rom) def reset_action_seed(self): self.rng = deepcopy(self.rng_source) def set_action_seed(self, seed): self.rng = np.random.RandomState(seed) def act(self, action): actual_action = action if self.internal_action_repeat_prob > 0: if self.rng.uniform(0, 1) < self.internal_action_repeat_prob: actual_action = self.prev_action self.prev_action = actual_action return self.ale.act(actual_action)
class AleInterface(object): def __init__(self, game, args): self.game = game self.ale = ALEInterface() # if sys.platform == 'darwin': # self.ale.setBool('sound', False) # Sound doesn't work on OSX # elif sys.platform.startswith('linux'): # self.ale.setBool('sound', True) # self.ale.setBool('display_screen', True) # self.ale.setBool('display_screen', args.display_screen) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) self.ale.setInt('random_seed', args.random_seed) # # if rand_seed is not None: # self.ale.setInt('random_seed', rand_seed) rom_file = "./roms/%s.bin" % game if not os.path.exists(rom_file): print "not found rom file:", rom_file sys.exit(-1) self.ale.loadROM(rom_file) self.actions = self.ale.getMinimalActionSet() def get_actions_num(self): return len(self.actions) def act(self, action): reward = self.ale.act(self.actions[action]) return reward def get_screen_gray(self): return self.ale.getScreenGrayscale() def get_screen_rgb(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game()
class Atari: def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('./' +rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print len(self.legal_actions) self.windowname = rom_name #cv2.startWindowThread() #cv2.namedWindow(rom_name) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() #cv2.imshow(self.windowname,nextstate) if self.ale.game_over(): self.newGame() #print "reward %d" % reward return nextstate, reward, self.ale.game_over()
def train(): ale = ALEInterface() ale.setInt('random_seed', 123) ale.loadROM('roms/breakout.bin') legal_actions = ale.getLegalActionSet() total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a) screen = None screen = ale.getScreenRGB() print(screen) plt.imshow(screen) plt.show() total_reward += reward print(total_reward) print('Episode end!')
class Env: def __init__(self): self.ale = ALEInterface() rom_name = "roms/Breakout.bin" self.ale.setInt("frame_skip", 4) self.ale.loadROM(rom_name) legal_actions = self.ale.getMinimalActionSet() self.action_map = {} for i in range(len(legal_actions)): self.action_map[i] = legal_actions[i] self.action_num = len(self.action_map) def reset(self): state = np.zeros((84, 84, 3), dtype=np.uint8) self.ale.reset_game() return state def step(self, action): reward = self.ale.act(self.action_map[action]) state = self.ale.getScreenRGB() done = self.ale.game_over() return state, reward, done, ""
class ALEInterfaceWrapper: def __init__(self, repeat_action_probability): self.internal_action_repeat_prob = repeat_action_probability self.prev_action = 0 self.ale = ALEInterface() ''' This sets the probability from the default 0.25 to 0. It ensures deterministic actions. ''' self.ale.setFloat('repeat_action_probability', repeat_action_probability) def getScreenRGB(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getMinimalActionSet(self): return self.ale.getMinimalActionSet() def setInt(self, key, value): self.ale.setInt(key, value) def setFloat(self, key, value): self.ale.setFloat(key, value) def loadROM(self, rom): self.ale.loadROM(rom) def act(self, action): actual_action = action return self.ale.act(actual_action)
class Game(): """ Wrapper around the ALEInterface class. """ def __init__(self, rom_file, sdl=False): self.ale = ALEInterface() # Setup SDL if sdl: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) # Load rom self.ale.loadROM(str.encode(rom_file)) def get_action_set(self): return self.ale.getLegalActionSet() def get_minimal_action_set(self): return self.ale.getMinimalActionSet() def game_over(self): return self.ale.game_over() def act(self, action): return self.ale.act(action) def reset_game(self): self.ale.reset_game() def get_frame(self): return self.ale.getScreenRGB()
class GameEnvironment: def __init__(self, settings): self.ale = ALEInterface() self.ale.setBool('display_screen', settings['DISPLAY_SCREEN']) self.ale.setBool('sound', settings['SOUND']) self.ale.setBool('color_averaging', settings['COLOR_AVERAGING']) self.ale.setInt('random_seed', settings['RANDOM_SEED']) self.ale.setInt('frame_skip', settings['FRAME_SKIP']) self.ale.setFloat('repeat_action_probability', settings['REPEAT_ACTION_PROB']) roms_dir = settings['ROMS_DIR'] rom_name = settings['ROM_NAME'] ROM = None if(rom_name.endswith('.bin')): self.name = rom_name[:-4] ROM = rom_name else: self.name = rom_name ROM = rom_name + '.bin' self.ale.loadROM(os.path.join(roms_dir, ROM)) self.random_starts = settings['RANDOM_STARTS'] self.rng = settings['RNG'] if(settings['MINIMAL_ACTION_SET']): self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.n_actions = len(self.actions) self.width, self.height = self.ale.getScreenDims() self.observation = np.zeros((self.height, self.width), dtype='uint8') self.reward = None self.game_over = None self.terminal = None self.total_lives = None self.init() def init(self): self.restartGame() self.reward = 0 self.game_over = self.gameOver() self.terminal = self.game_over self.total_lives = self.lives() self.step(0) def getState(self): return self.observation, self.reward, self.terminal, self.game_over def step(self, action, training=False): self.reward = self.act(action) self.paint() lives = self.lives() self.game_over = self.gameOver() self.terminal = self.game_over if(training and (lives < self.total_lives)): self.terminal = True self.total_lives = lives return self.getState() def newGame(self): self.init() for i in xrange(self.rng.randint(1, self.random_starts)): self.act(0) terminal = self.gameOver() if(terminal): print "Warning terminal in random init" return self.step(0) def newTestGame(self): self.init() return self.getState() def paint(self): self.ale.getScreenGrayscale(self.observation) def getScreenRGB(self): return self.ale.getScreenRGB() def act(self, action): assert ((action >= 0) and (action < self.n_actions)) return self.ale.act(self.actions[action]) def lives(self): return self.ale.lives() def restartGame(self): self.ale.reset_game() def gameOver(self): return self.ale.game_over()
class AtariDriver(object): """ A wrapper for atari emulator. """ def __init__(self, rom_file, frame_skip=1, viz=0): """ :param rom_file: path to the rom :param frame_skip: skip every k frames :param viz: the delay. visualize the game while running. 0 to disable """ self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(self.rng.randint(0, 1000))) self.ale.setInt("frame_skip", frame_skip) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() if isinstance(viz, int): viz = float(viz) self.viz = viz self.romname = os.path.basename(rom_file) if self.viz and isinstance(self.viz, float): cv2.startWindowThread() cv2.namedWindow(self.romname) self._reset() self.last_image = self._grab_raw_image() self.framenum = 0 def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = np.zeros(self.height * self.width * 3, dtype=np.uint8) self.ale.getScreenRGB(m) return m.reshape((self.height, self.width, 3)) def grab_image(self): """ :returns: a gray-scale image, max-pooled over the last frame. """ now = self._grab_raw_image() ret = np.maximum(now, self.last_image) self.last_image = now if self.viz and isinstance(self.viz, float): cv2.imshow(self.romname, ret) time.sleep(self.viz) elif self.viz: cv2.imwrite("{}/{:06d}.jpg".format(self.viz, self.framenum), ret) self.framenum += 1 ret = cv2.cvtColor(ret, cv2.COLOR_BGR2YUV)[:,:,0] ret = ret[36:204,:] # several online repos all use this return ret def get_num_actions(self): """ :returns: the number of legal actions """ return len(self.actions) def _reset(self): self.ale.reset_game() def next(self, act): """ :param act: an index of the action :returns: (next_image, reward, isOver) """ r = self.ale.act(self.actions[act]) s = self.grab_image() isOver = self.ale.game_over() if isOver: self._reset() return (s, r, isOver)
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % ( self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
class AtariEmulator(BaseEnvironment): def __init__(self, emulator_id, game, resource_folder, random_seed=3, random_start=True, single_life_episodes=False, history_window=1, visualize=False, verbose=0, **unknown): if verbose >= 2: logging.debug('Emulator#{} received unknown args: {}'.format( emulator_id, unknown)) self.emulator_id = emulator_id self.ale = ALEInterface() self.ale.setInt(b"random_seed", random_seed * (emulator_id + 1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) self.ale.setBool(b"display_screen", visualize) full_rom_path = resource_folder + "/" + game + ".bin" self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() #this env is fixed until firing, so you have to... self._have_to_fire = ('FIRE' in [ ACTION_MEANING[a] for a in self.legal_actions ]) self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.random_start = random_start self.single_life_episodes = single_life_episodes self.call_on_new_frame = visualize self.history_window = history_window self.observation_shape = (self.history_window, IMG_SIZE_X, IMG_SIZE_Y) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) # Processed historcal frames that will be fed in to the network (i.e., four 84x84 images) self.history = create_history_observation(self.history_window) #ObservationPool(np.zeros(self.observation_shape, dtype=np.uint8)) self.frame_preprocessor = FramePreprocessor(self.gray_screen.shape, FRAMES_IN_POOL) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return self.gray_screen def on_new_frame(self, frame): pass def __random_start_reset(self): """ Restart game """ self.ale.reset_game() if self.random_start: wait = random.randint(0, MAX_START_WAIT + 1) for _ in range(wait): self.ale.act(self.get_noop()) if self.__is_over(): self.ale.reset_game() self.lives = self.ale.lives() def __new_game(self): self.__random_start_reset() if self._have_to_fire: #take action on reset for environments that are fixed until firing self.ale.act(self.legal_actions[1]) if self.__is_over(): self.__random_start_reset() self.ale.act(self.legal_actions[2]) if self.__is_over(): self.__random_start_reset() def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_preprocessor.new_frame(self.__get_screen_image()) return reward def reset(self): """ Get the initial state """ self.__new_game() for step in range(self.history_window): _ = self.__action_repeat(0) self.history.new_observation( self.frame_preprocessor.get_processed()) if self.__is_terminal(): raise Exception('This should never happen.') return self.history.get_state(), None def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(action) self.history.new_observation(self.frame_preprocessor.get_processed()) terminal = self.__is_terminal() self.lives = self.ale.lives() return self.history.get_state(), reward, terminal, None def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return self.legal_actions[0] def close(self): del self.ale
class ArcadeLearningEnvironment(Environment): """ [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment) adapter (specification key: `ale`, `arcade_learning_environment`). May require: ```bash sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git cd Arcade-Learning-Environment mkdir build && cd build cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON .. make -j 4 cd .. pip3 install . ``` Args: level (string): ALE rom file (<span style="color:#C00000"><b>required</b></span>). loss_of_life_termination: Signals a terminal state on loss of life (<span style="color:#00C000"><b>default</b></span>: false). loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty) (<span style="color:#00C000"><b>default</b></span>: 0.0). repeat_action_probability (float): Repeats last action with given probability (<span style="color:#00C000"><b>default</b></span>: 0.0). visualize (bool): Whether to visualize interaction (<span style="color:#00C000"><b>default</b></span>: false). frame_skip (int > 0): Number of times to repeat an action without observing (<span style="color:#00C000"><b>default</b></span>: 1). seed (int): Random seed (<span style="color:#00C000"><b>default</b></span>: none). """ def __init__( self, level, life_loss_terminal=False, life_loss_punishment=0.0, repeat_action_probability=0.0, visualize=False, frame_skip=1, seed=None ): from ale_python_interface import ALEInterface self.environment = ALEInterface() self.rom_file = level self.life_loss_terminal = life_loss_terminal self.life_loss_punishment = life_loss_punishment self.environment.setFloat(b'repeat_action_probability', repeat_action_probability) self.environment.setBool(b'display_screen', visualize) self.environment.setInt(b'frame_skip', frame_skip) if seed is not None: self.environment.setInt(b'random_seed', seed) # All set commands must be done before loading the ROM. self.environment.loadROM(rom_file=self.rom_file.encode()) self.available_actions = tuple(self.environment.getLegalActionSet()) # Full list of actions: # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire, # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left # Fire def __str__(self): return super().__str__() + '({})'.format(self.rom_file) def states(self): width, height = self.environment.getScreenDims() return dict(type='float', shape=(height, width, 3)) def actions(self): return dict(type='int', num_values=len(self.available_actions)) def close(self): self.environment.__del__() self.environment = None def get_states(self): screen = np.copy(self.environment.getScreenRGB(screen_data=self.screen)) screen = screen.astype(dtype=np.float32) / 255.0 return screen def reset(self): self.environment.reset_game() width, height = self.environment.getScreenDims() self.screen = np.empty((height, width, 3), dtype=np.uint8) self.lives = self.environment.lives() return self.get_states() def execute(self, actions): reward = self.environment.act(action=self.available_actions[actions]) terminal = self.environment.game_over() states = self.get_states() next_lives = self.environment.lives() if next_lives < self.lives: if self.life_loss_terminal: terminal = True elif self.life_loss_punishment > 0.0: reward -= self.life_loss_punishment self.lives = next_lives return states, terminal, reward
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
# ale.loadROM(rom_file) # # # Get the list of legal actions # legal_actions = ale.getLegalActionSet() # # # Play 10 episodes # for episode in range(10): # total_reward = 0 # while not ale.game_over(): # a = legal_actions[randrange(len(legal_actions))] # # Apply an action and get the resulting reward # reward = ale.act(a); # total_reward += reward # print('Episode %d ended with score: %d' % (episode, total_reward)) # ale.reset_game() from ale_python_interface import ALEInterface ale = ALEInterface() ale.setBool('display_screen', True) rom_file = "./roms/breakoutv.bin" ale.loadROM(rom_file) ale.reset_game() ale.getScreenRGB() ale.reset_game() ale.act(0)
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_dir('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 10000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) float32 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver)
class ALEEnvironment(BaseEnvironment): """ The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply wrappers of the underlying ALE but with pythonic names/usage. Parameters ---------- rom : byte string Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin' display_screen : boolean Default False. Whether or not to show the game. True takes longer to run but can be fun to watch step_cap: int Default None. Maximum number of steps to run in an episode. Breakout can sometimes not return terminal even when game is ended. This fixes that and will return terminal after stepping above this count """ def __init__(self, rom, resize_shape=(84, 84), skip_frame=1, repeat_action_probability=0.0, step_cap=None, loss_of_life_termination=False, loss_of_life_negative_reward=False, grayscale=True, display_screen=False, seed=np.random.RandomState()): # set up emulator self.ale = ALEInterface() if display_screen: self.ale.setBool(b'display_screen', True) self.ale.setInt(b'frame_skip', skip_frame) self.ale.setInt(b'random_seed', seed.randint(0, 9999)) self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setBool(b'color_averaging', False) self.ale.loadROM(rom.encode()) # setup gamescreen object. I think this is faster than recreating an empty each time width, height = self.ale.getScreenDims() channels = 1 if grayscale else 3 self.grayscale = grayscale self.gamescreen = np.empty((height, width, 1), dtype=np.uint8) self.resize_shape = resize_shape self.skip_frame = skip_frame self.step_cap = step_cap self.curr_step_count = 0 # setup action converter # ALE returns legal action indexes, convert these to just numbers self.action_inds = self.ale.getMinimalActionSet() # setup lives self.loss_of_life_negative_reward = loss_of_life_negative_reward self.cur_lives = self.ale.lives() self.loss_of_life_termination = loss_of_life_termination self.life_lost = False def reset(self): self.ale.reset_game() self.cur_lives = self.ale.lives() self.life_lost = False self.curr_step_count = 0 def step(self, action): self.curr_step_count += 1 ale_action = self.action_inds[action] return self._step(ale_action) def _step(self, ale_action): if not self.loss_of_life_termination and not self.loss_of_life_negative_reward: return self.ale.act(ale_action) else: rew = self.ale.act(ale_action) new_lives = self.ale.lives() if new_lives < self.cur_lives: # if loss of life is negative reward subtract 1 from reward if self.loss_of_life_negative_reward: rew -= 1 self.cur_lives = new_lives self.life_lost = True return rew def get_state(self): if self.grayscale: self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen) else: self.gamescreen = self.ale.getScreenRGB(self.gamescreen) # if resize_shape is none then don't resize if self.resize_shape is not None: # if grayscale we remove the last dimmension (channel) if self.grayscale: processedImg = imresize(self.gamescreen[:, :, 0], self.resize_shape) else: processedImg = imresize(self.gamescreen, self.resize_shape) return processedImg def get_state_shape(self): return self.resize_shape def get_terminal(self): if self.loss_of_life_termination and self.life_lost: return True elif self.step_cap is not None and self.curr_step_count > self.step_cap: return True else: return self.ale.game_over() def get_num_actions(self): return len(self.action_inds)
controller = FittedQController(numActions=len(legal_actions), numFeatures=5000, horizon=100, discountParameter=0.3, epsilon=0.2) # Simulate episodes numEpisodes = 100 counter = 1 # Total number of time steps, used for naming screenshots with the time index they were taken for episode in xrange(numEpisodes): # Initialize reward to zero as we do not have any reward yet reward = 0 total_reward = 0 while not ale.game_over(): # Retrieve and encode current frame frame = ale.getScreenRGB() frame = np.array(frame, dtype=float) frameCode = encodeFrame(frame) # Get action from controller, and pass in frame code and previous reward (frame code is dct of flattened frame, # can be replaced with deep autoencoder, etc.) a = legal_actions[controller.queryForActionAndUpdateExperience(features=frameCode, reward=reward)] # Save screen shot of game #image = png.from_array(frame, 'RGB;8') #imFile = savePath + str(counter) + '.png' #image.save(imFile) #counter += 1 #image = [] # Select an action and send to the ALE. Get reward and add to running tally
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. may need to check self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:] # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_num_actions(self): """ :returns: the number of legal actions """ return len(self.actions) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver) def get_stat(self): try: return {'avg_score': np.mean(self.stats['score']), 'max_score': float(np.max(self.stats['score'])) } except ValueError: return {}
regressor = skflow.TensorFlowDNNRegressor(hidden_units=[20,10, 10, 15,20],learning_rate=0.01, verbose=int(sys.argv[2])) regressor.fit(np.random.randn(1601, 1), append([1.], np.zeros((1600, 1)))) # regressor = skflow.TensorFlowEstimator.restore('./regressor') def Q(s, a): return regressor.predict(append(s, a)) def detectState(ale): return cv2.resize(ale.getScreenGrayscale(), (40,40)) while True: ale = ALEInterface() ale.loadROM("breakout.bin") actionSet = ale.getMinimalActionSet() while not ale.game_over(): if sys.argv[1] == 'disp': cv2.imshow('', cv2.resize(ale.getScreenRGB(), (600,600))) cv2.waitKey(1) s = detectState(ale) qvals = [] for action in actionSet: qvals.append(Q(s, action)[0]) a = actionSet[qvals.index(max(qvals))] X = append(s, a) r = ale.act(a) s_ = detectState(ale) qvals = [] for action in actionSet: qvals.append(Q(s_, action)[0]) a_ = actionSet[qvals.index(max(qvals))] y = r + g*Q(s_, a_) regressor.fit(X, y, logdir='/tmp/regressor')
if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file rom_file = str.encode('data/roms/breakout.bin') ale.loadROM(rom_file) # Get the list of legal actions legal_actions = ale.getLegalActionSet() print(legal_actions) # Play 10 episodes for episode in range(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] screen = ale.getScreenRGB() # Apply an action and get the resulting reward reward = ale.act(a) total_reward += reward print('Episode %d ended with score: %d' % (episode, total_reward)) ale.reset_game() img = Image.fromarray(screen, 'RGB') img.show()
class Emulator: def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False): self.ale = ALEInterface() self.ale.setInt("random_seed", rseed * (actor_id +1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat("repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt("frame_skip", 1) self.ale.setBool("color_averaging", False) self.ale.loadROM(rom_path + "/" + rom_name + ".bin") self.legal_actions = self.ale.getMinimalActionSet() self.screen_width,self.screen_height = self.ale.getScreenDims() #self.ale.setBool('display_screen', True) # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8) self.frame_pool = np.empty((2, self.screen_height, self.screen_width)) self.current = 0 self.lives = self.ale.lives() self.visualize = visualize self.visualize_processed = False self.windowname = rom_name + ' ' + str(actor_id) if self.visualize: logger.debug("Opening emulator window...") #from skimage import io #io.use_plugin('qt') cv2.startWindowThread() cv2.namedWindow(self.windowname) logger.debug("Emulator window opened") if self.visualize_processed: logger.debug("Opening processed frame window...") cv2.startWindowThread() logger.debug("Processed frame window opened") cv2.namedWindow(self.windowname + "_processed") self.single_life_episodes = single_life_episodes def get_screen_image(self): """ Add screen (luminance) to frame pool """ # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), # self.ale.getScreenRGB()] self.ale.getScreenGrayscale(self.gray_screen) self.ale.getScreenRGB(self.rgb_screen) self.frame_pool[self.current] = np.squeeze(self.gray_screen) self.current = (self.current + 1) % FRAMES_IN_POOL return self.rgb_screen def new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if MAX_START_WAIT < 0: logger.debug("Cannot time travel yet.") sys.exit() elif MAX_START_WAIT > 0: wait = random.randint(0, MAX_START_WAIT) else: wait = 0 for _ in xrange(wait): self.ale.act(self.legal_actions[0]) def process_frame_pool(self): """ Preprocess frame pool """ img = None if BLEND_METHOD == "max_pool": img = np.amax(self.frame_pool, axis=0) #img resize(img[:210, :], (84, 84)) img = cv2.resize(img[:210, :], (84, 84), interpolation=cv2.INTER_LINEAR) img = img.astype(np.float32) img *= (1.0/255.0) return img # Reduce height to 210, if not so #cropped_img = img[:210, :] # Downsample to 110x84 #down_sampled_img = resize(cropped_img, (84, 84)) # Crop to 84x84 playing area #stackable_image = down_sampled_img[:, 26:110] #return stackable_image def action_repeat(self, a): """ Repeat action and grab screen into frame pool """ reward = 0 for i in xrange(ACTION_REPEAT): reward += self.ale.act(self.legal_actions[a]) new_screen_image_rgb = self.get_screen_image() return reward, new_screen_image_rgb def get_reshaped_state(self, state): return np.reshape(state, (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) #return np.reshape(self.screen_images_processed, # (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) def get_initial_state(self): """ Get the initial state """ self.new_game() for step in xrange(NR_IMAGES): reward, new_screen_image_rgb = self.action_repeat(0) self.screen_images_processed[:, :, step] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) if self.is_terminal(): MAX_START_WAIT -= 1 return self.get_initial_state() return np.copy(self.screen_images_processed) #get_reshaped_state() def next(self, action): """ Get the next state, reward, and game over signal """ reward, new_screen_image_rgb = self.action_repeat(np.argmax(action)) self.screen_images_processed[:, :, 0:3] = \ self.screen_images_processed[:, :, 1:4] self.screen_images_processed[:, :, 3] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) terminal = self.is_terminal() self.lives = self.ale.lives() return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal def show_screen(self, image): """ Show visuals for raw and processed images """ if self.visualize: #io.imshow(image[:210, :], fancy=True) cv2.imshow(self.windowname, image[:210, :]) if self.visualize_processed: #io.imshow(self.screen_images_processed[:, :, 3], fancy=True) cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3]) def is_terminal(self): if self.single_life_episodes: return (self.is_over() or (self.lives > self.ale.lives())) else: return self.is_over() def is_over(self): return self.ale.game_over()
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box( low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w, 1) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis] return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
def train_agent(gamepath, agent, n_episodes, display_screen, record_weights, reduce_exploration_prob_amount, n_frames_to_skip): """ :description: trains an agent to play a game :type gamepath: string :param gamepath: path to the binary of the game to be played :type agent: subclass RLAlgorithm :param agent: the algorithm/agent that learns to play the game :type n_episodes: int :param n_episodes: number of episodes of the game on which to train """ # load the ale interface to interact with ale = ALEInterface() ale.setInt('random_seed', 42) # display/recording settings, doesn't seem to work currently recordings_dir = './recordings/breakout/' # previously "USE_SDL" if display_screen: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.loadROM(gamepath) ale.setInt("frame_skip", n_frames_to_skip) screen_preprocessor = screen_utils.RGBScreenPreprocessor() rewards = [] best_reward = 0 print('starting training...') for episode in xrange(n_episodes): action = 0 reward = 0 newAction = None total_reward = 0 counter = 0 lives = ale.lives() screen = np.zeros((32, 32, 3), dtype=np.int8) state = { "screen" : screen, "objects" : None, "prev_objects": None, "prev_action": 0, "action": 0 } while not ale.game_over(): # if newAction is None then we are training an off-policy algorithm # otherwise, we are training an on policy algorithm if newAction is None: action = agent.getAction(state) else: action = newAction reward += ale.act(action) if ale.lives() < lives: lives = ale.lives() reward -= 1 total_reward += reward new_screen = ale.getScreenRGB() new_screen = screen_preprocessor.preprocess(new_screen) new_state = {"screen": new_screen, "objects": None, "prev_objects": state["objects"], "prev_action": state["action"], "action": action} newAction = agent.incorporateFeedback(state, action, reward, new_state) state = new_state reward = 0 rewards.append(total_reward) if total_reward > best_reward and record_weights: best_reward = total_reward print("Best reward: {}".format(total_reward)) if episode % PRINT_TRAINING_INFO_PERIOD == 0: print '\n############################' print '### training information ###' print("Average reward: {}".format(np.mean(rewards))) print("Last 50: {}".format(np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:]))) print("Exploration probability: {}".format(agent.explorationProb)) print('action: {}'.format(action)) print('size of weights dict: {}'.format(len(agent.weights))) print('current objects: {}'.format(state['objects'])) print('previous objects: {}'.format(state['prev_objects'])) avg_feat_weight = np.mean([v for k,v in agent.weights.iteritems()]) print('average feature weight: {}'.format(avg_feat_weight)) print '############################' print '############################\n' if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights: file_utils.save_rewards(rewards, filename='episode-{}-{}-rewards'.format(episode, type(agent).__name__)) file_utils.save_weights(agent.weights, filename='episode-{}-{}-weights'.format(episode, type(agent).__name__)) if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON: agent.explorationProb -= reduce_exploration_prob_amount print('episode: {} ended with score: {}'.format(episode, total_reward)) ale.reset_game() return rewards
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None, None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = os.path.join(get_dataset_dir('atari_rom'), rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) float32 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1], :].astype( 'float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver)
keys |= pressed[pygame.K_UP] keys |= pressed[pygame.K_DOWN] <<1 keys |= pressed[pygame.K_LEFT] <<2 keys |= pressed[pygame.K_RIGHT] <<3 keys |= pressed[pygame.K_z] <<4 a = key_action_tform_table[keys] reward = ale.act(a); cur_time += 1 total_reward += reward #clear screen screen.fill((0,0,0)) #get atari screen pixels and blit them numpy_surface = np.frombuffer(game_surface.get_buffer(),dtype=np.int32) ale.getScreenRGB(numpy_surface) logger.log(a, TYPE_ACTION, cur_time) #if cur_time %2 == 0: logger.log(numpy_surface, TYPE_SCREEN, cur_time) del numpy_surface screen.blit(pygame.transform.scale2x(game_surface),(0,0)) #get RAM ram_size = ale.getRAMSize() ram = np.zeros((ram_size),dtype=np.uint8) ale.getRAM(ram) #Display ram bytes font = pygame.font.SysFont("Ubuntu Mono",32)
def test(session, hist_len=4, discount=0.99, act_rpt=4, upd_freq=4, min_sq_grad=0.01, epsilon=0.05, no_op_max=30, num_tests=1, learning_rate=0.0025, momentum=0.95, sq_momentum=0.95): #Create ALE object if len(sys.argv) < 3: print('Usage: %s rom_file record_screen_dir' % sys.argv[0]) sys.exit() ale = ALEInterface() record_path = sys.argv[2] ale.setString('record_screen_dir', record_path) ale.setString('record_sound_filename', (record_path + '/sound.wav')) ale.setInt('fragsize', 64) cmd = 'mkdir ' cmd += record_path os.system(cmd) # Get & Set the desired settings ale.setInt('random_seed', 123) #Changes repeat action probability from default of 0.25 ale.setFloat('repeat_action_probability', 0.0) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file ale.loadROM(sys.argv[1]) # create DQN agent # learning_rate and momentum are unused parameters (but needed) agent = DQN(ale, session, epsilon, learning_rate, momentum, sq_momentum, hist_len, len(ale.getMinimalActionSet()), None, discount, rom_name(sys.argv[1])) #Store the most recent two images preprocess_stack = deque([], 2) num_episodes = 0 while num_episodes < num_tests: #initialize sequence with initial image seq = list() #We only have one image, we cannot combine two images perform_no_ops(ale, no_op_max, preprocess_stack, seq) #proc_seq.append(pp.preprocess(seq)) total_reward = 0 while not ale.game_over(): state = get_state(seq, hist_len) action = agent.get_action_best_network(state, epsilon) #skip frames by repeating action reward = 0 for i in range(act_rpt): reward = reward + ale.act(action) preprocess_stack.append(ale.getScreenRGB()) seq.append(pp.preprocess(preprocess_stack[0], preprocess_stack[1])) total_reward += reward print('Episode ended with score: %d' % (total_reward)) num_episodes = num_episodes + 1 ale.reset_game()
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box( low=0, high=255, shape=(self.height, self.width)) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def _reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def _step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
class AleEnv(object): '''ALE wrapper for RL training game_over_conditions={'points':(-1, 1)}: dict that describes all desired game over conditions each key corresponds to a condition that is checked; the first condition met produces a game over points: int or tuple of integers int: if x < 0, game ends when score is <= x if x >= 0, game ends when score is >= x tuple: game ends if score <= x[0] or score >= x[1] lives: int that ends game when lives <= x frames: int that ends game when total number of frames >= x episodes: int that ends game when num of episodes >= x Use max_num_frames_per_episode to set max episode length ''' # will include timing and hidden functionality in future iterations def __init__(self, rom_file, display_screen=False, sound=False, random_seed=0, game_over_conditions={}, frame_skip=1, repeat_action_probability=0.25, max_num_frames_per_episode=0, min_action_set=False, screen_color='gray', fps=60, output_buffer_size=1, reduce_screen=False): # ALE instance and setup self.ale = ALEInterface() #TODO: check if rom file exists; will crash jupyter kernel otherwise self.ale.loadROM(str.encode(rom_file)) self.ale.setBool(b'sound', sound) self.ale.setBool(b'display_screen', display_screen) if min_action_set: self.legal_actions = self.ale.getMinimalActionSet() else: self.legal_actions = self.ale.getLegalActionSet() self.ale.setInt(b'random_seed', random_seed) self.ale.setInt(b'frame_skip', frame_skip) self.frame_skip = frame_skip self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setInt(b'max_num_frames_per_episode', max_num_frames_per_episode) self.ale.loadROM(str.encode(rom_file)) self.game_over_conditions = game_over_conditions self.screen_color = screen_color self.reduce_screen = reduce_screen self.d_frame = (fps**-1) * self.frame_skip # set up output buffer self.output_buffer_size = output_buffer_size self.queue_size = self.output_buffer_size self._reset_params() def observe(self, flatten=False, expand_dim=False): if flatten is True: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)).flatten() if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out else: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)) out = np.squeeze(out) if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out @property def width(self): return self.game_screen.shape[1] @property def height(self): return self.game_screen.shape[0] @property def game_over(self): return self._game_over() @property def actions(self): return self.legal_actions @property def lives(self): return self.ale.lives() def _reset_params(self): self.total_points = 0 self.total_frames = 0 self.curr_episode = 1 self.prev_ep_frame_num = -float("inf") if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue = deque( np.zeros(shape=(self.queue_size - 1, self.height, self.width)), self.queue_size) self.output_queue.appendleft(self.game_screen) def reset(self): self.ale.reset_game() self._reset_params() def act(self, action): reward = self.ale.act(self.legal_actions[action]) if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue.pop() self.output_queue.appendleft(self.game_screen) self.total_points += reward self.total_frames += self.frame_skip if self.ale.getEpisodeFrameNumber() <= self.prev_ep_frame_num: self.curr_episode += 1 self.prev_ep_frame_num = self.ale.getEpisodeFrameNumber() return reward, self.d_frame, self.game_over def _game_over(self): if self.ale.game_over(): return True for cond in self.game_over_conditions: if cond == 'points': if isinstance(self.game_over_conditions[cond], int): if self.total_points >= self.game_over_conditions[cond]: return True elif isinstance(self.game_over_conditions[cond], tuple): if (self.total_points <= self.game_over_conditions[cond][0] or self.total_points >= self.game_over_conditions[cond][1]): return True elif cond == 'lives': if self.lives <= self.game_over_conditions[cond]: return True elif cond == 'frames': if self.total_frames >= self.game_over_conditions[cond]: return True elif cond == 'episodes': if self.curr_episode >= self.game_over_conditions[cond]: return True else: raise RuntimeError("ERROR: Invalid game over condition") return False
def train(gamepath, n_episodes, display_screen, record_weights, reduce_exploration_prob_amount, n_frames_to_skip, exploration_prob, verbose, discount, learning_rate, load_weights, frozen_target_update_period, use_replay_mem): """ :description: trains an agent to play a game :type gamepath: string :param gamepath: path to the binary of the game to be played :type n_episodes: int :param n_episodes: number of episodes of the game on which to train display_screen : whether or not to display the screen of the game record_weights : whether or not to save the weights of the nextwork reduce_exploration_prob_amount : amount to reduce exploration prob each episode to not reduce exploration_prob set to 0 n_frames_to_skip : how frequently to determine a new action to use exploration_prob : probability of choosing a random action verbose : whether or not to print information about the run periodically discount : discount factor used in learning learning_rate : the scaling factor for the sgd update load_weights : whether or not to load weights for the network (set the files directly below) frozen_target_update_period : the number of episodes between reseting the target of the network """ # load the ale interface to interact with ale = ALEInterface() ale.setInt('random_seed', 42) # display/recording settings, doesn't seem to work currently recordings_dir = './recordings/breakout/' # previously "USE_SDL" if display_screen: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.loadROM(gamepath) ale.setInt("frame_skip", n_frames_to_skip) # real actions for breakout are [0,1,3,4] real_actions = ale.getMinimalActionSet() # use a list of actions [0,1,2,3] to index into the array of real actions actions = np.arange(len(real_actions)) # these theano variables are used to define the symbolic input of the network features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') # load weights by file name # currently must be loaded by individual hidden layers if load_weights: hidden_layer_1 = file_utils.load_model('weights/hidden0_replay.pkl') hidden_layer_2 = file_utils.load_model('weights/hidden1_replay.pkl') else: # defining the hidden layer network structure # the n_hid of a prior layer must equal the n_vis of a subsequent layer # for q-learning the output layer must be of len(actions) hidden_layer_1 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, n_hid=NNET_INPUT_DIMENSION, layer_name='hidden1', activation='relu') hidden_layer_2 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, n_hid=NNET_INPUT_DIMENSION, layer_name='hidden2', activation='relu') hidden_layer_3 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, n_hid=len(actions), layer_name='hidden3', activation='relu') # the output layer is currently necessary when using tanh units in the # hidden layer in order to prevent a theano warning # currently the relu unit setting of the hidden and output layers is leaky w/ alpha=0.01 output_layer = OutputLayer(layer_name='output', activation='relu') # pass a list of layers to the constructor of the network (here called "mlp") layers = [hidden_layer_1, hidden_layer_2, hidden_layer_3, output_layer] qnetwork = QNetwork(layers, discount=discount, learning_rate=learning_rate) # this call gets the symbolic output of the network # along with the parameter updates expected loss, updates = qnetwork.get_loss_and_updates(features, action, reward, next_features) # this defines the theano symbolic function used to train the network # 1st argument is a list of inputs, here the symbolic variables above # 2nd argument is the symbolic output expected # 3rd argument is the dictionary of parameter updates # 4th argument is the compilation mode train_model = theano.function( [theano.Param(features, default=np.zeros(NNET_INPUT_DIMENSION)), theano.Param(action, default=0), theano.Param(reward, default=0), theano.Param(next_features, default=np.zeros(NNET_INPUT_DIMENSION))], outputs=loss, updates=updates, mode='FAST_RUN') sym_action = qnetwork.get_action(features) get_action = theano.function([features], sym_action) # some containers for collecting information about the training processes rewards = [] losses = [] best_reward = 4 sequence_examples = [] sampled_examples = [] # the preprocessor and feature extractor to use preprocessor = screen_utils.RGBScreenPreprocessor() feature_extractor = feature_extractors.NNetOpenCVBoundingBoxExtractor(max_features=MAX_FEATURES) if use_replay_mem: replay_mem = ReplayMemory() # main training loop, each episode is a full playthrough of the game for episode in xrange(n_episodes): # this implements the frozen target component of the network # by setting the frozen layers of the network to a copy of the current layers if episode % frozen_target_update_period == 0: qnetwork.frozen_layers = copy.deepcopy(qnetwork.layers) # some variables for collecting information about this particular run of the game total_reward = 0 action = 1 counter = 0 reward = 0 loss = 0 previous_param_0 = None # lives here is used for the reward heuristic of subtracting 1 from the reward # when we lose a life. currently commented out this functionality because # i think it might not be helpful. lives = ale.lives() # the initial state of the screen and state screen = np.zeros((preprocessor.dim, preprocessor.dim, preprocessor.channels)) state = { "screen" : screen, "objects" : None, "prev_objects": None, "features": np.zeros(MAX_FEATURES)} # start the actual play through of the game while not ale.game_over(): counter += 1 # get the current features, which is the representation of the state provided to # the "agent" (here just the network directly) features = state["features"] # epsilon greedy action selection (note that exploration_prob is reduced by # reduce_exploration_prob_amount after every game) if random.random() < exploration_prob: action = random.choice(actions) else: # to choose an action from the network, we fprop # the current state and take the argmax of the output # layer (i.e., the action that corresponds to the # maximum q value) action = get_action(features) # take the action and receive the reward reward += ale.act(real_actions[action]) # this is commented out because i think it might not be helpful if ale.lives() < lives: lives = ale.lives() reward -= 1 # get the next screen, preprocess it, initialize the next state next_screen = ale.getScreenRGB() next_screen = preprocessor.preprocess(next_screen) next_state = {"screen": next_screen, "objects": None, "prev_objects": state["objects"]} # get the features for the next state next_features = feature_extractor(next_state, action=None) if use_replay_mem: sars_tuple = (features, action, reward, next_features) replay_mem.store(sars_tuple) num_samples = 5 if replay_mem.isFull() else 1 for i in range(0, num_samples): random_train_tuple = replay_mem.sample() loss += train_model(*random_train_tuple) # collect for pca sequence_examples.append(list(sars_tuple[0]) + [sars_tuple[1]] \ + [sars_tuple[2]] + sars_tuple[3]) sequence_examples = sequence_examples[-100:] sampled_examples.append(list(random_train_tuple[0]) + [random_train_tuple[1]] \ + [random_train_tuple[2]] + random_train_tuple[3]) sampled_examples = sampled_examples[-100:] else: # call the train model function loss += train_model(features, action, reward, next_features) # prepare for the next loop through the game next_state["features"] = next_features state = next_state # weird counter value to avoid interaction with any other counter # loop that might be added, not necessary right now if verbose and counter % PRINT_TRAINING_INFO_PERIOD == 0: print('*' * 15 + ' training information ' + '*' * 15) print('episode: {}'.format(episode)) print('reward: \t{}'.format(reward)) print('avg reward: \t{}'.format(np.mean(rewards))) print 'avg reward (last 25): \t{}'.format(np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:])) print('action: \t{}'.format(real_actions[action])) print('exploration prob: {}'.format(exploration_prob)) param_info = [(p.eval(), p.name) for p in qnetwork.get_params()] for index, (val, name) in enumerate(param_info): if previous_param_0 is None and index == 0: previous_param_0 = val print('parameter {} value: \n{}'.format(name, val)) if index == 0: diff = val - previous_param_0 print('difference from previous param {}: \n{}'.format(name, diff)) print('features: \t{}'.format(features)) print('next_features: \t{}'.format(next_features)) scaled_sequence = preprocessing.scale(np.array(sequence_examples)) scaled_sampled = preprocessing.scale(np.array(sampled_examples)) pca = PCA() _ = pca.fit_transform(scaled_sequence) print('variance explained by first component for sequence: {}%'.format(pca. \ explained_variance_ratio_[0] * 100)) _ = pca.fit_transform(scaled_sampled) print('variance explained by first component for sampled: {}%'.format(pca. \ explained_variance_ratio_[0] * 100)) print('*' * 52) print('\n') # collect info and total reward and also reset the reward to 0 if we reach this point total_reward += reward reward = 0 # collect stats from this game run losses.append(loss) rewards.append(total_reward) # if we got a best reward, inform the user if total_reward > best_reward: best_reward = total_reward print("best reward!: {}".format(total_reward)) # record the weights if record_weights=True # must record the weights of the indiviual layers # only save hidden layers b/c output layer does not have weights if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights: file_utils.save_rewards(rewards) file_utils.save_model(qnetwork.layers[0], 'weights/hidden0_{}.pkl'.format(episode)) file_utils.save_model(qnetwork.layers[1], 'weights/hidden1_{}.pkl'.format(episode)) # reduce exploration policy over time if exploration_prob > MINIMUM_EXPLORATION_EPSILON: exploration_prob -= reduce_exploration_prob_amount # inform user of how the episode went and reset the game print('episode: {} ended with score: {}\tloss: {}'.format(episode, rewards[-1], losses[-1])) ale.reset_game() # return the list of rewards attained return rewards
class ALEEnvironment(BaseEnvironment): """ A wrapper of Arcade Learning Environment, which inherits all members of ``BaseEnvironment``. """ # 63 games ADVENTURE = "adventure" AIR_RAID = "air_raid" ALIEN = "alien" AMIDAR = "amidar" ASSAULT = "assault" ASTERIX = "asterix" ASTEROIDS = "asteroids" ATLANTIS = "aslantis" BANK_HEIST = "bank_heist" BATTLE_ZONE = "battle_zone" BEAM_RIDER = "beam_rider" BERZERK = "berzerk" BOWLING = "bowling" BOXING = "boxing" BREAKOUT = "breakout" CARNIVAL = "carnival" CENTIPEDE = "centipede" CHOPPER_COMMAND = "chopper_command" CRAZY_CLIMBER = "crazy_climber" DEFENDER = "defender" DEMON_ATTACK = "demon_attack" DOUBLE_DUNK = "double_dunk" ELEVATOR_ACTION = "elevator_action" ENDURO = "enduro" FISHING_DERBY = "fishing_derby" FREEWAY = "freeway" FROSTBITE = "frostbite" GOPHER = "gopher" GRAVITAR = "gravitar" HERO = "hero" ICE_HOCKEY = "ice_hockey" JAMESBOND = "jamesbond" JOURNEY_ESCAPE = "journey_escape" KABOOM = "kaboom" KANGAROO = "kangaroo" KRULL = "krull" KUNGFU_MASTER = "kung_fu_master" MONTEZUMA = "montezuma_revenge" MS_PACMAN = "ms_pacman" UNKNOWN = "name_this_game" PHOENIX = "phoenix" PITFALL = "pitfall" PONG = "pong" POOYAN = "pooyan" PRIVATE_EYE = "private_eye" QBERT = "qbert" RIVERRAID = "riverraid" ROAD_RUNNER = "road_runner" ROBOTANK = "robotank" SEAQUEST = "seaquest" SKIING = "skiing" SOLARIS = "solaris" SPACE_INVADERS = "space_invaders" STAR_GUNNER = "star_gunner" TENNIS = "tennis" TIME_PILOT = "time_pilot" TUTANKHAM = "tutankham" UP_N_DOWN = "up_n_down" VENTURE = "venture" VIDEO_PINBALL = "video_pinball" WIZARD_OF_WOR = "wizard_of_wor" YARS_REVENGE = "yars_revenge" ZAXXON = "zaxxon" def __init__(self, rom_name, frame_skip=4, repeat_action_probability=0., max_episode_steps=10000, loss_of_life_termination=False, loss_of_life_negative_reward=False, bitwise_max_on_two_consecutive_frames=False, is_render=False, seed=None, startup_policy=None, disable_actions=None, num_of_sub_actions=-1, state_processor=AtariProcessor(resize_shape=(84, 84), convert_to_grayscale=True)): os.environ['SDL_VIDEO_CENTERED'] = '1' file_exist = isfile(ALEEnvironment.get_rom_path(rom_name)) if not file_exist: raise ValueError("Rom not found ! Please put rom " + rom_name + ".bin into: " + ALEEnvironment.get_rom_path()) self.__rom_name = rom_name self.__ale = ALEInterface() if frame_skip < 0: print("Invalid frame_skip param ! Set default frame_skip = 4") self.__frame_skip = 4 else: self.__frame_skip = frame_skip if repeat_action_probability < 0 or repeat_action_probability > 1: raise ValueError("Invalid repeat_action_probability") else: self.__repeat_action_probability = repeat_action_probability self.__max_episode_steps = max_episode_steps self.__loss_of_life_termination = loss_of_life_termination self.__loss_of_life_negative_reward = loss_of_life_negative_reward self.__max_2_frames = bitwise_max_on_two_consecutive_frames # Max 2 frames only work with grayscale self.__grayscale = False if state_processor is not None and type( state_processor ) is AtariProcessor and state_processor.get_grayscale(): self.__grayscale = True if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale: self.__max_2_frames = True else: self.__max_2_frames = False self.__is_render = is_render self.__processor = state_processor if seed is None or seed <= 0 or seed >= 9999: if seed is not None and (seed < 0 or seed >= 9999): print("Invalid seed ! Default seed = randint(0, 9999") self.__seed = np.random.randint(0, 9999) self.__random_seed = True else: self.__random_seed = False self.__seed = seed self.__current_steps = 0 self.__is_life_lost = False self.__is_terminal = False self.__current_lives = 0 self.__action_reduction = num_of_sub_actions self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale( ) self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3), dtype=np.uint8) self.__current_buffer = np.empty( (self.__scr_height, self.__scr_width, 3), dtype=np.uint8) self.__current_state = None self.__prev_state = None self.__startup_policy = startup_policy if disable_actions is None: self.__dis_act = [] else: self.__dis_act = disable_actions if self.__processor is not None and self.__processor.get_number_of_objectives( ) > 1: self.__multi_objs = True else: self.__multi_objs = False def get_processor(self): return self.__processor def __init_ale(self): self.__ale.setBool(b'display_screen', self.__is_render) if self.__max_2_frames and self.__frame_skip > 1: self.__ale.setInt(b'frame_skip', 1) else: self.__ale.setInt(b'frame_skip', self.__frame_skip) self.__ale.setInt(b'random_seed', self.__seed) self.__ale.setFloat(b'repeat_action_probability', self.__repeat_action_probability) self.__ale.setBool(b'color_averaging', False) self.__ale.loadROM( ALEEnvironment.get_rom_path(self.__rom_name).encode()) width, height = self.__ale.getScreenDims() return width, height, self.__ale.getMinimalActionSet() def clone(self): if self.__random_seed: seed = np.random.randint(0, 9999) else: seed = self.__seed return ALEEnvironment(self.__rom_name, self.__frame_skip, self.__repeat_action_probability, self.__max_episode_steps, self.__loss_of_life_termination, self.__loss_of_life_negative_reward, self.__max_2_frames, self.__is_render, seed, self.__startup_policy, self.__dis_act, self.__action_reduction, self.__processor.clone()) def step_all(self, a): if isinstance(a, (list, np.ndarray)): if len(a) <= 0: raise ValueError('Empty action list !') a = a[0] self.__current_steps += 1 act = self.__action_set[a] rew = self._step(act) next_state = self.get_state() _is_terminal = self.is_terminal() return next_state, rew, _is_terminal, self.__current_steps def reset(self): self.__ale.reset_game() self.__current_lives = self.__ale.lives() self.__is_life_lost = False self.__is_terminal = False self.__current_state = None self.__prev_state = None action_space = self.get_action_space() v_range, is_range = action_space.get_range() if len(v_range) > 1: self.step(1) # No op steps if self.__startup_policy is not None: max_steps = int(self.__startup_policy.get_max_steps()) for _ in range(max_steps): act = self.__startup_policy.step(self.get_state(), action_space) self.step(act) # Start training from this point self.__current_steps = 0 # Reset processor if self.__processor is not None: self.__processor.reset() return self.get_state() def _pre_step(self, act): if self.__max_2_frames and self.__frame_skip > 1: rew = 0 for i in range(self.__frame_skip - 2): rew += self.__ale.act(act) self.__prev_buffer = self.__ale.getScreenRGB( self.__prev_buffer) self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer) rew += self.__ale.act(act) self.__current_buffer = self.__ale.getScreenRGB( self.__current_buffer) self.__is_terminal = self.__ale.game_over() if self.__processor is not None: self.__prev_state = self.__processor.process( self.__prev_buffer) self.__current_state = self.__processor.process( self.__current_buffer) else: self.__prev_state = self.__prev_buffer self.__current_state = self.__current_buffer self.__current_state = np.maximum.reduce( [self.__prev_state, self.__current_state]) else: rew = self.__ale.act(act) self.__current_buffer = self.__ale.getScreenRGB( self.__current_buffer) self.__is_terminal = self.__ale.game_over() if self.__processor is not None: self.__current_state = self.__processor.process( self.__current_buffer) if self.__multi_objs and self.__processor is not None: all_rewards = self.__processor.get_rewards(rew) return all_rewards else: return rew def _step(self, act): for i in range(len(self.__dis_act)): if act == self.__dis_act[i]: act = 0 if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward: if not self.__is_terminal: next_lives = self.__ale.lives() if next_lives < self.__current_lives: act = 1 self.__current_lives = next_lives return self._pre_step(act) else: rew = self._pre_step(act) next_lives = self.__ale.lives() if next_lives < self.__current_lives: if self.__loss_of_life_negative_reward: rew -= 1 self.__current_lives = next_lives self.__is_life_lost = True return rew def get_state(self): if not self.__max_2_frames: if self.__processor is not None: return self.__current_state else: return self.__current_buffer else: return self.__current_state def is_terminal(self): if self.__loss_of_life_termination and self.__is_life_lost: return True elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps: return True else: return self.__is_terminal @staticmethod def get_rom_path(rom=None): if rom is None: return os.path.dirname(os.path.abspath(__file__)) + "/roms/" else: return os.path.dirname( os.path.abspath(__file__)) + "/roms/" + rom + ".bin" @staticmethod def list_all_roms(): return [ f for f in listdir(ALEEnvironment.get_rom_path()) if isfile(join(ALEEnvironment.get_rom_path(), f)) ] def get_state_space(self): if self.__processor is None: shape = self.__current_buffer.shape else: shape = self.__processor.process(self.__current_buffer).shape min_value = np.zeros(shape, dtype=np.uint8) max_value = np.full(shape, 255) return Space(min_value, max_value, True) def get_action_space(self): if self.__action_reduction >= 1: return Space(0, self.__action_reduction - 1, True) else: return Space(0, len(self.__action_set) - 1, True) def step(self, act): if isinstance(act, (list, np.ndarray)): if len(act) <= 0: raise ValueError('Empty action list !') act = act[0] self.__current_steps += 1 act = self.__action_set[act] rew = self._step(act) return rew def get_current_steps(self): return self.__current_steps def is_atari(self): return True def is_render(self): return self.__is_render def get_number_of_objectives(self): if self.__processor is None: return 1 else: return self.__processor.get_number_of_objectives() def get_number_of_agents(self): if self.__processor is None: return 1 else: return self.__processor.get_number_of_agents() def get_state_processor(self): return self.__processor
class AtariGame(Task): ''' RL task based on Arcade Game. ''' def __init__(self, rom_path, num_frames=4, live=False, skip_frame=0, mode='normal'): self.ale = ALEInterface() if live: USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.mode = mode self.live = live self.ale.loadROM(rom_path) self.num_frames = num_frames self.frames = [] self.frame_id = 0 self.cum_reward = 0 self.skip_frame = skip_frame if mode == 'small': img = T.matrix('img') self.max_pool = theano.function([img], max_pool_2d(img, [4, 4])) self.img_shape = (16, 16) else: self.img_shape = (84, 84) # image shape according to DQN Nature paper. while len(self.frames) < 4: self.step(choice(self.valid_actions, 1)[0]) self.reset() def copy(self): import dill as pickle return pickle.loads(pickle.dumps(self)) def reset(self): self.ale.reset_game() self.frame_id = 0 self.cum_reward = 0 if self.skip_frame: for frame_i in range(self.skip_frame): self.step(choice(self.valid_actions, 1)[0]) @property def _curr_frame(self): img = self.ale.getScreenRGB() img = rgb2yuv(img)[:, :, 0] # get Y channel, according to Nature paper. # print 'RAM', self.ale.getRAM() if self.mode == 'small': img = self.max_pool(img) img = imresize(img, self.img_shape, interp='bicubic') return img @property def curr_state(self): ''' return raw pixels. ''' return np.array(self.frames, dtype=floatX) / floatX(255.) # normalize @property def state_shape(self): return self.curr_state.shape @property def num_actions(self): return len(self.valid_actions) @property def valid_actions(self): return self.ale.getLegalActionSet() def step(self, action): reward = self.ale.act(action) if len(self.frames) == self.num_frames: self.frames = self.frames[1:] self.frames.append(self._curr_frame) self.frame_id += 1 #print 'frame_id', self.frame_id self.cum_reward += reward return reward # TODO: scale the gradient up. def is_end(self): if np.abs(self.cum_reward) > 0: return True return self.ale.game_over() def visualize(self, fig=1, fname=None, format='png'): import matplotlib.pyplot as plt fig = plt.figure(fig, figsize=(5,5)) plt.clf() plt.axis('off') #res = plt.imshow(self.ale.getScreenRGB()) res = plt.imshow(self._curr_frame, interpolation='none') if fname: plt.savefig(fname, format=format) else: plt.show() return res
class AtariEmulator(BaseEnvironment): def __init__(self, rom_addr, random_start=False, random_seed=6, visualize=True, single_life=False): self.ale = ALEInterface() self.ale.setInt(b"random_seed", 2 * random_seed) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) full_rom_path = rom_addr self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.writer = imageio.get_writer('breakout0.gif', fps=30) self.random_start = random_start self.single_life_episodes = single_life self.call_on_new_frame = visualize # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.observation_pool = ObservationPool( np.zeros((84, 84, 4), dtype=np.uint8)) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = FramePool( np.empty((2, self.screen_height, self.screen_width), dtype=np.uint8), self.__process_frame_pool) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return np.squeeze(self.gray_screen) def on_new_frame(self, frame): pass def __new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if self.random_start: wait = random.randint(0, MAX_START_WAIT) for _ in range(wait): self.ale.act(self.legal_actions[0]) def __process_frame_pool(self, frame_pool): """ Preprocess frame pool """ img = np.amax(frame_pool, axis=0) img = imresize(img, (84, 84), interp='nearest') img = img.astype(np.uint8) return img def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_pool.new_frame(self.__get_screen_image()) return reward def get_initial_state(self): """ Get the initial state """ self.__new_game() for step in range(4): _ = self.__action_repeat(0) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) if self.__is_terminal(): raise Exception('This should never happen.') return self.observation_pool.get_pooled_observations() def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(np.argmax(action)) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) terminal = self.__is_terminal() self.lives = self.ale.lives() observation = self.observation_pool.get_pooled_observations() return observation, reward, terminal def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return [1.0, 0.0]
class AleEnv(): def __init__(self, rom, display_screen, use_env_frame_skip, frame_repeat): self.actions = None self.rom = rom self.display_screen = display_screen self.use_env_frame_skip = use_env_frame_skip self.frame_repeat = frame_repeat def initialize(self): self.ale = ALEInterface() self.ale.setInt("random_seed", random.randint(1, 1000)) if self.display_screen: self.ale.setBool('display_screen', True) if self.use_env_frame_skip == True: self.ale.setInt('frame_skip', self.frame_repeat) self.ale.setBool('color_averaging', True) self.ale.setFloat('repeat_action_probability', 0) self.ale.loadROM(self.rom) self.actions = self.ale.getMinimalActionSet() print 'actions: %s' % self.actions (self.screen_width,self.screen_height) = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.initialized = True def get_actions(self, rom=None): if self.actions is None and rom != None: ale = ALEInterface() ale.loadROM(rom) self.actions = ale.getMinimalActionSet() return self.actions @property def state_dtype(self): return np.uint8 @property def continuous_action(self): return False def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getScreenRGB(self): return self.ale.getScreenRGB() def getState(self, debug_display=False, debug_input=None): screen = self.ale.getScreenGrayscale() if screen is not None and debug_display: debug_input.show(screen.reshape(screen.shape[0], screen.shape[1])) return screen.reshape(self.screen_height, self.screen_width) def act(self, action): return self.ale.act(action) def game_over(self): return self.ale.game_over() def finish(self): return