class ALE(object): def __init__(self, init_seed, init_rand): self.ale = ALEInterface() self.ale.setInt(b'random_seed', init_seed) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.loadROM('./breakout.bin') self.action_size = 4 self.screen = None self.reward = 0 self.terminal = True self.init_rand = init_rand def setSetting(self, action_repeat, screen_type): self.action_repeat = action_repeat self.screen_type = screen_type def _step(self, action): self.reward = self.ale.act(action) self.terminal = self.ale.game_over() if self.screen_type == 0: self.screen = self.ale.getScreenRGB() elif self.screen_type == 1: self.screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() def state(self): return self.reward, self.screen, self.terminal def act(self, action): cumulated_reward = 0 for _ in range(self.action_repeat): self._step(action) cumulated_reward += self.reward if self.terminal: break self.reward = cumulated_reward return self.state() def new_game(self): if self.ale.game_over(): self.ale.reset_game() if self.screen_type == 0: self.screen = self.ale.getScreenRGB() elif self.screen_type == 1: self.screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() for _ in range(self.init_rand): self._step(0) return self.screen
class emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_mum_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print self.legal_actions self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: "+ str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): # numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) # self.ale.getScreenRGB(numpy_surface) # image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) image = self.ale.getScreenRGB() image = np.reshape(image, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image(), 0, False def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over() def train(self): for episode in range(10): total_reward = 0 frame_number = 0 while not self.ale.game_over(): a = self.legal_actions[random.randrange(len(self.legal_actions))] # Apply an action and get the resulting reward reward = self.ale.act(a); total_reward += reward screen = self.ale.getScreenRGB() screen = np.array(screen).reshape([self.screen_height, self.screen_width, -1]) frame_number = self.ale.getEpisodeFrameNumber() cv2.imshow("screen", screen/255.0) cv2.waitKey(0) self.ale.saveScreenPNG("test_"+str(frame_number)+".png") print('Episode %d ended with score: %d' % (episode, total_reward)) print('Frame number is : ', frame_number) self.ale.reset_game()
class AtariMDP(MDP, Serializable): def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4): Serializable.__init__(self, rom_path, obs_type, frame_skip) self.options = (rom_path, obs_type, frame_skip) self.ale = ALEInterface() self.ale.loadROM(rom_path) self._rom_path = rom_path self._obs_type = obs_type self._action_set = self.ale.getMinimalActionSet() self.frame_skip = frame_skip def get_image(self): return to_rgb(self.ale) def get_ram(self): return to_ram(self.ale) def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game() @property def n_actions(self): return len(self.action_set) def get_obs(self): if self._obs_type == OBS_RAM: return self.get_ram()[None, :] else: assert self._obs_type == OBS_IMAGE return self.get_image()[None, :, :, :] def step(self, a): reward = 0.0 action = self.action_set[a] for _ in xrange(self.frame_skip): reward += self.ale.act(action) ob = self.get_obs().reshape(1, -1) return ob, np.array([reward]), self.ale.game_over() # return: (states, observations) def reset(self): self.ale.reset_game() return self.get_obs() @property def action_set(self): return self._action_set def plot(self): import cv2 cv2.imshow("atarigame", self.get_image()) #pylint: disable=E1101 cv2.waitKey(10) #pylint: disable=E1101
class AtariMDP(MDP, Serializable): def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4): Serializable.__init__(self, rom_path, obs_type, frame_skip) self.options = (rom_path, obs_type, frame_skip) self.ale = ALEInterface() self.ale.loadROM(rom_path) self._rom_path = rom_path self._obs_type = obs_type self._action_set = self.ale.getMinimalActionSet() self.frame_skip = frame_skip def get_image(self): return to_rgb(self.ale) def get_ram(self): return to_ram(self.ale) def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game() @property def n_actions(self): return len(self.action_set) def get_obs(self): if self._obs_type == OBS_RAM: return self.get_ram()[None,:] else: assert self._obs_type == OBS_IMAGE return self.get_image()[None,:,:,:] def step(self, a): reward = 0.0 action = self.action_set[a] for _ in xrange(self.frame_skip): reward += self.ale.act(action) ob = self.get_obs().reshape(1,-1) return ob, np.array([reward]), self.ale.game_over() # return: (states, observations) def reset(self): self.ale.reset_game() return self.get_obs() @property def action_set(self): return self._action_set def plot(self): import cv2 cv2.imshow("atarigame",self.get_image()) #pylint: disable=E1101 cv2.waitKey(10) #pylint: disable=E1101
class Atari: # Constructor def __init__(self, rom_name): # 1º Passo: carregamos o jogo e definimos seus parâmetros self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( b"max_num_frames_per_episode") self.ale.setInt(b"random_seed", 123) self.ale.setInt(b"frame_skip", 4) self.ale.loadROM(('game/' + rom_name).encode()) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # 2º Passo: criamos a janela para exibição self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) # Essa função será utilizada para receber uma imagem do emulador, já em um formato esperado # por nosso algoritmo de treinamento. def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image # Simplesmente inicializa o jogo def newGame(self): self.ale.reset_game() return self.get_image() # Essa função será responsável por retornar as informações da observação do estado após certa ação ser tomada. def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname, nextstate) if self.ale.game_over(): self.newGame() return nextstate, reward, self.ale.game_over()
class env_atari: def __init__(self, params): self.params = params self.ale = ALEInterface() self.ale.setInt('random_seed', np.random.randint(0, 500)) self.ale.setFloat('repeat_action_probability', params['repeat_prob']) self.ale.setInt(b'frame_skip', params['frameskip']) self.ale.setBool('color_averaging', True) self.ale.loadROM('roms/' + params['rom'] + '.bin') self.actions = self.ale.getMinimalActionSet() self.action_space = c_action_space(len(self.actions)) self.screen_width, self.screen_height = self.ale.getScreenDims() def reset(self): self.ale.reset_game() seed = np.random.randint(0, 7) for i in range(seed): self.ale.act(0) return self.get_image() def step(self, action): reward = self.ale.act(self.actions[action]) next_s = self.get_image() terminate = self.ale.game_over() return next_s, reward, float(terminate), 0 def get_image(self): temp = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(temp) #self.ale.getScreenGrayscale(temp) return temp.reshape((self.screen_height, self.screen_width, 3))
class Emulate: def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True): self.ale = ALEInterface() if display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', frame_skip) self.ale.setFloat('repeat_action_probability', repeat_action_probability) self.ale.setBool('color_averaging', color_averaging) if random_seed: self.ale.setInt('random_seed', random_seed) self.ale.loadROM(rom_file) if minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.dims = (screen_width,screen_height) def numActions(self): return len(self.actions) def getActions(self): return self.actions def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def getScreenGray(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def getScreenColor(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def isTerminal(self): return self.ale.game_over()
class Emulator(object): FRAME_SKIP = 4 SCREEN_WIDTH = 84 SCREEN_HEIGHT = 84 def __init__(self, rom): self.ale = ALEInterface() self.max_num_frames_per_episode = 100000 #self.ale.getInt('max_num_frames_per_episode') self.ale.setInt('frame_skip', self.FRAME_SKIP) self.ale.loadROM('roms/' + rom) self.actions = self.ale.getMinimalActionSet() def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) return np.reshape(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) def act(self, action): return self.ale.act(action) def terminal(self): return self.ale.game_over()
class pyrlcade_environment(object): def init(self,rom_file,ale_frame_skip): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.set("random_seed",123) self.ale.set("disable_color_averaging",1) self.ale.set("frame_skip",ale_frame_skip) self.ale.loadROM(rom_file) self.legal_actions = self.ale.getMinimalActionSet() ram_size = self.ale.getRAMSize() self.ram = np.zeros((ram_size),dtype=np.uint8) self.ale.getRAM(self.ram) self.state = self.ale.getRAM(self.ram) def reset_state(self): self.ale.reset_game() def set_action(self,a): self.action = a def step(self): self.reward = self.ale.act(self.action) is_terminal = self.ale.game_over() return is_terminal def get_state(self): self.ale.getRAM(self.ram) return self.ram def get_reward(self): return self.reward
class Environment: def __init__(self, rom_file, args): self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.dims = (args.screen_height, args.screen_width) def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def isTerminal(self): return self.ale.game_over()
class Emulator(object): def __init__(self, settings): self.ale = ALEInterface() self.ale.setInt('frame_skip', settings['frame_skip']) self.ale.setInt('random_seed', np.random.RandomState().randint(1000)) self.ale.setBool('color_averaging', False) self.ale.loadROM('roms/' + settings['rom_name']) self.actions = self.ale.getMinimalActionSet() self.width = settings['screen_width'] self.height = settings['screen_height'] def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.height, self.width), interpolation=cv2.INTER_LINEAR) return np.reshape(screen, (self.height, self.width)) def full_image(self): screen = self.ale.getScreenRGB() return screen def act(self, action): return self.ale.act(self.actions[action]) def terminal(self): return self.ale.game_over()
def main(): arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0') pygame.init() ale = ALEInterface() ale.setInt(b'random_seed', 123) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(arguments['<rom_file>'])) legal_actions = ale.getLegalActionSet() width, height = ale.getScreenDims() print(width, height) frame = ale.getScreenRGB() frame = np.array(frame, dtype=float) rewards, num_episodes = [], int(arguments['--iters'] or 5) for episode in range(num_episodes): total_reward = 0 while not ale.game_over(): total_reward += ale.act(random.choice(legal_actions)) print('Episode %d reward %d.' % (episode, total_reward)) rewards.append(total_reward) ale.reset_game() average = sum(rewards) / len(rewards) print('Average for %d episodes: %d' % (num_episodes, average))
class ALEGame(object): """ Class linked to the Arcade Learning Environment """ def __init__(self, rand_seed, game_name): self.ale = ALEInterface() self.ale.setInt(b'random_seed', rand_seed) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', SKIPED_FRAMES) self.ale.loadROM(game_name.encode('ascii')) self.real_actions = self.ale.getMinimalActionSet() self.screen = np.empty((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.uint8) self.reset() def preprocess_image(self, is_to_reshape=False): """ Get image from the game and reshape it """ self.ale.getScreenGrayscale(self.screen) reshaped_screen = np.reshape(self.screen, (IMAGE_HEIGHT, IMAGE_WIDTH)) x_t = skimage.transform.resize(reshaped_screen, (110, 84), preserve_range=True) x_t = x_t[18:102, :] if is_to_reshape: x_t = np.reshape(x_t, (84, 84, 1)) x_t = x_t.astype(np.float32) x_t *= (1.0 / 255.0) return x_t def reset(self): """ Resets the game and create the first state """ self.ale.reset_game() self.act(0) x_t = self.preprocess_image() self.s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) def act(self, action): self.reward = self.ale.act(action) self.is_game_over = self.ale.game_over() def process_to_next_image(self, action): """ Acts and get new state """ real_action = self.real_actions[action] self.act(real_action) x_t1 = self.preprocess_image(True) self.s_t1 = np.append(self.s_t[:, :, 1:], x_t1, axis=2) def update(self): self.s_t = self.s_t1
def act_with_frame_skip(self, a): # trolololo reward = 0 game_over = False lives = ALEInterface.lives(self) for _ in xrange(self.frame_skip): reward += ALEInterface.act(self, self.legal_actions[a]) if ALEInterface.game_over(self) or (not self.test_mode and ALEInterface.lives(self) < lives): game_over = True return reward, game_over
class AtariEnvironment(Environment): """ Atari Environment Object """ def __init__(self, rom_path, action_repeat=4, death_end=True, width_resize=84, height_resize=84, resize_mod='scale'): super(Environment, self).__init__() self.action_repeat = action_repeat self.death_end = death_end self.width_resize = width_resize self.height_resize = height_resize self.resize_mod = resize_mod self.display = False from ale_python_interface import ALEInterface self.ale = ALEInterface() self.ale.loadROM(rom_path) self.ale.setInt('random_seed', np.random.randint(1000)) self.ale.setBool('display_screen', self.display) self.action_set = self.ale.getMinimalActionSet() self.num_actions = len(self.action_set) self.start_lives = self.ale.lives() width, height = self.ale.getScreenDims() self.currentScreen = np.empty((height, width), dtype=np.uint8) self.reset() def reset(self): self.ale.reset_game() self.ale.getScreenGrayscale(self.currentScreen) self.terminal = False def step(self, action, repeat=None): repeat = self.action_repeat if repeat is None else repeat reward = 0 for _ in range(repeat): reward += self.ale.act(self.action_set[action]) self.ale.getScreenGrayscale(self.currentScreen) self.terminal = self.death_end and self.ale.lives( ) < self.start_lives or self.ale.game_over() return reward def get_frame(self): if self.resize_mod == 'scale': return imresize(self.currentScreen, (self.width_resize, self.height_resize), interp='bilinear') elif self.resize_mod == 'crop': height, width = self.currentScreen.shape res = (height - width) / 2 crop = self.currentScreen[res:(res + width), :] return imresize(crop, (self.width_resize, self.height_resize), interp='bilinear')
class Emulator: def __init__(self): self.ale = ALEInterface() # turn off the sound self.ale.setBool('sound', False) self.ale.setBool('display_screen', EMULATOR_DISPLAY) self.ale.setInt('frame_skip', FRAME_SKIP) self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY) self.ale.setBool('color_averaging', COLOR_AVERAGING) self.ale.setInt('random_seed', RANDOM_SEED) if RECORD_SCENE_PATH: self.ale.setString('record_screen_dir', RECORD_SCENE_PATH) self.ale.loadROM(ROM_PATH) self.actions = self.ale.getMinimalActionSet() logger.info("Actions: " + str(self.actions)) self.dims = DIMS #self.start_lives = self.ale.lives() def getActions(self): return self.actions def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() # can be omitted def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): # why grayscale ? screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) # normalize #resized /= COLOR_SCALE return resized def isTerminal(self): # while training deepmind only ends when agent dies #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives) return self.ale.game_over()
class Env(): def __init__(self, rom_name): self.__initALE() self.__loadROM(rom_name) self.screen_history = [] self.screens = [] def __initALE(self): self.ale = ALEInterface() self.ale.setInt(b'random_seed', randrange(1000)) self.ale.setInt(b'fragsize', 64) self.ale.setInt(b'frame_skip', 1) # qq set this back to 0.25? self.ale.setFloat(b'repeat_action_probability', 0) self.ale.setLoggerMode('error') def __loadROM(self, rom_name): self.ale.loadROM(rom_name.encode('utf-8')) self.actions = self.ale.getMinimalActionSet() (width, height) = self.ale.getScreenDims() self.screen_data1 = np.empty((height, width, 3), dtype=np.uint8) self.screen_data2 = np.empty((height, width, 3), dtype=np.uint8) def get_legal_action_count(self): return len(self.actions) def act(self, action_index): action = self.actions[action_index] reward = 0 # perform the action 4 times reward += _clip(self.ale.act(action), -1, 1) reward += _clip(self.ale.act(action), -1, 1) reward += _clip(self.ale.act(action), -1, 1) self.ale.getScreenRGB(self.screen_data1) reward += _clip(self.ale.act(action), -1, 1) self.ale.getScreenRGB(self.screen_data2) # return the pixel-wise max of the last two frames (some games only # render every other frame) screen_data_combined = np.maximum(self.screen_data1, self.screen_data2) terminal = self.ale.game_over() self.screens.append(preprocess_screen(screen_data_combined)) phi = get_phi(self.screens) return (terminal, reward, phi, self.screen_data2) def get_s(self): return get_phi(self.screens) def reset(self): self.ale.reset_game() self.screens = []
class AtariEnvironment: def __init__(self, rom): self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.loadROM(rom_file=rom) self.action_space = self.ale.getMinimalActionSet() self.obs = self.reset() try: self.im = Image.fromarray(self.obs) self.root = Tk() self.tkim = ImageTk.PhotoImage(self.im) self.window = Label(image=self.tkim) self.window.image = self.tkim self.window.pack() except AttributeError: print("Cannot create rendering attributes") def step(self, action): reward = 0. # Use if you want environment to provide every 4th frame and repeat action in between for i in range(4): reward += float(self.ale.act(self.action_space[action])) if i == 2: frame1 = self.ale.getScreenGrayscale() if i == 3: frame2 = self.ale.getScreenGrayscale() self.obs = np.squeeze(np.maximum(frame1, frame2)) # Use if you want to receive every frame from environment # reward += float(self.ale.act(self.action_space[action])) # self.obs = np.squeeze(self.ale.getScreenGrayscale()) done = self.ale.game_over() return self.obs, reward, done def reset(self): self.ale.reset_game() self.obs = np.squeeze(self.ale.getScreenGrayscale()) return self.obs def render(self, rate=0.1): self.im = Image.fromarray(self.obs) self.tkim = ImageTk.PhotoImage(self.im) self.window.configure(image=self.tkim) self.window.image = self.tkim self.window.update_idletasks() self.window.update() time.sleep(rate) def sample_action(self): action = random.choice([0, 1, 2, 3]) return action
class Atari: def __init__(self, rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('game/' + rom_name) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print len(self.legal_actions) self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname, nextstate) if self.ale.game_over(): self.newGame() # print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class Environment: def __init__(self, render=False): self.ale = ALEInterface() self.ale.setInt(b'random_seed', 0) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', 4) self.ale.setBool(b'display_screen', render) self.ale.loadROM(ENV.encode('ascii')) self._screen = np.empty((210, 160, 1), dtype=np.uint8) self._no_op_max = 7 def set_render(self, render): if not render: self.ale.setBool(b'display_screen', render) def reset(self): self.ale.reset_game() # randomize initial state if self._no_op_max > 0: no_op = np.random.randint(0, self._no_op_max + 1) for _ in range(no_op): self.ale.act(0) self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 110)) screen = screen[18:102, :] screen = screen.astype(np.float32) screen /= 255.0 self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2) return self.frame_buffer def act(self, action): reward = self.ale.act(4 + action) done = self.ale.game_over() self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 110)) screen = np.reshape(screen[18:102, :], (84, 84, 1)) screen = screen.astype(np.float32) screen *= (1 / 255.0) self.frame_buffer = np.append(self.frame_buffer[:, :, 1:], screen, axis=2) return self.frame_buffer, reward, done, "" def close(self): self.ale.setBool(b'display_screen', False)
class Atari: def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM(rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print len(self.legal_actions) self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) def preprocess(self, image): image = cv2.cvtColor(cv2.resize(image, (84, 110)), cv2.COLOR_BGR2GRAY) image = image[26:110,:] ret, image = cv2.threshold(image,1,255,cv2.THRESH_BINARY) return np.reshape(image,(84,84, 1)) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return self.preprocess(image) def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname,nextstate) if self.ale.game_over(): self.newGame() #print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class Environment: def __init__(self, show_screen, history_length): self.ale = ALEInterface() self.ale.setInt('frame_skip', 4) self.history = None self.history_length = history_length if show_screen: self.display_screen() self.load_game() (screen_width, screen_height) = self.ale.getScreenDims() self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8) # 210x160 screen data self.dims = (84, 84) # input size for neural network self.actions = [3, 0, 1, 4] # noop, left, right, fire, def display_screen(self): self.ale.setBool("display_screen", True) def turn_on_sound(self): self.ale.setBool("sound", True) def restart(self): """reset game""" self.ale.reset_game() def act(self, action): """:returns reward of an action""" return self.ale.act(self.actions[action]) def __get_screen(self): """:returns Grayscale thresholded resized screen image """ self.ale.getScreenGrayscale(self.screen_data) resized = cv2.resize(self.screen_data, self.dims) return resized def get_state(self): binary_screen = self.__get_screen() if self.history is None: self.history = deque(maxlen=self.history_length) for _ in range(self.history_length - 1): self.history.append(binary_screen) self.history.append(binary_screen) result = np.stack(self.history, axis=0) return result def isTerminal(self): """checks if game is over""" return self.ale.game_over() def load_game(self): """load game from file""" self.ale.loadROM("Breakout.bin")
class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState( self.ale.decodeState(checkpoints[random.randint( 0, 99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class emulator(object): def __init__(self, rom_name, vis, frameskip=1, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", frameskip) romfile = str(ROM_PATH) + str(rom_name) if not os.path.exists(romfile): print('No ROM file found at "' + romfile + '".\nAdjust ROM_PATH or double-check the filt exists.') self.ale.loadROM(romfile) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow( self.windowname, flags=cv2.WINDOW_AUTOSIZE) # permit manual resizing def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) if sys.platform == 'darwin': # if we don't do this, can hang on OS X cv2.waitKey(2) return nextstate, reward, self.ale.game_over()
class ALEInterfaceWrapper: def __init__(self, repeat_action_probability, rng): self.internal_action_repeat_prob = repeat_action_probability self.prev_action = 0 self.rng_source = rng self.rng = deepcopy(self.rng_source) self.ale = ALEInterface() ''' This sets the probability from the default 0.25 to 0. It ensures deterministic actions. ''' self.ale.setFloat('repeat_action_probability', 0.0) def getScreenRGB(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getMinimalActionSet(self): return self.ale.getMinimalActionSet() def setInt(self, key, value): self.ale.setInt(key, value) def setFloat(self, key, value): self.ale.setFloat(key, value) def loadROM(self, rom): self.ale.loadROM(rom) def reset_action_seed(self): self.rng = deepcopy(self.rng_source) def set_action_seed(self, seed): self.rng = np.random.RandomState(seed) def act(self, action): actual_action = action if self.internal_action_repeat_prob > 0: if self.rng.uniform(0, 1) < self.internal_action_repeat_prob: actual_action = self.prev_action self.prev_action = actual_action return self.ale.act(actual_action)
class AleInterface(object): def __init__(self, game, args): self.game = game self.ale = ALEInterface() # if sys.platform == 'darwin': # self.ale.setBool('sound', False) # Sound doesn't work on OSX # elif sys.platform.startswith('linux'): # self.ale.setBool('sound', True) # self.ale.setBool('display_screen', True) # self.ale.setBool('display_screen', args.display_screen) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) self.ale.setInt('random_seed', args.random_seed) # # if rand_seed is not None: # self.ale.setInt('random_seed', rand_seed) rom_file = "./roms/%s.bin" % game if not os.path.exists(rom_file): print "not found rom file:", rom_file sys.exit(-1) self.ale.loadROM(rom_file) self.actions = self.ale.getMinimalActionSet() def get_actions_num(self): return len(self.actions) def act(self, action): reward = self.ale.act(self.actions[action]) return reward def get_screen_gray(self): return self.ale.getScreenGrayscale() def get_screen_rgb(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game()
class Atari: def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('./' +rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print len(self.legal_actions) self.windowname = rom_name #cv2.startWindowThread() #cv2.namedWindow(rom_name) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() #cv2.imshow(self.windowname,nextstate) if self.ale.game_over(): self.newGame() #print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class emulator: def __init__(self, rom_name, vis,windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname,nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class Emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) #added by ben may 2016 print image print '&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& printing' return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over()
def train(): ale = ALEInterface() ale.setInt('random_seed', 123) ale.loadROM('roms/breakout.bin') legal_actions = ale.getLegalActionSet() total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a) screen = None screen = ale.getScreenRGB() print(screen) plt.imshow(screen) plt.show() total_reward += reward print(total_reward) print('Episode end!')
def main(): if len(sys.argv) < 2: dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/T-Z/Tennis.bin' else: dir_rom = sys.argv[1] ale = ALEInterface() # Get & Set the desired settings ale.setInt(b'random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: # mac OS if sys.platform == 'darwin': pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file rom_file = str.encode(dir_rom) print('- Loading ROM - %s' % dir_rom) ale.loadROM(rom_file) print('- Complete loading ROM') legal_actions = ale.getMinimalActionSet() # Play 10 episodes for episode in range(10): total_reward = 0 while not ale.game_over(): a = legal_actions[np.random.randint(legal_actions.size)] # Apply an action and get the resulting reward reward = ale.act(a) total_reward += reward print('Episode %d ended with score: %d' % (episode, total_reward)) ale.reset_game()
class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() # When it starts self.ale.setInt("random_seed", 123) # Skipping 4 frames self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() print('Actions : %s' % self.legal_actions) self.action_map = dict() self.windowname = windowname # Raw atari frames, 210 * 160 pixel images self.screen_width, self.screen_height = self.ale.getScreenDims() print("widht/height: " + str(self.screen_width) + "/" + str(self.screen_height)) # Visualize self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): # Need to specify data type as uint8 numpy_surface = np.zeros([self.screen_width * self.screen_height * 3], dtype=np.uint8) # get RGB values self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, [self.screen_height, self.screen_width, 3]) return image def new_game(self): self.ale.reset_game() # Reset game and getting reset image value return self.get_image() def next(self, action_index): # Get R(s,a) reward = self.ale.act(action_index) # Get image pixel value after taking an action next_state = self.get_image() if self.vis: cv2.imshow(self.windowname, next_state) # self.ale.game_over() returns True when game is over return next_state, reward, self.ale.game_over()
class Breakout(object): steps_between_actions = 4 def __init__(self): self.ale = ALEInterface() self.ale.setInt('random_seed', 123) self.ale.setBool("display_screen", False) self.ale.setBool("sound", False) self.ale.loadROM("%s/breakout.bin" % rom_directory) self.current_state = [ self.ale.getScreenRGB(), self.ale.getScreenRGB() ] def start_episode(self): self.ale.reset_game() def take_action(self, action): assert not self.terminated def step(): reward = self.ale.act(action) self.roll_state() return reward reward = sum(step() for _ in xrange(self.steps_between_actions)) return (reward, self.current_state) def roll_state(self): assert len(self.current_state) == 2 self.current_state = [self.current_state[1], self.ale.getScreenRGB()] assert len(self.current_state) == 2 @property def actions(self): return self.ale.getMinimalActionSet() @property def terminated(self): return self.ale.game_over() or self.ale.lives() < 5
class game(object): def __init__(self, display): self.ale = ALEInterface() # Get & Set the desired settings self.ale.setInt('random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = display if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) # Load the ROM file self.ale.loadROM("ms_pacman.bin") def act(self, action): return self.ale.act(action) def getState(self): return get_feature(self.ale.getScreen()) def getScreen(self): return self.ale.getScreen() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def game_over(self): return self.ale.game_over()
class ALEInterfaceWrapper: def __init__(self, repeat_action_probability): self.internal_action_repeat_prob = repeat_action_probability self.prev_action = 0 self.ale = ALEInterface() ''' This sets the probability from the default 0.25 to 0. It ensures deterministic actions. ''' self.ale.setFloat('repeat_action_probability', repeat_action_probability) def getScreenRGB(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getMinimalActionSet(self): return self.ale.getMinimalActionSet() def setInt(self, key, value): self.ale.setInt(key, value) def setFloat(self, key, value): self.ale.setFloat(key, value) def loadROM(self, rom): self.ale.loadROM(rom) def act(self, action): actual_action = action return self.ale.act(actual_action)
class Env: def __init__(self): self.ale = ALEInterface() rom_name = "roms/Breakout.bin" self.ale.setInt("frame_skip", 4) self.ale.loadROM(rom_name) legal_actions = self.ale.getMinimalActionSet() self.action_map = {} for i in range(len(legal_actions)): self.action_map[i] = legal_actions[i] self.action_num = len(self.action_map) def reset(self): state = np.zeros((84, 84, 3), dtype=np.uint8) self.ale.reset_game() return state def step(self, action): reward = self.ale.act(self.action_map[action]) state = self.ale.getScreenRGB() done = self.ale.game_over() return state, reward, done, ""
class emulator: def __init__(self, rom_name, vis): if vis: import cv2 self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow('preview',nextstate) return nextstate, reward, self.ale.game_over()
class Game(): """ Wrapper around the ALEInterface class. """ def __init__(self, rom_file, sdl=False): self.ale = ALEInterface() # Setup SDL if sdl: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) # Load rom self.ale.loadROM(str.encode(rom_file)) def get_action_set(self): return self.ale.getLegalActionSet() def get_minimal_action_set(self): return self.ale.getMinimalActionSet() def game_over(self): return self.ale.game_over() def act(self, action): return self.ale.act(action) def reset_game(self): self.ale.reset_game() def get_frame(self): return self.ale.getScreenRGB()
def get_random_baseline(gamepath): ale = ALEInterface() ale.setInt('random_seed', 42) recordings_dir = './recordings/breakout/' USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file ale.loadROM(gamepath) # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play 5 episodes rewards = [] for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a); total_reward += reward rewards.append(total_reward) #print 'Episode', episode, 'ended with score:', total_reward ale.reset_game() avg_reward = sum(rewards) / float(len(rewards)) return avg_reward
class pyrlcade_environment(object): def init(self, rom_file, ale_frame_skip): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.set("random_seed", 123) self.ale.set("disable_color_averaging", 1) self.ale.set("frame_skip", ale_frame_skip) self.ale.loadROM(rom_file) self.legal_actions = self.ale.getMinimalActionSet() ram_size = self.ale.getRAMSize() self.ram = np.zeros((ram_size), dtype=np.uint8) self.ale.getRAM(self.ram) self.state = self.ale.getRAM(self.ram) def reset_state(self): self.ale.reset_game() def set_action(self, a): self.action = a def step(self): self.reward = self.ale.act(self.action) is_terminal = self.ale.game_over() return is_terminal def get_state(self): self.ale.getRAM(self.ram) return self.ram def get_reward(self): return self.reward
def get_random_baseline(gamepath): ale = ALEInterface() ale.setInt('random_seed', 42) recordings_dir = './recordings/breakout/' USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file ale.loadROM(gamepath) # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play 5 episodes rewards = [] for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a) total_reward += reward rewards.append(total_reward) #print 'Episode', episode, 'ended with score:', total_reward ale.reset_game() avg_reward = sum(rewards) / float(len(rewards)) return avg_reward
def act(self, a): lives = ALEInterface.lives(self) reward = ALEInterface.act(self, self.legal_actions[a]) game_over = ALEInterface.game_over(self) or (not self.test_mode and ALEInterface.lives(self) < lives) return reward, game_over
# ale.setBool('display_screen', True) # Load the ROM file ale.loadROM('Breakout.bin') # Get the list of legal actions # legal_actions = ale.getLegalActionSet() legal_actions = ale.getMinimalActionSet() print legal_actions # (screen_width,screen_height) = ale.getScreenDims() # screen_data = np.zeros(screen_width*screen_height,dtype=np.uint32) # ale.getScreenRGB(screen_data) (screen_width, screen_height) = ale.getScreenDims() screen_data = np.zeros(screen_width * screen_height, dtype=np.uint8) print type(ale.getScreen(screen_data)) # Play 10 episodes for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] # Apply an action and get the resulting reward reward = ale.act(a) print reward total_reward += reward print 'Episode', episode, 'ended with score:', total_reward ale.reset_game()
class AleAgent: ## # @param processing_cls Class for processing game visual unput def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None): assert game_rom is not None self.game = ALEInterface() if encoder_weights is not None and encoder_model is not None: self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights) else: self.encoder = Encoder() self.processor = processing_cls() # Get & Set the desired settings self.game.setInt('random_seed', 0) self.game.setInt('frame_skip', 4) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': pygame.init() self.game.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.game.setBool('sound', False) # no sound self.game.setBool('display_screen', True) # Load the ROM file self.game.loadROM(game_rom) # Get the list of legal actions self.legal_actions = self.game.getLegalActionSet() # Get actions applicable in current game self.minimal_actions = self.game.getMinimalActionSet() if NFQ_model is not None and NFQ_weights is not None: self.NFQ = NFQ( self.encoder.out_dim, len(self.minimal_actions), model_path=NFQ_model, weights_path=NFQ_weights ) else: self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions)) (self.screen_width, self.screen_height) = self.game.getScreenDims() self.screen_data = np.zeros( (self.screen_height, self.screen_width), dtype=np.uint8 ) ## # Initialize the reinforcement learning def train(self, num_of_episodes=1500, eps=0.995, key_binding=None): pygame.init() for episode in xrange(num_of_episodes): total_reward = 0 moves = 0 hits = 0 print 'Starting episode: ', episode+1 if key_binding: eps = 0.05 else: eps -= 2/num_of_episodes self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) while not self.game.game_over(): current_state = next_state x = None if key_binding: key_pressed = pygame.key.get_pressed() x = key_binding(key_pressed) if x is None: r = np.random.rand() if r < eps: x = np.random.randint(self.minimal_actions.size) else: x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] # Apply an action and get the resulting reward reward = self.game.act(a) # record only every 3 frames # if not moves % 3: self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) transition = np.append(current_state, x) transition = np.append(transition, next_state) transition = np.append(transition, reward) self.NFQ.add_transition(transition) total_reward += reward if reward > 0: hits += 1 moves += 1 if eps > 0.1: eps -= 0.00001 # end while print 'Epsilon: ', eps print 'Episode', episode+1, 'ended with score:', total_reward print 'Hits: ', hits self.game.reset_game() self.NFQ.train() hits = 0 moves = 0 self.NFQ.save_net() # end for ## # Play the game! def play(self): total_reward = 0 moves = 1 while not self.game.game_over(): self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) current_state = self.encoder.encode(pooled_data) x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] reward = self.game.act(a) total_reward += reward moves += 1 print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'
font = pygame.font.SysFont("Ubuntu Mono",30) text = font.render("Total Reward: " + str(total_reward) ,1,(208,255,255)) screen.blit(text,(330,line_pos)) pygame.display.flip() #process pygame event queue exit=False for event in pygame.event.get(): if event.type == pygame.QUIT: exit=True break; if(pressed[pygame.K_q]): exit = True if(exit): logger.close(); break #delay to 60fps clock.tick(60.) if(ale.game_over()): episode_frame_number = ale.getEpisodeFrameNumber() frame_number = ale.getFrameNumber() print("Frame Number: " + str(frame_number) + " Episode Frame Number: " + str(episode_frame_number)) print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() total_reward = 0.0 episode = episode + 1
class AtariEmulator(BaseEnvironment): def __init__(self, rom_addr, random_start=False, random_seed=6, visualize=True, single_life=False): self.ale = ALEInterface() self.ale.setInt(b"random_seed", 2 * random_seed) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) full_rom_path = rom_addr self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.writer = imageio.get_writer('breakout0.gif', fps=30) self.random_start = random_start self.single_life_episodes = single_life self.call_on_new_frame = visualize # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.observation_pool = ObservationPool( np.zeros((84, 84, 4), dtype=np.uint8)) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = FramePool( np.empty((2, self.screen_height, self.screen_width), dtype=np.uint8), self.__process_frame_pool) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return np.squeeze(self.gray_screen) def on_new_frame(self, frame): pass def __new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if self.random_start: wait = random.randint(0, MAX_START_WAIT) for _ in range(wait): self.ale.act(self.legal_actions[0]) def __process_frame_pool(self, frame_pool): """ Preprocess frame pool """ img = np.amax(frame_pool, axis=0) img = imresize(img, (84, 84), interp='nearest') img = img.astype(np.uint8) return img def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_pool.new_frame(self.__get_screen_image()) return reward def get_initial_state(self): """ Get the initial state """ self.__new_game() for step in range(4): _ = self.__action_repeat(0) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) if self.__is_terminal(): raise Exception('This should never happen.') return self.observation_pool.get_pooled_observations() def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(np.argmax(action)) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) terminal = self.__is_terminal() self.lives = self.ale.lives() observation = self.observation_pool.get_pooled_observations() return observation, reward, terminal def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return [1.0, 0.0]
class AtariEmulator: def __init__(self, args): ''' Initialize Atari environment ''' # Parameters self.buffer_length = args.buffer_length self.screen_dims = args.screen_dims self.frame_skip = args.frame_skip self.blend_method = args.blend_method self.reward_processing = args.reward_processing self.max_start_wait = args.max_start_wait self.history_length = args.history_length self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip) #Initialize ALE instance self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) if args.watch: self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin')) self.buffer = np.empty((self.buffer_length, 210, 160)) self.current = 0 self.action_set = self.ale.getMinimalActionSet() self.lives = self.ale.lives() self.reset() def get_possible_actions(self): ''' Return list of possible actions for game ''' return self.action_set def get_screen(self): ''' Add screen to frame buffer ''' self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale()) self.current = (self.current + 1) % self.buffer_length def reset(self): self.ale.reset_game() self.lives = self.ale.lives() if self.max_start_wait < 0: print("ERROR: max start wait decreased beyond 0") sys.exit() elif self.max_start_wait <= self.start_frames_needed: wait = 0 else: wait = random.randint(0, self.max_start_wait - self.start_frames_needed) for _ in range(wait): self.ale.act(self.action_set[0]) # Fill frame buffer self.get_screen() for _ in range(self.buffer_length - 1): self.ale.act(self.action_set[0]) self.get_screen() # get initial_states state = [(self.preprocess(), 0, 0, False)] for step in range(self.history_length - 1): state.append(self.run_step(0)) # make sure agent hasn't died yet if self.isTerminal(): print("Agent lost during start wait. Decreasing max_start_wait by 1") self.max_start_wait -= 1 return self.reset() return state def run_step(self, action): ''' Apply action to game and return next screen and reward ''' raw_reward = 0 for step in range(self.frame_skip): raw_reward += self.ale.act(self.action_set[action]) self.get_screen() reward = None if self.reward_processing == 'clip': reward = np.clip(raw_reward, -1, 1) else: reward = raw_reward terminal = self.isTerminal() self.lives = self.ale.lives() return (self.preprocess(), action, reward, terminal, raw_reward) def preprocess(self): ''' Preprocess frame for agent ''' img = None if self.blend_method == "max": img = np.amax(self.buffer, axis=0) return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR) def isTerminal(self): return (self.isGameOver() or (self.lives > self.ale.lives())) def isGameOver(self): return self.ale.game_over()
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if ( self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class Environment: """docstring for Environment""" BUFFER_LEN = 2 EPISODE_FRAMES = 18000 EPOCH_COUNT = 200 EPOCH_STEPS = 250000 EVAL_EPS = 0.001 FRAMES_SKIP = 4 FRAME_HEIGHT = 84 FRAME_WIDTH = 84 MAX_NO_OP = 30 MAX_REWARD = 1 def __init__(self, rom_name, rng, display_screen = False): self.api = ALEInterface() self.api.setInt('random_seed', rng.randint(333)) self.api.setBool('display_screen', display_screen) self.api.setFloat('repeat_action_probability', 0.0) self.rom_name = rom_name self.display_screen = display_screen self.rng = rng self.repeat = Environment.FRAMES_SKIP self.buffer_len = Environment.BUFFER_LEN self.height = Environment.FRAME_HEIGHT self.width = Environment.FRAME_WIDTH self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP self.merge_id = 0 self.max_reward = Environment.MAX_REWARD self.eval_eps = Environment.EVAL_EPS self.log_dir = '' self.network_dir = '' self.api.loadROM('../rom/' + self.rom_name) self.minimal_actions = self.api.getMinimalActionSet() original_width, original_height = self.api.getScreenDims() self.merge_frame = np.zeros((self.buffer_len , original_height , original_width) , dtype = np.uint8) def get_action_count(self): return len(self.minimal_actions) def train(self, agent, store_freq, folder = None, start_epoch = 0): self._open_log_files(agent, folder) obs = np.zeros((self.height, self.width), dtype = np.uint8) epoch_count = Environment.EPOCH_COUNT for epoch in xrange(start_epoch, epoch_count): self.need_reset = True steps_left = Environment.EPOCH_STEPS print "\n" + "=" * 50 print "Epoch #%d" % (epoch + 1) episode = 0 train_start = time.time() while steps_left > 0: num_step, _ = self._run_episode(agent, steps_left, obs) steps_left -= num_step episode += 1 if steps_left == 0 or episode % 10 == 0: print "Finished episode #%d, steps_left = %d" \ % (episode, steps_left) train_end = time.time() valid_values = agent.get_validate_values() eval_values = self.evaluate(agent) test_end = time.time() train_time = train_end - train_start test_time = test_end - train_end step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time) print "\tFinished epoch #%d, episode trained = %d\n" \ "\tValidate values = %.3f, evaluate reward = %.3f\n"\ "\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \ % (epoch + 1, episode, valid_values, eval_values\ , train_time, test_time, step_per_sec) self._update_log_files(agent, epoch + 1, episode , valid_values, eval_values , train_time, test_time , step_per_sec, store_freq) gc.collect() def evaluate(self, agent, episodes = 30, obs = None): print "\n***Start evaluating" if obs is None: obs = np.zeros((self.height, self.width), dtype = np.uint8) sum_reward = 0.0 sum_step = 0.0 for episode in xrange(episodes): self.need_reset = True step, reward = self._run_episode(agent, self.episode_steps, obs , self.eval_eps, evaluating = True) sum_reward += reward sum_step += step print "Finished episode %d, reward = %d, step = %d" \ % (episode + 1, reward, step) self.need_reset = True print "Average reward per episode = %.4f" % (sum_reward / episodes) print "Average step per episode = %.4f" % (sum_step / episodes) return sum_reward / episodes def _prepare_game(self): if self.need_reset or self.api.game_over(): self.api.reset_game() self.need_reset = False if Environment.MAX_NO_OP > 0: num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \ + self.buffer_len for _ in xrange(num_no_op): self.api.act(0) for _ in xrange(self.buffer_len): self._update_buffer() def _run_episode(self, agent, steps_left, obs , eps = 0.0, evaluating = False): self._prepare_game() start_lives = self.api.lives() step_count = 0 sum_reward = 0 is_terminal = False while step_count < steps_left and not is_terminal: self._get_screen(obs) action_id, _ = agent.get_action(obs, eps, evaluating) reward = self._repeat_action(self.minimal_actions[action_id]) reward_clip = reward if self.max_reward > 0: reward_clip = np.clip(reward, -self.max_reward, self.max_reward) life_lost = not evaluating and self.api.lives() < start_lives is_terminal = self.api.game_over() or life_lost \ or step_count + 1 >= steps_left agent.add_experience(obs, is_terminal, action_id, reward_clip , evaluating) sum_reward += reward step_count += 1 return step_count, sum_reward def _update_buffer(self): self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...]) self.merge_id = (self.merge_id + 1) % self.buffer_len def _repeat_action(self, action): reward = 0 for i in xrange(self.repeat): reward += self.api.act(action) if i + self.buffer_len >= self.repeat: self._update_buffer() return reward def _get_screen(self, resized_frame): self._resize_frame(self.merge_frame.max(axis = 0), resized_frame) def _resize_frame(self, src_frame, dst_frame): cv2.resize(src = src_frame, dst = dst_frame, dsize = (self.width, self.height), interpolation = cv2.INTER_LINEAR) def _open_log_files(self, agent, folder): time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0] if folder is not None: self.log_dir = folder self.network_dir = self.log_dir + '/network' else: self.log_dir = '../run_results/' + base_rom_name + time_str self.network_dir = self.log_dir + '/network' info_name = get_next_name(self.log_dir, 'info', 'txt') git_name = get_next_name(self.log_dir, 'git-diff', '') try: os.stat(self.log_dir) except OSError: os.makedirs(self.log_dir) try: os.stat(self.network_dir) except OSError: os.makedirs(self.network_dir) with open(os.path.join(self.log_dir, info_name), 'w') as f: f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse' , 'HEAD'])) f.write('Run command: ') f.write(' '.join(pipes.quote(x) for x in sys.argv)) f.write('\n\n') f.write(agent.get_info()) write_info(f, Environment) write_info(f, agent.__class__) write_info(f, agent.network.__class__) # From https://github.com/spragunr/deep_q_rl/pull/49/files with open(os.path.join(self.log_dir, git_name), 'w') as f: f.write(subprocess.check_output(['git', 'diff', 'HEAD'])) if folder is not None: return with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f: f.write("epoch,episode_train,validate_values,evaluate_reward"\ ",train_time,test_time,steps_per_second\n") mem = psutil.virtual_memory() with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f: f.write("epoch,available,free,buffers,cached"\ ",available_readable,used_percent\n") f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (0, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) def _update_log_files(self, agent, epoch, episode, valid_values , eval_values, train_time, test_time, step_per_sec , store_freq): print "Updating log files" with open(self.log_dir + '/results.csv', 'a') as f: f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \ (epoch, episode, valid_values, eval_values , train_time, test_time, step_per_sec)) mem = psutil.virtual_memory() with open(self.log_dir + '/memory.csv', 'a') as f: f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (epoch, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz') if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \ (store_freq > 0 and (epoch % store_freq == 0)): agent.dump_exp(self.network_dir + '/exp.npz') def _setup_record(self, network_file): file_name, _ = os.path.splitext(os.path.basename(network_file)) time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) img_dir = os.path.dirname(network_file) + '/images_' \ + file_name + time_str rom_name, _ = os.path.splitext(self.rom_name) out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \ + file_name + time_str + '.mov' print out_name try: os.stat(img_dir) except OSError: os.makedirs(img_dir) self.api.setString('record_screen_dir', img_dir) self.api.loadROM('../rom/' + self.rom_name) return img_dir, out_name def record_run(self, agent, network_file, episode_id = 1): if episode_id > 1: self.evaluate(agent, episode_id - 1) system_state = self.api.cloneSystemState() img_dir, out_name = self._setup_record(network_file) if episode_id > 1: self.api.restoreSystemState(system_state) self.evaluate(agent, 1) script = \ """ { ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } || { avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } """ % (img_dir, out_name, img_dir, out_name) os.system(script)
class AtariGame(Task): ''' RL task based on Arcade Game. ''' def __init__(self, rom_path, num_frames=4, live=False, skip_frame=0, mode='normal'): self.ale = ALEInterface() if live: USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.mode = mode self.live = live self.ale.loadROM(rom_path) self.num_frames = num_frames self.frames = [] self.frame_id = 0 self.cum_reward = 0 self.skip_frame = skip_frame if mode == 'small': img = T.matrix('img') self.max_pool = theano.function([img], max_pool_2d(img, [4, 4])) self.img_shape = (16, 16) else: self.img_shape = (84, 84) # image shape according to DQN Nature paper. while len(self.frames) < 4: self.step(choice(self.valid_actions, 1)[0]) self.reset() def copy(self): import dill as pickle return pickle.loads(pickle.dumps(self)) def reset(self): self.ale.reset_game() self.frame_id = 0 self.cum_reward = 0 if self.skip_frame: for frame_i in range(self.skip_frame): self.step(choice(self.valid_actions, 1)[0]) @property def _curr_frame(self): img = self.ale.getScreenRGB() img = rgb2yuv(img)[:, :, 0] # get Y channel, according to Nature paper. # print 'RAM', self.ale.getRAM() if self.mode == 'small': img = self.max_pool(img) img = imresize(img, self.img_shape, interp='bicubic') return img @property def curr_state(self): ''' return raw pixels. ''' return np.array(self.frames, dtype=floatX) / floatX(255.) # normalize @property def state_shape(self): return self.curr_state.shape @property def num_actions(self): return len(self.valid_actions) @property def valid_actions(self): return self.ale.getLegalActionSet() def step(self, action): reward = self.ale.act(action) if len(self.frames) == self.num_frames: self.frames = self.frames[1:] self.frames.append(self._curr_frame) self.frame_id += 1 #print 'frame_id', self.frame_id self.cum_reward += reward return reward # TODO: scale the gradient up. def is_end(self): if np.abs(self.cum_reward) > 0: return True return self.ale.game_over() def visualize(self, fig=1, fname=None, format='png'): import matplotlib.pyplot as plt fig = plt.figure(fig, figsize=(5,5)) plt.clf() plt.axis('off') #res = plt.imshow(self.ale.getScreenRGB()) res = plt.imshow(self._curr_frame, interpolation='none') if fname: plt.savefig(fname, format=format) else: plt.show() return res
class GameManager(object): """This class takes care of the interactions between an agent and a game across episodes, as well as overall logging of performance. """ def __init__( self, game_name, agent, results_dir, n_epochs=1, n_episodes=None, n_frames=None, remove_old_results_dir=False, use_minimal_action_set=True, min_time_between_frames=0, ): """game_name is one of the supported games (there are many), as a string: "space_invaders.bin" agent is an an instance of a subclass of the Agent interface results_dir is a string representing a directory in which results and logs are placed If it does not exist, it is created. use_minimal_action_set determines whether the agent is offered all possible actions, or only those (minimal) that are applicable to the specific game. min_time_between_frames is the minimum required time in seconds between frames. If 0, the game is unrestricted. """ self.game_name = game_name self.agent = agent self.use_minimal_action_set = use_minimal_action_set self.min_time_between_frames = min_time_between_frames self.n_epochs = n_epochs self.n_episodes = n_episodes self.n_frames = n_frames if (n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None): raise ValueError("Extacly one of n_episodes and n_frames " "must be defined") self.initialize_results_dir(results_dir, remove_old_results_dir) self.log = util.logging.Logger( ("settings", "step", "episode", "epoch", "overall"), "settings", os.path.join(self.results_dir, "GameManager.log"), ) self.stats = util.logging.CSVLogger( os.path.join(self.results_dir, "stats.log"), header="epoch,episode,total_reward,n_frames,wall_time", print_items=True, ) self._object_cache = dict() self.initialize_ale() self.initialize_agent() self.dump_settings() def initialize_results_dir(self, results_dir, remove_existing=False): """Creates the whole path of directories if they do no exist. If they do exist, raises an error unless remove_existing is True, in which case the existing directory is deleted. """ now = datetime.now().strftime("%Y%m%d-%H-%M") # drop .bin, append current time down to the minute results_dir = os.path.join(results_dir, self.game_name[:-4] + now) if remove_existing: if os.path.exists(results_dir): shutil.rmtree(results_dir) # Should raise an error if directory exists os.makedirs(results_dir) self.results_dir = results_dir def initialize_ale(self): self.ale = ALEInterface() self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name)) def initialize_agent(self): RSC = namedtuple("RawStateCallbacks", ["raw", "grey", "rgb", "ram"]) raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM) self.agent.set_raw_state_callbacks(raw_state_callbacks) self.agent.set_results_dir(self.results_dir) if self.use_minimal_action_set: actions = self.ale.getMinimalActionSet() else: actions = self.ale.getLegalActionSet() self.agent.set_available_actions(actions) def rest(self, already_elapsed): rest_time = self.min_time_between_frames - already_elapsed if rest_time > 0: sleep(rest_time) def run(self): """Runs self.n_epochs epochs, where the agent's learning is reset for each new epoch. Each epoch lasts self.n_episodes or self.n_frames, whichever is defined. """ self.log.overall("Starting run") run_start = time() for epoch in xrange(self.n_epochs): self.agent.reset() self.n_epoch = epoch self._run_epoch() self.log.overall("End of run ({:.2f} s)".format(time() - run_start)) def _run_epoch(self): self.n_episode = 0 start = time() while not self._stop_condition_met(): self._run_episode() self.n_episode += 1 wall_time = time() - start frames = self.ale.getFrameNumber() self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time)) def _run_episode(self): self.ale.reset_game() self.agent.on_episode_start() total_reward = 0 episode_start = time() while (not self.ale.game_over()) and (not self._stop_condition_met()): timestep_start = time() action = self.agent.select_action() reward = self.ale.act(action) self.agent.receive_reward(reward) total_reward += reward self.rest(time() - timestep_start) wall_time = time() - episode_start self.agent.on_episode_end() # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time self.stats.write( self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), "{:.2f}".format(wall_time) ) def _stop_condition_met(self): if self.n_episodes: return self.n_episode >= self.n_episodes return self.ale.getFrameNumber() >= self.n_frames # Methods for state perception def get_screen(self): """Returns a matrix containing the current game screen in raw pixel data, i.e. before conversion to RGB. Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached("raw", self.ale.getScreen) def get_screen_grayscale(self): """Returns an np.array with the screen grayscale colours. Handles reuse of np.array object, so it will overwrite what is in the old object. """ return self._cached("gray", self.ale.getScreenGrayscale) def get_screen_RGB(self): """Returns a numpy array with the screen's RGB colours. The first positions contain the red colours, followed by the green colours and then the blue colours""" return self._cached("rgb", self.ale.getScreenRGB) def get_RAM(self): """Returns a vector containing current RAM content (byte-level). Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached("ram", self.ale.getRAM) def _cached(self, key, func): if key in self._object_cache: func(self._object_cache[key]) else: self._object_cache[key] = func() return self._object_cache[key] def dump_settings(self): import json settings = self.get_settings() path = os.path.join(self.results_dir, "settings") with open(path, "w") as f: json.dump(settings, f, indent=4) def get_settings(self): """Returns a dict representing the settings needed to reproduce this object and its subobjects """ return { "game_name": self.game_name, "n_epochs": self.n_epochs, "n_episodes": self.n_episodes, "n_frames": self.n_frames, "agent": self.agent.get_settings(), "results_dir": self.results_dir, "use_minimal_action_set": self.use_minimal_action_set, }
class GameState(object): def __init__(self, rand_seed, display=False): self.ale = ALEInterface() self.ale.setInt('random_seed', rand_seed) if display: self._setup_display() self.ale.loadROM(ROM) # height=210, width=160 self.screen = np.empty((210, 160, 1), dtype=np.uint8) no_action = 0 self.reward = self.ale.act(no_action) self.terminal = self.ale.game_over() # screenのshapeは、(210, 160, 1) self.ale.getScreenGrayscale(self.screen) # (210, 160)にreshape reshaped_screen = np.reshape(self.screen, (210, 160)) # height=110, width=84にリサイズ resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t = resized_screen[18:102,:] x_t = x_t.astype(np.float32) x_t *= (1.0/255.0) self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2) # 実際に利用するactionのみを集めておく self.real_actions = self.ale.getMinimalActionSet() def _setup_display(self): if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) def process(self, action): # 18種類のうちの実際に利用するactionに変換 real_action = self.real_actions[action] self.reward = self.ale.act(real_action) #self.reward = self.ale.act(action) self.terminal = self.ale.game_over() # screenのshapeは、(210, 160, 1) self.ale.getScreenGrayscale(self.screen) # (210, 160)にreshape reshaped_screen = np.reshape(self.screen, (210, 160)) # height=210, width=160 # height=110, width=84にリサイズ resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t1 = resized_screen[18:102,:] x_t1 = np.reshape(x_t1, (84, 84, 1)) x_t1 = x_t1.astype(np.float32) x_t1 *= (1.0/255.0) self.s_t1 = np.append(x_t1, self.s_t[:,:,0:3], axis = 2) if self.terminal: self.ale.reset_game() def update(self): self.s_t = self.s_t1
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_dir('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 10000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) float32 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver)
class MyEnv(Environment): VALIDATION_MODE = 0 def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, ale_options=[{"key": "random_seed", "value": 0}, {"key": "color_averaging", "value": True}, {"key": "repeat_action_probability", "value": 0.}]): self._mode = -1 self._modeScore = 0.0 self._modeEpisodeCount = 0 self._frameSkip = frame_skip if frame_skip >= 1 else 1 self._randomState = rng self._ale = ALEInterface() for option in ale_options: t = type(option["value"]) if t is int: self._ale.setInt(option["key"], option["value"]) elif t is float: self._ale.setFloat(option["key"], option["value"]) elif t is bool: self._ale.setBool(option["key"], option["value"]) else: raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t)) self._ale.loadROM(rom) w, h = self._ale.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reducedScreen = np.empty((84, 84), dtype=np.uint8) self._actions = self._ale.getMinimalActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._modeScore = 0.0 self._modeEpisodeCount = 0 else: self._modeEpisodeCount += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 self._ale.reset_game() for _ in range(self._randomState.randint(15)): self._ale.act(0) self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) return [4 * [84 * [84 * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frameSkip): reward += self._ale.act(action) if self.inTerminalState(): break self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) self._modeScore += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._modeEpisodeCount += 1 print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount)) def inputDimensions(self): return [(4, 84, 84)] def observationType(self, subject): return np.uint8 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reducedScreen)] def inTerminalState(self): return self._ale.game_over()
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
class MyEnv(Environment): VALIDATION_MODE = 0 def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, ale_options=[{ "key": "random_seed", "value": 0 }, { "key": "color_averaging", "value": True }, { "key": "repeat_action_probability", "value": 0. }]): self._mode = -1 self._modeScore = 0.0 self._modeEpisodeCount = 0 self._frameSkip = frame_skip if frame_skip >= 1 else 1 self._randomState = rng self._ale = ALEInterface() for option in ale_options: t = type(option["value"]) if t is int: self._ale.setInt(option["key"], option["value"]) elif t is float: self._ale.setFloat(option["key"], option["value"]) elif t is bool: self._ale.setBool(option["key"], option["value"]) else: raise ValueError( "Option {} ({}) is not an int, bool or float.".format( option["key"], t)) self._ale.loadROM(rom) w, h = self._ale.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reducedScreen = np.empty((84, 84), dtype=np.uint8) self._actions = self._ale.getMinimalActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._modeScore = 0.0 self._modeEpisodeCount = 0 else: self._modeEpisodeCount += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 self._ale.reset_game() for _ in range(self._randomState.randint(15)): self._ale.act(0) self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) return [4 * [84 * [84 * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frameSkip): reward += self._ale.act(action) if self.inTerminalState(): break self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) self._modeScore += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._modeEpisodeCount += 1 print("== Mean score per episode is {} over {} episodes ==".format( self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount)) def inputDimensions(self): return [(4, 84, 84)] def observationType(self, subject): return np.uint8 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reducedScreen)] def inTerminalState(self): return self._ale.game_over()
sess.run(Q_target.b_fc2.assign(Q_train.b_fc2)) if frameCount % saveFrame is 0: if saveModel is True: saver.save(sess, modelPath) if saveData is True: memory.save() if explorationRate > 0.1: explorationRate -= explorationRateDelta if ale.game_over(): cost_average /= (1.0 * (frameCount - frameCountLast)/trainFreq) Q_average /= (1.0 * batchSize*n_actions*(frameCount - frameCountLast)/trainFreq) episode += 1 print 'Epi: %07d Score: %03d Exp: %.2f Frame: %08d Cost: %.6f FPS:%.2f Q: %.2f' \ % (episode, scoreEpisode, explorationRate, frameCount, cost_average, (frameCount - frameCountLast) / (time.time() - t0) ,Q_average ) # print t1s, t2s# t3s, t4s, t5s,t6s,t7s t0 = time.time() ale.reset_game() scoreEpisode = 0.0 cost_average = 0.0 Q_average = 0.0 frameCountLast = frameCount