class Emulate: def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True): self.ale = ALEInterface() if display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', frame_skip) self.ale.setFloat('repeat_action_probability', repeat_action_probability) self.ale.setBool('color_averaging', color_averaging) if random_seed: self.ale.setInt('random_seed', random_seed) self.ale.loadROM(rom_file) if minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.dims = (screen_width,screen_height) def numActions(self): return len(self.actions) def getActions(self): return self.actions def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def getScreenGray(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def getScreenColor(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def isTerminal(self): return self.ale.game_over()
class ALE(object): def __init__(self, init_seed, init_rand): self.ale = ALEInterface() self.ale.setInt(b'random_seed', init_seed) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.loadROM('./breakout.bin') self.action_size = 4 self.screen = None self.reward = 0 self.terminal = True self.init_rand = init_rand def setSetting(self, action_repeat, screen_type): self.action_repeat = action_repeat self.screen_type = screen_type def _step(self, action): self.reward = self.ale.act(action) self.terminal = self.ale.game_over() if self.screen_type == 0: self.screen = self.ale.getScreenRGB() elif self.screen_type == 1: self.screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() def state(self): return self.reward, self.screen, self.terminal def act(self, action): cumulated_reward = 0 for _ in range(self.action_repeat): self._step(action) cumulated_reward += self.reward if self.terminal: break self.reward = cumulated_reward return self.state() def new_game(self): if self.ale.game_over(): self.ale.reset_game() if self.screen_type == 0: self.screen = self.ale.getScreenRGB() elif self.screen_type == 1: self.screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() for _ in range(self.init_rand): self._step(0) return self.screen
class ALEGame(object): """ Class linked to the Arcade Learning Environment """ def __init__(self, rand_seed, game_name): self.ale = ALEInterface() self.ale.setInt(b'random_seed', rand_seed) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', SKIPED_FRAMES) self.ale.loadROM(game_name.encode('ascii')) self.real_actions = self.ale.getMinimalActionSet() self.screen = np.empty((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.uint8) self.reset() def preprocess_image(self, is_to_reshape=False): """ Get image from the game and reshape it """ self.ale.getScreenGrayscale(self.screen) reshaped_screen = np.reshape(self.screen, (IMAGE_HEIGHT, IMAGE_WIDTH)) x_t = skimage.transform.resize(reshaped_screen, (110, 84), preserve_range=True) x_t = x_t[18:102, :] if is_to_reshape: x_t = np.reshape(x_t, (84, 84, 1)) x_t = x_t.astype(np.float32) x_t *= (1.0 / 255.0) return x_t def reset(self): """ Resets the game and create the first state """ self.ale.reset_game() self.act(0) x_t = self.preprocess_image() self.s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) def act(self, action): self.reward = self.ale.act(action) self.is_game_over = self.ale.game_over() def process_to_next_image(self, action): """ Acts and get new state """ real_action = self.real_actions[action] self.act(real_action) x_t1 = self.preprocess_image(True) self.s_t1 = np.append(self.s_t[:, :, 1:], x_t1, axis=2) def update(self): self.s_t = self.s_t1
class AtariEnvironment(Environment): """ Atari Environment Object """ def __init__(self, rom_path, action_repeat=4, death_end=True, width_resize=84, height_resize=84, resize_mod='scale'): super(Environment, self).__init__() self.action_repeat = action_repeat self.death_end = death_end self.width_resize = width_resize self.height_resize = height_resize self.resize_mod = resize_mod self.display = False from ale_python_interface import ALEInterface self.ale = ALEInterface() self.ale.loadROM(rom_path) self.ale.setInt('random_seed', np.random.randint(1000)) self.ale.setBool('display_screen', self.display) self.action_set = self.ale.getMinimalActionSet() self.num_actions = len(self.action_set) self.start_lives = self.ale.lives() width, height = self.ale.getScreenDims() self.currentScreen = np.empty((height, width), dtype=np.uint8) self.reset() def reset(self): self.ale.reset_game() self.ale.getScreenGrayscale(self.currentScreen) self.terminal = False def step(self, action, repeat=None): repeat = self.action_repeat if repeat is None else repeat reward = 0 for _ in range(repeat): reward += self.ale.act(self.action_set[action]) self.ale.getScreenGrayscale(self.currentScreen) self.terminal = self.death_end and self.ale.lives( ) < self.start_lives or self.ale.game_over() return reward def get_frame(self): if self.resize_mod == 'scale': return imresize(self.currentScreen, (self.width_resize, self.height_resize), interp='bilinear') elif self.resize_mod == 'crop': height, width = self.currentScreen.shape res = (height - width) / 2 crop = self.currentScreen[res:(res + width), :] return imresize(crop, (self.width_resize, self.height_resize), interp='bilinear')
class AtariEnvironment: def __init__(self, rom): self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.loadROM(rom_file=rom) self.action_space = self.ale.getMinimalActionSet() self.obs = self.reset() try: self.im = Image.fromarray(self.obs) self.root = Tk() self.tkim = ImageTk.PhotoImage(self.im) self.window = Label(image=self.tkim) self.window.image = self.tkim self.window.pack() except AttributeError: print("Cannot create rendering attributes") def step(self, action): reward = 0. # Use if you want environment to provide every 4th frame and repeat action in between for i in range(4): reward += float(self.ale.act(self.action_space[action])) if i == 2: frame1 = self.ale.getScreenGrayscale() if i == 3: frame2 = self.ale.getScreenGrayscale() self.obs = np.squeeze(np.maximum(frame1, frame2)) # Use if you want to receive every frame from environment # reward += float(self.ale.act(self.action_space[action])) # self.obs = np.squeeze(self.ale.getScreenGrayscale()) done = self.ale.game_over() return self.obs, reward, done def reset(self): self.ale.reset_game() self.obs = np.squeeze(self.ale.getScreenGrayscale()) return self.obs def render(self, rate=0.1): self.im = Image.fromarray(self.obs) self.tkim = ImageTk.PhotoImage(self.im) self.window.configure(image=self.tkim) self.window.image = self.tkim self.window.update_idletasks() self.window.update() time.sleep(rate) def sample_action(self): action = random.choice([0, 1, 2, 3]) return action
class Environment: def __init__(self, render=False): self.ale = ALEInterface() self.ale.setInt(b'random_seed', 0) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', 4) self.ale.setBool(b'display_screen', render) self.ale.loadROM(ENV.encode('ascii')) self._screen = np.empty((210, 160, 1), dtype=np.uint8) self._no_op_max = 7 def set_render(self, render): if not render: self.ale.setBool(b'display_screen', render) def reset(self): self.ale.reset_game() # randomize initial state if self._no_op_max > 0: no_op = np.random.randint(0, self._no_op_max + 1) for _ in range(no_op): self.ale.act(0) self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 110)) screen = screen[18:102, :] screen = screen.astype(np.float32) screen /= 255.0 self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2) return self.frame_buffer def act(self, action): reward = self.ale.act(4 + action) done = self.ale.game_over() self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 110)) screen = np.reshape(screen[18:102, :], (84, 84, 1)) screen = screen.astype(np.float32) screen *= (1 / 255.0) self.frame_buffer = np.append(self.frame_buffer[:, :, 1:], screen, axis=2) return self.frame_buffer, reward, done, "" def close(self): self.ale.setBool(b'display_screen', False)
class Environment: def __init__(self, show_screen, history_length): self.ale = ALEInterface() self.ale.setInt('frame_skip', 4) self.history = None self.history_length = history_length if show_screen: self.display_screen() self.load_game() (screen_width, screen_height) = self.ale.getScreenDims() self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8) # 210x160 screen data self.dims = (84, 84) # input size for neural network self.actions = [3, 0, 1, 4] # noop, left, right, fire, def display_screen(self): self.ale.setBool("display_screen", True) def turn_on_sound(self): self.ale.setBool("sound", True) def restart(self): """reset game""" self.ale.reset_game() def act(self, action): """:returns reward of an action""" return self.ale.act(self.actions[action]) def __get_screen(self): """:returns Grayscale thresholded resized screen image """ self.ale.getScreenGrayscale(self.screen_data) resized = cv2.resize(self.screen_data, self.dims) return resized def get_state(self): binary_screen = self.__get_screen() if self.history is None: self.history = deque(maxlen=self.history_length) for _ in range(self.history_length - 1): self.history.append(binary_screen) self.history.append(binary_screen) result = np.stack(self.history, axis=0) return result def isTerminal(self): """checks if game is over""" return self.ale.game_over() def load_game(self): """load game from file""" self.ale.loadROM("Breakout.bin")
class Environment: def __init__(self, rom_file, args): self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.dims = (args.screen_height, args.screen_width) def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def isTerminal(self): return self.ale.game_over()
class Emulator(object): FRAME_SKIP = 4 SCREEN_WIDTH = 84 SCREEN_HEIGHT = 84 def __init__(self, rom): self.ale = ALEInterface() self.max_num_frames_per_episode = 100000 #self.ale.getInt('max_num_frames_per_episode') self.ale.setInt('frame_skip', self.FRAME_SKIP) self.ale.loadROM('roms/' + rom) self.actions = self.ale.getMinimalActionSet() def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) return np.reshape(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) def act(self, action): return self.ale.act(action) def terminal(self): return self.ale.game_over()
class Emulator(object): def __init__(self, settings): self.ale = ALEInterface() self.ale.setInt('frame_skip', settings['frame_skip']) self.ale.setInt('random_seed', np.random.RandomState().randint(1000)) self.ale.setBool('color_averaging', False) self.ale.loadROM('roms/' + settings['rom_name']) self.actions = self.ale.getMinimalActionSet() self.width = settings['screen_width'] self.height = settings['screen_height'] def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.height, self.width), interpolation=cv2.INTER_LINEAR) return np.reshape(screen, (self.height, self.width)) def full_image(self): screen = self.ale.getScreenRGB() return screen def act(self, action): return self.ale.act(self.actions[action]) def terminal(self): return self.ale.game_over()
class Emulator: def __init__(self): self.ale = ALEInterface() # turn off the sound self.ale.setBool('sound', False) self.ale.setBool('display_screen', EMULATOR_DISPLAY) self.ale.setInt('frame_skip', FRAME_SKIP) self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY) self.ale.setBool('color_averaging', COLOR_AVERAGING) self.ale.setInt('random_seed', RANDOM_SEED) if RECORD_SCENE_PATH: self.ale.setString('record_screen_dir', RECORD_SCENE_PATH) self.ale.loadROM(ROM_PATH) self.actions = self.ale.getMinimalActionSet() logger.info("Actions: " + str(self.actions)) self.dims = DIMS #self.start_lives = self.ale.lives() def getActions(self): return self.actions def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() # can be omitted def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): # why grayscale ? screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) # normalize #resized /= COLOR_SCALE return resized def isTerminal(self): # while training deepmind only ends when agent dies #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives) return self.ale.game_over()
class AleInterface(object): def __init__(self, game, args): self.game = game self.ale = ALEInterface() # if sys.platform == 'darwin': # self.ale.setBool('sound', False) # Sound doesn't work on OSX # elif sys.platform.startswith('linux'): # self.ale.setBool('sound', True) # self.ale.setBool('display_screen', True) # self.ale.setBool('display_screen', args.display_screen) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) self.ale.setInt('random_seed', args.random_seed) # # if rand_seed is not None: # self.ale.setInt('random_seed', rand_seed) rom_file = "./roms/%s.bin" % game if not os.path.exists(rom_file): print "not found rom file:", rom_file sys.exit(-1) self.ale.loadROM(rom_file) self.actions = self.ale.getMinimalActionSet() def get_actions_num(self): return len(self.actions) def act(self, action): reward = self.ale.act(self.actions[action]) return reward def get_screen_gray(self): return self.ale.getScreenGrayscale() def get_screen_rgb(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game()
# ale.act(np.random.randint(len(legal_actions))) # ale.act(0) # actionIndex = 0 # else: # rate = explorationRate if not testFlag else testExplorationRate if np.random.rand(1) > explorationRate: [actionIndex, actionValue] = forward([np.transpose(memory.History, [1, 2, 0])], Q_train, all=False) else: actionIndex = np.random.randint(len(legal_actions)) # get action reward = ale.act(legal_actions[actionIndex]) # reward observe = Scale(ale.getScreenGrayscale()) # 0.08s terminal = ale.game_over() memory.add(observe, actionIndex, reward, terminal=terminal) scoreEpisode += reward # t1 = time.time() # t1s += t1 - t00 # training if frameCount >= startLearningFrame and frameCount % trainFreq is 0: #and not testFlag: # while trainStart: pass # trainStart = True train() if frameCount % targetUpdateFreq is 0:
elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.setInt('frame_skip',1) # Load the ROM file ale.loadROM('roms/breakout.bin') ale.setInt('max_num_frames',1) # Get the list of legal actions legal_actions = ale.getMinimalActionSet() a1,a2 = ale.getScreenDims() cnt = 0 # Play 10 episodes import numpy as np d = np.empty((a1,a2),dtype=np.uint8) for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] #print legal_actions # Apply an action and get the resulting reward reward = ale.act(a); ale.getScreenGrayscale(d) io.imshow(d) io.show() #print reward #print ale.getScreenRGB() #total_reward += reward #print 'Episode', episode, 'ended with score:', total_reward ale.reset_game()
class AtariEnvironment(interfaces.Environment): def __init__(self, atari_rom, frame_skip=4, noop_max=30, terminate_on_end_life=False, random_seed=123, frame_history_length=4, use_gui=False, max_num_frames=500000, repeat_action_probability=0.0, record_screen_dir=None): self.ale = ALEInterface() self.ale.setInt('random_seed', random_seed) self.ale.setInt('frame_skip', 1) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setInt('max_num_frames_per_episode', max_num_frames) if record_screen_dir is not None: self.ale.setString('record_screen_dir', record_screen_dir) self.ale.loadROM(atari_rom) self.frame_skip = frame_skip self.repeat_action_probability = repeat_action_probability self.noop_max = noop_max self.terminate_on_end_life = terminate_on_end_life self.current_lives = self.ale.lives() self.is_terminal = False self.previous_action = 0 self.num_actions = len(self.ale.getMinimalActionSet()) w, h = self.ale.getScreenDims() self.screen_width = w self.screen_height = h self.zero_last_frames = [ np.zeros((84, 84), dtype=np.uint8), np.zeros((84, 84), dtype=np.uint8) ] self.last_two_frames = copy.copy(self.zero_last_frames) self.zero_history_frames = [ np.zeros((84, 84), dtype=np.uint8) for i in range(0, frame_history_length) ] self.frame_history = copy.copy(self.zero_history_frames) atari_actions = self.ale.getMinimalActionSet() self.atari_to_onehot = dict( list(zip(atari_actions, list(range(len(atari_actions)))))) self.onehot_to_atari = dict( list(zip(list(range(len(atari_actions))), atari_actions))) self.screen_image = np.zeros(self.screen_height * self.screen_width, dtype=np.uint8) self.use_gui = use_gui self.original_frame = np.zeros((h, w), dtype=np.uint8) self.refresh_time = datetime.timedelta(milliseconds=1000 / 60) self.last_refresh = datetime.datetime.now() if (self.use_gui): self.gui_screen = pygame.display.set_mode((w, h)) def getRAM(self, ram=None): return self.ale.getRAM(ram) def _get_frame(self): self.ale.getScreenGrayscale(self.screen_image) image = self.screen_image.reshape( [self.screen_height, self.screen_width, 1]) self.original_frame = image image = cv2.resize(image, (84, 84)) return image def perform_action(self, onehot_index_action): if self.repeat_action_probability > 0: if np.random.uniform() < self.repeat_action_probability: onehot_index_action = self.previous_action self.previous_action = onehot_index_action action = self.onehot_to_atari[onehot_index_action] state, action, reward, next_state, self.is_terminal = self.perform_atari_action( action) return state, onehot_index_action, reward, next_state, self.is_terminal def perform_atari_action(self, atari_action): state = self.get_current_state() reward = self._act(atari_action, self.frame_skip) if self.use_gui: self.refresh_gui() self.frame_history[:-1] = self.frame_history[1:] self.frame_history[-1] = np.max(self.last_two_frames, axis=0) next_state = self.get_current_state() return state, atari_action, reward, next_state, self.is_terminal def _act(self, ale_action, repeat): reward = 0 for i in range(repeat): reward += self.ale.act(ale_action) if i >= repeat - 2: self.last_two_frames = [ self.last_two_frames[1], self._get_frame() ] self.is_terminal = self.ale.game_over() # terminate the episode if current_lives has decreased lives = self.ale.lives() if self.current_lives != lives: if self.current_lives > lives and self.terminate_on_end_life: self.is_terminal = True self.current_lives = lives return reward def get_current_state(self): #return copy.copy(self.frame_history) return [x.copy() for x in self.frame_history] def get_actions_for_state(self, state): return [ self.atari_to_onehot[a] for a in self.ale.getMinimalActionSet() ] def reset_environment(self): self.last_two_frames = [self.zero_history_frames[0], self._get_frame()] if self.terminate_on_end_life: if self.ale.game_over(): self.ale.reset_game() else: self.ale.reset_game() self.current_lives = self.ale.lives() if self.noop_max > 0: num_noops = np.random.randint(self.noop_max + 1) self._act(0, num_noops) self.previous_action = 0 self.frame_history = copy.copy(self.zero_history_frames) self.frame_history[-1] = np.max(self.last_two_frames, axis=0) if self.use_gui: self.refresh_gui() def is_current_state_terminal(self): return self.is_terminal def refresh_gui(self): current_time = datetime.datetime.now() if (current_time - self.last_refresh) > self.refresh_time: self.last_refresh = current_time gui_image = np.tile( np.transpose(self.original_frame, axes=(1, 0, 2)), [1, 1, 3]) # gui_image = np.zeros((self.screen_width, self.screen_height, 3), dtype=np.uint8) # channel = np.random.randint(3) # gui_image[:,:,channel] = np.transpose(self.original_frame, axes=(1, 0, 2))[:,:,0] pygame.surfarray.blit_array(self.gui_screen, gui_image) pygame.display.update()
class ALEEnvironment(BaseEnvironment): """ The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply wrappers of the underlying ALE but with pythonic names/usage. Parameters ---------- rom : byte string Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin' display_screen : boolean Default False. Whether or not to show the game. True takes longer to run but can be fun to watch step_cap: int Default None. Maximum number of steps to run in an episode. Breakout can sometimes not return terminal even when game is ended. This fixes that and will return terminal after stepping above this count """ def __init__(self, rom, resize_shape=(84, 84), skip_frame=1, repeat_action_probability=0.0, step_cap=None, loss_of_life_termination=False, loss_of_life_negative_reward=False, grayscale=True, display_screen=False, seed=np.random.RandomState()): # set up emulator self.ale = ALEInterface() if display_screen: self.ale.setBool(b'display_screen', True) self.ale.setInt(b'frame_skip', skip_frame) self.ale.setInt(b'random_seed', seed.randint(0, 9999)) self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setBool(b'color_averaging', False) self.ale.loadROM(rom.encode()) # setup gamescreen object. I think this is faster than recreating an empty each time width, height = self.ale.getScreenDims() channels = 1 if grayscale else 3 self.grayscale = grayscale self.gamescreen = np.empty((height, width, 1), dtype=np.uint8) self.resize_shape = resize_shape self.skip_frame = skip_frame self.step_cap = step_cap self.curr_step_count = 0 # setup action converter # ALE returns legal action indexes, convert these to just numbers self.action_inds = self.ale.getMinimalActionSet() # setup lives self.loss_of_life_negative_reward = loss_of_life_negative_reward self.cur_lives = self.ale.lives() self.loss_of_life_termination = loss_of_life_termination self.life_lost = False def reset(self): self.ale.reset_game() self.cur_lives = self.ale.lives() self.life_lost = False self.curr_step_count = 0 def step(self, action): self.curr_step_count += 1 ale_action = self.action_inds[action] return self._step(ale_action) def _step(self, ale_action): if not self.loss_of_life_termination and not self.loss_of_life_negative_reward: return self.ale.act(ale_action) else: rew = self.ale.act(ale_action) new_lives = self.ale.lives() if new_lives < self.cur_lives: # if loss of life is negative reward subtract 1 from reward if self.loss_of_life_negative_reward: rew -= 1 self.cur_lives = new_lives self.life_lost = True return rew def get_state(self): if self.grayscale: self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen) else: self.gamescreen = self.ale.getScreenRGB(self.gamescreen) # if resize_shape is none then don't resize if self.resize_shape is not None: # if grayscale we remove the last dimmension (channel) if self.grayscale: processedImg = imresize(self.gamescreen[:, :, 0], self.resize_shape) else: processedImg = imresize(self.gamescreen, self.resize_shape) return processedImg def get_state_shape(self): return self.resize_shape def get_terminal(self): if self.loss_of_life_termination and self.life_lost: return True elif self.step_cap is not None and self.curr_step_count > self.step_cap: return True else: return self.ale.game_over() def get_num_actions(self): return len(self.action_inds)
class MyEnv(Environment): VALIDATION_MODE = 0 def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, ale_options=[{"key": "random_seed", "value": 0}, {"key": "color_averaging", "value": True}, {"key": "repeat_action_probability", "value": 0.}]): self._mode = -1 self._modeScore = 0.0 self._modeEpisodeCount = 0 self._frameSkip = frame_skip if frame_skip >= 1 else 1 self._randomState = rng self._ale = ALEInterface() for option in ale_options: t = type(option["value"]) if t is int: self._ale.setInt(option["key"], option["value"]) elif t is float: self._ale.setFloat(option["key"], option["value"]) elif t is bool: self._ale.setBool(option["key"], option["value"]) else: raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t)) self._ale.loadROM(rom) w, h = self._ale.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reducedScreen = np.empty((84, 84), dtype=np.uint8) self._actions = self._ale.getMinimalActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._modeScore = 0.0 self._modeEpisodeCount = 0 else: self._modeEpisodeCount += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 self._ale.reset_game() for _ in range(self._randomState.randint(15)): self._ale.act(0) self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) return [4 * [84 * [84 * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frameSkip): reward += self._ale.act(action) if self.inTerminalState(): break self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) self._modeScore += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._modeEpisodeCount += 1 print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount)) def inputDimensions(self): return [(4, 84, 84)] def observationType(self, subject): return np.uint8 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reducedScreen)] def inTerminalState(self): return self._ale.game_over()
#Set up game environment ale = ALEInterface() ale.setInt(b'random_seed', randrange(0, 256, 1)) #ale.setBool(b'color_averaging', True) ale.loadROM(b"breakout.bin") actions = ale.getMinimalActionSet() interpolator = BilinearInterpolator2D([210, 160], [84, 84]) current_frame = np.empty([210, 160, 1], dtype=np.uint8) next_state = np.empty([84, 84, 1], dtype=np.float32) action_repeats = 20 i = 0 reward = 0 while i < action_repeats and not ale.game_over(): action = choice(actions) reward += ale.act(action) ale.getScreenGrayscale(current_frame) interpolator.interpolate(current_frame, next_state) i += 1 test_state = next_state.transpose(2, 0, 1) import matplotlib.pyplot as plt plt.subplot(1, 2, 1) plt.imshow(current_frame[:, :, 0], interpolation='none', cmap='gray') plt.subplot(1, 2, 2) plt.imshow(test_state[0, :, :], interpolation='none', cmap='gray') plt.show()
class AleInterface(object): def __init__(self, game, args): #self.game = game self.ale = ALEInterface() # if sys.platform == 'darwin': # self.ale.setBool('sound', False) # Sound doesn't work on OSX # elif sys.platform.startswith('linux'): # self.ale.setBool('sound', True) # self.ale.setBool('display_screen', True) # self.ale.setBool('display_screen', args.display_screen) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) self.ale.setInt('random_seed', args.random_seed) # # if rand_seed is not None: # self.ale.setInt('random_seed', rand_seed) rom_file = "./roms/%s.bin" % game if not os.path.exists(rom_file): print "not found rom file:", rom_file sys.exit(-1) self.ale.loadROM(rom_file) self.actions = self.ale.getMinimalActionSet() self.actionsB = self.ale.getMinimalActionSetB() def get_actions_num(self): return len(self.actions) def get_actions_numB(self): return len(self.actionsB) def act(self, action): reward = self.ale.act(self.actions[action]) return reward def actAB(self, actionA, actionB): reward = self.ale.actAB(self.actions[actionA], self.actionsB[actionB - 18]) return reward def get_screen_gray(self): return self.ale.getScreenGrayscale() def get_screen_rgb(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game() def set_mode(self, mode): return self.ale.setMode(mode)
class AtariEmulator: def __init__(self, args): ''' Initialize Atari environment ''' # Parameters self.buffer_length = args.buffer_length self.screen_dims = args.screen_dims self.frame_skip = args.frame_skip self.blend_method = args.blend_method self.reward_processing = args.reward_processing self.max_start_wait = args.max_start_wait self.history_length = args.history_length self.start_frames_needed = self.buffer_length - 1 + ( (args.history_length - 1) * self.frame_skip) #Initialize ALE instance self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) if args.watch: self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin')) self.buffer = np.empty((self.buffer_length, 210, 160)) self.current = 0 self.action_set = self.ale.getMinimalActionSet() self.lives = self.ale.lives() self.reset() def get_possible_actions(self): ''' Return list of possible actions for game ''' return self.action_set def get_screen(self): ''' Add screen to frame buffer ''' self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale()) self.current = (self.current + 1) % self.buffer_length def reset(self): self.ale.reset_game() self.lives = self.ale.lives() if self.max_start_wait < 0: print("ERROR: max start wait decreased beyond 0") sys.exit() elif self.max_start_wait <= self.start_frames_needed: wait = 0 else: wait = random.randint( 0, self.max_start_wait - self.start_frames_needed) for _ in range(wait): self.ale.act(self.action_set[0]) # Fill frame buffer self.get_screen() for _ in range(self.buffer_length - 1): self.ale.act(self.action_set[0]) self.get_screen() # get initial_states state = [(self.preprocess(), 0, 0, False)] for step in range(self.history_length - 1): state.append(self.run_step(0)) # make sure agent hasn't died yet if self.isTerminal(): print( "Agent lost during start wait. Decreasing max_start_wait by 1" ) self.max_start_wait -= 1 return self.reset() return state def run_step(self, action): ''' Apply action to game and return next screen and reward ''' raw_reward = 0 for step in range(self.frame_skip): raw_reward += self.ale.act(self.action_set[action]) self.get_screen() reward = None if self.reward_processing == 'clip': reward = np.clip(raw_reward, -1, 1) else: reward = raw_reward terminal = self.isTerminal() self.lives = self.ale.lives() return (self.preprocess(), action, reward, terminal, raw_reward) def preprocess(self): ''' Preprocess frame for agent ''' img = None if self.blend_method == "max": img = np.amax(self.buffer, axis=0) return imresize(img, self.screen_dims) def isTerminal(self): return (self.isGameOver() or (self.lives > self.ale.lives())) def isGameOver(self): return self.ale.game_over()
class AleAgent: ## # @param processing_cls Class for processing game visual unput def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None): assert game_rom is not None self.game = ALEInterface() if encoder_weights is not None and encoder_model is not None: self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights) else: self.encoder = Encoder() self.processor = processing_cls() # Get & Set the desired settings self.game.setInt('random_seed', 0) self.game.setInt('frame_skip', 4) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': pygame.init() self.game.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.game.setBool('sound', False) # no sound self.game.setBool('display_screen', True) # Load the ROM file self.game.loadROM(game_rom) # Get the list of legal actions self.legal_actions = self.game.getLegalActionSet() # Get actions applicable in current game self.minimal_actions = self.game.getMinimalActionSet() if NFQ_model is not None and NFQ_weights is not None: self.NFQ = NFQ( self.encoder.out_dim, len(self.minimal_actions), model_path=NFQ_model, weights_path=NFQ_weights ) else: self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions)) (self.screen_width, self.screen_height) = self.game.getScreenDims() self.screen_data = np.zeros( (self.screen_height, self.screen_width), dtype=np.uint8 ) ## # Initialize the reinforcement learning def train(self, num_of_episodes=1500, eps=0.995, key_binding=None): pygame.init() for episode in xrange(num_of_episodes): total_reward = 0 moves = 0 hits = 0 print 'Starting episode: ', episode+1 if key_binding: eps = 0.05 else: eps -= 2/num_of_episodes self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) while not self.game.game_over(): current_state = next_state x = None if key_binding: key_pressed = pygame.key.get_pressed() x = key_binding(key_pressed) if x is None: r = np.random.rand() if r < eps: x = np.random.randint(self.minimal_actions.size) else: x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] # Apply an action and get the resulting reward reward = self.game.act(a) # record only every 3 frames # if not moves % 3: self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) transition = np.append(current_state, x) transition = np.append(transition, next_state) transition = np.append(transition, reward) self.NFQ.add_transition(transition) total_reward += reward if reward > 0: hits += 1 moves += 1 if eps > 0.1: eps -= 0.00001 # end while print 'Epsilon: ', eps print 'Episode', episode+1, 'ended with score:', total_reward print 'Hits: ', hits self.game.reset_game() self.NFQ.train() hits = 0 moves = 0 self.NFQ.save_net() # end for ## # Play the game! def play(self): total_reward = 0 moves = 1 while not self.game.game_over(): self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) current_state = self.encoder.encode(pooled_data) x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] reward = self.game.act(a) total_reward += reward moves += 1 print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if ( self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class AtariEnvironment: def __init__(self, seed=1, record=False): self.ale = ALEInterface() self.ale.setBool(b'display_screen', FLAGS.display_screen or record) self.ale.setInt(b'frame_skip', 1) self.ale.setBool(b'color_averaging', False) self.ale.setInt(b'random_seed', seed) self.ale.setFloat(b'repeat_action_probability', FLAGS.sticky_prob) self.ale.setInt(b'max_num_frames_per_episode', FLAGS.max_num_frames_per_episode) if record: if not tf.gfile.Exists(FLAGS.record_dir): tf.gfile.MakeDirs(FLAGS.record_dir) self.ale.setBool(b'sound', True) self.ale.setString(b'record_screen_dir', str.encode(FLAGS.record_dir)) self.ale.setString(b'record_sound_filename', str.encode(FLAGS.record_dir + '/sound.wav')) self.ale.setInt(b'fragsize', 64) self.ale.loadROM(str.encode(FLAGS.rom)) self.ale.setMode(FLAGS.mode) self.ale.setDifficulty(FLAGS.difficulty) self.action_set = self.ale.getLegalActionSet() screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,) self._frame_buffer = CircularBuffer(FLAGS.frame_buffer_size, screen_dims, np.uint8) self.reset() def _is_terminal(self): return self.ale.game_over() def _get_single_frame(self): stacked_frames = np.concatenate(self._frame_buffer, axis=2) maxed_frame = np.amax(stacked_frames, axis=2) expanded_frame = np.expand_dims(maxed_frame, 3) return expanded_frame def reset(self): self._episode_frames = 0 self._episode_reward = 0 self.ale.reset_game() for _ in range(FLAGS.frame_buffer_size): self._frame_buffer.append(self.ale.getScreenGrayscale()) def act(self, action): assert not self._is_terminal() cum_reward = 0 for _ in range(FLAGS.frame_skip): cum_reward += self.ale.act(self.action_set[action]) self._frame_buffer.append(self.ale.getScreenGrayscale()) self._episode_frames += FLAGS.frame_skip self._episode_reward += cum_reward cum_reward = np.clip(cum_reward, -1, 1) return cum_reward, self._get_single_frame(), self._is_terminal() def state(self): assert len(self._frame_buffer) == FLAGS.frame_buffer_size return self._get_single_frame() def num_actions(self): return len(self.action_set) def episode_reward(self): return self._episode_reward def episode_frames(self): return self._episode_frames def frame_skip(self): return FLAGS.frame_skip
legal_actions = ale.getLegalActionSet() (screen_width, screen_height) = ale.getScreenDims() #screen_data = np.zeros(screen_width*screen_height, dtype=np.uint32) screen_data = np.zeros((screen_height,screen_width), dtype=np.uint8) pooling_data = np.zeros((40,31), dtype=np.uint8) # Play 10 episodes for episode in xrange(20): total_reward = 0 i = 0 while not ale.game_over(): i = i + 1 if i % 20 == 0: ale.getScreenGrayscale(screen_data) pooled_data = processor.process(screen_data) encoded_data = encoder.draw(pooled_data) plt.figure(figsize=(1, 1), dpi=40) plt.imshow(encoded_data.reshape(40, 31)) plt.show() a = legal_actions[randrange(len(legal_actions))] # Apply an action and get the resulting reward reward = ale.act(a); #print 'Reward acquired: ', reward total_reward += reward
class AleEnvironment(Environment): def __init__(self, rom_name, record_display=False, show_display=False, id = 0, shrink=False, life_lost_as_end=True, use_grayscale=True): super(AleEnvironment, self).__init__() self.ale = ALEInterface() self.ale.setInt('random_seed', int(np.random.rand() * 100)) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setBool('color_averaging', False) self.record_display = record_display self.show_display = show_display if self.record_display: self.ale.setString('record_screen_dir', 'movie') elif self.show_display: self.display_name = rom_name + '_' + str(id) cv2.startWindowThread() cv2.namedWindow(self.display_name) self.ale.loadROM(rom_name) self.actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.use_grayscale = use_grayscale if self.use_grayscale: self.screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) else: self.screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.prev_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.shrink = shrink self.life_lost_as_end = life_lost_as_end self.lives_lost = False self.lives = self.ale.lives() def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): cv2.destroyWindow(self.display_name) def act(self, action): reward = self.ale.act(self.actions[action]) if self.use_grayscale: screen = self.ale.getScreenGrayscale(self.screen) else: current_screen = self.ale.getScreenRGB(self.screen) screen = np.maximum(current_screen, self.prev_screen) self.prev_screen = current_screen screen = screen[:, :, 0] * 0.2126 + screen[:, :, 1] * 0.0722 + screen[:, :, 2] * 0.7152 screen = screen.astype(np.uint8) screen = np.reshape(screen, (self.screen_height, self.screen_width, 1)) state = self.preprocess(screen) self.lives_lost = True if self.lives > self.ale.lives() else False self.lives = self.ale.lives() return reward, state def is_end_state(self): if self.life_lost_as_end: return self.ale.game_over() or self.lives_lost else: return self.ale.game_over() def reset(self): if self.ale.game_over(): self.ale.reset_game() self.lives = self.ale.lives() self.lives_lost = False def available_actions(self): # return available indexes instead of actual action value return range(0, len(self.actions)) def preprocess(self, screen): if self.show_display and not self.record_display: cv2.imshow(self.display_name, screen) if self.shrink: resized = cv2.resize(screen, (84, 84)) else: resized = cv2.resize(screen, (84, 110)) resized = resized[18:102, :] scaled = resized.astype(np.float32) / 255.0 return scaled
class AleEnv(): def __init__(self, rom, display_screen, use_env_frame_skip, frame_repeat): self.actions = None self.rom = rom self.display_screen = display_screen self.use_env_frame_skip = use_env_frame_skip self.frame_repeat = frame_repeat def initialize(self): self.ale = ALEInterface() self.ale.setInt("random_seed", random.randint(1, 1000)) if self.display_screen: self.ale.setBool('display_screen', True) if self.use_env_frame_skip == True: self.ale.setInt('frame_skip', self.frame_repeat) self.ale.setBool('color_averaging', True) self.ale.setFloat('repeat_action_probability', 0) self.ale.loadROM(self.rom) self.actions = self.ale.getMinimalActionSet() print 'actions: %s' % self.actions (self.screen_width,self.screen_height) = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.initialized = True def get_actions(self, rom=None): if self.actions is None and rom != None: ale = ALEInterface() ale.loadROM(rom) self.actions = ale.getMinimalActionSet() return self.actions @property def state_dtype(self): return np.uint8 @property def continuous_action(self): return False def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getScreenRGB(self): return self.ale.getScreenRGB() def getState(self, debug_display=False, debug_input=None): screen = self.ale.getScreenGrayscale() if screen is not None and debug_display: debug_input.show(screen.reshape(screen.shape[0], screen.shape[1])) return screen.reshape(self.screen_height, self.screen_width) def act(self, action): return self.ale.act(action) def game_over(self): return self.ale.game_over() def finish(self): return
class GameState(object): def __init__(self, rand_seed, display=False, no_op_max=7): self.ale = ALEInterface() self.ale.setInt(b'random_seed', rand_seed) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', 4) self._no_op_max = no_op_max if display: self._setup_display() self.ale.loadROM(ROM.encode('ascii')) # collect minimal action set self.real_actions = self.ale.getMinimalActionSet() # height=210, width=160 self._screen = np.empty((210, 160, 1), dtype=np.uint8) self.reset() def _process_frame(self, action, reshape): reward = self.ale.act(action) terminal = self.ale.game_over() # screen shape is (210, 160, 1) self.ale.getScreenGrayscale(self._screen) # reshape it into (210, 160) reshaped_screen = np.reshape(self._screen, (210, 160)) # resize to height=110, width=84 resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t = resized_screen[18:102,:] if reshape: x_t = np.reshape(x_t, (84, 84, 1)) x_t = x_t.astype(np.float32) x_t *= (1.0/255.0) return reward, terminal, x_t def _setup_display(self): if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool(b'sound', False) elif sys.platform.startswith('linux'): self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) def reset(self): self.ale.reset_game() # randomize initial state if self._no_op_max > 0: no_op = np.random.randint(0, self._no_op_max + 1) for _ in range(no_op): self.ale.act(0) _, _, x_t = self._process_frame(0, False) self.reward = 0 self.terminal = False self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2) def process(self, action): # convert original 18 action index to minimal action set index real_action = self.real_actions[action] r, t, x_t1 = self._process_frame(real_action, True) self.reward = r self.terminal = t self.s_t1 = np.append(self.s_t[:,:,1:], x_t1, axis = 2) def update(self): self.s_t = self.s_t1
ckpt_file = "checkpoint/" + args.load_checkpoint print("loading: " +'"'+args.load_checkpoint+'"') saver.restore(sess, ckpt_file) global_step = 0 global_episode = 0 logging = True t = time.time() num_episodes = 100000 initial_episode = global_episode sess.run(sync_DQNT_op) for episode in range(global_episode, num_episodes + global_episode): global state state = np.zeros((1, 84, 84, config.buff_size), dtype=np.uint8) state = preprocess(ale.getScreenGrayscale(), state) R = 0 ep_begin_t = time.time() terminal = False pseudo_terminal = False lives = ale.lives() episode_begining_step = global_step while terminal == False: action = e_greedy_action(get_epsilon(), state) reward = ale.act(action_map[action]) clipped_reward = max(-1, min(1, reward)) R += reward pseudo_terminal = False if ale.game_over(): terminal = True if lives != ale.lives() or terminal:
class AtariSimulator(object): def __init__(self, settings): '''Initiate Arcade Learning Environment (ALE) using Python interface https://github.com/bbitmaster/ale_python_interface/wiki - Set number of frames to be skipped, random seed, ROM and title for display. - Retrieve a set of legal actions and their number. - Retrieve dimensions of the original screen (width/height), and set the dimensions of the cropped screen, together with the padding used to crop the screen rectangle. - Set dimensions of the pygame display that will show visualization of the simulation. (May be cropped --- showing what the learner sees, or not --- showing full Atari screen) - Allocate memory for generated grayscale screenshots. Accepts dims in (height/width) format ''' self.ale = ALEInterface() self.ale.setInt("frame_skip",settings["frame_skip"]) self.ale.setInt("random_seed",settings["seed_simulator"]) self.ale.loadROM(settings["rom_dir"] + '/' + settings["rom"]) self.title = "ALE Simulator: " + str(settings["rom"]) self.actions = self.ale.getLegalActionSet() self.n_actions = self.actions.size self.screen_dims = self.ale.getScreenDims() self.model_dims = settings['model_dims'] self.pad = settings['pad'] print("Original screen width/height: " + str(self.screen_dims[0]) + "/" + str(self.screen_dims[1])) print("Cropped screen width/height: " + str(self.model_dims[0]) + "/" + str(self.model_dims[1])) self.viz_cropped = settings['viz_cropped'] if self.viz_cropped: self.display_dims = (int(self.model_dims[0]*2), int(self.model_dims[1]*2)) else: self.display_dims = (int(self.screen_dims[0]*2), int(self.screen_dims[1]*2)) # preallocate an array to accept ALE screen data (height/width) ! self.screen_data = np.empty((self.screen_dims[1],self.screen_dims[0]),dtype=np.uint8) def get_screenshot(self): '''returns a cropped snapshot of the simulator - store grayscale values in a preallocated array - cut out a square from the rectangle, using provided padding value - downsample to the desired size and transpose from (height/width) to (width/height) ''' self.ale.getScreenGrayscale(self.screen_data) self.tmp = self.screen_data[(self.screen_dims[1]-self.screen_dims[0]-self.pad):(self.screen_dims[1]-self.pad),:] self.frame = spm.imresize(self.tmp,self.model_dims[::-1],interp='nearest').T #, interp='nearest' return self.frame def act(self,action_index): '''function to transition the simulator from s to s' using provided action the action that is provided is in form of an index simulator deals with translating the index into an actual action''' self.last_reward = self.ale.act(self.actions[action_index]) def reward(self): '''return reward - has to be called after the "act" function''' return self.last_reward def episode_over(self): '''return a boolean indicator on whether the game is still running''' return self.ale.game_over() def reset_episode(self): '''reset the game that ended''' self.ale.reset_game() def init_viz_display(self): '''initialize display that will show visualization''' pygame.init() self.screen = pygame.display.set_mode(self.display_dims) if self.title: pygame.display.set_caption(self.title) def refresh_viz_display(self): '''if display is shut down, shut the game down else move the current simulator's frame (cropped or not cropped) into the pygame display, after expanding it 2x along x and y dimensions''' for event in pygame.event.get(): if event.type == pygame.QUIT: exit if self.viz_cropped: self.surface = pygame.surfarray.make_surface(self.frame) # has already been transposed else: self.surface = pygame.surfarray.make_surface(self.screen_data.T) self.screen.blit(pygame.transform.scale2x(self.surface),(0,0)) pygame.display.flip()
class AtariEmulator: def __init__(self, dims, history_length): ''' Initialize Atari environment ''' # Parameters self.buffer_length = 2 # args.buffer_length self.screen_dims = dims self.frame_skip = 4 # args.frame_skip self.max_start_wait = 30 # args.max_start_wait self.history_length = history_length # args.history_length self.start_frames_needed = self.buffer_length - 1 + \ ((self.history_length - 1) * self.frame_skip) # Initialize ALE instance self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) # if args.watch: # self.ale.setBool(b'sound', True) # self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode('../roms/pong.bin')) self.buffer = np.empty((self.buffer_length, 210, 160)) self.current = 0 self.action_set = self.ale.getMinimalActionSet() self.lives = self.ale.lives() self.reset() def get_possible_actions(self): ''' Return list of possible actions for game ''' return self.action_set def get_screen(self): ''' Add screen to frame buffer ''' self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale()) self.current = (self.current + 1) % self.buffer_length def reset(self): self.ale.reset_game() self.lives = self.ale.lives() if self.max_start_wait < 0: print("ERROR: max start wait decreased beyond 0") sys.exit() elif self.max_start_wait <= self.start_frames_needed: wait = 0 else: wait = random.randint( 0, self.max_start_wait - self.start_frames_needed) for _ in range(wait): self.ale.act(self.action_set[0]) # Fill frame buffer for _ in range(self.buffer_length - 1): self.ale.act(self.action_set[0]) self.get_screen() # get initial_states frame = self.preprocess() state = [(frame, 0, 0, False)] for step in range(self.history_length - 1): next_frame, reward, terminal, _ = self.run_step(0) state.append((frame, 0, reward, terminal)) frame = next_frame # make sure agent hasn't died yet if self.isTerminal(): print( "Agent lost during start wait. Decreasing max_start_wait by 1" ) self.max_start_wait -= 1 return self.reset() return state, next_frame def run_step(self, action): ''' Apply action to game and return next screen and reward ''' raw_reward = 0 for step in range(self.frame_skip): raw_reward += self.ale.act(self.action_set[action]) self.get_screen() reward = np.clip(raw_reward, -1, 1) terminal = self.isTerminal() next_frame = self.preprocess() return (next_frame, reward, terminal, raw_reward) def preprocess(self): ''' Preprocess frame for agent ''' img = np.amax(self.buffer, axis=0) return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR) def isTerminal(self): t = self.ale.game_over() or (self.lives > self.ale.lives()) if t: self.lives = self.ale.lives() return t
# for frameCount in xrange(maxFrame): life0 = ale.lives() # rate = explorationRate if not testFlag else testExplorationRate # perceive if np.random.rand(1) >explorationRate: actionIndex = forward(memory.History,sess,Q_train) # actionIndex = np.argmax(sess.run(Q_train.y, feed_dict={Q_train.x_image: [memory.History]}),axis=1) else: actionIndex = np.random.randint(n_actions) # get action reward = ale.act(legal_actions[actionIndex]) # reward observe = Scale(ale.getScreenGrayscale()) # 0.08s life1 = ale.lives() if life1 < life0: reward += -1 terminal = True for _ in xrange(np.random.randint(-1,noopMax) + 1): ale.act(0) memory.add(observe, actionIndex, reward, terminal) # if not testFlag: # memory.add(observe,actionIndex,reward,terminal) # else: # memory.addHistory(observe)
class Emulator: def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False): self.ale = ALEInterface() self.ale.setInt("random_seed", rseed * (actor_id +1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat("repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt("frame_skip", 1) self.ale.setBool("color_averaging", False) self.ale.loadROM(rom_path + "/" + rom_name + ".bin") self.legal_actions = self.ale.getMinimalActionSet() self.screen_width,self.screen_height = self.ale.getScreenDims() #self.ale.setBool('display_screen', True) # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8) self.frame_pool = np.empty((2, self.screen_height, self.screen_width)) self.current = 0 self.lives = self.ale.lives() self.visualize = visualize self.visualize_processed = False self.windowname = rom_name + ' ' + str(actor_id) if self.visualize: logger.debug("Opening emulator window...") #from skimage import io #io.use_plugin('qt') cv2.startWindowThread() cv2.namedWindow(self.windowname) logger.debug("Emulator window opened") if self.visualize_processed: logger.debug("Opening processed frame window...") cv2.startWindowThread() logger.debug("Processed frame window opened") cv2.namedWindow(self.windowname + "_processed") self.single_life_episodes = single_life_episodes def get_screen_image(self): """ Add screen (luminance) to frame pool """ # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), # self.ale.getScreenRGB()] self.ale.getScreenGrayscale(self.gray_screen) self.ale.getScreenRGB(self.rgb_screen) self.frame_pool[self.current] = np.squeeze(self.gray_screen) self.current = (self.current + 1) % FRAMES_IN_POOL return self.rgb_screen def new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if MAX_START_WAIT < 0: logger.debug("Cannot time travel yet.") sys.exit() elif MAX_START_WAIT > 0: wait = random.randint(0, MAX_START_WAIT) else: wait = 0 for _ in xrange(wait): self.ale.act(self.legal_actions[0]) def process_frame_pool(self): """ Preprocess frame pool """ img = None if BLEND_METHOD == "max_pool": img = np.amax(self.frame_pool, axis=0) #img resize(img[:210, :], (84, 84)) img = cv2.resize(img[:210, :], (84, 84), interpolation=cv2.INTER_LINEAR) img = img.astype(np.float32) img *= (1.0/255.0) return img # Reduce height to 210, if not so #cropped_img = img[:210, :] # Downsample to 110x84 #down_sampled_img = resize(cropped_img, (84, 84)) # Crop to 84x84 playing area #stackable_image = down_sampled_img[:, 26:110] #return stackable_image def action_repeat(self, a): """ Repeat action and grab screen into frame pool """ reward = 0 for i in xrange(ACTION_REPEAT): reward += self.ale.act(self.legal_actions[a]) new_screen_image_rgb = self.get_screen_image() return reward, new_screen_image_rgb def get_reshaped_state(self, state): return np.reshape(state, (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) #return np.reshape(self.screen_images_processed, # (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) def get_initial_state(self): """ Get the initial state """ self.new_game() for step in xrange(NR_IMAGES): reward, new_screen_image_rgb = self.action_repeat(0) self.screen_images_processed[:, :, step] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) if self.is_terminal(): MAX_START_WAIT -= 1 return self.get_initial_state() return np.copy(self.screen_images_processed) #get_reshaped_state() def next(self, action): """ Get the next state, reward, and game over signal """ reward, new_screen_image_rgb = self.action_repeat(np.argmax(action)) self.screen_images_processed[:, :, 0:3] = \ self.screen_images_processed[:, :, 1:4] self.screen_images_processed[:, :, 3] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) terminal = self.is_terminal() self.lives = self.ale.lives() return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal def show_screen(self, image): """ Show visuals for raw and processed images """ if self.visualize: #io.imshow(image[:210, :], fancy=True) cv2.imshow(self.windowname, image[:210, :]) if self.visualize_processed: #io.imshow(self.screen_images_processed[:, :, 3], fancy=True) cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3]) def is_terminal(self): if self.single_life_episodes: return (self.is_over() or (self.lives > self.ale.lives())) else: return self.is_over() def is_over(self): return self.ale.game_over()
class AleEnv(object): '''ALE wrapper for RL training game_over_conditions={'points':(-1, 1)}: dict that describes all desired game over conditions each key corresponds to a condition that is checked; the first condition met produces a game over points: int or tuple of integers int: if x < 0, game ends when score is <= x if x >= 0, game ends when score is >= x tuple: game ends if score <= x[0] or score >= x[1] lives: int that ends game when lives <= x frames: int that ends game when total number of frames >= x episodes: int that ends game when num of episodes >= x Use max_num_frames_per_episode to set max episode length ''' # will include timing and hidden functionality in future iterations def __init__(self, rom_file, display_screen=False, sound=False, random_seed=0, game_over_conditions={}, frame_skip=1, repeat_action_probability=0.25, max_num_frames_per_episode=0, min_action_set=False, screen_color='gray', fps=60, output_buffer_size=1, reduce_screen=False): # ALE instance and setup self.ale = ALEInterface() #TODO: check if rom file exists; will crash jupyter kernel otherwise self.ale.loadROM(str.encode(rom_file)) self.ale.setBool(b'sound', sound) self.ale.setBool(b'display_screen', display_screen) if min_action_set: self.legal_actions = self.ale.getMinimalActionSet() else: self.legal_actions = self.ale.getLegalActionSet() self.ale.setInt(b'random_seed', random_seed) self.ale.setInt(b'frame_skip', frame_skip) self.frame_skip = frame_skip self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setInt(b'max_num_frames_per_episode', max_num_frames_per_episode) self.ale.loadROM(str.encode(rom_file)) self.game_over_conditions = game_over_conditions self.screen_color = screen_color self.reduce_screen = reduce_screen self.d_frame = (fps**-1) * self.frame_skip # set up output buffer self.output_buffer_size = output_buffer_size self.queue_size = self.output_buffer_size self._reset_params() def observe(self, flatten=False, expand_dim=False): if flatten is True: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)).flatten() if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out else: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)) out = np.squeeze(out) if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out @property def width(self): return self.game_screen.shape[1] @property def height(self): return self.game_screen.shape[0] @property def game_over(self): return self._game_over() @property def actions(self): return self.legal_actions @property def lives(self): return self.ale.lives() def _reset_params(self): self.total_points = 0 self.total_frames = 0 self.curr_episode = 1 self.prev_ep_frame_num = -float("inf") if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue = deque( np.zeros(shape=(self.queue_size - 1, self.height, self.width)), self.queue_size) self.output_queue.appendleft(self.game_screen) def reset(self): self.ale.reset_game() self._reset_params() def act(self, action): reward = self.ale.act(self.legal_actions[action]) if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue.pop() self.output_queue.appendleft(self.game_screen) self.total_points += reward self.total_frames += self.frame_skip if self.ale.getEpisodeFrameNumber() <= self.prev_ep_frame_num: self.curr_episode += 1 self.prev_ep_frame_num = self.ale.getEpisodeFrameNumber() return reward, self.d_frame, self.game_over def _game_over(self): if self.ale.game_over(): return True for cond in self.game_over_conditions: if cond == 'points': if isinstance(self.game_over_conditions[cond], int): if self.total_points >= self.game_over_conditions[cond]: return True elif isinstance(self.game_over_conditions[cond], tuple): if (self.total_points <= self.game_over_conditions[cond][0] or self.total_points >= self.game_over_conditions[cond][1]): return True elif cond == 'lives': if self.lives <= self.game_over_conditions[cond]: return True elif cond == 'frames': if self.total_frames >= self.game_over_conditions[cond]: return True elif cond == 'episodes': if self.curr_episode >= self.game_over_conditions[cond]: return True else: raise RuntimeError("ERROR: Invalid game over condition") return False
class ALEEnvironment(): def __init__(self, config): self.history = History3D(config) self.history_length = config.history_length self.mode = config.mode self.life_lost = False self.terminal = False self.score = 0 #cv2.namedWindow("Image") from ale_python_interface import ALEInterface self.ale = ALEInterface() if config.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', False) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', config.frame_skip) # Whether skip frames or not self.ale.setBool('color_averaging', config.color_averaging) if config.random_seed: # Random seed for repeatable experiments. self.ale.setInt('random_seed', config.random_seed) if config.record_screen_path: if not os.path.exists(config.record_screen_path): os.makedirs(config.record_screen_path) self.ale.setString('record_screen_dir', config.record_screen_path) if config.record_sound_filename: self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', config.record_sound_filename) self.ale.loadROM(config.rom_file) if config.minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.screen_width = config.screen_width self.screen_height = config.screen_height def numActions(self): return len(self.actions) def new_game(self): state, terminal = self.reset() for _ in range(self.history_length + 1): self.history.add(state) return state, terminal, list(range(len(self.actions))) def reset(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or self.ale.game_over()): # `reset` called in a middle of episode # all lives are lost self.ale.reset_game() self.life_lost = False return self.getScreen(), self.isTerminal() def step(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) self.score += reward self.current_state = self.getScreen() self.history.add(self.current_state) self.terminal = self.isTerminal() return reward, self.history.get(), self.terminal def getScreen(self): screen = self.ale.getScreenGrayscale() #print 'screen:\n',type(screen) #print 'screen.shape',screen.shape resized = cv2.resize(screen / 255., (self.screen_width, self.screen_height)) #cv2.imshow("Image", screen) ''' cv2.namedWindow("Image") cv2.destroyAllWindows() ''' return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class AtariAleEnvironment(object): def __init__(self, env_name, display=False, no_op_max=7): self.ale = ALEInterface() self.ale.setInt(b'random_seed', 113 * np.random.randint(0, 5)) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', 4) self._no_op_max = no_op_max if display: self._setup_display() rom_name = env_name + '.bin' self.ale.loadROM(rom_name.encode('ascii')) # collect minimal action set self.real_actions = self.ale.getMinimalActionSet() # height=210, width=160 self._screen = np.empty((210, 160, 1), dtype=np.uint8) def _process_frame(self, action, reshape): reward = self.ale.act(action) terminal = self.ale.game_over() # screen shape is (210, 160, 1) self.ale.getScreenGrayscale(self._screen) # reshape it into (210, 160) reshaped_screen = np.reshape(self._screen, (210, 160)) # resize to height=110, width=84 resized_screen = cv2.resize(reshaped_screen, (84, 84)) #x_t = resized_screen[18:102,:] x_t = resized_screen if reshape: x_t = np.reshape(x_t, (84, 84, 1)) x_t = x_t.astype(np.float32) x_t *= (1.0 / 255.0) return reward, terminal, x_t def _setup_display(self): if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool(b'sound', False) elif sys.platform.startswith('linux'): self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) def reset(self): self.ale.reset_game() # randomize initial state if self._no_op_max > 0: no_op = np.random.randint(0, self._no_op_max + 1) for _ in range(no_op): self.ale.act(0) _, _, x_t = self._process_frame(0, False) self.reward = 0 self.terminal = False self.s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) return self.s_t def step(self, action): # convert original 18 action index to minimal action set index real_action = self.real_actions[action] r, t, x_t1 = self._process_frame(real_action, True) self.reward = r self.terminal = t s_t1 = np.append(self.s_t[:, :, 1:], x_t1, axis=2) self.s_t = s_t1 # 4th argument is some info from gym; consistency hack return self.s_t, self.reward, self.terminal, None
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) # OpenCV expects width as first and height as second self.dims = (args.screen_width, args.screen_height) def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def isTerminal(self): return self.ale.game_over()
class AtariEnvironment: num_actions = 18 # Use full action set def __init__(self, frame_shape, frame_postprocess=lambda x: x): self.ale = ALEInterface() self.ale.setBool(b"display_screen", cfg.display_screen) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) self.ale.setInt(b"random_seed", cfg.random_seed) self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob) self.ale.loadROM(str.encode(cfg.rom)) self.ale.setMode(cfg.mode) self.ale.setDifficulty(cfg.difficulty) self.action_set = self.ale.getLegalActionSet() assert len(self.action_set) == AtariEnvironment.num_actions screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,) self._frame_buffer = CircularBuffer( cfg.frame_buffer_size, screen_dims, np.uint8 ) self._frame_stack = CircularBuffer( cfg.frame_history_size, frame_shape, np.uint8 ) self._frame_postprocess = frame_postprocess self._episode_count = 0 self.reset(inc_episode_count=False) def _is_terminal(self): return self.ale.game_over() def _get_single_frame(self): stacked_frames = np.concatenate(self._frame_buffer, axis=2) maxed_frame = np.amax(stacked_frames, axis=2) expanded_frame = np.expand_dims(maxed_frame, 3) frame = self._frame_postprocess(expanded_frame) return frame def reset(self, inc_episode_count=True): self._episode_frames = 0 self._episode_reward = 0 if inc_episode_count: self._episode_count += 1 self.ale.reset_game() for _ in range(cfg.frame_buffer_size): self._frame_buffer.append(self.ale.getScreenGrayscale()) for _ in range(cfg.frame_history_size): self._frame_stack.append(self._get_single_frame()) def act(self, action): assert not self._is_terminal() cum_reward = 0 for _ in range(cfg.frame_skip): cum_reward += self.ale.act(self.action_set[action]) self._frame_buffer.append(self.ale.getScreenGrayscale()) self._frame_stack.append(self._get_single_frame()) self._episode_frames += cfg.frame_skip self._episode_reward += cum_reward cum_reward = np.clip(cum_reward, -1, 1) return cum_reward, self.state, self._is_terminal() @property def state(self): assert len(self._frame_buffer) == cfg.frame_buffer_size assert len(self._frame_stack) == cfg.frame_history_size return np.concatenate(self._frame_stack, axis=-1) @property def episode_reward(self): return self._episode_reward @property def episode_frames(self): return self._episode_frames @property def episode_steps(self): return self._episode_frames // cfg.frame_skip @property def episode_count(self): return self._episode_count
class Environment: """docstring for Environment""" BUFFER_LEN = 2 EPISODE_FRAMES = 18000 EPOCH_COUNT = 200 EPOCH_STEPS = 250000 EVAL_EPS = 0.001 FRAMES_SKIP = 4 FRAME_HEIGHT = 84 FRAME_WIDTH = 84 MAX_NO_OP = 30 MAX_REWARD = 1 def __init__(self, rom_name, rng, display_screen = False): self.api = ALEInterface() self.api.setInt('random_seed', rng.randint(333)) self.api.setBool('display_screen', display_screen) self.api.setFloat('repeat_action_probability', 0.0) self.rom_name = rom_name self.display_screen = display_screen self.rng = rng self.repeat = Environment.FRAMES_SKIP self.buffer_len = Environment.BUFFER_LEN self.height = Environment.FRAME_HEIGHT self.width = Environment.FRAME_WIDTH self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP self.merge_id = 0 self.max_reward = Environment.MAX_REWARD self.eval_eps = Environment.EVAL_EPS self.log_dir = '' self.network_dir = '' self.api.loadROM('../rom/' + self.rom_name) self.minimal_actions = self.api.getMinimalActionSet() original_width, original_height = self.api.getScreenDims() self.merge_frame = np.zeros((self.buffer_len , original_height , original_width) , dtype = np.uint8) def get_action_count(self): return len(self.minimal_actions) def train(self, agent, store_freq, folder = None, start_epoch = 0): self._open_log_files(agent, folder) obs = np.zeros((self.height, self.width), dtype = np.uint8) epoch_count = Environment.EPOCH_COUNT for epoch in xrange(start_epoch, epoch_count): self.need_reset = True steps_left = Environment.EPOCH_STEPS print "\n" + "=" * 50 print "Epoch #%d" % (epoch + 1) episode = 0 train_start = time.time() while steps_left > 0: num_step, _ = self._run_episode(agent, steps_left, obs) steps_left -= num_step episode += 1 if steps_left == 0 or episode % 10 == 0: print "Finished episode #%d, steps_left = %d" \ % (episode, steps_left) train_end = time.time() valid_values = agent.get_validate_values() eval_values = self.evaluate(agent) test_end = time.time() train_time = train_end - train_start test_time = test_end - train_end step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time) print "\tFinished epoch #%d, episode trained = %d\n" \ "\tValidate values = %.3f, evaluate reward = %.3f\n"\ "\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \ % (epoch + 1, episode, valid_values, eval_values\ , train_time, test_time, step_per_sec) self._update_log_files(agent, epoch + 1, episode , valid_values, eval_values , train_time, test_time , step_per_sec, store_freq) gc.collect() def evaluate(self, agent, episodes = 30, obs = None): print "\n***Start evaluating" if obs is None: obs = np.zeros((self.height, self.width), dtype = np.uint8) sum_reward = 0.0 sum_step = 0.0 for episode in xrange(episodes): self.need_reset = True step, reward = self._run_episode(agent, self.episode_steps, obs , self.eval_eps, evaluating = True) sum_reward += reward sum_step += step print "Finished episode %d, reward = %d, step = %d" \ % (episode + 1, reward, step) self.need_reset = True print "Average reward per episode = %.4f" % (sum_reward / episodes) print "Average step per episode = %.4f" % (sum_step / episodes) return sum_reward / episodes def _prepare_game(self): if self.need_reset or self.api.game_over(): self.api.reset_game() self.need_reset = False if Environment.MAX_NO_OP > 0: num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \ + self.buffer_len for _ in xrange(num_no_op): self.api.act(0) for _ in xrange(self.buffer_len): self._update_buffer() def _run_episode(self, agent, steps_left, obs , eps = 0.0, evaluating = False): self._prepare_game() start_lives = self.api.lives() step_count = 0 sum_reward = 0 is_terminal = False while step_count < steps_left and not is_terminal: self._get_screen(obs) action_id, _ = agent.get_action(obs, eps, evaluating) reward = self._repeat_action(self.minimal_actions[action_id]) reward_clip = reward if self.max_reward > 0: reward_clip = np.clip(reward, -self.max_reward, self.max_reward) life_lost = not evaluating and self.api.lives() < start_lives is_terminal = self.api.game_over() or life_lost \ or step_count + 1 >= steps_left agent.add_experience(obs, is_terminal, action_id, reward_clip , evaluating) sum_reward += reward step_count += 1 return step_count, sum_reward def _update_buffer(self): self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...]) self.merge_id = (self.merge_id + 1) % self.buffer_len def _repeat_action(self, action): reward = 0 for i in xrange(self.repeat): reward += self.api.act(action) if i + self.buffer_len >= self.repeat: self._update_buffer() return reward def _get_screen(self, resized_frame): self._resize_frame(self.merge_frame.max(axis = 0), resized_frame) def _resize_frame(self, src_frame, dst_frame): cv2.resize(src = src_frame, dst = dst_frame, dsize = (self.width, self.height), interpolation = cv2.INTER_LINEAR) def _open_log_files(self, agent, folder): time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0] if folder is not None: self.log_dir = folder self.network_dir = self.log_dir + '/network' else: self.log_dir = '../run_results/' + base_rom_name + time_str self.network_dir = self.log_dir + '/network' info_name = get_next_name(self.log_dir, 'info', 'txt') git_name = get_next_name(self.log_dir, 'git-diff', '') try: os.stat(self.log_dir) except OSError: os.makedirs(self.log_dir) try: os.stat(self.network_dir) except OSError: os.makedirs(self.network_dir) with open(os.path.join(self.log_dir, info_name), 'w') as f: f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse' , 'HEAD'])) f.write('Run command: ') f.write(' '.join(pipes.quote(x) for x in sys.argv)) f.write('\n\n') f.write(agent.get_info()) write_info(f, Environment) write_info(f, agent.__class__) write_info(f, agent.network.__class__) # From https://github.com/spragunr/deep_q_rl/pull/49/files with open(os.path.join(self.log_dir, git_name), 'w') as f: f.write(subprocess.check_output(['git', 'diff', 'HEAD'])) if folder is not None: return with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f: f.write("epoch,episode_train,validate_values,evaluate_reward"\ ",train_time,test_time,steps_per_second\n") mem = psutil.virtual_memory() with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f: f.write("epoch,available,free,buffers,cached"\ ",available_readable,used_percent\n") f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (0, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) def _update_log_files(self, agent, epoch, episode, valid_values , eval_values, train_time, test_time, step_per_sec , store_freq): print "Updating log files" with open(self.log_dir + '/results.csv', 'a') as f: f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \ (epoch, episode, valid_values, eval_values , train_time, test_time, step_per_sec)) mem = psutil.virtual_memory() with open(self.log_dir + '/memory.csv', 'a') as f: f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (epoch, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz') if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \ (store_freq > 0 and (epoch % store_freq == 0)): agent.dump_exp(self.network_dir + '/exp.npz') def _setup_record(self, network_file): file_name, _ = os.path.splitext(os.path.basename(network_file)) time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) img_dir = os.path.dirname(network_file) + '/images_' \ + file_name + time_str rom_name, _ = os.path.splitext(self.rom_name) out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \ + file_name + time_str + '.mov' print out_name try: os.stat(img_dir) except OSError: os.makedirs(img_dir) self.api.setString('record_screen_dir', img_dir) self.api.loadROM('../rom/' + self.rom_name) return img_dir, out_name def record_run(self, agent, network_file, episode_id = 1): if episode_id > 1: self.evaluate(agent, episode_id - 1) system_state = self.api.cloneSystemState() img_dir, out_name = self._setup_record(network_file) if episode_id > 1: self.api.restoreSystemState(system_state) self.evaluate(agent, 1) script = \ """ { ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } || { avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } """ % (img_dir, out_name, img_dir, out_name) os.system(script)
class AtariEmulator(BaseEnvironment): def __init__(self, emulator_id, game, resource_folder, random_seed=3, random_start=True, single_life_episodes=False, history_window=1, visualize=False, verbose=0, **unknown): if verbose >= 2: logging.debug('Emulator#{} received unknown args: {}'.format( emulator_id, unknown)) self.emulator_id = emulator_id self.ale = ALEInterface() self.ale.setInt(b"random_seed", random_seed * (emulator_id + 1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) self.ale.setBool(b"display_screen", visualize) full_rom_path = resource_folder + "/" + game + ".bin" self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() #this env is fixed until firing, so you have to... self._have_to_fire = ('FIRE' in [ ACTION_MEANING[a] for a in self.legal_actions ]) self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.random_start = random_start self.single_life_episodes = single_life_episodes self.call_on_new_frame = visualize self.history_window = history_window self.observation_shape = (self.history_window, IMG_SIZE_X, IMG_SIZE_Y) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) # Processed historcal frames that will be fed in to the network (i.e., four 84x84 images) self.history = create_history_observation(self.history_window) #ObservationPool(np.zeros(self.observation_shape, dtype=np.uint8)) self.frame_preprocessor = FramePreprocessor(self.gray_screen.shape, FRAMES_IN_POOL) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return self.gray_screen def on_new_frame(self, frame): pass def __random_start_reset(self): """ Restart game """ self.ale.reset_game() if self.random_start: wait = random.randint(0, MAX_START_WAIT + 1) for _ in range(wait): self.ale.act(self.get_noop()) if self.__is_over(): self.ale.reset_game() self.lives = self.ale.lives() def __new_game(self): self.__random_start_reset() if self._have_to_fire: #take action on reset for environments that are fixed until firing self.ale.act(self.legal_actions[1]) if self.__is_over(): self.__random_start_reset() self.ale.act(self.legal_actions[2]) if self.__is_over(): self.__random_start_reset() def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_preprocessor.new_frame(self.__get_screen_image()) return reward def reset(self): """ Get the initial state """ self.__new_game() for step in range(self.history_window): _ = self.__action_repeat(0) self.history.new_observation( self.frame_preprocessor.get_processed()) if self.__is_terminal(): raise Exception('This should never happen.') return self.history.get_state(), None def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(action) self.history.new_observation(self.frame_preprocessor.get_processed()) terminal = self.__is_terminal() self.lives = self.ale.lives() return self.history.get_state(), reward, terminal, None def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return self.legal_actions[0] def close(self): del self.ale
class Environment: def __init__(self, render=False): self.ale = ALEInterface() self.ale.setInt(b'random_seed', 0) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', 3) self.ale.setBool(b'display_screen', render) self.ale.loadROM(ENV.encode('ascii')) self._screen = np.empty((210, 160, 1), dtype=np.uint8) self._no_op_max = 7 self.img_buffer = [] def set_render(self, render): if not render: self.ale.setBool(b'display_screen', render) def reset(self): self.ale.reset_game() # randomize initial state if self._no_op_max > 0: no_op = np.random.randint(0, self._no_op_max + 1) for _ in range(no_op): self.ale.act(0) self.img_buffer = [] self.img_buffer.append(self.ale.getScreenRGB()) self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 100)) screen = screen[11:95, :] screen = screen.astype(np.float32) screen /= 255.0 self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2) return self.frame_buffer def process(self, action, gif=False): reward = self.ale.act(1+action) done = self.ale.game_over() if gif: self.img_buffer.append(self.ale.getScreenRGB()) self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 100)) screen = np.reshape(screen[11:95, :], (84, 84, 1)) screen = screen.astype(np.float32) screen *= (1/255.0) self.frame_buffer = np.append(self.frame_buffer[:, :, 1:], screen, axis=2) return self.frame_buffer, reward, done, "" def save_gif(self, path): os.makedirs(os.path.dirname(path), exist_ok=True) imageio.mimsave(path, self.img_buffer, duration=0.001) self.img_buffer = [] def close(self): self.ale.setBool(b'display_screen', False)
class GameState(object): def __init__(self, rand_seed, display=False): self.ale = ALEInterface() self.ale.setInt('random_seed', rand_seed) if display: self._setup_display() self.ale.loadROM(ROM) # height=210, width=160 self.screen = np.empty((210, 160, 1), dtype=np.uint8) no_action = 0 self.reward = self.ale.act(no_action) self.terminal = self.ale.game_over() # screenのshapeは、(210, 160, 1) self.ale.getScreenGrayscale(self.screen) # (210, 160)にreshape reshaped_screen = np.reshape(self.screen, (210, 160)) # height=110, width=84にリサイズ resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t = resized_screen[18:102,:] x_t = x_t.astype(np.float32) x_t *= (1.0/255.0) self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2) # 実際に利用するactionのみを集めておく self.real_actions = self.ale.getMinimalActionSet() def _setup_display(self): if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) def process(self, action): # 18種類のうちの実際に利用するactionに変換 real_action = self.real_actions[action] self.reward = self.ale.act(real_action) #self.reward = self.ale.act(action) self.terminal = self.ale.game_over() # screenのshapeは、(210, 160, 1) self.ale.getScreenGrayscale(self.screen) # (210, 160)にreshape reshaped_screen = np.reshape(self.screen, (210, 160)) # height=210, width=160 # height=110, width=84にリサイズ resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t1 = resized_screen[18:102,:] x_t1 = np.reshape(x_t1, (84, 84, 1)) x_t1 = x_t1.astype(np.float32) x_t1 *= (1.0/255.0) self.s_t1 = np.append(x_t1, self.s_t[:,:,0:3], axis = 2) if self.terminal: self.ale.reset_game() def update(self): self.s_t = self.s_t1
class AtariEmulator(BaseEnvironment): def __init__(self, rom_addr, random_start=False, random_seed=6, visualize=True, single_life=False): self.ale = ALEInterface() self.ale.setInt(b"random_seed", 2 * random_seed) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) full_rom_path = rom_addr self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.writer = imageio.get_writer('breakout0.gif', fps=30) self.random_start = random_start self.single_life_episodes = single_life self.call_on_new_frame = visualize # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.observation_pool = ObservationPool( np.zeros((84, 84, 4), dtype=np.uint8)) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = FramePool( np.empty((2, self.screen_height, self.screen_width), dtype=np.uint8), self.__process_frame_pool) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return np.squeeze(self.gray_screen) def on_new_frame(self, frame): pass def __new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if self.random_start: wait = random.randint(0, MAX_START_WAIT) for _ in range(wait): self.ale.act(self.legal_actions[0]) def __process_frame_pool(self, frame_pool): """ Preprocess frame pool """ img = np.amax(frame_pool, axis=0) img = imresize(img, (84, 84), interp='nearest') img = img.astype(np.uint8) return img def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_pool.new_frame(self.__get_screen_image()) return reward def get_initial_state(self): """ Get the initial state """ self.__new_game() for step in range(4): _ = self.__action_repeat(0) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) if self.__is_terminal(): raise Exception('This should never happen.') return self.observation_pool.get_pooled_observations() def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(np.argmax(action)) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) terminal = self.__is_terminal() self.lives = self.ale.lives() observation = self.observation_pool.get_pooled_observations() return observation, reward, terminal def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return [1.0, 0.0]
class AtariEmulator: def __init__(self, args): ''' Initialize Atari environment ''' # Parameters self.buffer_length = args.buffer_length self.screen_dims = args.screen_dims self.frame_skip = args.frame_skip self.blend_method = args.blend_method self.reward_processing = args.reward_processing self.max_start_wait = args.max_start_wait self.history_length = args.history_length self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip) #Initialize ALE instance self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) if args.watch: self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin')) self.buffer = np.empty((self.buffer_length, 210, 160)) self.current = 0 self.action_set = self.ale.getMinimalActionSet() self.lives = self.ale.lives() self.reset() def get_possible_actions(self): ''' Return list of possible actions for game ''' return self.action_set def get_screen(self): ''' Add screen to frame buffer ''' self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale()) self.current = (self.current + 1) % self.buffer_length def reset(self): self.ale.reset_game() self.lives = self.ale.lives() if self.max_start_wait < 0: print("ERROR: max start wait decreased beyond 0") sys.exit() elif self.max_start_wait <= self.start_frames_needed: wait = 0 else: wait = random.randint(0, self.max_start_wait - self.start_frames_needed) for _ in range(wait): self.ale.act(self.action_set[0]) # Fill frame buffer self.get_screen() for _ in range(self.buffer_length - 1): self.ale.act(self.action_set[0]) self.get_screen() # get initial_states state = [(self.preprocess(), 0, 0, False)] for step in range(self.history_length - 1): state.append(self.run_step(0)) # make sure agent hasn't died yet if self.isTerminal(): print("Agent lost during start wait. Decreasing max_start_wait by 1") self.max_start_wait -= 1 return self.reset() return state def run_step(self, action): ''' Apply action to game and return next screen and reward ''' raw_reward = 0 for step in range(self.frame_skip): raw_reward += self.ale.act(self.action_set[action]) self.get_screen() reward = None if self.reward_processing == 'clip': reward = np.clip(raw_reward, -1, 1) else: reward = raw_reward terminal = self.isTerminal() self.lives = self.ale.lives() return (self.preprocess(), action, reward, terminal, raw_reward) def preprocess(self): ''' Preprocess frame for agent ''' img = None if self.blend_method == "max": img = np.amax(self.buffer, axis=0) return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR) def isTerminal(self): return (self.isGameOver() or (self.lives > self.ale.lives())) def isGameOver(self): return self.ale.game_over()
class GameEnvironment: def __init__(self, settings): self.ale = ALEInterface() self.ale.setBool('display_screen', settings['DISPLAY_SCREEN']) self.ale.setBool('sound', settings['SOUND']) self.ale.setBool('color_averaging', settings['COLOR_AVERAGING']) self.ale.setInt('random_seed', settings['RANDOM_SEED']) self.ale.setInt('frame_skip', settings['FRAME_SKIP']) self.ale.setFloat('repeat_action_probability', settings['REPEAT_ACTION_PROB']) roms_dir = settings['ROMS_DIR'] rom_name = settings['ROM_NAME'] ROM = None if(rom_name.endswith('.bin')): self.name = rom_name[:-4] ROM = rom_name else: self.name = rom_name ROM = rom_name + '.bin' self.ale.loadROM(os.path.join(roms_dir, ROM)) self.random_starts = settings['RANDOM_STARTS'] self.rng = settings['RNG'] if(settings['MINIMAL_ACTION_SET']): self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.n_actions = len(self.actions) self.width, self.height = self.ale.getScreenDims() self.observation = np.zeros((self.height, self.width), dtype='uint8') self.reward = None self.game_over = None self.terminal = None self.total_lives = None self.init() def init(self): self.restartGame() self.reward = 0 self.game_over = self.gameOver() self.terminal = self.game_over self.total_lives = self.lives() self.step(0) def getState(self): return self.observation, self.reward, self.terminal, self.game_over def step(self, action, training=False): self.reward = self.act(action) self.paint() lives = self.lives() self.game_over = self.gameOver() self.terminal = self.game_over if(training and (lives < self.total_lives)): self.terminal = True self.total_lives = lives return self.getState() def newGame(self): self.init() for i in xrange(self.rng.randint(1, self.random_starts)): self.act(0) terminal = self.gameOver() if(terminal): print "Warning terminal in random init" return self.step(0) def newTestGame(self): self.init() return self.getState() def paint(self): self.ale.getScreenGrayscale(self.observation) def getScreenRGB(self): return self.ale.getScreenRGB() def act(self, action): assert ((action >= 0) and (action < self.n_actions)) return self.ale.act(self.actions[action]) def lives(self): return self.ale.lives() def restartGame(self): self.ale.reset_game() def gameOver(self): return self.ale.game_over()
ale.setBool(b'display_screen', True) # load game rom file name_of_the_game = 'space_invaders' game_path = '/home/juna/atari_project/Arcade-Learning-Environment/roms/' + name_of_the_game + '.bin' ale.loadROM(game_path.encode()) minimal_actions = ale.getMinimalActionSet() print('minimal_actions :\n', minimal_actions) screen_data = np.empty((210, 160, 1), dtype=np.uint8) screen_data = None #initialize the state image = ale.getScreenGrayscale(screen_data) image = impre(name_of_the_game, image) state = tc.stack((image, image, image, image), dim=0).unsqueeze(0).type(cpu_dtype) del image memory_buffer = [] # zeros = tc.zeros_like(image) # state_m = tc.zeros(sample_num, 4, 84, 84).type(gpu_dtype) epi_num = 0 epi_reward = 0 frame_num = ale.getFrameNumber() action = None # iteration loop while frame_num < 1e7:
class Emulate: def __init__(self, rom_file, display_screen=False, frame_skip=4, screen_height=84, screen_width=84, repeat_action_probability=0, color_averaging=True, random_seed=0, record_screen_path='screen_pics', record_sound_filename=None, minimal_action_set=True): self.ale = ALEInterface() if display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', frame_skip) self.ale.setFloat('repeat_action_probability', repeat_action_probability) self.ale.setBool('color_averaging', color_averaging) if random_seed: self.ale.setInt('random_seed', random_seed) self.ale.loadROM(rom_file) if minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.dims = (screen_width, screen_height) def numActions(self): return len(self.actions) def getActions(self): return self.actions def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def getScreenGray(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized, k=1) return rotated def getScreenColor(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized, k=1) return rotated def isTerminal(self): return self.ale.game_over()
class ALEEnvironment(): def __init__(self, rom_file, args): self.ale = ALEInterface() self.histLen = 4 if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) #self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.mode = "train" self.life_lost = False self.initSrcreen = self.getScreen() self.goalSet = [] self.goalSet.append([[70, 65], [74, 71]]) # lower right ladder 4 self.goalSet.append([[11, 58], [15, 66]]) # lower left ladder 3 self.goalSet.append([[7, 41], [11, 45]]) # key 5 self.goalCenterLoc = [] for goal in self.goalSet: goalCenter = [ float(goal[0][0] + goal[1][0]) / 2, float(goal[0][1] + goal[1][1]) / 2 ] self.goalCenterLoc.append(goalCenter) self.agentOriginLoc = [42, 33] self.agentLastX = 42 self.agentLastY = 33 self.reachedGoal = [0, 0, 0] self.histState = self.initializeHistState() def initializeHistState(self): histState = np.concatenate((self.getState(), self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) return histState def numActions(self): return len(self.actions) def resetGoalReach(self): self.reachedGoal = [0, 0, 0] def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False self.reachedGoal = [0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] def beginNextLife(self): self.life_lost = False self.reachedGoal = [0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) currState = self.getState() self.histState = np.concatenate((self.histState[:, :, 1:], currState), axis=2) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def getScreenRGB(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) #resized = screen return resized def getAgentLoc(self): img = self.getScreenRGB() man = [200, 72, 72] mask = np.zeros(np.shape(img)) mask[:, :, 0] = man[0] mask[:, :, 1] = man[1] mask[:, :, 2] = man[2] diff = img - mask indxs = np.where(diff == 0) diff[np.where(diff < 0)] = 0 diff[np.where(diff > 0)] = 0 diff[indxs] = 255 if (np.shape(indxs[0])[0] == 0): mean_x = self.agentLastX mean_y = self.agentLastY else: mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0] mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0] self.agentLastX = mean_x self.agentLastY = mean_y return (mean_x, mean_y) def distanceReward(self, lastGoal, goal): if (lastGoal == -1): lastGoalCenter = self.agentOriginLoc else: lastGoalCenter = self.goalCenterLoc[lastGoal] goalCenter = self.goalCenterLoc[goal] agentX, agentY = self.getAgentLoc() dis = np.sqrt((goalCenter[0] - agentX) * (goalCenter[0] - agentX) + (goalCenter[1] - agentY) * (goalCenter[1] - agentY)) disLast = np.sqrt((lastGoalCenter[0] - agentX) * (lastGoalCenter[0] - agentX) + (lastGoalCenter[1] - agentY) * (lastGoalCenter[1] - agentY)) disGoals = np.sqrt((goalCenter[0] - lastGoalCenter[0]) * (goalCenter[0] - lastGoalCenter[0]) + (goalCenter[1] - lastGoalCenter[1]) * (goalCenter[1] - lastGoalCenter[1])) return 0.001 * (disLast - dis) / disGoals # add color channel for input of network def getState(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return np.reshape(resized, (84, 84, 1)) def getStackedState(self): return self.histState def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over() def isGameOver(self): return self.ale.game_over() def isLifeLost(self): return self.life_lost def reset(self): self.ale.reset_game() self.life_lost = False def goalReached(self, goal): goalPosition = self.goalSet[goal] goalScreen = self.initSrcreen stateScreen = self.getScreen() count = 0 for y in range(goalPosition[0][0], goalPosition[1][0]): for x in range(goalPosition[0][1], goalPosition[1][1]): if goalScreen[x][y] != stateScreen[x][y]: count = count + 1 # 30 is total number of pixels of agent if float(count) / 30 > 0.3: self.reachedGoal[goal] = 1 return True return False def goalNotReachedBefore(self, goal): if (self.reachedGoal[goal] == 1): return False return True
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class MyEnv(Environment): VALIDATION_MODE = 0 def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, ale_options=[{ "key": "random_seed", "value": 0 }, { "key": "color_averaging", "value": True }, { "key": "repeat_action_probability", "value": 0. }]): self._mode = -1 self._modeScore = 0.0 self._modeEpisodeCount = 0 self._frameSkip = frame_skip if frame_skip >= 1 else 1 self._randomState = rng self._ale = ALEInterface() for option in ale_options: t = type(option["value"]) if t is int: self._ale.setInt(option["key"], option["value"]) elif t is float: self._ale.setFloat(option["key"], option["value"]) elif t is bool: self._ale.setBool(option["key"], option["value"]) else: raise ValueError( "Option {} ({}) is not an int, bool or float.".format( option["key"], t)) self._ale.loadROM(rom) w, h = self._ale.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reducedScreen = np.empty((84, 84), dtype=np.uint8) self._actions = self._ale.getMinimalActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._modeScore = 0.0 self._modeEpisodeCount = 0 else: self._modeEpisodeCount += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 self._ale.reset_game() for _ in range(self._randomState.randint(15)): self._ale.act(0) self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) return [4 * [84 * [84 * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frameSkip): reward += self._ale.act(action) if self.inTerminalState(): break self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) self._modeScore += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._modeEpisodeCount += 1 print("== Mean score per episode is {} over {} episodes ==".format( self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount)) def inputDimensions(self): return [(4, 84, 84)] def observationType(self, subject): return np.uint8 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reducedScreen)] def inTerminalState(self): return self._ale.game_over()
class AtariWrapper(): """ ALE wrapper that tries to mimic the options in the DQN paper including the preprocessing (except resizing/cropping) """ action_words = [ 'NOOP', 'UP', 'RIGHT', 'LEFT', 'DOWN', "UPRIGHT", "UPLEFT", "DOWNRIGHT", "DOWNLEFT" ] _action_set = [0, 2, 3, 4, 5, 6, 7, 8, 9] #Valid actions for ALE. #Possible actions are just a list from 0,num_valid_actions #We still need to map from the latter to the former when possible_actions = list(range(len(_action_set))) def __init__(self, rom_path, seed=123, frameskip=4, show_display=False, stack_num_states=4, concatenate_state_every=4): """ Parameters: Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int. It's aka action repeat. stack_num_states: Number of dimensions/channels to have. concatenate_state_every: After how many frames should one channel be appended to state. Number is in terms of absolute frames independent of frameskip """ self.stack_num_states = stack_num_states self.concatenate_state_every = concatenate_state_every self.game_path = rom_path if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist' % (game, self.game_path)) self.frameskip = frameskip try: self.ale = ALEInterface() except Exception as e: print( "ALEInterface could not be loaded. ale_python_interface import failed" ) raise e #Set some default options self.ale.setInt(b'random_seed', seed) self.ale.setBool(b'sound', False) self.ale.setBool(b'display_screen', show_display) self.ale.setFloat(b'repeat_action_probability', 0.) #Load the rom self.ale.loadROM(self.game_path) (self.screen_width, self.screen_height) = self.ale.getScreenDims() self.latest_frame_fifo = deque( maxlen=2) #Holds the two closest frames to max. self.state_fifo = deque(maxlen=stack_num_states) def _step(self, a, force_noop=False): """Perform one step of the environment. Automatically repeats the step self.frameskip number of times parameters: force_noop: Force it to perform a no-op ignoring the action supplied. """ assert a in self.possible_actions + [0] if force_noop: action, num_steps = 0, 1 else: action = self._action_set[a] if isinstance(self.frameskip, int): num_steps = self.frameskip else: num_steps = np.random.randint(self.frameskip[0], self.frameskip[1]) reward = 0.0 for i in range(num_steps): reward += self.ale.act(action) cur_frame = self.observe_raw(get_rgb=True) cur_frame_cropped = self.crop_frame(cur_frame) self.latest_frame_fifo.append(cur_frame_cropped) if i % self.concatenate_state_every == 0: curmax_frame = np.amax(self.latest_frame_fifo, axis=0) frame_lumi = self.convert_to_gray(curmax_frame) self.state_fifo.append(frame_lumi) #Transpose so we get HxWxC instead of CxHxW self.current_frame = np.array(np.transpose(self.state_fifo, (1, 2, 0))) return self.current_frame, reward, self.ale.game_over(), { "ale.lives": self.ale.lives() } def step(self, *args, **kwargs): """Performs one step of the environment """ lives_before = self.ale.lives() next_state, reward, done, info = self._step(*args, **kwargs) lives_after = self.ale.lives() # End the episode when a life is lost if lives_before > lives_after: done = True return next_state, reward, done, info def observe_raw(self, get_rgb=False): """Observe either RGB or Gray frames. Initialzing arrays forces it to not modify stale pointers """ if get_rgb: cur_frame_rgb = np.zeros( (self.screen_height, self.screen_width, 3), dtype=np.uint8) self.ale.getScreenRGB(cur_frame_rgb) return cur_frame_rgb else: cur_frame_gray = np.zeros((self.screen_height, self.screen_width), dtype=np.uint8) self.ale.getScreenGrayscale(cur_frame_gray) return cur_frame_gray def crop_frame(self, frame): """Simply crops a frame. Does nothing by default. """ return frame def convert_to_gray(self, img): """Get Luminescence channel """ img_f = np.float32(img) img_lumi = 0.299*img_f[:,:,0] + \ 0.587*img_f[:,:,1] + \ 0.114*img_f[:,:,2] return np.uint8(img_lumi) def reset(self): """Reset the game """ self.ale.reset_game() s = self.observe_raw(get_rgb=True) s = self.crop_frame(s) #Populate missing frames with blank ones. for _ in range(self.stack_num_states - 1): self.state_fifo.append(np.zeros(shape=(s.shape[0], s.shape[1]))) self.latest_frame_fifo.append(s) #Push the latest frame curmax_frame = s frame_lumi = self.convert_to_gray(s) self.state_fifo.append(frame_lumi) self.state = np.transpose(self.state_fifo, (1, 2, 0)) return self.state def get_action_meanings(self): """Return in text what the actions correspond to. """ return [ACTION_MEANING[i] for i in self._action_set] def save_state(self): """Saves the current state and returns a identifier to saved state """ return self.ale.cloneSystemState() def restore_state(self, ident): """Restore game state Restores the saved state of the system and perform a no-op so a new frame can be generated incase a restore is followed by an observe() """ self.ale.restoreSystemState(ident) self.step(0, force_noop=True)