def launch(args, defaults, description): """ Execute a complete training run. """ rec_screen = "" if "--nn-file" in args: temp_params = vars(load_params(args[args.index("--nn-file") + 1])) for p in temp_params: try: vars(defaults)[p.upper()] = temp_params[p] except: print "warning: parameter", p, "from param file doesn't exist." #rec_screen = args[args.index("--nn-file")+1][:-len("last_model.pkl")]+"/frames" parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom parameters.rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) rng = np.random.RandomState(123456) folder_name = None if parameters.folder_name == "" else parameters.folder_name ale = ALEInterface() ale.setInt('random_seed', rng.randint(1000)) ale.setBool('display_screen', parameters.display_screen) ale.setString('record_screen_dir', rec_screen) trainer = Q_Learning(model_params=parameters, ale_env=ale, folder_name=folder_name) trainer.train()
def __init__(self, game, seed=None, use_sdl=False, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None): self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops # atari_py is used only to provide rom files. atari_py has its own # ale_python_interface, but it is obsolete. game_path = atari_py.get_game_path(game) ale = ALEInterface() if seed is not None: assert seed >= 0 and seed < 2 ** 16, \ "ALE's random seed must be represented by unsigned int" else: # Use numpy's random state seed = np.random.randint(0, 2**16) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(str(record_screen_dir))) self.frame_skip = frame_skip if use_sdl: if 'DISPLAY' not in os.environ: raise RuntimeError( 'Please set DISPLAY environment variable for use_sdl=True') # SDL settings below are from the ALE python example if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(str(game_path))) assert ale.getFrameNumber() == 0 self.ale = ale self.legal_actions = ale.getMinimalActionSet() self.initialize() self.action_space = spaces.Discrete(len(self.legal_actions)) one_screen_observation_space = spaces.Box(low=0, high=255, shape=(84, 84)) self.observation_space = spaces.Tuple([one_screen_observation_space] * n_last_screens)
def initializeALE(romFile, rec_dir): ale = ALEInterface() max_frames_per_episode = ale.getInt("max_num_frames_per_episode") ale.setInt("random_seed", 123) ale.setFloat("repeat_action_probability", 0.0) ale.setInt("frame_skip", 5) # Set record flags ale.setString(b'record_screen_dir', rec_dir + '/') ale.setString("record_sound_filename", rec_dir + "/sound.wav") # We set fragsize to 64 to ensure proper sound sync ale.setInt("fragsize", 64) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.loadROM(romFile) actionSet = ale.getMinimalActionSet() return ale, actionSet
class Environment: def __init__(self, rom_file, args): self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.dims = (args.screen_height, args.screen_width) def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def isTerminal(self): return self.ale.game_over()
class Emulator: def __init__(self): self.ale = ALEInterface() # turn off the sound self.ale.setBool('sound', False) self.ale.setBool('display_screen', EMULATOR_DISPLAY) self.ale.setInt('frame_skip', FRAME_SKIP) self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY) self.ale.setBool('color_averaging', COLOR_AVERAGING) self.ale.setInt('random_seed', RANDOM_SEED) if RECORD_SCENE_PATH: self.ale.setString('record_screen_dir', RECORD_SCENE_PATH) self.ale.loadROM(ROM_PATH) self.actions = self.ale.getMinimalActionSet() logger.info("Actions: " + str(self.actions)) self.dims = DIMS #self.start_lives = self.ale.lives() def getActions(self): return self.actions def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() # can be omitted def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): # why grayscale ? screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) # normalize #resized /= COLOR_SCALE return resized def isTerminal(self): # while training deepmind only ends when agent dies #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives) return self.ale.game_over()
def init(game, display_screen=False, record_dir=None): if display_screen: import pygame pygame.init() ale = ALEInterface() ale.setBool('display_screen', display_screen) ale.setInt('random_seed', 123) if record_dir is not None: ale.setString("record_screen_dir", record_dir) ale.loadROM('{game}.bin'.format(game=game)) ale.setFloat("repeat_action_probability", 0) return ale
def init(display_screen=False, record_dir=None): if display_screen: import pygame pygame.init() rom_path = '.' ale = ALEInterface() ale.setBool('display_screen', display_screen) ale.setInt('random_seed', 123) if record_dir is not None: ale.setString("record_screen_dir", record_dir) ale.loadROM(rom_path + '/space_invaders.bin') ale.setFloat("repeat_action_probability", 0) return ale
def __init__(self, rom_path, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None, render=False, max_episode_length=None, max_time=None): self.frame_skip = frame_skip self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops self.max_episode_length = max_episode_length self.max_time = max_time ale = ALEInterface() # Use numpy's random state seed = np.random.randint(0, 2**16) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(record_screen_dir)) if render: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(rom_path)) self.ale = ale self.__exceed_max = False self.legal_actions = ale.getMinimalActionSet() self.reset()
def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None): self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops ale = ALEInterface() if seed is not None: assert seed >= 0 and seed < 2 ** 16, \ "ALE's random seed must be represented by unsigned int" else: # Use numpy's random state seed = np.random.randint(0, 2 ** 16) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(record_screen_dir)) self.frame_skip = frame_skip if use_sdl: if 'DISPLAY' not in os.environ: raise RuntimeError( 'Please set DISPLAY environment variable for use_sdl=True') # SDL settings below are from the ALE python example if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(rom_filename)) assert ale.getFrameNumber() == 0 self.ale = ale self.legal_actions = ale.getMinimalActionSet() self.initialize()
def _init_ale(self): ale = ALEInterface() ale.setBool('sound', self.play_sound) ale.setBool('display_screen', self.display_screen) ale.setInt('random_seed', self.random_seed) # Frame skip is implemented separately ale.setInt('frame_skip', 1) ale.setBool('color_averaging', False) ale.setFloat('repeat_action_probability', 0.0) # Somehow this repeat_action_probability has unexpected effect on game. # The larger this value is, the more frames games take to restart. # And when 1.0 games completely hang # We are setting the default value of 0.0 here, expecting that # it has no effect as frame_skip == 1 # This action repeating is agent's concern # so we do not implement an equivalent in our wrapper. if self.record_screen_path: _LG.info('Recording screens: %s', self.record_screen_path) if not os.path.exists(self.record_screen_path): os.makedirs(self.record_screen_path) ale.setString('record_screen_dir', self.record_screen_path) if self.record_sound_filename: _LG.info('Recording sound: %s', self.record_sound_filename) record_sound_dir = os.path.dirname(self.record_sound_filename) if not os.path.exists(record_sound_dir): os.makedirs(record_sound_dir) ale.setBool('sound', True) ale.setString('record_sound_filename', self.record_sound_filename) ale.loadROM(self.rom_path) self._ale = ale self._actions = (ale.getMinimalActionSet() if self.minimal_action_set else ale.getLegalActionSet())
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None, None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = os.path.join(get_dataset_dir('atari_rom'), rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) float32 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1], :].astype( 'float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver)
class AtariEnv(object): def __init__(self, frame_skip=None, repeat_action_probability=0.0, state_shape=[84, 84], rom_path=None, game_name='pong', random_state=None, rendering=False, record_dir=None, obs_showing=False, channel_weights=[0.5870, 0.2989, 0.1140]): self.ale = ALEInterface() self.frame_skip = frame_skip self.state_shape = state_shape if random_state is None: random_state = np.random.RandomState(1234) self.rng = random_state self.channel_weights = channel_weights self.ale.setInt(b'random_seed', self.rng.randint(1000)) self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setBool(b'color_averaging', False) if rendering: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) if rendering and record_dir is not None: # should be before loadROM self.ale.setString(b'record_screen_dir', record_dir.encode()) self.ale.setString(b'record_sound_filename', os.path.join(record_dir, '/sound.wav').encode()) self.ale.setInt(b'fragsize', 64) # to ensure proper sound sync (see ALE doc) self.ale.loadROM(str.encode(rom_path + game_name + '.bin')) self.legal_actions = self.ale.getMinimalActionSet() self.nb_actions = len(self.legal_actions) (self.screen_width, self.screen_height) = self.ale.getScreenDims() self._buffer = np.empty((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.obs_showing = obs_showing def reset(self): self.ale.reset_game() return self.get_state() def step(self, action): reward = 0.0 if self.frame_skip is None: num_steps = 1 elif isinstance(self.frame_skip, int): num_steps = self.frame_skip else: num_steps = self.rng.randint(self.frame_skip[0], self.frame_skip[1]) for i in range(num_steps): reward += self.ale.act(self.legal_actions[action]) return self.get_state(), reward, self.ale.game_over(), {} def _get_image(self): self.ale.getScreenRGB(self._buffer) gray = self.channel_weights[0] * self._buffer[:, :, 0] + self.channel_weights[1] * self._buffer[:, :, 1] + \ self.channel_weights[2] * self._buffer[:, :, 2] x = cv2.resize(gray, tuple(self.state_shape), interpolation=cv2.INTER_LINEAR) return x def get_state(self): return self._get_image() def get_lives(self): return self.ale.lives()
class AtariEnvironment(interfaces.Environment): def __init__(self, atari_rom, frame_skip=4, noop_max=30, terminate_on_end_life=False, random_seed=123, frame_history_length=4, use_gui=False, max_num_frames=500000, repeat_action_probability=0.0, record_screen_dir=None): self.ale = ALEInterface() self.ale.setInt('random_seed', random_seed) self.ale.setInt('frame_skip', 1) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setInt('max_num_frames_per_episode', max_num_frames) if record_screen_dir is not None: self.ale.setString('record_screen_dir', record_screen_dir) self.ale.loadROM(atari_rom) self.frame_skip = frame_skip self.repeat_action_probability = repeat_action_probability self.noop_max = noop_max self.terminate_on_end_life = terminate_on_end_life self.current_lives = self.ale.lives() self.is_terminal = False self.previous_action = 0 self.num_actions = len(self.ale.getMinimalActionSet()) w, h = self.ale.getScreenDims() self.screen_width = w self.screen_height = h self.zero_last_frames = [ np.zeros((84, 84), dtype=np.uint8), np.zeros((84, 84), dtype=np.uint8) ] self.last_two_frames = copy.copy(self.zero_last_frames) self.zero_history_frames = [ np.zeros((84, 84), dtype=np.uint8) for i in range(0, frame_history_length) ] self.frame_history = copy.copy(self.zero_history_frames) atari_actions = self.ale.getMinimalActionSet() self.atari_to_onehot = dict( list(zip(atari_actions, list(range(len(atari_actions)))))) self.onehot_to_atari = dict( list(zip(list(range(len(atari_actions))), atari_actions))) self.screen_image = np.zeros(self.screen_height * self.screen_width, dtype=np.uint8) self.use_gui = use_gui self.original_frame = np.zeros((h, w), dtype=np.uint8) self.refresh_time = datetime.timedelta(milliseconds=1000 / 60) self.last_refresh = datetime.datetime.now() if (self.use_gui): self.gui_screen = pygame.display.set_mode((w, h)) def getRAM(self, ram=None): return self.ale.getRAM(ram) def _get_frame(self): self.ale.getScreenGrayscale(self.screen_image) image = self.screen_image.reshape( [self.screen_height, self.screen_width, 1]) self.original_frame = image image = cv2.resize(image, (84, 84)) return image def perform_action(self, onehot_index_action): if self.repeat_action_probability > 0: if np.random.uniform() < self.repeat_action_probability: onehot_index_action = self.previous_action self.previous_action = onehot_index_action action = self.onehot_to_atari[onehot_index_action] state, action, reward, next_state, self.is_terminal = self.perform_atari_action( action) return state, onehot_index_action, reward, next_state, self.is_terminal def perform_atari_action(self, atari_action): state = self.get_current_state() reward = self._act(atari_action, self.frame_skip) if self.use_gui: self.refresh_gui() self.frame_history[:-1] = self.frame_history[1:] self.frame_history[-1] = np.max(self.last_two_frames, axis=0) next_state = self.get_current_state() return state, atari_action, reward, next_state, self.is_terminal def _act(self, ale_action, repeat): reward = 0 for i in range(repeat): reward += self.ale.act(ale_action) if i >= repeat - 2: self.last_two_frames = [ self.last_two_frames[1], self._get_frame() ] self.is_terminal = self.ale.game_over() # terminate the episode if current_lives has decreased lives = self.ale.lives() if self.current_lives != lives: if self.current_lives > lives and self.terminate_on_end_life: self.is_terminal = True self.current_lives = lives return reward def get_current_state(self): #return copy.copy(self.frame_history) return [x.copy() for x in self.frame_history] def get_actions_for_state(self, state): return [ self.atari_to_onehot[a] for a in self.ale.getMinimalActionSet() ] def reset_environment(self): self.last_two_frames = [self.zero_history_frames[0], self._get_frame()] if self.terminate_on_end_life: if self.ale.game_over(): self.ale.reset_game() else: self.ale.reset_game() self.current_lives = self.ale.lives() if self.noop_max > 0: num_noops = np.random.randint(self.noop_max + 1) self._act(0, num_noops) self.previous_action = 0 self.frame_history = copy.copy(self.zero_history_frames) self.frame_history[-1] = np.max(self.last_two_frames, axis=0) if self.use_gui: self.refresh_gui() def is_current_state_terminal(self): return self.is_terminal def refresh_gui(self): current_time = datetime.datetime.now() if (current_time - self.last_refresh) > self.refresh_time: self.last_refresh = current_time gui_image = np.tile( np.transpose(self.original_frame, axes=(1, 0, 2)), [1, 1, 3]) # gui_image = np.zeros((self.screen_width, self.screen_height, 3), dtype=np.uint8) # channel = np.random.randint(3) # gui_image[:,:,channel] = np.transpose(self.original_frame, axes=(1, 0, 2))[:,:,0] pygame.surfarray.blit_array(self.gui_screen, gui_image) pygame.display.update()
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) # OpenCV expects width as first and height as second self.dims = (args.screen_width, args.screen_height) def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def isTerminal(self): return self.ale.game_over()
class ALEEnvironment(): def __init__(self, config): self.history = History3D(config) self.history_length = config.history_length self.mode = config.mode self.life_lost = False self.terminal = False self.score = 0 #cv2.namedWindow("Image") from ale_python_interface import ALEInterface self.ale = ALEInterface() if config.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', False) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', config.frame_skip) # Whether skip frames or not self.ale.setBool('color_averaging', config.color_averaging) if config.random_seed: # Random seed for repeatable experiments. self.ale.setInt('random_seed', config.random_seed) if config.record_screen_path: if not os.path.exists(config.record_screen_path): os.makedirs(config.record_screen_path) self.ale.setString('record_screen_dir', config.record_screen_path) if config.record_sound_filename: self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', config.record_sound_filename) self.ale.loadROM(config.rom_file) if config.minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.screen_width = config.screen_width self.screen_height = config.screen_height def numActions(self): return len(self.actions) def new_game(self): state, terminal = self.reset() for _ in range(self.history_length + 1): self.history.add(state) return state, terminal, list(range(len(self.actions))) def reset(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or self.ale.game_over()): # `reset` called in a middle of episode # all lives are lost self.ale.reset_game() self.life_lost = False return self.getScreen(), self.isTerminal() def step(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) self.score += reward self.current_state = self.getScreen() self.history.add(self.current_state) self.terminal = self.isTerminal() return reward, self.history.get(), self.terminal def getScreen(self): screen = self.ale.getScreenGrayscale() #print 'screen:\n',type(screen) #print 'screen.shape',screen.shape resized = cv2.resize(screen / 255., (self.screen_width, self.screen_height)) #cv2.imshow("Image", screen) ''' cv2.namedWindow("Image") cv2.destroyAllWindows() ''' return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_dir('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 10000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) float32 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver)
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box( low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w, 1) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis] return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
# Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) recordPath = 'record' ale.setString('record_screen_dir', recordPath) # Load the ROM file ale.loadROM(sys.argv[1]) # Get the list of legal actions legal_actions = ale.getLegalActionSet() one_hot = {0:"1 0 0 0 0 0 0 0 0", 1:"0 1 0 0 0 0 0 0 0", 2:"0 0 1 0 0 0 0 0 0", 3:"0 0 0 1 0 0 0 0 0", 4:"0 0 0 0 1 0 0 0 0", 5:"0 0 0 0 0 1 0 0 0", 6:"0 0 0 0 0 0 1 0 0", 7:"0 0 0 0 0 0 0 1 0", 8:"0 0 0 0 0 0 0 0 1"}
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box( low=0, high=255, shape=(self.height, self.width)) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def _reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def _step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
class Agent(): def __init__(self, game, agent_type, display, load_model, record, test): self.name = game self.agent_type = agent_type self.ale = ALEInterface() self.ale.setInt(str.encode('random_seed'), np.random.randint(100)) self.ale.setBool(str.encode('display_screen'), display or record) if record: self.ale.setString(str.encode('record_screen_dir'), str.encode('./data/recordings/{}/{}/tmp/'.format(game, agent_type))) self.ale.loadROM(str.encode('./roms/{}.bin'.format(self.name))) self.action_list = list(self.ale.getMinimalActionSet()) self.frame_shape = np.squeeze(self.ale.getScreenGrayscale()).shape if test: self.name += '_test' if 'space_invaders' in self.name: # Account for blinking bullets self.frameskip = 2 else: self.frameskip = 3 self.frame_buffer = deque(maxlen=4) if load_model and not record: self.load_replaymemory() else: self.replay_memory = ReplayMemory(500000, 32) model_input_shape = self.frame_shape + (4,) model_output_shape = len(self.action_list) if agent_type == 'dqn': self.model = DeepQN( model_input_shape, model_output_shape, self.action_list, self.replay_memory, self.name, load_model ) elif agent_type == 'double': self.model = DoubleDQN( model_input_shape, model_output_shape, self.action_list, self.replay_memory, self.name, load_model ) else: self.model = DuelingDQN( model_input_shape, model_output_shape, self.action_list, self.replay_memory, self.name, load_model ) print('{} Loaded!'.format(' '.join(self.name.split('_')).title())) print('Displaying: ', display) print('Frame Shape: ', self.frame_shape) print('Frame Skip: ', self.frameskip) print('Action Set: ', self.action_list) print('Model Input Shape: ', model_input_shape) print('Model Output Shape: ', model_output_shape) print('Agent: ', agent_type) def training(self, steps): ''' Trains the agent for :steps number of weight updates. Returns the average model loss ''' loss = [] # Initialize frame buffer. np.squeeze removes empty dimensions e.g. if shape=(210,160,__) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) try: for step in range(steps): gameover = False initial_state = np.stack(self.frame_buffer, axis=-1) action = self.model.predict_action(initial_state) # Backup data if step % 5000 == 0: self.model.save_model() self.model.save_hyperparams() self.save_replaymemory() # If using a target model check for weight updates if hasattr(self.model, 'tau'): if self.model.tau == 0: self.model.update_target_model() self.model.tau = 10000 else: self.model.tau -= 1 # Frame skipping technique https://danieltakeshi.github.io/2016/11/25/frame-skipping-and-preprocessing-for-deep-q-networks-on-atari-2600-games/ lives_before = self.ale.lives() for _ in range(self.frameskip): self.ale.act(action) reward = self.ale.act(action) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) lives_after = self.ale.lives() if lives_after < lives_before: gameover = True # Taking advice from dude on reddit reward = -1 if self.ale.game_over(): gameover = True reward = -1 self.ale.reset_game() new_state = np.stack(self.frame_buffer, axis=-1) # Experiment with clipping rewards for stability purposes reward = np.clip(reward, -1, 1) self.replay_memory.add( initial_state, action, reward, gameover, new_state ) loss += self.model.replay_train() except: self.model.save_model() self.model.save_hyperparams() self.save_replaymemory() raise KeyboardInterrupt return np.mean(loss, axis=0) def simulate_random(self): print('Simulating game randomly') done = False total_reward = 0 while not done: action = np.random.choice(self.ale.getMinimalActionSet()) reward = self.ale.act(action) total_reward += reward if self.ale.game_over(): reward = -1 done = True reward = np.clip(reward, -1, 1) if reward != 0: print(reward) frames_survived = self.ale.getEpisodeFrameNumber() self.ale.reset_game() return total_reward, frames_survived def simulate_intelligent(self, evaluating=False): done = False total_score = 0 self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) while not done: state = np.stack(self.frame_buffer, axis=-1) action = self.model.predict_action(state, evaluating) for _ in range(self.frameskip): self.ale.act(action) # Remember, ale.act returns the increase in game score with this action total_score += self.ale.act(action) # Pushes oldest frame out self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) if self.ale.game_over(): done = True frames_survived = self.ale.getEpisodeFrameNumber() print(' Game Over') print(' Frames Survived: ', frames_survived) print(' Score: ', total_score) print('===========================') self.ale.reset_game() return total_score, frames_survived def save_replaymemory(self): with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'wb') as f: pickle.dump(self.replay_memory, f, protocol=pickle.HIGHEST_PROTOCOL) print('Saved replay memory at ', datetime.now()) def load_replaymemory(self): try: with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'rb') as f: self.replay_memory = pickle.load(f) print('Loaded replay memory at ', datetime.now()) except FileNotFoundError: print('No replay memory file found') raise KeyboardInterrupt
class AleEnvironment(Environment): def __init__(self, rom_name, record_display=False, show_display=False, id = 0, shrink=False, life_lost_as_end=True, use_grayscale=True): super(AleEnvironment, self).__init__() self.ale = ALEInterface() self.ale.setInt('random_seed', int(np.random.rand() * 100)) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setBool('color_averaging', False) self.record_display = record_display self.show_display = show_display if self.record_display: self.ale.setString('record_screen_dir', 'movie') elif self.show_display: self.display_name = rom_name + '_' + str(id) cv2.startWindowThread() cv2.namedWindow(self.display_name) self.ale.loadROM(rom_name) self.actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.use_grayscale = use_grayscale if self.use_grayscale: self.screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) else: self.screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.prev_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.shrink = shrink self.life_lost_as_end = life_lost_as_end self.lives_lost = False self.lives = self.ale.lives() def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): cv2.destroyWindow(self.display_name) def act(self, action): reward = self.ale.act(self.actions[action]) if self.use_grayscale: screen = self.ale.getScreenGrayscale(self.screen) else: current_screen = self.ale.getScreenRGB(self.screen) screen = np.maximum(current_screen, self.prev_screen) self.prev_screen = current_screen screen = screen[:, :, 0] * 0.2126 + screen[:, :, 1] * 0.0722 + screen[:, :, 2] * 0.7152 screen = screen.astype(np.uint8) screen = np.reshape(screen, (self.screen_height, self.screen_width, 1)) state = self.preprocess(screen) self.lives_lost = True if self.lives > self.ale.lives() else False self.lives = self.ale.lives() return reward, state def is_end_state(self): if self.life_lost_as_end: return self.ale.game_over() or self.lives_lost else: return self.ale.game_over() def reset(self): if self.ale.game_over(): self.ale.reset_game() self.lives = self.ale.lives() self.lives_lost = False def available_actions(self): # return available indexes instead of actual action value return range(0, len(self.actions)) def preprocess(self, screen): if self.show_display and not self.record_display: cv2.imshow(self.display_name, screen) if self.shrink: resized = cv2.resize(screen, (84, 84)) else: resized = cv2.resize(screen, (84, 110)) resized = resized[18:102, :] scaled = resized.astype(np.float32) / 255.0 return scaled
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() # Set ALE configuration self.ale.setInt(b'frame_skip', args.frame_skip) self.ale.setFloat(b'repeat_action_probability', args.repeat_action_probability) self.ale.setBool(b'color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt(b'random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): os.makedirs(args.record_screen_path) self.ale.setString(b'record_screen_dir', args.record_screen_path.encode()) if args.record_sound_filename: self.ale.setBool(b'sound', True) self.ale.setString(b'record_sound_filename', args.record_sound_filename.encode()) # Load ROM self.ale.loadROM(rom_file.encode()) # Set game difficulty and mode (after loading) self.ale.setDifficulty(args.game_difficulty) self.ale.setMode(args.game_mode) # Whether to use minimum set or set if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() # Life lost control self.life_lost = False # Initialize base class super(ALEEnvironment, self).__init__(args) def action_dim(self): return len(self.actions) def reset(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False screen = self._get_state(self.ale.getScreenRGB()) return screen def step(self, action, action_b=0, ignore_screen=False): lives = self.ale.lives() # Act on environment reward = self.ale.act(self.actions[action], self.actions[action_b] + 18) # Check if life was lost self.life_lost = (not lives == self.ale.lives()) # Check terminal state terminal = (self.ale.game_over() or self.life_lost ) if self.mode == 'train' else self.ale.game_over() # Check if should ignore the screen (in case of RobotEnvironment) if ignore_screen: screen = None else: # Get screen from ALE screen = self._get_state(self.ale.getScreenRGB()) # Wait for next frame to start self.fps_control.wait_next_frame() return screen, reward, terminal
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if ( self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
def test(session, hist_len=4, discount=0.99, act_rpt=4, upd_freq=4, min_sq_grad=0.01, epsilon=0.05, no_op_max=30, num_tests=1, learning_rate=0.0025, momentum=0.95, sq_momentum=0.95): #Create ALE object if len(sys.argv) < 3: print('Usage: %s rom_file record_screen_dir' % sys.argv[0]) sys.exit() ale = ALEInterface() record_path = sys.argv[2] ale.setString('record_screen_dir', record_path) ale.setString('record_sound_filename', (record_path + '/sound.wav')) ale.setInt('fragsize', 64) cmd = 'mkdir ' cmd += record_path os.system(cmd) # Get & Set the desired settings ale.setInt('random_seed', 123) #Changes repeat action probability from default of 0.25 ale.setFloat('repeat_action_probability', 0.0) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file ale.loadROM(sys.argv[1]) # create DQN agent # learning_rate and momentum are unused parameters (but needed) agent = DQN(ale, session, epsilon, learning_rate, momentum, sq_momentum, hist_len, len(ale.getMinimalActionSet()), None, discount, rom_name(sys.argv[1])) #Store the most recent two images preprocess_stack = deque([], 2) num_episodes = 0 while num_episodes < num_tests: #initialize sequence with initial image seq = list() #We only have one image, we cannot combine two images perform_no_ops(ale, no_op_max, preprocess_stack, seq) #proc_seq.append(pp.preprocess(seq)) total_reward = 0 while not ale.game_over(): state = get_state(seq, hist_len) action = agent.get_action_best_network(state, epsilon) #skip frames by repeating action reward = 0 for i in range(act_rpt): reward = reward + ale.act(action) preprocess_stack.append(ale.getScreenRGB()) seq.append(pp.preprocess(preprocess_stack[0], preprocess_stack[1])) total_reward += reward print('Episode ended with score: %d' % (total_reward)) num_episodes = num_episodes + 1 ale.reset_game()
class Environment: """docstring for Environment""" BUFFER_LEN = 2 EPISODE_FRAMES = 18000 EPOCH_COUNT = 200 EPOCH_STEPS = 250000 EVAL_EPS = 0.001 FRAMES_SKIP = 4 FRAME_HEIGHT = 84 FRAME_WIDTH = 84 MAX_NO_OP = 30 MAX_REWARD = 1 def __init__(self, rom_name, rng, display_screen=False): self.api = ALEInterface() self.api.setInt('random_seed', rng.randint(333)) self.api.setBool('display_screen', display_screen) self.api.setFloat('repeat_action_probability', 0.0) self.rom_name = rom_name self.display_screen = display_screen self.rng = rng self.repeat = Environment.FRAMES_SKIP self.buffer_len = Environment.BUFFER_LEN self.height = Environment.FRAME_HEIGHT self.width = Environment.FRAME_WIDTH self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP self.merge_id = 0 self.max_reward = Environment.MAX_REWARD self.eval_eps = Environment.EVAL_EPS self.log_dir = '' self.network_dir = '' self.api.loadROM('../rom/' + self.rom_name) self.minimal_actions = self.api.getMinimalActionSet() original_width, original_height = self.api.getScreenDims() self.merge_frame = np.zeros( (self.buffer_len, original_height, original_width), dtype=np.uint8) def get_action_count(self): return len(self.minimal_actions) def train(self, agent, store_freq, folder=None, start_epoch=0): self._open_log_files(agent, folder) obs = np.zeros((self.height, self.width), dtype=np.uint8) epoch_count = Environment.EPOCH_COUNT for epoch in xrange(start_epoch, epoch_count): self.need_reset = True steps_left = Environment.EPOCH_STEPS print "\n" + "=" * 50 print "Epoch #%d" % (epoch + 1) episode = 0 train_start = time.time() while steps_left > 0: num_step, _ = self._run_episode(agent, steps_left, obs) steps_left -= num_step episode += 1 if steps_left == 0 or episode % 10 == 0: print "Finished episode #%d, steps_left = %d" \ % (episode, steps_left) train_end = time.time() valid_values = agent.get_validate_values() eval_values = self.evaluate(agent) test_end = time.time() train_time = train_end - train_start test_time = test_end - train_end step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time) print "\tFinished epoch #%d, episode trained = %d\n" \ "\tValidate values = %.3f, evaluate reward = %.3f\n"\ "\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \ % (epoch + 1, episode, valid_values, eval_values\ , train_time, test_time, step_per_sec) self._update_log_files(agent, epoch + 1, episode, valid_values, eval_values, train_time, test_time, step_per_sec, store_freq) gc.collect() def evaluate(self, agent, episodes=30, obs=None): print "\n***Start evaluating" if obs is None: obs = np.zeros((self.height, self.width), dtype=np.uint8) sum_reward = 0.0 sum_step = 0.0 for episode in xrange(episodes): self.need_reset = True step, reward = self._run_episode(agent, self.episode_steps, obs, self.eval_eps, evaluating=True) sum_reward += reward sum_step += step print "Finished episode %d, reward = %d, step = %d" \ % (episode + 1, reward, step) self.need_reset = True print "Average reward per episode = %.4f" % (sum_reward / episodes) print "Average step per episode = %.4f" % (sum_step / episodes) return sum_reward / episodes def _prepare_game(self): if self.need_reset or self.api.game_over(): self.api.reset_game() self.need_reset = False if Environment.MAX_NO_OP > 0: num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \ + self.buffer_len for _ in xrange(num_no_op): self.api.act(0) for _ in xrange(self.buffer_len): self._update_buffer() def _run_episode(self, agent, steps_left, obs, eps=0.0, evaluating=False): self._prepare_game() start_lives = self.api.lives() step_count = 0 sum_reward = 0 is_terminal = False while step_count < steps_left and not is_terminal: self._get_screen(obs) action_id, _ = agent.get_action(obs, eps, evaluating) reward = self._repeat_action(self.minimal_actions[action_id]) reward_clip = reward if self.max_reward > 0: reward_clip = np.clip(reward, -self.max_reward, self.max_reward) life_lost = not evaluating and self.api.lives() < start_lives is_terminal = self.api.game_over() or life_lost \ or step_count + 1 >= steps_left agent.add_experience(obs, is_terminal, action_id, reward_clip, evaluating) sum_reward += reward step_count += 1 return step_count, sum_reward def _update_buffer(self): self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...]) self.merge_id = (self.merge_id + 1) % self.buffer_len def _repeat_action(self, action): reward = 0 for i in xrange(self.repeat): reward += self.api.act(action) if i + self.buffer_len >= self.repeat: self._update_buffer() return reward def _get_screen(self, resized_frame): self._resize_frame(self.merge_frame.max(axis=0), resized_frame) def _resize_frame(self, src_frame, dst_frame): cv2.resize(src=src_frame, dst=dst_frame, dsize=(self.width, self.height), interpolation=cv2.INTER_LINEAR) def _open_log_files(self, agent, folder): time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0] if folder is not None: self.log_dir = folder self.network_dir = self.log_dir + '/network' else: self.log_dir = '../run_results/' + base_rom_name + time_str self.network_dir = self.log_dir + '/network' info_name = get_next_name(self.log_dir, 'info', 'txt') git_name = get_next_name(self.log_dir, 'git-diff', '') try: os.stat(self.log_dir) except OSError: os.makedirs(self.log_dir) try: os.stat(self.network_dir) except OSError: os.makedirs(self.network_dir) with open(os.path.join(self.log_dir, info_name), 'w') as f: f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse', 'HEAD'])) f.write('Run command: ') f.write(' '.join(pipes.quote(x) for x in sys.argv)) f.write('\n\n') f.write(agent.get_info()) write_info(f, Environment) write_info(f, agent.__class__) write_info(f, agent.network.__class__) # From https://github.com/spragunr/deep_q_rl/pull/49/files with open(os.path.join(self.log_dir, git_name), 'w') as f: f.write(subprocess.check_output(['git', 'diff', 'HEAD'])) if folder is not None: return with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f: f.write("epoch,episode_train,validate_values,evaluate_reward"\ ",train_time,test_time,steps_per_second\n") mem = psutil.virtual_memory() with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f: f.write("epoch,available,free,buffers,cached"\ ",available_readable,used_percent\n") f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (0, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) def _update_log_files(self, agent, epoch, episode, valid_values, eval_values, train_time, test_time, step_per_sec, store_freq): print "Updating log files" with open(self.log_dir + '/results.csv', 'a') as f: f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \ (epoch, episode, valid_values, eval_values , train_time, test_time, step_per_sec)) mem = psutil.virtual_memory() with open(self.log_dir + '/memory.csv', 'a') as f: f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (epoch, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz') if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \ (store_freq > 0 and (epoch % store_freq == 0)): agent.dump_exp(self.network_dir + '/exp.npz') def _setup_record(self, network_file): file_name, _ = os.path.splitext(os.path.basename(network_file)) time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) img_dir = os.path.dirname(network_file) + '/images_' \ + file_name + time_str rom_name, _ = os.path.splitext(self.rom_name) out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \ + file_name + time_str + '.mov' print out_name try: os.stat(img_dir) except OSError: os.makedirs(img_dir) self.api.setString('record_screen_dir', img_dir) self.api.loadROM('../rom/' + self.rom_name) return img_dir, out_name def record_run(self, agent, network_file, episode_id=1): if episode_id > 1: self.evaluate(agent, episode_id - 1) system_state = self.api.cloneSystemState() img_dir, out_name = self._setup_record(network_file) if episode_id > 1: self.api.restoreSystemState(system_state) self.evaluate(agent, 1) script = \ """ { ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } || { avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } """ % (img_dir, out_name, img_dir, out_name) os.system(script)
class Environment: """docstring for Environment""" BUFFER_LEN = 2 EPISODE_FRAMES = 18000 EPOCH_COUNT = 200 EPOCH_STEPS = 250000 EVAL_EPS = 0.001 FRAMES_SKIP = 4 FRAME_HEIGHT = 84 FRAME_WIDTH = 84 MAX_NO_OP = 30 MAX_REWARD = 1 def __init__(self, rom_name, rng, display_screen = False): self.api = ALEInterface() self.api.setInt('random_seed', rng.randint(333)) self.api.setBool('display_screen', display_screen) self.api.setFloat('repeat_action_probability', 0.0) self.rom_name = rom_name self.display_screen = display_screen self.rng = rng self.repeat = Environment.FRAMES_SKIP self.buffer_len = Environment.BUFFER_LEN self.height = Environment.FRAME_HEIGHT self.width = Environment.FRAME_WIDTH self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP self.merge_id = 0 self.max_reward = Environment.MAX_REWARD self.eval_eps = Environment.EVAL_EPS self.log_dir = '' self.network_dir = '' self.api.loadROM('../rom/' + self.rom_name) self.minimal_actions = self.api.getMinimalActionSet() original_width, original_height = self.api.getScreenDims() self.merge_frame = np.zeros((self.buffer_len , original_height , original_width) , dtype = np.uint8) def get_action_count(self): return len(self.minimal_actions) def train(self, agent, store_freq, folder = None, start_epoch = 0): self._open_log_files(agent, folder) obs = np.zeros((self.height, self.width), dtype = np.uint8) epoch_count = Environment.EPOCH_COUNT for epoch in xrange(start_epoch, epoch_count): self.need_reset = True steps_left = Environment.EPOCH_STEPS print "\n" + "=" * 50 print "Epoch #%d" % (epoch + 1) episode = 0 train_start = time.time() while steps_left > 0: num_step, _ = self._run_episode(agent, steps_left, obs) steps_left -= num_step episode += 1 if steps_left == 0 or episode % 10 == 0: print "Finished episode #%d, steps_left = %d" \ % (episode, steps_left) train_end = time.time() valid_values = agent.get_validate_values() eval_values = self.evaluate(agent) test_end = time.time() train_time = train_end - train_start test_time = test_end - train_end step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time) print "\tFinished epoch #%d, episode trained = %d\n" \ "\tValidate values = %.3f, evaluate reward = %.3f\n"\ "\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \ % (epoch + 1, episode, valid_values, eval_values\ , train_time, test_time, step_per_sec) self._update_log_files(agent, epoch + 1, episode , valid_values, eval_values , train_time, test_time , step_per_sec, store_freq) gc.collect() def evaluate(self, agent, episodes = 30, obs = None): print "\n***Start evaluating" if obs is None: obs = np.zeros((self.height, self.width), dtype = np.uint8) sum_reward = 0.0 sum_step = 0.0 for episode in xrange(episodes): self.need_reset = True step, reward = self._run_episode(agent, self.episode_steps, obs , self.eval_eps, evaluating = True) sum_reward += reward sum_step += step print "Finished episode %d, reward = %d, step = %d" \ % (episode + 1, reward, step) self.need_reset = True print "Average reward per episode = %.4f" % (sum_reward / episodes) print "Average step per episode = %.4f" % (sum_step / episodes) return sum_reward / episodes def _prepare_game(self): if self.need_reset or self.api.game_over(): self.api.reset_game() self.need_reset = False if Environment.MAX_NO_OP > 0: num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \ + self.buffer_len for _ in xrange(num_no_op): self.api.act(0) for _ in xrange(self.buffer_len): self._update_buffer() def _run_episode(self, agent, steps_left, obs , eps = 0.0, evaluating = False): self._prepare_game() start_lives = self.api.lives() step_count = 0 sum_reward = 0 is_terminal = False while step_count < steps_left and not is_terminal: self._get_screen(obs) action_id, _ = agent.get_action(obs, eps, evaluating) reward = self._repeat_action(self.minimal_actions[action_id]) reward_clip = reward if self.max_reward > 0: reward_clip = np.clip(reward, -self.max_reward, self.max_reward) life_lost = not evaluating and self.api.lives() < start_lives is_terminal = self.api.game_over() or life_lost \ or step_count + 1 >= steps_left agent.add_experience(obs, is_terminal, action_id, reward_clip , evaluating) sum_reward += reward step_count += 1 return step_count, sum_reward def _update_buffer(self): self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...]) self.merge_id = (self.merge_id + 1) % self.buffer_len def _repeat_action(self, action): reward = 0 for i in xrange(self.repeat): reward += self.api.act(action) if i + self.buffer_len >= self.repeat: self._update_buffer() return reward def _get_screen(self, resized_frame): self._resize_frame(self.merge_frame.max(axis = 0), resized_frame) def _resize_frame(self, src_frame, dst_frame): cv2.resize(src = src_frame, dst = dst_frame, dsize = (self.width, self.height), interpolation = cv2.INTER_LINEAR) def _open_log_files(self, agent, folder): time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0] if folder is not None: self.log_dir = folder self.network_dir = self.log_dir + '/network' else: self.log_dir = '../run_results/' + base_rom_name + time_str self.network_dir = self.log_dir + '/network' info_name = get_next_name(self.log_dir, 'info', 'txt') git_name = get_next_name(self.log_dir, 'git-diff', '') try: os.stat(self.log_dir) except OSError: os.makedirs(self.log_dir) try: os.stat(self.network_dir) except OSError: os.makedirs(self.network_dir) with open(os.path.join(self.log_dir, info_name), 'w') as f: f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse' , 'HEAD'])) f.write('Run command: ') f.write(' '.join(pipes.quote(x) for x in sys.argv)) f.write('\n\n') f.write(agent.get_info()) write_info(f, Environment) write_info(f, agent.__class__) write_info(f, agent.network.__class__) # From https://github.com/spragunr/deep_q_rl/pull/49/files with open(os.path.join(self.log_dir, git_name), 'w') as f: f.write(subprocess.check_output(['git', 'diff', 'HEAD'])) if folder is not None: return with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f: f.write("epoch,episode_train,validate_values,evaluate_reward"\ ",train_time,test_time,steps_per_second\n") mem = psutil.virtual_memory() with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f: f.write("epoch,available,free,buffers,cached"\ ",available_readable,used_percent\n") f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (0, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) def _update_log_files(self, agent, epoch, episode, valid_values , eval_values, train_time, test_time, step_per_sec , store_freq): print "Updating log files" with open(self.log_dir + '/results.csv', 'a') as f: f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \ (epoch, episode, valid_values, eval_values , train_time, test_time, step_per_sec)) mem = psutil.virtual_memory() with open(self.log_dir + '/memory.csv', 'a') as f: f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (epoch, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz') if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \ (store_freq > 0 and (epoch % store_freq == 0)): agent.dump_exp(self.network_dir + '/exp.npz') def _setup_record(self, network_file): file_name, _ = os.path.splitext(os.path.basename(network_file)) time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) img_dir = os.path.dirname(network_file) + '/images_' \ + file_name + time_str rom_name, _ = os.path.splitext(self.rom_name) out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \ + file_name + time_str + '.mov' print out_name try: os.stat(img_dir) except OSError: os.makedirs(img_dir) self.api.setString('record_screen_dir', img_dir) self.api.loadROM('../rom/' + self.rom_name) return img_dir, out_name def record_run(self, agent, network_file, episode_id = 1): if episode_id > 1: self.evaluate(agent, episode_id - 1) system_state = self.api.cloneSystemState() img_dir, out_name = self._setup_record(network_file) if episode_id > 1: self.api.restoreSystemState(system_state) self.evaluate(agent, 1) script = \ """ { ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } || { avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } """ % (img_dir, out_name, img_dir, out_name) os.system(script)
class AtariEnvironment: def __init__(self, seed=1, record=False): self.ale = ALEInterface() self.ale.setBool(b'display_screen', FLAGS.display_screen or record) self.ale.setInt(b'frame_skip', 1) self.ale.setBool(b'color_averaging', False) self.ale.setInt(b'random_seed', seed) self.ale.setFloat(b'repeat_action_probability', FLAGS.sticky_prob) self.ale.setInt(b'max_num_frames_per_episode', FLAGS.max_num_frames_per_episode) if record: if not tf.gfile.Exists(FLAGS.record_dir): tf.gfile.MakeDirs(FLAGS.record_dir) self.ale.setBool(b'sound', True) self.ale.setString(b'record_screen_dir', str.encode(FLAGS.record_dir)) self.ale.setString(b'record_sound_filename', str.encode(FLAGS.record_dir + '/sound.wav')) self.ale.setInt(b'fragsize', 64) self.ale.loadROM(str.encode(FLAGS.rom)) self.ale.setMode(FLAGS.mode) self.ale.setDifficulty(FLAGS.difficulty) self.action_set = self.ale.getLegalActionSet() screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,) self._frame_buffer = CircularBuffer(FLAGS.frame_buffer_size, screen_dims, np.uint8) self.reset() def _is_terminal(self): return self.ale.game_over() def _get_single_frame(self): stacked_frames = np.concatenate(self._frame_buffer, axis=2) maxed_frame = np.amax(stacked_frames, axis=2) expanded_frame = np.expand_dims(maxed_frame, 3) return expanded_frame def reset(self): self._episode_frames = 0 self._episode_reward = 0 self.ale.reset_game() for _ in range(FLAGS.frame_buffer_size): self._frame_buffer.append(self.ale.getScreenGrayscale()) def act(self, action): assert not self._is_terminal() cum_reward = 0 for _ in range(FLAGS.frame_skip): cum_reward += self.ale.act(self.action_set[action]) self._frame_buffer.append(self.ale.getScreenGrayscale()) self._episode_frames += FLAGS.frame_skip self._episode_reward += cum_reward cum_reward = np.clip(cum_reward, -1, 1) return cum_reward, self._get_single_frame(), self._is_terminal() def state(self): assert len(self._frame_buffer) == FLAGS.frame_buffer_size return self._get_single_frame() def num_actions(self): return len(self.action_set) def episode_reward(self): return self._episode_reward def episode_frames(self): return self._episode_frames def frame_skip(self): return FLAGS.frame_skip
# Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) exp_path = "records/exp{}".format(sys.argv[2]) os.mkdir(exp_path) ale.setString("record_screen_dir", exp_path) # Load the ROM file ale.loadROM(sys.argv[1]) print sys.argv[1] #ale.loadROM("../../Roms/ROMS/montezuma_revenge.bin") # Get the list of legal actions legal_actions = ale.getLegalActionSet() one_hot = {0:"1 0 0 0 0 0 0 0 0", 2:"0 1 0 0 0 0 0 0 0", 3:"0 0 1 0 0 0 0 0 0", 4:"0 0 0 1 0 0 0 0 0", 5:"0 0 0 0 1 0 0 0 0", 6:"0 0 0 0 0 1 0 0 0", 7:"0 0 0 0 0 0 1 0 0",
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. may need to check self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:] # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_num_actions(self): """ :returns: the number of legal actions """ return len(self.actions) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver) def get_stat(self): try: return {'avg_score': np.mean(self.stats['score']), 'max_score': float(np.max(self.stats['score'])) } except ValueError: return {}