import time # set parameters, these are in the paper REPLAY_MEMORY_SIZE = 1000000 REPLAY_START_SIZE = int(REPLAY_MEMORY_SIZE / 50) REPLAY_MINIBATCH_SIZE = 32 AGENT_HISTORY_LENGTH = 4 # TARGET_NETWORK_UPDATE_FREQUENCY = 10000 DISCOUNT_FACTOR = 0.99 INITIAL_EXPLORATION = 1.0 FINAL_EXPLORATION = 0.1 FINAL_EXPLORATION_FRAME = 10000 NUM_EPISODES = 5 # initialize ALE interface ale = atari_py.ALEInterface() pong_path = atari_py.get_game_path('breakout') ale.loadROM(pong_path) legal_actions = ale.getMinimalActionSet() print("legal actions {}".format(legal_actions)) num_of_actions = len(legal_actions) (screen_width, screen_height) = ale.getScreenDims() screen_data = np.zeros((screen_height, screen_width, 3), dtype=np.uint8) # Using RGB state1 = np.zeros((AGENT_HISTORY_LENGTH, screen_height, screen_width, 3), dtype=np.uint8) state2 = np.zeros((AGENT_HISTORY_LENGTH, screen_height, screen_width, 3), dtype=np.uint8) # observe initial state
def __init__( self, game="pong", mode=None, difficulty=None, obs_type="image", frameskip=(2, 5), repeat_action_probability=0.0, full_action_space=False, ): """ Arguments: game: the name of the game ("pong", "Enduro", etc) dont add the "-v0" mode: different modes are available for different games. frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int. """ utils.EzPickle.__init__(self, game, mode, difficulty, obs_type, frameskip, repeat_action_probability) assert obs_type in ("ram", "image") self.game = game self.game_path = atari_py.get_game_path(game) self.game_mode = mode self.game_difficulty = difficulty if not os.path.exists(self.game_path): msg = "You asked for game %s but path %s does not exist" raise IOError(msg % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat("repeat_action_probability".encode("utf-8"), repeat_action_probability) self.seed() self._action_set = (self.ale.getLegalActionSet() if full_action_space else self.ale.getMinimalActionSet()) self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == "ram": self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128, )) elif self._obs_type == "image": self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) else: raise error.Error("Unrecognized observation type: {}".format( self._obs_type))
def __init__(self, game, seed=None, use_sdl=False, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None): assert crop_or_scale in ['crop', 'scale'] assert frame_skip >= 1 self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops # atari_py is used only to provide rom files. atari_py has its own # ale_python_interface, but it is obsolete. if not atari_py_available: raise RuntimeError( 'You need to install atari_py>=0.1.1 to use ALE.') game_path = atari_py.get_game_path(game) ale = atari_py.ALEInterface() if seed is not None: assert seed >= 0 and seed < 2 ** 31, \ "ALE's random seed must be in [0, 2 ** 31)." else: # Use numpy's random state seed = np.random.randint(0, 2**31) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(str(record_screen_dir))) self.frame_skip = frame_skip if use_sdl: if 'DISPLAY' not in os.environ: raise RuntimeError( 'Please set DISPLAY environment variable for use_sdl=True') # SDL settings below are from the ALE python example if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(str(game_path))) assert ale.getFrameNumber() == 0 self.ale = ale self.legal_actions = ale.getMinimalActionSet() self.initialize() self.action_space = spaces.Discrete(len(self.legal_actions)) one_screen_observation_space = spaces.Box( low=0, high=255, shape=(84, 84), dtype=np.uint8, ) self.observation_space = spaces.Tuple([one_screen_observation_space] * n_last_screens)
def __init__(self, name): path = atari_py.get_game_path(name) self.ale = atari_py.ALEInterface() self.ale.loadROM(path)
def available_modes_for(game): ale = atari_py.ALEInterface() # load up the game ale.setInt(b"random_seed", 0) ale.loadROM(atari_py.get_game_path(game)) return ale.getAvailableModes()
def __init__(self, monitor, frameskip=(2, 5), repeat_action_probability=0.): self.game_path = atari_py.get_game_path(monitor.game_name) if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist' % (monitor.game_name, self.game_path)) self._obs_type = 'image' # HACK to image for now. self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # added monitor to keep track of things self.monitor = monitor # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self.seed_and_load_rom() self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) # goals specific self._goals_set = monitor.goals_set_small # 84x84 self._goals_center = monitor.goals_center self.goals_space = spaces.Discrete(len(self._goals_set)) self.desired_goal = -1 # we set and tell the agent to achieve this desired_goal. self.achieved_goal = -1 # we should keep track of which goal it currently achieved. self.goals_history = set( ) # can keep track of how it achieved the set of goals to the currently achieved_goal # we need to calculate whether agent achieve the goal so we need to keep track of agent loc # HACK only montezuma_revenge specific right now if monitor.game_name == 'montezuma_revenge': self.agent_origin = [42, 33] self.agent_last_x = 42 self.agent_last_y = 33 (screen_width, screen_hight) = self.ale.getScreenDims() self.init_screen = self.ale.getScreenGrayscale() # Don't think i will use this if self._obs_type == 'ram': self.observation_space = spaces.Dict({ 'observation': spaces.Box(low=0, high=255, shape=(screen_hight, screen_width, 3), dtype=np.uint8), 'achieved_goal': spaces.Discrete(1), 'desired_goal': spaces.Discrete(1) }) elif self._obs_type == 'image': self.observation_space = spaces.Dict({ 'observation': spaces.Box(low=0, high=255, shape=(screen_hight, screen_width, 3), dtype=np.uint8), 'achieved_goal': spaces.Discrete(1), 'desired_goal': spaces.Discrete(1) }) else: raise error.Error('Unrecognized observation type: {}'.format( self._obs_type))
def main(): parser = argparse.ArgumentParser() parser.add_argument('name', type=str) args = parser.parse_args() screen_scale = 2 info_width = 400 info_size = 24 ale = atari_py.ALEInterface() ale.loadROM(atari_py.get_game_path(args.name)) ale_width, ale_height = ale.getScreenDims() score = 0 key = pyglet.window.key keys = key.KeyStateHandler() def get_action(): lr = 1 ud = 1 fire = 1 if keys[key.ENTER] else 0 if keys[key.A]: lr -= 1 if keys[key.D]: lr += 1 if keys[key.W]: ud -= 1 if keys[key.S]: ud += 1 return ACTIONS[fire, lr, ud] def reset_game(): nonlocal score ale.reset_game() score = 0 def update(dt): nonlocal score action = get_action() score += ale.act(action) # print(score) if keys[key.R]: reset_game() screen = ale.getScreenRGB2() image = pyglet.image.ImageData( ale_width, ale_height, 'RGB', np.flip(screen, axis=0).tobytes(), ) image.scale = screen_scale texture = image.get_texture() gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST) texture.width *= screen_scale texture.height *= screen_scale window.clear() texture.blit(0, 0) info_strs = [ "score: {}".format(score), "lives: {}".format(ale.lives()), "frames: {}".format(ale.getFrameNumber()), "fps: {:.2f}".format(pyglet.clock.get_fps()), ] if ale.game_over(): info_strs.append("game_over") label = pyglet.text.Label("\n".join(info_strs), font_size=info_size, x=ale_width * screen_scale + 50, y=ale_height * screen_scale - 100, width=info_width - 100, color=(255, 255, 255, 255), multiline=True) label.draw() window = pyglet.window.Window(width=ale_width * screen_scale + info_width, height=ale_height * screen_scale) window.push_handlers(keys) pyglet.clock.schedule_interval(update, 1. / 60) pyglet.app.run()
def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, game, obs_type, frameskip, repeat_action_probability) assert obs_type in ('ram', 'image', 'color_rev', 'mirror', 'rot90', 'rot90down14') self.game_path = atari_py.get_game_path(game) if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist' % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self.seed() self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128, )) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) elif self._obs_type == 'color_rev': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) elif self._obs_type == 'mirror': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) elif self._obs_type == 'rot90': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) elif self._obs_type == 'rot90down14': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) else: raise error.Error('Unrecognized observation type: {}'.format( self._obs_type))
def __init__( self, game="pong", frame_skip=4, # Frames per step (>=1). num_img_obs=4, # Number of (past) frames in observation (>=1) - "frame stacking". clip_reward=True, episodic_lives=True, fire_on_reset=False, max_start_noops=30, repeat_action_probability=0., horizon=27000, no_extrinsic=False, no_negative_reward=False, normalize_obs=False, normalize_obs_steps=10000, downsampling_scheme='classical', record_freq=0, record_dir=None, score_multiplier=1.0): save__init__args(locals(), underscore=True) # ALE game_path = atari_py.get_game_path(game) if not os.path.exists(game_path): raise IOError("You asked for game {} but path {} does not " " exist".format(game, game_path)) self.ale = atari_py.ALEInterface() self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.loadROM(game_path) # Spaces self._action_set = self.ale.getMinimalActionSet() self._action_space = IntBox(low=0, high=len(self._action_set)) if downsampling_scheme == 'classical': self._frame_shape = (84, 84) # (W, H) elif downsampling_scheme == 'new': self._frame_shape = (80, 104) obs_shape = (num_img_obs, self._frame_shape[1], self._frame_shape[0]) self._observation_space = IntBox(low=0, high=255, shape=obs_shape, dtype="uint8") self._max_frame = self.ale.getScreenGrayscale() self._raw_frame_1 = self._max_frame.copy() self._raw_frame_2 = self._max_frame.copy() self._obs = np.zeros(shape=obs_shape, dtype="uint8") # Settings self._has_fire = "FIRE" in self.get_action_meanings() self._has_up = "UP" in self.get_action_meanings() self._horizon = int(horizon) self._multiplier = score_multiplier # Recording self.record_env = False # set in samping_process for environment 0 self._record_episode = False self._record_freq = record_freq self._video_dir = os.path.join(record_dir, 'videos') if "TMPDIR" in os.environ: self._frames_dir = os.path.join("{}/frames".format( os.path.expandvars("$TMPDIR"))) pathlib.Path(self._frames_dir).mkdir(exist_ok=True) else: self._frames_dir = os.path.join(self._video_dir, 'frames') self._episode_number = 0 self.reset()
def __init__(self, game='Pong', mode=None, difficulty=None, obs_type='image', frameskip=(2, 5), repeat_action_probability=0., full_action_space=False, orientation=True, color=1, size=1, noise=False): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, game, mode, difficulty, obs_type, frameskip, repeat_action_probability, orientation, color, size, noise) assert obs_type in ('ram', 'image') self.game = game self.game_path = atari_py.get_game_path(game) self.game_mode = mode self.game_difficulty = difficulty self.orientation = orientation self.color = color self.size = size self.noise = noise if not os.path.exists(self.game_path): msg = 'You asked for game %s but path %s does not exist' raise IOError(msg % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance(repeat_action_probability, (float, int)), \ "Invalid repeat_action_probability: {!r}".format(repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self.seed() self._action_set = (self.ale.getLegalActionSet() if full_action_space else self.ale.getMinimalActionSet()) self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() # Change background color if self.color > 255 or self.color < 1: raise error.Error('Unrecognized background color') if self._obs_type == 'ram': self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128, )) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) else: raise error.Error('Unrecognized observation type: {}'.format( self._obs_type))