import time

# set parameters, these are in the paper
REPLAY_MEMORY_SIZE = 1000000
REPLAY_START_SIZE = int(REPLAY_MEMORY_SIZE / 50)
REPLAY_MINIBATCH_SIZE = 32
AGENT_HISTORY_LENGTH = 4
# TARGET_NETWORK_UPDATE_FREQUENCY = 10000
DISCOUNT_FACTOR = 0.99
INITIAL_EXPLORATION = 1.0
FINAL_EXPLORATION = 0.1
FINAL_EXPLORATION_FRAME = 10000
NUM_EPISODES = 5

# initialize ALE interface
ale = atari_py.ALEInterface()
pong_path = atari_py.get_game_path('breakout')
ale.loadROM(pong_path)
legal_actions = ale.getMinimalActionSet()
print("legal actions {}".format(legal_actions))
num_of_actions = len(legal_actions)
(screen_width, screen_height) = ale.getScreenDims()
screen_data = np.zeros((screen_height, screen_width, 3),
                       dtype=np.uint8)  # Using RGB

state1 = np.zeros((AGENT_HISTORY_LENGTH, screen_height, screen_width, 3),
                  dtype=np.uint8)
state2 = np.zeros((AGENT_HISTORY_LENGTH, screen_height, screen_width, 3),
                  dtype=np.uint8)

# observe initial state
コード例 #2
0
    def __init__(
        self,
        game="pong",
        mode=None,
        difficulty=None,
        obs_type="image",
        frameskip=(2, 5),
        repeat_action_probability=0.0,
        full_action_space=False,
    ):
        """
        Arguments:
            game: the name of the game ("pong", "Enduro", etc) dont add the "-v0"
            mode: different modes are available for different games.
            frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.
        """

        utils.EzPickle.__init__(self, game, mode, difficulty, obs_type,
                                frameskip, repeat_action_probability)
        assert obs_type in ("ram", "image")

        self.game = game
        self.game_path = atari_py.get_game_path(game)
        self.game_mode = mode
        self.game_difficulty = difficulty

        if not os.path.exists(self.game_path):
            msg = "You asked for game %s but path %s does not exist"
            raise IOError(msg % (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat("repeat_action_probability".encode("utf-8"),
                          repeat_action_probability)

        self.seed()

        self._action_set = (self.ale.getLegalActionSet() if full_action_space
                            else self.ale.getMinimalActionSet())
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == "ram":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                dtype=np.uint8,
                                                shape=(128, ))
        elif self._obs_type == "image":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error("Unrecognized observation type: {}".format(
                self._obs_type))
コード例 #3
0
    def __init__(self,
                 game,
                 seed=None,
                 use_sdl=False,
                 n_last_screens=4,
                 frame_skip=4,
                 treat_life_lost_as_terminal=True,
                 crop_or_scale='scale',
                 max_start_nullops=30,
                 record_screen_dir=None):
        assert crop_or_scale in ['crop', 'scale']
        assert frame_skip >= 1
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        # atari_py is used only to provide rom files. atari_py has its own
        # ale_python_interface, but it is obsolete.
        if not atari_py_available:
            raise RuntimeError(
                'You need to install atari_py>=0.1.1 to use ALE.')
        game_path = atari_py.get_game_path(game)

        ale = atari_py.ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 31, \
                "ALE's random seed must be in [0, 2 ** 31)."
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2**31)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir',
                          str.encode(str(record_screen_dir)))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)

        ale.loadROM(str.encode(str(game_path)))

        assert ale.getFrameNumber() == 0

        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()

        self.action_space = spaces.Discrete(len(self.legal_actions))
        one_screen_observation_space = spaces.Box(
            low=0,
            high=255,
            shape=(84, 84),
            dtype=np.uint8,
        )
        self.observation_space = spaces.Tuple([one_screen_observation_space] *
                                              n_last_screens)
コード例 #4
0
 def __init__(self, name):
     path = atari_py.get_game_path(name)
     self.ale = atari_py.ALEInterface()
     self.ale.loadROM(path)
コード例 #5
0
 def available_modes_for(game):
     ale = atari_py.ALEInterface()
     # load up the game
     ale.setInt(b"random_seed", 0)
     ale.loadROM(atari_py.get_game_path(game))
     return ale.getAvailableModes()
コード例 #6
0
    def __init__(self,
                 monitor,
                 frameskip=(2, 5),
                 repeat_action_probability=0.):
        self.game_path = atari_py.get_game_path(monitor.game_name)

        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (monitor.game_name, self.game_path))

        self._obs_type = 'image'  # HACK to image for now.
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None
        # added monitor to keep track of things
        self.monitor = monitor

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed_and_load_rom()

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        # goals specific
        self._goals_set = monitor.goals_set_small  # 84x84
        self._goals_center = monitor.goals_center
        self.goals_space = spaces.Discrete(len(self._goals_set))
        self.desired_goal = -1  # we set and tell the agent to achieve this desired_goal.
        self.achieved_goal = -1  # we should keep track of which goal it currently achieved.
        self.goals_history = set(
        )  # can keep track of how it achieved the set of goals to the currently achieved_goal

        # we need to calculate whether agent achieve the goal so we need to keep track of agent loc
        # HACK only montezuma_revenge specific right now
        if monitor.game_name == 'montezuma_revenge':
            self.agent_origin = [42, 33]
            self.agent_last_x = 42
            self.agent_last_y = 33

        (screen_width, screen_hight) = self.ale.getScreenDims()

        self.init_screen = self.ale.getScreenGrayscale()

        # Don't think i will use this
        if self._obs_type == 'ram':
            self.observation_space = spaces.Dict({
                'observation':
                spaces.Box(low=0,
                           high=255,
                           shape=(screen_hight, screen_width, 3),
                           dtype=np.uint8),
                'achieved_goal':
                spaces.Discrete(1),
                'desired_goal':
                spaces.Discrete(1)
            })
        elif self._obs_type == 'image':
            self.observation_space = spaces.Dict({
                'observation':
                spaces.Box(low=0,
                           high=255,
                           shape=(screen_hight, screen_width, 3),
                           dtype=np.uint8),
                'achieved_goal':
                spaces.Discrete(1),
                'desired_goal':
                spaces.Discrete(1)
            })
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))
コード例 #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('name', type=str)
    args = parser.parse_args()

    screen_scale = 2
    info_width = 400
    info_size = 24

    ale = atari_py.ALEInterface()
    ale.loadROM(atari_py.get_game_path(args.name))
    ale_width, ale_height = ale.getScreenDims()
    score = 0

    key = pyglet.window.key
    keys = key.KeyStateHandler()

    def get_action():
        lr = 1
        ud = 1
        fire = 1 if keys[key.ENTER] else 0
        if keys[key.A]:
            lr -= 1
        if keys[key.D]:
            lr += 1
        if keys[key.W]:
            ud -= 1
        if keys[key.S]:
            ud += 1
        return ACTIONS[fire, lr, ud]

    def reset_game():
        nonlocal score
        ale.reset_game()
        score = 0

    def update(dt):
        nonlocal score
        action = get_action()
        score += ale.act(action)
        # print(score)
        if keys[key.R]:
            reset_game()
        screen = ale.getScreenRGB2()
        image = pyglet.image.ImageData(
            ale_width,
            ale_height,
            'RGB',
            np.flip(screen, axis=0).tobytes(),
        )
        image.scale = screen_scale
        texture = image.get_texture()
        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER,
                           gl.GL_NEAREST)
        texture.width *= screen_scale
        texture.height *= screen_scale
        window.clear()
        texture.blit(0, 0)
        info_strs = [
            "score: {}".format(score),
            "lives: {}".format(ale.lives()),
            "frames: {}".format(ale.getFrameNumber()),
            "fps: {:.2f}".format(pyglet.clock.get_fps()),
        ]
        if ale.game_over():
            info_strs.append("game_over")
        label = pyglet.text.Label("\n".join(info_strs),
                                  font_size=info_size,
                                  x=ale_width * screen_scale + 50,
                                  y=ale_height * screen_scale - 100,
                                  width=info_width - 100,
                                  color=(255, 255, 255, 255),
                                  multiline=True)
        label.draw()

    window = pyglet.window.Window(width=ale_width * screen_scale + info_width,
                                  height=ale_height * screen_scale)
    window.push_handlers(keys)
    pyglet.clock.schedule_interval(update, 1. / 60)
    pyglet.app.run()
コード例 #8
0
    def __init__(self,
                 game='pong',
                 obs_type='ram',
                 frameskip=(2, 5),
                 repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type, frameskip,
                                repeat_action_probability)
        assert obs_type in ('ram', 'image', 'color_rev', 'mirror', 'rot90',
                            'rot90down14')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed()

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                dtype=np.uint8,
                                                shape=(128, ))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        elif self._obs_type == 'color_rev':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        elif self._obs_type == 'mirror':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        elif self._obs_type == 'rot90':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        elif self._obs_type == 'rot90down14':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))
コード例 #9
0
    def __init__(
            self,
            game="pong",
            frame_skip=4,  # Frames per step (>=1).
            num_img_obs=4,  # Number of (past) frames in observation (>=1) - "frame stacking".
            clip_reward=True,
            episodic_lives=True,
            fire_on_reset=False,
            max_start_noops=30,
            repeat_action_probability=0.,
            horizon=27000,
            no_extrinsic=False,
            no_negative_reward=False,
            normalize_obs=False,
            normalize_obs_steps=10000,
            downsampling_scheme='classical',
            record_freq=0,
            record_dir=None,
            score_multiplier=1.0):
        save__init__args(locals(), underscore=True)

        # ALE
        game_path = atari_py.get_game_path(game)
        if not os.path.exists(game_path):
            raise IOError("You asked for game {} but path {} does not "
                          " exist".format(game, game_path))
        self.ale = atari_py.ALEInterface()
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.loadROM(game_path)

        # Spaces
        self._action_set = self.ale.getMinimalActionSet()
        self._action_space = IntBox(low=0, high=len(self._action_set))
        if downsampling_scheme == 'classical':
            self._frame_shape = (84, 84)  # (W, H)
        elif downsampling_scheme == 'new':
            self._frame_shape = (80, 104)
        obs_shape = (num_img_obs, self._frame_shape[1], self._frame_shape[0])
        self._observation_space = IntBox(low=0,
                                         high=255,
                                         shape=obs_shape,
                                         dtype="uint8")
        self._max_frame = self.ale.getScreenGrayscale()
        self._raw_frame_1 = self._max_frame.copy()
        self._raw_frame_2 = self._max_frame.copy()
        self._obs = np.zeros(shape=obs_shape, dtype="uint8")

        # Settings
        self._has_fire = "FIRE" in self.get_action_meanings()
        self._has_up = "UP" in self.get_action_meanings()
        self._horizon = int(horizon)
        self._multiplier = score_multiplier

        # Recording
        self.record_env = False  # set in samping_process for environment 0
        self._record_episode = False
        self._record_freq = record_freq
        self._video_dir = os.path.join(record_dir, 'videos')
        if "TMPDIR" in os.environ:
            self._frames_dir = os.path.join("{}/frames".format(
                os.path.expandvars("$TMPDIR")))
            pathlib.Path(self._frames_dir).mkdir(exist_ok=True)
        else:
            self._frames_dir = os.path.join(self._video_dir, 'frames')
        self._episode_number = 0

        self.reset()
コード例 #10
0
    def __init__(self,
                 game='Pong',
                 mode=None,
                 difficulty=None,
                 obs_type='image',
                 frameskip=(2, 5),
                 repeat_action_probability=0.,
                 full_action_space=False,
                 orientation=True,
                 color=1,
                 size=1,
                 noise=False):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, mode, difficulty, obs_type,
                                frameskip, repeat_action_probability,
                                orientation, color, size, noise)

        assert obs_type in ('ram', 'image')

        self.game = game
        self.game_path = atari_py.get_game_path(game)
        self.game_mode = mode
        self.game_difficulty = difficulty
        self.orientation = orientation
        self.color = color
        self.size = size
        self.noise = noise

        if not os.path.exists(self.game_path):
            msg = 'You asked for game %s but path %s does not exist'
            raise IOError(msg % (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), \
                "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed()

        self._action_set = (self.ale.getLegalActionSet() if full_action_space
                            else self.ale.getMinimalActionSet())
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()

        # Change background color
        if self.color > 255 or self.color < 1:
            raise error.Error('Unrecognized background color')

        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                dtype=np.uint8,
                                                shape=(128, ))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))