Exemple #1
0
    def _start(self, directory, video_callable=None, force=False, resume=False,
              write_upon_reset=False, uid=None, mode=None):
        """Start monitoring.

        Args:
            directory (str): A per-training run directory where to record stats.
            video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording.
            force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
            resume (bool): Retain the training data already in this directory, which will be merged with our new data
            write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.)
            uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid().
            mode (['evaluation', 'training']): Whether this is an evaluation or training episode.
        """
        if self.env.spec is None:
            logger.warning("Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users.")
            env_id = '(unknown)'
        else:
            env_id = self.env.spec.id

        if not os.path.exists(directory):
            logger.info('Creating monitor directory %s', directory)
            if six.PY3:
                os.makedirs(directory, exist_ok=True)
            else:
                os.makedirs(directory)

        if video_callable is None:
            video_callable = capped_cubic_video_schedule
        elif video_callable == False:
            video_callable = disable_videos
        elif not callable(video_callable):
            raise error.Error('You must provide a function, None, or False for video_callable, not {}: {}'.format(type(video_callable), video_callable))
        self.video_callable = video_callable

        # Check on whether we need to clear anything
        if force:
            clear_monitor_files(directory)
        elif not resume:
            training_manifests = detect_training_manifests(directory)
            if len(training_manifests) > 0:
                raise error.Error('''Trying to write to monitor directory {} with existing monitor files: {}.

 You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''.format(directory, ', '.join(training_manifests[:5])))

        self._monitor_id = monitor_closer.register(self)

        self.enabled = True
        self.directory = os.path.abspath(directory)
        # We use the 'openai-gym' prefix to determine if a file is
        # ours
        self.file_prefix = FILE_PREFIX
        self.file_infix = '{}.{}'.format(self._monitor_id, uid if uid else os.getpid())

        self.stats_recorder = stats_recorder.StatsRecorder(directory, '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix), autoreset=self.env_semantics_autoreset, env_id=env_id)

        if not os.path.exists(directory): os.mkdir(directory)
        self.write_upon_reset = write_upon_reset

        if mode is not None:
            self._set_mode(mode)
Exemple #2
0
def _upload(training_dir,
            algorithm_id=None,
            writeup=None,
            benchmark_run_id=None,
            api_key=None,
            ignore_open_monitors=False):
    if not ignore_open_monitors:
        open_monitors = monitoring._open_monitors()
        if len(open_monitors) > 0:
            envs = [
                m.env.spec.id if m.env.spec else '(unknown)'
                for m in open_monitors
            ]
            raise error.Error(
                "Still have an open monitor on {}. You must run 'env.close()' before uploading."
                .format(', '.join(envs)))

    env_info, training_episode_batch, training_video = upload_training_data(
        training_dir, api_key=api_key)
    env_id = env_info['env_id']
    training_episode_batch_id = training_video_id = None
    if training_episode_batch:
        training_episode_batch_id = training_episode_batch.id
    if training_video:
        training_video_id = training_video.id

    if logger.level <= logging.INFO:
        if training_episode_batch_id is not None and training_video_id is not None:
            logger.info(
                '[%s] Creating evaluation object from %s with learning curve and training video',
                env_id, training_dir)
        elif training_episode_batch_id is not None:
            logger.info(
                '[%s] Creating evaluation object from %s with learning curve',
                env_id, training_dir)
        elif training_video_id is not None:
            logger.info(
                '[%s] Creating evaluation object from %s with training video',
                env_id, training_dir)
        else:
            raise error.Error(
                "[%s] You didn't have any recorded training data in %s. Once you've used 'env.monitor.start(training_dir)' to start recording, you need to actually run some rollouts. Please join the community chat on https://gym.openai.com if you have any issues."
                % (env_id, training_dir))

    evaluation = resource.Evaluation.create(
        training_episode_batch=training_episode_batch_id,
        training_video=training_video_id,
        env=env_info['env_id'],
        algorithm={
            'id': algorithm_id,
        },
        benchmark_run_id=benchmark_run_id,
        writeup=writeup,
        gym_version=env_info['gym_version'],
        api_key=api_key,
    )

    return evaluation
Exemple #3
0
    def __init__(self, env, path=None, metadata=None, enabled=True, base_path=None):
        modes = env.metadata.get('render.modes', [])
        self._async = env.metadata.get('semantics.async')
        self.enabled = enabled

        # Don't bother setting anything else if not enabled
        if not self.enabled:
            return

        self.ansi_mode = False
        if 'rgb_array' not in modes:
            if 'ansi' in modes:
                self.ansi_mode = True
            else:
                logger.info('Disabling video recorder because {} neither supports video mode "rgb_array" nor "ansi".'.format(env))
                # Whoops, turns out we shouldn't be enabled after all
                self.enabled = False
                return

        if path is not None and base_path is not None:
            raise error.Error("You can pass at most one of `path` or `base_path`.")

        self.last_frame = None
        self.env = env

        required_ext = '.json' if self.ansi_mode else '.mp4'
        if path is None:
            if base_path is not None:
                # Base path given, append ext
                path = base_path + required_ext
            else:
                # Otherwise, just generate a unique filename
                with tempfile.NamedTemporaryFile(suffix=required_ext, delete=False) as f:
                    path = f.name
        self.path = path

        path_base, actual_ext = os.path.splitext(self.path)

        if actual_ext != required_ext:
            hint = " HINT: The environment is text-only, therefore we're recording its text output in a structured JSON format." if self.ansi_mode else ''
            raise error.Error("Invalid path given: {} -- must have file extension {}.{}".format(self.path, required_ext, hint))
        # Touch the file in any case, so we know it's present. (This
        # corrects for platform platform differences. Using ffmpeg on
        # OS X, the file is precreated, but not on Linux.
        touch(path)

        self.frames_per_sec = env.metadata.get('video.frames_per_second', 30)
        self.encoder = None # lazily start the process
        self.broken = False

        # Dump metadata
        self.metadata = metadata or {}
        self.metadata['content_type'] = 'video/vnd.openai.ansivid' if self.ansi_mode else 'video/mp4'
        self.metadata_path = '{}.meta.json'.format(path_base)
        self.write_metadata()

        logger.info('Starting new video recorder writing to %s', self.path)
        self.empty = True
Exemple #4
0
def collapse_env_infos(env_infos, training_dir):
    assert len(env_infos) > 0

    first = env_infos[0]
    for other in env_infos[1:]:
        if first != other:
            raise error.Error('Found two unequal env_infos: {} and {}. This usually indicates that your training directory {} has commingled results from multiple runs.'.format(first, other, training_dir))

    for key in ['env_id', 'gym_version']:
        if key not in first:
            raise error.Error("env_info {} from training directory {} is missing expected key {}. This is unexpected and likely indicates a bug in gym.".format(first, training_dir, key))
    return first
Exemple #5
0
    def __init__(self, player_color, opponent, observation_type,
                 illegal_move_mode, board_size):
        """
        Args:
            player_color: Stone color for the agent. Either 'black' or 'white'
            opponent: An opponent policy
            observation_type: State encoding
            illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose'
        """
        assert isinstance(
            board_size,
            int) and board_size >= 1, 'Invalid board size: {}'.format(
                board_size)
        self.board_size = board_size

        self._seed()

        colormap = {
            'black': pachi_py.BLACK,
            'white': pachi_py.WHITE,
        }
        try:
            self.player_color = colormap[player_color]
        except KeyError:
            raise error.Error(
                "player_color must be 'black' or 'white', not {}".format(
                    player_color))

        self.opponent_policy = None
        self.opponent = opponent

        assert observation_type in ['image3c']
        self.observation_type = observation_type

        assert illegal_move_mode in ['lose', 'raise']
        self.illegal_move_mode = illegal_move_mode

        if self.observation_type != 'image3c':
            raise error.Error('Unsupported observation type: {}'.format(
                self.observation_type))

        shape = pachi_py.CreateBoard(self.board_size).encode().shape
        self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
        # One action for each board position, pass, and resign
        self.action_space = spaces.Discrete(self.board_size**2 + 2)

        # Filled in by _reset()
        self.state = None
        self.done = True
Exemple #6
0
    def spec(self, id):
        match = env_id_re.search(id)
        if not match:
            raise error.Error(
                'Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)'
                .format(id.encode('utf-8'), env_id_re.pattern))

        try:
            return self.env_specs[id]
        except KeyError:
            # Parse the env name and check to see if it matches the non-version
            # part of a valid env (could also check the exact number here)
            env_name = match.group(1)
            matching_envs = [
                valid_env_name
                for valid_env_name, valid_env_spec in self.env_specs.items()
                if env_name == valid_env_spec._env_name
            ]
            if matching_envs:
                raise error.DeprecatedEnv(
                    'Env {} not found (valid versions include {})'.format(
                        id, matching_envs))
            else:
                raise error.UnregisteredEnv(
                    'No registered env with id: {}'.format(id))
Exemple #7
0
    def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)

        self._seed()

        (screen_width, screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((screen_height, screen_width, 4), dtype=np.uint8)

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width,screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
Exemple #8
0
 def _set_mode(self, mode):
     if mode == 'evaluation':
         type = 'e'
     elif mode == 'training':
         type = 't'
     else:
         raise error.Error('Invalid mode {}: must be "training" or "evaluation"', mode)
     self.stats_recorder.type = type
Exemple #9
0
    def __init__(self, player_color, opponent, observation_type,
                 illegal_move_mode, board_size):
        """
        Args:
            player_color: Stone color for the agent. Either 'black' or 'white'
            opponent: An opponent policy
            observation_type: State encoding
            illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose'
            board_size: size of the Hex board
        """
        assert isinstance(
            board_size,
            int) and board_size >= 1, 'Invalid board size: {}'.format(
                board_size)
        self.board_size = board_size

        colormap = {
            'black': HexEnv.BLACK,
            'white': HexEnv.WHITE,
        }
        try:
            self.player_color = colormap[player_color]
        except KeyError:
            raise error.Error(
                "player_color must be 'black' or 'white', not {}".format(
                    player_color))

        self.opponent = opponent

        assert observation_type in ['numpy3c']
        self.observation_type = observation_type

        assert illegal_move_mode in ['lose', 'raise']
        self.illegal_move_mode = illegal_move_mode

        if self.observation_type != 'numpy3c':
            raise error.Error('Unsupported observation type: {}'.format(
                self.observation_type))

        # One action for each board position and resign
        self.action_space = spaces.Discrete(self.board_size**2 + 1)
        observation = self.reset()
        self.observation_space = spaces.Box(np.zeros(observation.shape),
                                            np.ones(observation.shape))

        self._seed()
Exemple #10
0
    def put(self, contents, encode='json'):
        supplied_headers = {
            "Content-Type": self.content_type
        }
        if encode == 'json':
            contents = json.dumps(contents)
        elif encode is None:
            pass
        else:
            raise error.Error('Encode request for put must be "json" or None, not {}'.format(encode))

        files = {'file': contents}

        body, code, headers = api_requestor.http_client.request(
            'post', self.post_url, post_data=self.post_fields, files=files, headers={})
        if code != 204:
            raise error.Error("Upload to S3 failed. If error persists, please contact us at [email protected] this message. S3 returned '{} -- {}'. Tried 'POST {}' with fields {}.".format(code, body, self.post_url, self.post_fields))
Exemple #11
0
 def _reset_opponent(self, board):
     if self.opponent == 'random':
         self.opponent_policy = make_random_policy(self.np_random)
     elif self.opponent == 'pachi:uct:_2400':
         self.opponent_policy = make_pachi_policy(board=board, engine_type=six.b('uct'),
                                                  pachi_timestr=six.b('_2400'))  # TODO: strength as argument
     else:
         raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
Exemple #12
0
    def _step(self, action):
        assert self.state.color == self.player_color

        # If already terminal, then don't do anything
        if self.done:
            return self.state.board.encode(), 0., True, {'state': self.state}

        # If resigned, then we're done
        if action == _resign_action(self.board_size):
            self.done = True
            return self.state.board.encode(), -1., True, {'state': self.state}

        # Play
        prev_state = self.state
        try:
            self.state = self.state.act(action)
        except pachi_py.IllegalMove:
            if self.illegal_move_mode == 'raise':
                six.reraise(*sys.exc_info())
            elif self.illegal_move_mode == 'lose':
                # Automatic loss on illegal move
                self.done = True
                return self.state.board.encode(), -1., True, {
                    'state': self.state
                }
            else:
                raise error.Error('Unsupported illegal move action: {}'.format(
                    self.illegal_move_mode))

        # Opponent play
        if not self.state.board.is_terminal:
            self.state, opponent_resigned = self._exec_opponent_play(
                self.state, prev_state, action)
            # After opponent play, we should be back to the original color
            assert self.state.color == self.player_color

            # If the opponent resigns, then the agent wins
            if opponent_resigned:
                self.done = True
                return self.state.board.encode(), 1., True, {
                    'state': self.state
                }

        # Reward: if nonterminal, then the reward is 0
        if not self.state.board.is_terminal:
            self.done = False
            return self.state.board.encode(), 0., False, {'state': self.state}

        # We're in a terminal state. Reward is 1 if won, -1 if lost
        assert self.state.board.is_terminal
        self.done = True
        white_wins = self.state.board.official_score > 0
        black_wins = self.state.board.official_score < 0
        player_wins = (white_wins and self.player_color == pachi_py.WHITE) or (
            black_wins and self.player_color == pachi_py.BLACK)
        reward = 1. if player_wins else -1. if (
            white_wins or black_wins) else 0.
        return self.state.board.encode(), reward, True, {'state': self.state}
    def __init__(self, env_id, trials, max_timesteps, max_seconds, reward_floor, reward_ceiling):
        self.env_id = env_id
        self.trials = trials
        self.max_timesteps = max_timesteps
        self.max_seconds = max_seconds
        self.reward_floor = reward_floor
        self.reward_ceiling = reward_ceiling

        if max_timesteps is None and max_seconds is None:
            raise error.Error('Must provide at least one of max_timesteps and max_seconds for {}'.format(self))
Exemple #14
0
    def __init__(self,
                 id,
                 entry_point=None,
                 trials=100,
                 reward_threshold=None,
                 local_only=False,
                 kwargs=None,
                 nondeterministic=False,
                 tags=None,
                 max_episode_steps=None,
                 max_episode_seconds=None,
                 timestep_limit=None):
        self.id = id
        # Evaluation parameters
        self.trials = trials
        self.reward_threshold = reward_threshold
        # Environment properties
        self.nondeterministic = nondeterministic

        if tags is None:
            tags = {}
        self.tags = tags

        # BACKWARDS COMPAT 2017/1/18
        if tags.get('wrapper_config.TimeLimit.max_episode_steps'):
            max_episode_steps = tags.get(
                'wrapper_config.TimeLimit.max_episode_steps')
            # TODO: Add the following deprecation warning after 2017/02/18
            # warnings.warn("DEPRECATION WARNING wrapper_config.TimeLimit has been deprecated. Replace any calls to `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}` with `register(max_episode_steps=200)`. This change was made 2017/1/31 and is included in gym version 0.8.0. If you are getting many of these warnings, you may need to update universe past version 0.21.3")

        tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps
        ######

        # BACKWARDS COMPAT 2017/1/31
        if timestep_limit is not None:
            max_episode_steps = timestep_limit
            # TODO: Add the following deprecation warning after 2017/03/01
            # warnings.warn("register(timestep_limit={}) is deprecated. Use register(max_episode_steps={}) instead.".format(timestep_limit, timestep_limit))
        ######

        self.max_episode_steps = max_episode_steps
        self.max_episode_seconds = max_episode_seconds

        # We may make some of these other parameters public if they're
        # useful.
        match = env_id_re.search(id)
        if not match:
            raise error.Error(
                'Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'
                .format(id, env_id_re.pattern))
        self._env_name = match.group(1)
        self._entry_point = entry_point
        self._local_only = local_only
        self._kwargs = {} if kwargs is None else kwargs
Exemple #15
0
    def before_reset(self):
        assert not self.closed

        if self.done is not None and not self.done and self.steps > 0:
            raise error.Error(
                "Tried to reset environment which is not done. While the monitor is active for {}, you cannot call reset() unless the episode is over."
                .format(self.env_id))

        self.done = False
        if self.initial_reset_timestamp is None:
            self.initial_reset_timestamp = time.time()
Exemple #16
0
def np_random(seed=None):
    if seed is not None and not (isinstance(seed, integer_types)
                                 and 0 <= seed):
        raise error.Error(
            'Seed must be a non-negative integer or omitted, not {}'.format(
                seed))

    seed = _seed(seed)

    rng = np.random.RandomState()
    rng.seed(_int_list_from_bigint(hash_seed(seed)))
    return rng, seed
Exemple #17
0
def _int_list_from_bigint(bigint):
    # Special case 0
    if bigint < 0:
        raise error.Error('Seed must be non-negative, not {}'.format(bigint))
    elif bigint == 0:
        return [0]

    ints = []
    while bigint > 0:
        bigint, mod = divmod(bigint, 2**32)
        ints.append(mod)
    return ints
Exemple #18
0
def upload_training_data(training_dir, api_key=None):
    # Could have multiple manifests
    results = monitoring.load_results(training_dir)
    if not results:
        raise error.Error('''Could not find any manifest files in {}.

(HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.close()' at the end, or exit the process.)'''
                          .format(training_dir))

    manifests = results['manifests']
    env_info = results['env_info']
    data_sources = results['data_sources']
    timestamps = results['timestamps']
    episode_lengths = results['episode_lengths']
    episode_rewards = results['episode_rewards']
    episode_types = results['episode_types']
    initial_reset_timestamps = results['initial_reset_timestamps']
    videos = results['videos']

    env_id = env_info['env_id']
    logger.debug('[%s] Uploading data from manifest %s', env_id,
                 ', '.join(manifests))

    # Do the relevant uploads
    if len(episode_lengths) > 0:
        training_episode_batch = upload_training_episode_batch(
            data_sources,
            episode_lengths,
            episode_rewards,
            episode_types,
            initial_reset_timestamps,
            timestamps,
            api_key,
            env_id=env_id)
    else:
        training_episode_batch = None

    if len(videos) > MAX_VIDEOS:
        logger.warning(
            '[%s] You recorded videos for %s episodes, but the scoreboard only supports up to %s. We will automatically subsample for you, but you also might wish to adjust your video recording rate.',
            env_id, len(videos), MAX_VIDEOS)
        subsample_inds = np.linspace(0,
                                     len(videos) - 1,
                                     MAX_VIDEOS).astype('int')  #pylint: disable=E1101
        videos = [videos[i] for i in subsample_inds]

    if len(videos) > 0:
        training_video = upload_training_video(videos, api_key, env_id=env_id)
    else:
        training_video = None

    return env_info, training_episode_batch, training_video
def get_display(spec):
    """Convert a display specification (such as :0) into an actual Display
    object.

    Pyglet only supports multiple Displays on Linux.
    """
    if spec is None:
        return None
    elif isinstance(spec, six.string_types):
        return pyglet.canvas.Display(spec)
    else:
        raise error.Error(
            'Invalid display specification: {}. (Must be a string like :0 or None.)'
            .format(spec))
Exemple #20
0
    def make(self):
        """Instantiates an instance of the environment with appropriate kwargs"""
        if self._entry_point is None:
            raise error.Error(
                'Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)'
                .format(self.id))

        cls = load(self._entry_point)
        env = cls(**self._kwargs)

        # Make the enviroment aware of which spec it came from.
        env.spec = self

        return env
Exemple #21
0
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)

        # Update the random policy if needed
        if isinstance(self.opponent, str):
            if self.opponent == 'random':
                self.opponent_policy = make_random_policy(self.np_random)
            else:
                raise error.Error('Unrecognized opponent policy {}'.format(
                    self.opponent))
        else:
            self.opponent_policy = self.opponent

        return [seed]
Exemple #22
0
    def _step(self, action):
        assert self.to_play == self.player_color
        # If already terminal, then don't do anything
        if self.done:
            return self.state, 0., True, {'state': self.state}

        # if HexEnv.pass_move(self.board_size, action):
        #     pass
        if HexEnv.resign_move(self.board_size, action):
            return self.state, -1, True, {'state': self.state}
        elif not HexEnv.valid_move(self.state, action):
            if self.illegal_move_mode == 'raise':
                raise
            elif self.illegal_move_mode == 'lose':
                # Automatic loss on illegal move
                self.done = True
                return self.state, -1., True, {'state': self.state}
            else:
                raise error.Error('Unsupported illegal move action: {}'.format(
                    self.illegal_move_mode))
        else:
            HexEnv.make_move(self.state, action, self.player_color)

        # Opponent play
        a = self.opponent_policy(self.state)

        # if HexEnv.pass_move(self.board_size, action):
        #     pass

        # Making move if there are moves left
        if a is not None:
            if HexEnv.resign_move(self.board_size, a):
                return self.state, 1, True, {'state': self.state}
            else:
                HexEnv.make_move(self.state, a, 1 - self.player_color)

        reward = HexEnv.game_finished(self.state)
        if self.player_color == HexEnv.WHITE:
            reward = -reward
        self.done = reward != 0
        return self.state, reward, self.done, {'state': self.state}
Exemple #23
0
def _seed(a=None, max_bytes=8):
    """Create a strong random seed. Otherwise, Python 2 would seed using
    the system time, which might be non-robust especially in the
    presence of concurrency.

    Args:
        a (Optional[int, str]): None seeds from an operating system specific randomness source.
        max_bytes: Maximum number of bytes to use in the seed.
    """
    # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
    if a is None:
        a = _bigint_from_bytes(os.urandom(max_bytes))
    elif isinstance(a, str):
        a = a.encode('utf8')
        a += hashlib.sha512(a).digest()
        a = _bigint_from_bytes(a[:max_bytes])
    elif isinstance(a, integer_types):
        a = a % 2**(8 * max_bytes)
    else:
        raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a))

    return a
Exemple #24
0
def write_archive(videos, archive_file, env_id=None):
    if len(videos) > MAX_VIDEOS:
        raise error.Error(
            '[{}] Trying to upload {} videos, but there is a limit of {} currently. If you actually want to upload this many videos, please email [email protected] with your use-case.'
            .format(env_id, MAX_VIDEOS, len(videos)))

    logger.debug('[%s] Preparing an archive of %d videos: %s', env_id,
                 len(videos), videos)

    # Double check that there are no collisions
    basenames = set()
    manifest = {'version': 0, 'videos': []}

    with tarfile.open(fileobj=archive_file, mode='w:gz') as tar:
        for video_path, metadata_path in videos:
            video_name = os.path.basename(video_path)
            metadata_name = os.path.basename(metadata_path)

            if not os.path.exists(video_path):
                raise error.Error(
                    '[{}] No such video file {}. (HINT: Your video recorder may have broken midway through the run. You can check this with `video_recorder.functional`.)'
                    .format(env_id, video_path))
            elif not os.path.exists(metadata_path):
                raise error.Error(
                    '[{}] No such metadata file {}. (HINT: this should be automatically created when using a VideoRecorder instance.)'
                    .format(env_id, video_path))

            # Do some sanity checking
            if video_name in basenames:
                raise error.Error(
                    '[{}] Duplicated video name {} in video list: {}'.format(
                        env_id, video_name, videos))
            elif metadata_name in basenames:
                raise error.Error(
                    '[{}] Duplicated metadata file name {} in video list: {}'.
                    format(env_id, metadata_name, videos))
            elif not video_name_re.search(video_name):
                raise error.Error(
                    '[{}] Invalid video name {} (must match {})'.format(
                        env_id, video_name, video_name_re.pattern))
            elif not metadata_name_re.search(metadata_name):
                raise error.Error(
                    '[{}] Invalid metadata file name {} (must match {})'.
                    format(env_id, metadata_name, metadata_name_re.pattern))

            # Record that we've seen these names; add to manifest
            basenames.add(video_name)
            basenames.add(metadata_name)
            manifest['videos'].append((video_name, metadata_name))

            # Import the files into the archive
            tar.add(video_path, arcname=video_name, recursive=False)
            tar.add(metadata_path, arcname=metadata_name, recursive=False)

        f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
        try:
            json.dump(manifest, f)
            f.close()
            tar.add(f.name, arcname='manifest.json')
        finally:
            f.close()
            os.remove(f.name)
Exemple #25
0
 def register(self, id, **kwargs):
     if id in self.env_specs:
         raise error.Error('Cannot re-register id: {}'.format(id))
     self.env_specs[id] = EnvSpec(id, **kwargs)
Exemple #26
0
def upload(training_dir,
           algorithm_id=None,
           writeup=None,
           tags=None,
           benchmark_id=None,
           api_key=None,
           ignore_open_monitors=False):
    """Upload the results of training (as automatically recorded by your
    env's monitor) to OpenAI Gym.

    Args:
        training_dir (Optional[str]): A directory containing the results of a training run.
        algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id). If the id doesn't match an existing server id it will create a new algorithm using algorithm_id as the name
        benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release.
        writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
        tags (Optional[dict]): A dictionary of key/values to store with the benchmark run (ignored for nonbenchmark evaluations). Must be jsonable.
        api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
    """

    if benchmark_id:
        # We're uploading a benchmark run.

        directories = []
        env_ids = []
        for name, _, files in os.walk(training_dir):
            manifests = monitoring.detect_training_manifests(name, files=files)
            if manifests:
                env_info = monitoring.load_env_info_from_manifests(
                    manifests, training_dir)
                env_ids.append(env_info['env_id'])
                directories.append(name)

        # Validate against benchmark spec
        try:
            spec = benchmark_spec(benchmark_id)
        except error.UnregisteredBenchmark:
            raise error.Error(
                "Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?"
                .format(benchmark_id))

        # TODO: verify that the number of trials matches
        spec_env_ids = [
            task.env_id for task in spec.tasks for _ in range(task.trials)
        ]

        if not env_ids:
            raise error.Error(
                "Could not find any evaluations in {}".format(training_dir))

        # This could be more stringent about mixing evaluations
        if sorted(env_ids) != sorted(spec_env_ids):
            logger.info(
                "WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s",
                benchmark_id, training_dir, sorted(env_ids),
                sorted(spec_env_ids))

        benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id,
                                                     algorithm_id=algorithm_id,
                                                     tags=json.dumps(tags))
        benchmark_run_id = benchmark_run.id

        # Actually do the uploads.
        for training_dir in directories:
            # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
            _upload(training_dir, None, writeup, benchmark_run_id, api_key,
                    ignore_open_monitors)

        logger.info(
            """
****************************************************
You successfully uploaded your benchmark on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
        """.rstrip(), benchmark_id, benchmark_run.web_url())

        return benchmark_run_id
    else:
        if tags is not None:
            logger.warning("Tags will NOT be uploaded for this submission.")
        # Single evalution upload
        benchmark_run_id = None
        evaluation = _upload(training_dir, algorithm_id, writeup,
                             benchmark_run_id, api_key, ignore_open_monitors)

        logger.info(
            """
****************************************************
You successfully uploaded your evaluation on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
        """.rstrip(), evaluation.env, evaluation.web_url())

        return None
Exemple #27
0
 def type(self, type):
     if type not in ['t', 'e']:
         raise error.Error(
             'Invalid episode type {}: must be t for training or e for evaluation',
             type)
     self._type = type
Exemple #28
0
 def monitor(self):
     raise error.Error(
         "env.monitor has been deprecated as of 12/23/2016. Remove your call to `env.monitor.start(directory)` and instead wrap your env with `env = gym.wrappers.Monitor(env, directory)` to record data.")