Ejemplo n.º 1
0
def benchmark_score_from_local(benchmark_id, training_dir):
    spec = gym.benchmark_spec(benchmark_id)

    directories = []
    for name, _, files in os.walk(training_dir):
        manifests = gym.monitoring.detect_training_manifests(name, files=files)
        if manifests:
            directories.append(name)

    benchmark_results = defaultdict(list)
    for training_dir in directories:
        results = gym.monitoring.load_results(training_dir)

        env_id = results['env_info']['env_id']
        benchmark_result = spec.score_evaluation(
            env_id, results['data_sources'],
            results['initial_reset_timestamps'], results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'])
        # from pprint import pprint
        # pprint(benchmark_result)
        benchmark_results[env_id].append(benchmark_result)

    return gym.benchmarks.scoring.benchmark_aggregate_score(
        spec, benchmark_results)
Ejemplo n.º 2
0
def upload(training_dir,
           algorithm_id=None,
           writeup=None,
           tags=None,
           benchmark_id=None,
           api_key=None,
           ignore_open_monitors=False):
    """Upload the results of training (as automatically recorded by your
    env's monitor) to OpenAI Gym.

    Args:
        training_dir (Optional[str]): A directory containing the results of a training run.
        algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id). If the id doesn't match an existing server id it will create a new algorithm using algorithm_id as the name
        benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release.
        writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
        tags (Optional[dict]): A dictionary of key/values to store with the benchmark run (ignored for nonbenchmark evaluations). Must be jsonable.
        api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
    """

    if benchmark_id:
        # We're uploading a benchmark run.

        directories = []
        env_ids = []
        for name, _, files in os.walk(training_dir):
            manifests = monitoring.detect_training_manifests(name, files=files)
            if manifests:
                env_info = monitoring.load_env_info_from_manifests(
                    manifests, training_dir)
                env_ids.append(env_info['env_id'])
                directories.append(name)

        # Validate against benchmark spec
        try:
            spec = benchmark_spec(benchmark_id)
        except error.UnregisteredBenchmark:
            raise error.Error(
                "Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?"
                .format(benchmark_id))

        # TODO: verify that the number of trials matches
        spec_env_ids = [
            task.env_id for task in spec.tasks for _ in range(task.trials)
        ]

        if not env_ids:
            raise error.Error(
                "Could not find any evaluations in {}".format(training_dir))

        # This could be more stringent about mixing evaluations
        if sorted(env_ids) != sorted(spec_env_ids):
            logger.info(
                "WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s",
                benchmark_id, training_dir, sorted(env_ids),
                sorted(spec_env_ids))

        benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id,
                                                     algorithm_id=algorithm_id,
                                                     tags=json.dumps(tags))
        benchmark_run_id = benchmark_run.id

        # Actually do the uploads.
        for training_dir in directories:
            # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
            _upload(training_dir, None, writeup, benchmark_run_id, api_key,
                    ignore_open_monitors)

        logger.info(
            """
****************************************************
You successfully uploaded your benchmark on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
        """.rstrip(), benchmark_id, benchmark_run.web_url())

        return benchmark_run_id
    else:
        if tags is not None:
            logger.warning("Tags will NOT be uploaded for this submission.")
        # Single evalution upload
        benchmark_run_id = None
        evaluation = _upload(training_dir, algorithm_id, writeup,
                             benchmark_run_id, api_key, ignore_open_monitors)

        logger.info(
            """
****************************************************
You successfully uploaded your evaluation on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
        """.rstrip(), evaluation.env, evaluation.web_url())

        return None