Ejemplos de env_from_flags en Python, ejemplos de compiler_gym.util.flags.env_from_flags.env_from_flags en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: brute_force.py Proyecto: soumith/CompilerGym

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    # Use default logdir of <base>/brute_force/<benchmark> unless told
    # otherwise.
    benchmark = benchmark_from_flags()
    if not benchmark:
        raise app.UsageError("No benchmark specified.")

    env = env_from_flags(benchmark)
    env.reset()
    benchmark = env.benchmark
    sanitized_benchmark_name = "/".join(benchmark.split("/")[-2:])
    env.close()
    logs_dir = Path(
        FLAGS.output_dir
        or create_logging_dir(f"brute_force/{sanitized_benchmark_name}"))

    run_brute_force(
        make_env=lambda: env_from_flags(benchmark_from_flags()),
        action_names=FLAGS.actions,
        episode_length=FLAGS.episode_length,
        outdir=logs_dir,
        nproc=FLAGS.nproc,
    )

Ejemplo n.º 2

0

Mostrar archivo

Archivo: brute_force.py Proyecto: facebookresearch/CompilerGym

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    # Use default logdir of <base>/brute_force/<benchmark> unless told
    # otherwise.
    benchmark = benchmark_from_flags()
    if not benchmark:
        raise app.UsageError("No benchmark specified.")

    with env_from_flags(benchmark) as env:
        env.reset()
        logs_dir = Path(FLAGS.output_dir or create_logging_dir(
            f'brute_force/{os.path.normpath(f"random/{env.benchmark.uri.scheme}/{env.benchmark.uri.path}")}'
        ))

    run_brute_force(
        make_env=lambda: env_from_flags(benchmark_from_flags()),
        action_names=FLAGS.brute_force_action_list,
        episode_length=FLAGS.episode_length,
        outdir=logs_dir,
        nproc=FLAGS.nproc,
    )

Ejemplo n.º 3

0

Mostrar archivo

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    # Determine the benchmark that is being analyzed, or use all of them.
    benchmark = benchmark_from_flags()
    if benchmark:
        benchmarks = [benchmark]
    else:
        with env_from_flags() as env:
            benchmarks = islice(env.benchmarks, 100)

    logs_dir = Path(FLAGS.output_dir
                    or create_logging_dir("benchmark_sensitivity_analysis"))
    rewards_path = logs_dir / f"benchmarks_{FLAGS.reward}.csv"
    runtimes_path = logs_dir / f"benchmarks_{FLAGS.reward}_runtimes.csv"

    run_benchmark_sensitivity_analysis(
        rewards_path=rewards_path,
        runtimes_path=runtimes_path,
        benchmarks=benchmarks,
        reward=FLAGS.reward,
        num_trials=FLAGS.num_benchmark_sensitivity_trials,
        min_steps=FLAGS.min_steps,
        max_steps=FLAGS.max_steps,
        nproc=FLAGS.nproc,
        max_attempts_multiplier=FLAGS.max_benchmark_attempts_multiplier,
    )

Ejemplo n.º 4

0

Mostrar archivo

def get_rewards(
    benchmark: Union[Benchmark, str],
    reward_space: str,
    num_trials: int,
    min_steps: int,
    max_steps: int,
    max_attempts_multiplier: int = 5,
) -> SensitivityAnalysisResult:
    """Run random trials to get a list of num_trials episode rewards."""
    rewards, runtimes = [], []
    num_attempts = 0
    while (num_attempts < max_attempts_multiplier * num_trials
           and len(rewards) < num_trials):
        num_attempts += 1
        with env_from_flags(benchmark=benchmark) as env:
            env.observation_space = None
            env.reward_space = None
            env.reset(benchmark=benchmark)
            benchmark = env.benchmark
            with Timer() as t:
                reward = run_one_trial(env, reward_space, min_steps, max_steps)
            if reward is not None:
                rewards.append(reward)
                runtimes.append(t.time)

    return SensitivityAnalysisResult(name=env.benchmark,
                                     runtimes=np.array(runtimes),
                                     rewards=np.array(rewards))

Ejemplo n.º 5

0

Mostrar archivo

Archivo: action_sensitivity_analysis.py Proyecto: mostafaelhoushi/CompilerGym

def run_action_sensitivity_analysis(
    actions: List[int],
    rewards_path: Path,
    runtimes_path: Path,
    reward_space: str,
    num_trials: int,
    max_warmup_steps: int,
    nproc: int = cpu_count(),
    max_attempts_multiplier: int = 5,
):
    """Estimate the immediate reward of a given list of actions."""
    with env_from_flags() as env:
        action_names = env.action_space.names

    with ThreadPoolExecutor(max_workers=nproc) as executor:
        analysis_futures = {
            executor.submit(
                get_rewards,
                action,
                action_names[action],
                reward_space,
                num_trials,
                max_warmup_steps,
                max_attempts_multiplier,
            )
            for action in actions
        }
        return run_sensitivity_analysis(
            analysis_futures=analysis_futures,
            runtimes_path=runtimes_path,
            rewards_path=rewards_path,
        )

Ejemplo n.º 6

0

Mostrar archivo

Archivo: action_sensitivity_analysis.py Proyecto: mostafaelhoushi/CompilerGym

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    with env_from_flags() as env:
        action_names = env.action_space.names

    if FLAGS.action:
        actions = [action_names.index(a) for a in FLAGS.action]
    else:
        actions = list(range(len(action_names)))

    logs_dir = Path(
        FLAGS.output_dir or create_logging_dir("benchmark_sensitivity_analysis")
    )
    rewards_path = logs_dir / f"actions_{FLAGS.reward}.rewards.csv"
    runtimes_path = logs_dir / f"actions_{FLAGS.reward}.runtimes.csv"

    run_action_sensitivity_analysis(
        rewards_path=rewards_path,
        runtimes_path=runtimes_path,
        actions=actions,
        reward=FLAGS.reward,
        num_trials=FLAGS.num_action_sensitivity_trials,
        max_warmup_steps=FLAGS.max_warmup_steps,
        nproc=FLAGS.nproc,
        max_attempts_multiplier=FLAGS.max_action_attempts_multiplier,
    )

Ejemplo n.º 7

0

Mostrar archivo

Archivo: action_sensitivity_analysis.py Proyecto: mostafaelhoushi/CompilerGym

def get_rewards(
    action: int,
    action_name: str,
    reward_space: str,
    num_trials: int,
    max_warmup_steps: int,
    max_attempts_multiplier: int = 5,
) -> SensitivityAnalysisResult:
    """Run random trials to get a list of num_trials immediate rewards."""
    rewards, runtimes = [], []
    benchmark = benchmark_from_flags()
    num_attempts = 0
    while (
        num_attempts < max_attempts_multiplier * num_trials
        and len(rewards) < num_trials
    ):
        num_attempts += 1
        with env_from_flags(benchmark=benchmark) as env:
            env.observation_space = None
            env.reward_space = None
            env.reset(benchmark=benchmark)
            with Timer() as t:
                reward = run_one_trial(env, reward_space, action, max_warmup_steps)
            if reward is not None:
                rewards.append(reward)
                runtimes.append(t.time)

    return SensitivityAnalysisResult(
        name=action_name, runtimes=np.array(runtimes), rewards=np.array(rewards)
    )

Ejemplo n.º 8

0

Mostrar archivo

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    if FLAGS.ls_benchmark:
        env = env_from_flags()
        print("\n".join(sorted(env.benchmarks)))
        env.close()
        return
    if FLAGS.ls_reward:
        env = env_from_flags()
        print("\n".join(sorted(env.reward.indices.keys())))
        env.close()
        return

    assert FLAGS.patience >= 0, "--patience must be >= 0"

    def make_env():
        return env_from_flags(benchmark=benchmark_from_flags())

    env = make_env()
    try:
        env.reset()
        if not env.benchmark:
            raise app.UsageError("No benchmark specified.")
    finally:
        env.close()

    best_reward, _ = random_search(
        make_env=make_env,
        outdir=Path(FLAGS.output_dir) if FLAGS.output_dir else None,
        patience=FLAGS.patience,
        total_runtime=FLAGS.runtime,
        nproc=FLAGS.nproc,
        skip_done=FLAGS.skip_done,
    )

    # Exit with error if --fail_threshold was set and the best reward does not
    # meet this value.
    if FLAGS.fail_threshold is not None and best_reward < FLAGS.fail_threshold:
        print(
            f"Best reward {best_reward:.3f} below threshold of {FLAGS.fail_threshold}",
            file=sys.stderr,
        )
        sys.exit(1)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: service.py Proyecto: zeta1999/CompilerGym

def main(argv):
    """Main entry point."""
    assert len(argv) == 1, f"Unrecognized flags: {argv[1:]}"
    assert 0 < FLAGS.heading_level <= 4, f"--heading_level must be in range [1,4]"

    env = env_from_flags()
    print_service_capabilities(env, base_heading_level=FLAGS.heading_level)
    env.close()

Ejemplo n.º 10

0

Mostrar archivo

def main(argv):
    """Main entry point."""
    assert len(argv) == 1, f"Unrecognized flags: {argv[1:]}"

    with env_from_flags(benchmark=benchmark_from_flags()) as env:
        step_min = min(FLAGS.step_min, FLAGS.step_max)
        step_max = max(FLAGS.step_min, FLAGS.step_max)
        run_random_walk(env=env, step_count=random.randint(step_min, step_max))

Ejemplo n.º 11

0

Mostrar archivo

def run_random_search(num_episodes, num_steps) -> None:
    """The inner loop of a load test benchmark."""
    with env_from_flags(benchmark=benchmark_from_flags()) as env:
        for _ in range(num_episodes):
            env.reset()
            for _ in range(num_steps):
                _, _, done, _ = env.step(env.action_space.sample())
                if done:
                    break

Ejemplo n.º 12

0

Mostrar archivo

def make_env():
    FLAGS.env = "llvm-v0"
    if not FLAGS.reward:
        FLAGS.reward = "IrInstructionCountOz"
    env = env_from_flags(benchmark=benchmark_from_flags())
    env = ConstrainedCommandline(env, flags=FLAGS.flags)
    env = TimeLimit(env, max_episode_steps=FLAGS.episode_len)
    env = HistoryObservation(env)
    return env

Ejemplo n.º 13

0

Mostrar archivo

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    if FLAGS.ls_benchmark:
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)
        print("\n".join(sorted(env.benchmarks)))
        env.close()
        return

    with Timer("Initialized environment"):
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)

    run_manual_env(env)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: manual_env.py Proyecto: JD-ETH/CompilerGym

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    if FLAGS.ls_benchmark:
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)
        print("\n".join(sorted(env.benchmarks)))
        env.close()
        return

    with Timer("Initialized environment"):
        # FIXME Chris, I don't seem to actually get a benchmark
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)

    shell = CompilerGymShell(env)
    shell.cmdloop()

Ejemplo n.º 15

0

Mostrar archivo

Archivo: manual_env.py Proyecto: facebookresearch/CompilerGym

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    with Timer("Initialized environment"):
        benchmark = benchmark_from_flags()
        env = env_from_flags(benchmark)

    shell = CompilerGymShell(env)
    shell.cmdloop()

Ejemplo n.º 16

0

Mostrar archivo

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    if FLAGS.ls_reward:
        with env_from_flags() as env:
            print("\n".join(sorted(env.reward.indices.keys())))
        return

    assert FLAGS.patience >= 0, "--patience must be >= 0"

    # Create an environment now to catch a startup time error before we launch
    # a bunch of workers.
    with env_from_flags() as env:
        env.reset(benchmark=benchmark_from_flags())

    env = random_search(
        make_env=lambda: env_from_flags(benchmark=benchmark_from_flags()),
        outdir=Path(FLAGS.output_dir) if FLAGS.output_dir else None,
        patience=FLAGS.patience,
        total_runtime=FLAGS.runtime,
        nproc=FLAGS.nproc,
        skip_done=FLAGS.skip_done,
    )
    try:
        # Exit with error if --fail_threshold was set and the best reward does not
        # meet this value.
        if (
            FLAGS.fail_threshold is not None
            and env.episode_reward < FLAGS.fail_threshold
        ):
            print(
                f"Best reward {env.episode_reward:.3f} below threshold of {FLAGS.fail_threshold}",
                file=sys.stderr,
            )
            sys.exit(1)
    finally:
        env.close()

Ejemplo n.º 17

0

Mostrar archivo

Archivo: random_replay.py Proyecto: zeta1999/CompilerGym

def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    output_dir = Path(FLAGS.output_dir).expanduser().resolve().absolute()
    assert (
        output_dir / logs.METADATA_NAME
    ).is_file(), f"Invalid --output_dir: {output_dir}"

    env = env_from_flags()
    benchmark = benchmark_from_flags()
    replay_actions_from_logs(env, output_dir, benchmark=benchmark)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: service.py Proyecto: kokizzu/CompilerGym

def main(argv):
    """Main entry point."""
    assert len(argv) == 1, f"Unrecognized flags: {argv[1:]}"

    if FLAGS.run_on_port:
        assert FLAGS.env, "Must specify an --env to run"
        settings = ConnectionOpts(
            script_args=["--port", str(FLAGS.run_on_port)])
        with gym.make(FLAGS.env, connection_settings=settings) as env:
            print(
                f"=== Started a service on port {FLAGS.run_on_port}. Use C-c to terminate. ==="
            )
            signal.pause()

    with env_from_flags() as env:
        print_service_capabilities(env)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: explore.py Proyecto: vinodganesan/CompilerGym

    def __init__(self):
        self._env = env_from_flags(benchmark_from_flags())
        try:
            # Project onto the subset of transformations that have
            # been specified to be used.
            if not FLAGS.actions:
                self._action_indices = list(
                    range(len(self._env.action_space.names)))
            else:
                self._action_indices = [
                    self._env.action_space.flags.index(a)
                    for a in FLAGS.actions
                ]
            self._action_names = [
                self._env.action_space.names[a] for a in self._action_indices
            ]

        finally:
            # The program will not terminate until the environment is
            # closed, not even if there is an exception.
            self._env.close()

Ejemplo n.º 20

0

Mostrar archivo

def main(argv):
    """Main entry point."""
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    with env_from_flags() as env:
        invalidated_manifest = False

        for name_or_url in FLAGS.download:
            env.datasets.install(name_or_url)

        if FLAGS.download_all:
            for dataset in env.datasets:
                dataset.install()

        for name in FLAGS.activate:
            activate(env, name)
            invalidated_manifest = True

        if FLAGS.activate_all:
            invalidated_manifest = True

        for name in FLAGS.deactivate:
            deactivate(env, name)
            invalidated_manifest = True

        if FLAGS.deactivate_all:
            invalidated_manifest = True

        for name in FLAGS.delete:
            delete(env, name)

        if invalidated_manifest:
            env.make_manifest_file()

        print(
            summarize_datasets(env.datasets),
        )

Ejemplo n.º 21

0

Mostrar archivo

Archivo: explore.py Proyecto: mostafaelhoushi/CompilerGym

def make_env():
    env = env_from_flags(benchmark=benchmark_from_flags())
    if FLAGS.explore_actions:
        env = ConstrainedCommandline(env, flags=FLAGS.explore_actions)
    return env

Ejemplo n.º 22

0

Mostrar archivo

def main(argv):
    """Main entry point."""
    try:
        states = list(CompilerEnvStateReader.read_paths(argv[1:]))
    except ValueError as e:
        print(e, file=sys.stderr)
        sys.exit(1)

    if not states:
        print(
            "No inputs to validate. Pass a CSV file path as an argument, or "
            "use - to read from stdin.",
            file=sys.stderr,
        )
        sys.exit(1)

    # Send the states off for validation
    if FLAGS.debug_force_valid:
        validation_results = (
            ValidationResult(
                state=state,
                reward_validated=True,
                actions_replay_failed=False,
                reward_validation_failed=False,
                benchmark_semantics_validated=False,
                benchmark_semantics_validation_failed=False,
                walltime=0,
            )
            for state in states
        )
    else:
        validation_results = validate_states(
            env_from_flags,
            states,
            nproc=FLAGS.nproc,
            inorder=FLAGS.inorder,
        )

    # Determine the name of the reward space.
    with env_from_flags() as env:
        if FLAGS.reward_aggregation == "geomean":

            def reward_aggregation(a):
                return geometric_mean(np.clip(a, 0, None))

            reward_aggregation_name = "Geometric mean"
        elif FLAGS.reward_aggregation == "mean":
            reward_aggregation = arithmetic_mean
            reward_aggregation_name = "Mean"
        else:
            raise app.UsageError(
                f"Unknown aggregation type: '{FLAGS.reward_aggregation}'"
            )

        if env.reward_space:
            reward_name = f"{reward_aggregation_name} {env.reward_space.id}"
        else:
            reward_name = ""

    # Determine the maximum column width required for printing tabular output.
    max_state_name_length = max(
        len(s)
        for s in [state_name(s) for s in states]
        + [
            "Mean inference walltime",
            reward_name,
        ]
    )
    name_col_width = min(max_state_name_length + 2, 78)

    error_count = 0
    rewards = []
    walltimes = []

    if FLAGS.summary_only:

        def intermediate_print(*args, **kwargs):
            del args
            del kwargs

    else:
        intermediate_print = print

    def progress_message(i):
        intermediate_print(
            f"{i} remaining {plural(i, 'state', 'states')} to validate ... ",
            end="",
            flush=True,
        )

    progress_message(len(states))
    result_dicts = []

    def dump_result_dicst_to_json():
        with open(FLAGS.validation_logfile, "w") as f:
            json.dump(result_dicts, f)

    for i, result in enumerate(validation_results, start=1):
        intermediate_print("\r\033[K", to_string(result, name_col_width), sep="")
        progress_message(len(states) - i)
        result_dicts.append(result.dict())

        if not result.okay():
            error_count += 1
        elif result.reward_validated and not result.reward_validation_failed:
            rewards.append(result.state.reward)
            walltimes.append(result.state.walltime)

        if not i % 10:
            dump_result_dicst_to_json()

    dump_result_dicst_to_json()

    # Print a summary footer.
    intermediate_print("\r\033[K----", "-" * name_col_width, "-----------", sep="")
    print(f"Number of validated results: {emph(len(walltimes))} of {len(states)}")
    walltime_mean = f"{arithmetic_mean(walltimes):.3f}s"
    walltime_std = f"{stdev(walltimes):.3f}s"
    print(
        f"Mean walltime per benchmark: {emph(walltime_mean)} "
        f"(std: {emph(walltime_std)})"
    )
    reward = f"{reward_aggregation(rewards):.3f}"
    reward_std = f"{stdev(rewards):.3f}"
    print(f"{reward_name}: {emph(reward)} " f"(std: {emph(reward_std)})")

    if error_count:
        sys.exit(1)

Ejemplo n.º 23

0

Mostrar archivo

 def make_env():
     return env_from_flags(benchmark=benchmark_from_flags())

Ejemplo n.º 24

0

Mostrar archivo

Archivo: datasets.py Proyecto: vinodganesan/CompilerGym

def main(argv):
    """Main entry point."""
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    env = env_from_flags()
    try:
        if not env.datasets_site_path:
            raise app.UsageError("Environment has no benchmarks site path")

        env.datasets_site_path.mkdir(parents=True, exist_ok=True)
        env.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True)

        invalidated_manifest = False

        for name_or_url in FLAGS.download:
            require(env, name_or_url)

        if FLAGS.download_all:
            for dataset in env.available_datasets:
                require(env, dataset)

        for name in FLAGS.activate:
            activate(env, name)
            invalidated_manifest = True

        if FLAGS.activate_all:
            for path in env.inactive_datasets_site_path.iterdir():
                activate(env, path.name)
            invalidated_manifest = True

        for name in FLAGS.deactivate:
            deactivate(env, name)
            invalidated_manifest = True

        if FLAGS.deactivate_all:
            for path in env.datasets_site_path.iterdir():
                deactivate(env, path.name)
            invalidated_manifest = True

        for name in FLAGS.delete:
            delete(env, name)

        if invalidated_manifest:
            env.make_manifest_file()

        print(f"{env.spec.id} benchmarks site dir: {env.datasets_site_path}")
        print()
        print(enumerate_directory("Active Datasets", env.datasets_site_path), )
        print(
            "These benchmarks are ready for use. Deactivate them using `--deactivate=<name>`."
        )
        print()
        print(
            enumerate_directory("Inactive Datasets",
                                env.inactive_datasets_site_path))
        print("These benchmarks may be activated using `--activate=<name>`.")
        print()
        print(
            tabulate(
                sorted([(
                    d.name,
                    d.license,
                    humanize.intcomma(d.file_count),
                    humanize.naturalsize(d.size_bytes),
                ) for d in env.available_datasets.values()]),
                headers=(
                    "Downloadable Dataset",
                    "License",
                    "#. Benchmarks",
                    "Size on disk",
                ),
            ))
        print(
            "These benchmarks may be installed using `--download=<name> --activate=<name>`."
        )
    finally:
        env.close()

Ejemplo n.º 25

0

Mostrar archivo

def main(argv):
    """Main entry point."""
    assert len(argv) == 1, f"Unrecognized flags: {argv[1:]}"

    # Parse the input states from the user.
    states = list(read_states_from_stdin())

    # Send the states off for validation
    validation_results = validate_states(
        env_from_flags,
        states,
        datasets=FLAGS.dataset,
        nproc=FLAGS.nproc,
        inorder=FLAGS.inorder,
    )

    # Determine the name of the reward space.
    env = env_from_flags()
    try:
        if env.reward_space:
            gmean_name = f"Geometric mean {env.reward_space.id}"
        else:
            gmean_name = "Geometric mean"
    finally:
        env.close()

    # Determine the maximum column width required for printing tabular output.
    max_state_name_length = max(
        len(s) for s in [state_name(s) for s in states] + [
            "Mean inference walltime",
            gmean_name,
        ])
    name_col_width = min(max_state_name_length + 2, 78)

    error_count = 0
    rewards = []
    walltimes = []

    for result in validation_results:
        print(to_string(result, name_col_width))
        if result.failed:
            error_count += 1
        elif result.reward_validated and not result.reward_validation_failed:
            rewards.append(result.state.reward)
            walltimes.append(result.state.walltime)

    # Print a summary footer.
    print("----", "-" * name_col_width, "-----------", sep="")
    print(
        f"Number of validated results: {emph(len(walltimes))} of {len(states)}"
    )
    walltime_mean = f"{arithmetic_mean(walltimes):.3f}s"
    walltime_std = f"{stdev(walltimes):.3f}s"
    print(f"Mean inference walltime: {emph(walltime_mean)} sec / benchmark "
          f"(std: {emph(walltime_std)})")
    reward_gmean = f"{geometric_mean(rewards):.3f}"
    reward_std = f"{stdev(rewards):.3f}"
    print(f"{gmean_name}: {emph(reward_gmean)} (std: {emph(reward_std)})")

    if error_count:
        sys.exit(1)