コード例 #1
0
ファイル: main.py プロジェクト: kllmia/inference_results_v0.7
def main(main_args, system_id):
    # Turn off MPS in case it's turned on.
    turn_off_mps()

    benchmarks = BENCHMARKS.ALL
    if main_args["benchmarks"] is not None:
        benchmarks = main_args["benchmarks"].split(",")
        for i, benchmark in enumerate(benchmarks):
            benchmarks[i] = BENCHMARKS.alias(benchmark)
    scenarios = SCENARIOS.ALL
    if main_args["scenarios"] is not None:
        scenarios = main_args["scenarios"].split(",")
        for i, scenario in enumerate(scenarios):
            scenarios[i] = SCENARIOS.alias(scenario)

    profile = main_args.get("profile", None)
    power = main_args.get("power", False)

    # Automatically detect architecture and scenarios and load configs
    config_files = main_args["configs"]
    if config_files == "" or config_files is None:
        config_files = find_config_files(benchmarks, scenarios)
        if config_files == "":
            logging.warn("Cannot find any valid configs for the specified benchmark-scenario pairs.")
            return

    logging.info("Using config files: {:}".format(str(config_files)))
    configs = load_configs(config_files)

    for config in configs:
        base_benchmark_conf = flatten_config(config, system_id)
        if base_benchmark_conf is None:
            continue

        base_benchmark_conf["config_name"] = "{:}_{:}_{:}".format(
            system_id,
            base_benchmark_conf["benchmark"],
            base_benchmark_conf["scenario"]
        )
        logging.info("Processing config \"{:}\"".format(base_benchmark_conf["config_name"]))

        # Load config_ver / apply overrides
        conf_vers = main_args.get("config_ver", "default").split(",")

        # Build default first. This is because some config_vers only modify harness args, and the engine is the same as
        # default. In this case, we build default first, and copy it instead of rebuilding it.
        if "default" in conf_vers:
            conf_vers = ["default"] + list(set(conf_vers) - {"default"})
        elif "all" in conf_vers:
            conf_vers = ["default"] + list(base_benchmark_conf.get("config_ver", {}).keys())

        for conf_ver in conf_vers:
            benchmark_conf = dict(base_benchmark_conf)  # Copy the config so we don't modify it

            # These fields are canonical names that refer to certain config versions
            benchmark_conf["accuracy_level"] = "99%"
            benchmark_conf["optimization_level"] = "plugin-enabled"
            benchmark_conf["inference_server"] = "lwis"

            """@etcheng
            NOTE: The original plan was to use a syntax like high_accuracy+triton to be able to combine already defined
            config_vers. However, since high_accuracy, triton, and high_accuracy+triton are likely to all have different
            expected QPS values, it makes more sense to keep high_accuracy_triton as a separate, individual config_ver.

            In the future, perhaps we can make an "extends": [ list of strings ] or { dict of config_ver name ->
            config_key } field in config_vers, so that we can define new config_vers that extend or combine previous
            config_vers.
            """

            equiv_to_default = False

            if conf_ver != "default":
                if "config_ver" not in benchmark_conf or conf_ver not in benchmark_conf["config_ver"]:
                    logging.warn(
                        "--config_ver={:} does not exist in config file '{:}'".format(conf_ver, benchmark_conf["config_name"]))
                    continue
                else:
                    if "high_accuracy" in conf_ver:
                        benchmark_conf["accuracy_level"] = "99.9%"
                    if "ootb" in conf_ver:
                        benchmark_conf["optimization_level"] = "ootb"
                    # "inference_server" is set when we run the harness

                    overrides = benchmark_conf["config_ver"][conf_ver]

                    # Check if this config_ver is equivalent to the default engine
                    gen_eng_argset = set(common_args.GENERATE_ENGINE_ARGS)
                    override_argset = set(overrides.keys())
                    equiv_to_default = (len(gen_eng_argset & override_argset) == 0)

                    benchmark_conf.update(overrides)

            # Update the config_ver key to be the actual string name, not the overrides
            benchmark_conf["config_ver"] = conf_ver

            need_gpu = not main_args["no_gpu"]
            need_dla = not main_args["gpu_only"]

            # Override the system_name if it exists
            if "system_name" in main_args:
                benchmark_conf["system_name"] = main_args["system_name"]

            if main_args["action"] == "generate_engines":
                # Turn on MPS if server scenario and if active_sms is specified.
                benchmark_conf = apply_overrides(benchmark_conf, ["active_sms"])
                active_sms = benchmark_conf.get("active_sms", None)

                copy_from_default = ("default" in conf_vers) and equiv_to_default
                if copy_from_default:
                    logging.info(
                        "config_ver={:} only modifies harness args. Re-using default engine.".format(conf_ver))

                _gen_args = [benchmark_conf]
                _gen_kwargs = {
                    "gpu": need_gpu,
                    "dla": need_dla,
                    "copy_from_default": copy_from_default
                }

                if not main_args["no_child_process"]:
                    if config["scenario"] == SCENARIOS.Server and active_sms is not None and active_sms < 100:
                        with ScopedMPS(active_sms):
                            launch_handle_generate_engine(*_gen_args, **_gen_kwargs)
                    else:
                        launch_handle_generate_engine(*_gen_args, **_gen_kwargs)
                else:
                    handle_generate_engine(*_gen_args, **_gen_kwargs)
            elif main_args["action"] == "run_harness":
                # In case there's a leftover audit.config file from a prior compliance run or other reason
                # we need to delete it or we risk silent failure.
                auditing.cleanup()

                handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power)
            elif main_args["action"] == "run_audit_harness":
                logging.info('\n\n\nRunning compliance harness for test ' + main_args['audit_test'] + '\n\n\n')

                # Find the correct audit.config file and move it in current directory
                dest_config = auditing.load(main_args['audit_test'], benchmark_conf['benchmark'])

                # Make sure the log_file override is valid
                os.makedirs("build/compliance_logs", exist_ok=True)

                # Pass audit test name to handle_run_harness via benchmark_conf
                benchmark_conf['audit_test_name'] = main_args['audit_test']

                # Run harness
                handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power, compliance=True)

                # Cleanup audit.config
                logging.info("AUDIT HARNESS: Cleaning Up audit.config...")
                auditing.cleanup()
            elif main_args["action"] == "run_audit_verification":
                logging.info("Running compliance verification for test " + main_args['audit_test'])
                handle_audit_verification(audit_test_name=main_args['audit_test'], config=benchmark_conf)
                auditing.cleanup()
            elif main_args["action"] == "calibrate":
                # To generate calibration cache, we only need to run each benchmark once.
                # Use offline config.
                if benchmark_conf["scenario"] == SCENARIOS.Offline:
                    handle_calibrate(benchmark_conf)
            elif main_args["action"] == "generate_conf_files":
                handle_run_harness(benchmark_conf, need_gpu, need_dla, generate_conf_files_only=True)
コード例 #2
0
def main(main_args, system):
    """
    Args:
        main_args: Args parsed from user input.
        system: System to use
    """
    system_id = system.get_id()

    # Turn off MPS in case it's turned on.
    turn_off_mps()

    # Get user's benchmarks, else run all.
    benchmarks = BENCHMARKS.ALL
    if main_args["benchmarks"] is not None:
        benchmarks = main_args["benchmarks"].split(",")
        benchmarks = [BENCHMARKS.alias(b) for b in benchmarks]

    # Get user's scenarios, else use all.
    scenarios = SCENARIOS.ALL
    if main_args["scenarios"] is not None:
        scenarios = main_args["scenarios"].split(",")
        scenarios = [SCENARIOS.alias(s) for s in scenarios]

    profile = main_args.get("profile", None)
    power = main_args.get("power", False)

    # Automatically find config file paths
    config_files = main_args["configs"]
    if config_files == "" or config_files is None:
        config_files = find_config_files(benchmarks, scenarios)
        if config_files == "":
            logging.warn(
                "Cannot find any valid configs for the specified benchmark-scenario pairs."
            )
            return

    logging.info("Using config files: {:}".format(str(config_files)))
    configs = load_configs(config_files)

    for config in configs:
        base_benchmark_conf = get_system_benchmark_config(config, system_id)
        if base_benchmark_conf is None:
            continue

        base_benchmark_conf["config_name"] = "{:}_{:}_{:}".format(
            system_id, base_benchmark_conf["benchmark"],
            base_benchmark_conf["scenario"])
        logging.info("Processing config \"{:}\"".format(
            base_benchmark_conf["config_name"]))

        # Load config_ver / apply overrides
        conf_vers = main_args.get("config_ver", "default").split(",")

        # Build default first. This is because some config_vers only modify harness args, and the engine is the same as
        # default. In this case, we build default first, and copy it instead of rebuilding it.
        if "default" in conf_vers:
            conf_vers = ["default"] + list(set(conf_vers) - {"default"})
        elif "all" in conf_vers:
            tmp = ["default"] + list(
                base_benchmark_conf.get("config_ver", {}).keys())
            # As per request, 'all' should skip 'maxQ' config_vers for now. MaxQ should only be run when specified
            # directly.
            conf_vers = []
            for s in tmp:
                if "maxq" not in s.lower() and "hetero" not in s.lower():
                    conf_vers.append(s)

        for conf_ver in conf_vers:
            benchmark_conf = dict(
                base_benchmark_conf)  # Copy the config so we don't modify it

            # These fields are canonical names that refer to certain config versions
            benchmark_conf["accuracy_level"] = "99%"
            benchmark_conf["optimization_level"] = "plugin-enabled"
            benchmark_conf["inference_server"] = "lwis"

            equiv_to_default = False

            if conf_ver != "default":
                if "config_ver" not in benchmark_conf or conf_ver not in benchmark_conf[
                        "config_ver"]:
                    logging.warn(
                        "--config_ver={:} does not exist in config file '{:}'".
                        format(conf_ver, benchmark_conf["config_name"]))
                    continue
                else:
                    if "high_accuracy" in conf_ver:
                        benchmark_conf["accuracy_level"] = "99.9%"
                    if "ootb" in conf_ver:
                        benchmark_conf["optimization_level"] = "ootb"
                    # "inference_server" is set when we run the harness

                    overrides = benchmark_conf["config_ver"][conf_ver]

                    # Enforce Triton check
                    if "triton" in conf_ver.lower() and not overrides.get(
                            "use_triton", False):
                        raise RuntimeError(
                            "conf_ver={} references Triton harness, but 'use_triton' is false"
                            .format(conf_ver))

                    # Check if this config_ver is equivalent to the default engine
                    # RNNT has multiple engines, so disable the equiv_to_default.
                    if benchmark_conf["benchmark"] != BENCHMARKS.RNNT:
                        gen_eng_argset = set(common_args.GENERATE_ENGINE_ARGS)
                        override_argset = set(overrides.keys())
                        equiv_to_default = (len(gen_eng_argset
                                                & override_argset) == 0)

                    benchmark_conf.update(overrides)

            # Update the config_ver key to be the actual string name, not the overrides
            benchmark_conf["config_ver"] = conf_ver

            need_gpu = not main_args["no_gpu"]
            need_dla = not main_args["gpu_only"]

            # Override the system_name if it exists
            if "system_name" in main_args:
                benchmark_conf["system_name"] = main_args["system_name"]

            # Check for use_cpu
            if system_id.startswith("Triton_CPU"):
                benchmark_conf["use_cpu"] = True

            # Generate engines.
            if main_args["action"] == "generate_engines":
                # Turn on MPS if server scenario and if active_sms is specified.
                benchmark_conf = apply_overrides(benchmark_conf,
                                                 ["active_sms"])
                active_sms = benchmark_conf.get("active_sms", None)

                copy_from_default = ("default"
                                     in conf_vers) and equiv_to_default
                if copy_from_default:
                    logging.info(
                        "config_ver={:} only modifies harness args. Re-using default engine."
                        .format(conf_ver))

                _gen_args = [benchmark_conf]
                _gen_kwargs = {
                    "gpu": need_gpu,
                    "dla": need_dla,
                    "copy_from_default": copy_from_default
                }

                if not main_args["no_child_process"]:
                    if config[
                            "scenario"] == SCENARIOS.Server and active_sms is not None and active_sms < 100:
                        with ScopedMPS(active_sms):
                            launch_handle_generate_engine(
                                *_gen_args, **_gen_kwargs)
                    else:
                        launch_handle_generate_engine(*_gen_args,
                                                      **_gen_kwargs)
                else:
                    handle_generate_engine(*_gen_args, **_gen_kwargs)

            # Run CPU harness:
            elif main_args["action"] == "run_cpu_harness":
                auditing.cleanup()
                benchmark_conf["use_cpu"] = True
                handle_run_harness(benchmark_conf, False, False, None, power)
            # Run harness.
            elif main_args["action"] == "run_harness":
                # In case there's a leftover audit.config file from a prior compliance run or other reason
                # we need to delete it or we risk silent failure.
                auditing.cleanup()

                handle_run_harness(benchmark_conf, need_gpu, need_dla, profile,
                                   power)
            elif main_args["action"] == "run_audit_harness" or main_args[
                    "action"] == "run_cpu_audit_harness":
                logging.info('\n\n\nRunning compliance harness for test ' +
                             main_args['audit_test'] + '\n\n\n')

                # Find the correct audit.config file and move it in current directory
                dest_config = auditing.load(main_args['audit_test'],
                                            benchmark_conf['benchmark'])

                # Make sure the log_file override is valid
                os.makedirs("build/compliance_logs", exist_ok=True)

                # Pass audit test name to handle_run_harness via benchmark_conf
                benchmark_conf['audit_test_name'] = main_args['audit_test']

                if main_args["action"] == "run_cpu_audit_harness":
                    need_gpu = False
                    need_dla = False
                    profile = None
                    benchmark_conf["use_cpu"] = True

                # Run harness
                handle_run_harness(benchmark_conf,
                                   need_gpu,
                                   need_dla,
                                   profile,
                                   power,
                                   compliance=True)

                # Cleanup audit.config
                logging.info("AUDIT HARNESS: Cleaning Up audit.config...")
                auditing.cleanup()
            elif main_args["action"] == "run_audit_verification":
                logging.info("Running compliance verification for test " +
                             main_args['audit_test'])
                handle_audit_verification(
                    audit_test_name=main_args['audit_test'],
                    config=benchmark_conf)
                auditing.cleanup()
            elif main_args["action"] == "run_cpu_audit_verification":
                logging.info("Running compliance verification for test " +
                             main_args['audit_test'])
                benchmark_conf["use_cpu"] = True
                handle_audit_verification(
                    audit_test_name=main_args['audit_test'],
                    config=benchmark_conf)
                auditing.cleanup()
            elif main_args["action"] == "calibrate":
                # To generate calibration cache, we only need to run each benchmark once.
                # Use offline config.
                if benchmark_conf["scenario"] == SCENARIOS.Offline:
                    handle_calibrate(benchmark_conf)
            elif main_args["action"] == "generate_conf_files":
                handle_run_harness(benchmark_conf,
                                   need_gpu,
                                   need_dla,
                                   generate_conf_files_only=True)