Example #1
0
def handle_calibrate(config):
    benchmark_name = config["benchmark"]

    logging.info("Generating calibration cache for Benchmark \"{:}\"".format(benchmark_name))
    config = apply_overrides(config, common_args.CALIBRATION_ARGS)
    config["dla_core"] = None
    config["force_calibration"] = True
    b = get_benchmark(config)
    b.calibrate()
Example #2
0
def get_benchmark(conf):
    """Return module of benchmark initialized with config."""

    benchmark_name = conf["benchmark"]
    if benchmark_name == BENCHMARKS.BERT:
        # TODO now only BERT uses gpu_inference_streams to generate engines
        conf = apply_overrides(conf, ['gpu_inference_streams'])
        BERTBuilder = import_module(
            "code.bert.tensorrt_sparse.bert_var_seqlen").BERTBuilder
        return BERTBuilder(conf)
    elif benchmark_name == BENCHMARKS.ResNet50:
        ResNet50Builder = import_module(
            "code.resnet50.int4.harness").ResNet50Builder
        return ResNet50Builder(conf)
    else:
        raise ValueError("Unknown benchmark: {:}".format(benchmark_name))
Example #3
0
def handle_generate_engine(config,
                           gpu=True,
                           dla=True,
                           copy_from_default=False):

    benchmark_name = config["benchmark"]

    logging.info(
        "Building engines for {:} benchmark in {:} scenario...".format(
            benchmark_name, config["scenario"]))

    start_time = time.time()

    arglist = common_args.GENERATE_ENGINE_ARGS
    config = apply_overrides(config, arglist)

    if dla and "dla_batch_size" in config:
        config["batch_size"] = config["dla_batch_size"]
        logging.info("Building DLA engine for {:}_{:}_{:}".format(
            config["system_id"], benchmark_name, config["scenario"]))
        b = get_benchmark(config)

        if copy_from_default:
            copy_default_engine(b)
        else:
            b.build_engines()

    if gpu and "gpu_batch_size" in config:
        config["batch_size"] = config["gpu_batch_size"]
        config["dla_core"] = None
        logging.info("Building GPU engine for {:}_{:}_{:}".format(
            config["system_id"], benchmark_name, config["scenario"]))
        b = get_benchmark(config)

        if copy_from_default:
            copy_default_engine(b)
        else:
            b.build_engines()

    end_time = time.time()

    logging.info(
        "Finished building engines for {:} benchmark in {:} scenario.".format(
            benchmark_name, config["scenario"]))

    print("Time taken to generate engines: {:} seconds".format(end_time -
                                                               start_time))
def get_benchmark(conf):
    """Return module of benchmark initialized with config."""

    benchmark_name = conf["benchmark"]

    # Do not use a map. We want to import benchmarks as we need them, because some take
    # time to load due to plugins.
    if benchmark_name == BENCHMARKS.ResNet50:
        ResNet50 = import_module("code.resnet50.tensorrt.ResNet50").ResNet50
        return ResNet50(conf)
    elif benchmark_name == BENCHMARKS.SSDResNet34:
        SSDResNet34 = import_module(
            "code.ssd-resnet34.tensorrt.SSDResNet34").SSDResNet34
        return SSDResNet34(conf)
    elif benchmark_name == BENCHMARKS.SSDMobileNet:
        SSDMobileNet = import_module(
            "code.ssd-mobilenet.tensorrt.SSDMobileNet").SSDMobileNet
        return SSDMobileNet(conf)
    elif benchmark_name == BENCHMARKS.BERT:
        # TODO now only BERT uses gpu_inference_streams to generate engines
        conf = apply_overrides(conf, ['gpu_inference_streams'])
        BERTBuilder = import_module(
            "code.bert.tensorrt.bert_var_seqlen").BERTBuilder
        return BERTBuilder(conf)
    elif benchmark_name == BENCHMARKS.RNNT:
        RNNTBuilder = import_module(
            "code.rnnt.tensorrt.rnn-t_builder").RNNTBuilder
        return RNNTBuilder(conf)
    elif benchmark_name == BENCHMARKS.DLRM:
        DLRMBuilder = import_module("code.dlrm.tensorrt.dlrm").DLRMBuilder
        return DLRMBuilder(conf)
    elif benchmark_name == BENCHMARKS.UNET:
        UNETBuilder = import_module(
            "code.3d-unet.tensorrt.3d-unet").UnetBuilder
        return UNETBuilder(conf)
    else:
        raise ValueError("Unknown benchmark: {:}".format(benchmark_name))
Example #5
0
def main(main_args, system):
    """
    Args:
        main_args: Args parsed from user input.
        system: System to use
    """
    system_id = system.get_id()

    # Turn off MPS in case it's turned on.
    turn_off_mps()

    # Get user's benchmarks, else run all.
    benchmarks = BENCHMARKS.ALL
    if main_args["benchmarks"] is not None:
        benchmarks = main_args["benchmarks"].split(",")
        benchmarks = [BENCHMARKS.alias(b) for b in benchmarks]

    # Get user's scenarios, else use all.
    scenarios = SCENARIOS.ALL
    if main_args["scenarios"] is not None:
        scenarios = main_args["scenarios"].split(",")
        scenarios = [SCENARIOS.alias(s) for s in scenarios]

    profile = main_args.get("profile", None)
    power = main_args.get("power", False)

    # Automatically find config file paths
    config_files = main_args["configs"]
    if config_files == "" or config_files is None:
        config_files = find_config_files(benchmarks, scenarios)
        if config_files == "":
            logging.warn(
                "Cannot find any valid configs for the specified benchmark-scenario pairs."
            )
            return

    logging.info("Using config files: {:}".format(str(config_files)))
    configs = load_configs(config_files)

    for config in configs:
        base_benchmark_conf = get_system_benchmark_config(config, system_id)
        if base_benchmark_conf is None:
            continue

        base_benchmark_conf["config_name"] = "{:}_{:}_{:}".format(
            system_id, base_benchmark_conf["benchmark"],
            base_benchmark_conf["scenario"])
        logging.info("Processing config \"{:}\"".format(
            base_benchmark_conf["config_name"]))

        # Load config_ver / apply overrides
        conf_vers = main_args.get("config_ver", "default").split(",")

        # Build default first. This is because some config_vers only modify harness args, and the engine is the same as
        # default. In this case, we build default first, and copy it instead of rebuilding it.
        if "default" in conf_vers:
            conf_vers = ["default"] + list(set(conf_vers) - {"default"})
        elif "all" in conf_vers:
            conf_vers = ["default"] + list(
                base_benchmark_conf.get("config_ver", {}).keys())

        for conf_ver in conf_vers:
            benchmark_conf = dict(
                base_benchmark_conf)  # Copy the config so we don't modify it

            # These fields are canonical names that refer to certain config versions
            benchmark_conf["accuracy_level"] = "99%"
            benchmark_conf["optimization_level"] = "plugin-enabled"
            benchmark_conf["inference_server"] = "lwis"
            """@etcheng
            NOTE: The original plan was to use a syntax like high_accuracy+triton to be able to combine already defined
            config_vers. However, since high_accuracy, triton, and high_accuracy+triton are likely to all have different
            expected QPS values, it makes more sense to keep high_accuracy_triton as a separate, individual config_ver.

            In the future, perhaps we can make an "extends": [ list of strings ] or { dict of config_ver name ->
            config_key } field in config_vers, so that we can define new config_vers that extend or combine previous
            config_vers.
            """

            equiv_to_default = False

            if conf_ver != "default":
                if "config_ver" not in benchmark_conf or conf_ver not in benchmark_conf[
                        "config_ver"]:
                    logging.warn(
                        "--config_ver={:} does not exist in config file '{:}'".
                        format(conf_ver, benchmark_conf["config_name"]))
                    continue
                else:
                    if "high_accuracy" in conf_ver:
                        benchmark_conf["accuracy_level"] = "99.9%"
                    if "ootb" in conf_ver:
                        benchmark_conf["optimization_level"] = "ootb"
                    # "inference_server" is set when we run the harness

                    overrides = benchmark_conf["config_ver"][conf_ver]

                    # Check if this config_ver is equivalent to the default engine
                    gen_eng_argset = set(common_args.GENERATE_ENGINE_ARGS)
                    override_argset = set(overrides.keys())
                    equiv_to_default = (len(gen_eng_argset
                                            & override_argset) == 0)

                    benchmark_conf.update(overrides)

            # Update the config_ver key to be the actual string name, not the overrides
            benchmark_conf["config_ver"] = conf_ver

            need_gpu = not main_args["no_gpu"]
            need_dla = not main_args["gpu_only"]

            # Override the system_name if it exists
            if "system_name" in main_args:
                benchmark_conf["system_name"] = main_args["system_name"]

            # Generate engines.
            if main_args["action"] == "generate_engines":
                # Turn on MPS if server scenario and if active_sms is specified.
                benchmark_conf = apply_overrides(benchmark_conf,
                                                 ["active_sms"])
                active_sms = benchmark_conf.get("active_sms", None)

                copy_from_default = ("default"
                                     in conf_vers) and equiv_to_default
                if copy_from_default:
                    logging.info(
                        "config_ver={:} only modifies harness args. Re-using default engine."
                        .format(conf_ver))

                _gen_args = [benchmark_conf]
                _gen_kwargs = {
                    "gpu": need_gpu,
                    "dla": need_dla,
                    "copy_from_default": copy_from_default
                }

                if not main_args["no_child_process"]:
                    if config[
                            "scenario"] == SCENARIOS.Server and active_sms is not None and active_sms < 100:
                        with ScopedMPS(active_sms):
                            launch_handle_generate_engine(
                                *_gen_args, **_gen_kwargs)
                    else:
                        launch_handle_generate_engine(*_gen_args,
                                                      **_gen_kwargs)
                else:
                    handle_generate_engine(*_gen_args, **_gen_kwargs)

            # Run harness.
            elif main_args["action"] == "run_harness":
                # In case there's a leftover audit.config file from a prior compliance run or other reason
                # we need to delete it or we risk silent failure.
                auditing.cleanup()

                handle_run_harness(benchmark_conf, need_gpu, need_dla, profile,
                                   power)
            elif main_args["action"] == "run_audit_harness":
                logging.info('\n\n\nRunning compliance harness for test ' +
                             main_args['audit_test'] + '\n\n\n')

                # Find the correct audit.config file and move it in current directory
                dest_config = auditing.load(main_args['audit_test'],
                                            benchmark_conf['benchmark'])

                # Make sure the log_file override is valid
                os.makedirs("build/compliance_logs", exist_ok=True)

                # Pass audit test name to handle_run_harness via benchmark_conf
                benchmark_conf['audit_test_name'] = main_args['audit_test']

                # Run harness
                handle_run_harness(benchmark_conf,
                                   need_gpu,
                                   need_dla,
                                   profile,
                                   power,
                                   compliance=True)

                # Cleanup audit.config
                logging.info("AUDIT HARNESS: Cleaning Up audit.config...")
                auditing.cleanup()
            elif main_args["action"] == "run_audit_verification":
                logging.info("Running compliance verification for test " +
                             main_args['audit_test'])
                handle_audit_verification(
                    audit_test_name=main_args['audit_test'],
                    config=benchmark_conf)
                auditing.cleanup()
            elif main_args["action"] == "calibrate":
                # To generate calibration cache, we only need to run each benchmark once.
                # Use offline config.
                if benchmark_conf["scenario"] == SCENARIOS.Offline:
                    handle_calibrate(benchmark_conf)
            elif main_args["action"] == "generate_conf_files":
                handle_run_harness(benchmark_conf,
                                   need_gpu,
                                   need_dla,
                                   generate_conf_files_only=True)
Example #6
0
def handle_run_harness(config,
                       gpu=True,
                       dla=True,
                       profile=None,
                       power=False,
                       generate_conf_files_only=False,
                       compliance=False):
    """Run harness for given benchmark and scenario."""

    benchmark_name = config["benchmark"]

    logging.info("Running harness for {:} benchmark in {:} scenario...".format(
        benchmark_name, config["scenario"]))

    arglist = common_args.getScenarioBasedHarnessArgs(config["scenario"],
                                                      benchmark_name)

    config = apply_overrides(config, arglist)

    # Validate arguments
    if not dla:
        config["dla_batch_size"] = None
    if not gpu:
        config["gpu_batch_size"] = None

    # If we only want to generate conf_files, then set flag to true
    if generate_conf_files_only:
        config["generate_conf_files_only"] = True
        profile = None
        power = False

    # MLPINF-829: Disable CUDA graphs when there is a profiler
    if profile is not None:
        logging.warn(
            "Due to MLPINF-829, CUDA graphs results in a CUDA illegal memory access when run with a profiler \
                on r460 driver. Force-disabling CUDA graphs.")
        config["use_graphs"] = False

    harness, config = get_harness(config, profile)

    if power:
        try:
            from code.internal.power_measurements import PowerMeasurements
            power_logfile_name = "{}_{}_{}_{}".format(
                config.get("config_name"), config.get("accuracy_level"),
                config.get("optimization_level"),
                config.get("inference_server"))
            power_measurements = PowerMeasurements("{}/{}/{}".format(
                os.getcwd(), "power_measurements", power_logfile_name))
            power_measurements.start()
        except BaseException:
            power_measurements = None

    for key, value in config.items():
        print("{} : {}".format(key, value))
    result = ""

    if compliance:
        # AP: We need to keep the compliance logs separated from accuracy and perf
        # otherwise it messes up the update_results process
        config['log_dir'] = os.path.join('build/compliance_logs',
                                         config['audit_test_name'])
        logging.info(
            'AUDIT HARNESS: Overriding log_dir for compliance run. Set to ' +
            config['log_dir'])

    # Launch the harness
    passed = True
    try:
        result = harness.run_harness()
        logging.info("Result: {:}".format(result))
    except Exception as _:
        traceback.print_exc(file=sys.stdout)
        passed = False
    finally:
        if power and power_measurements is not None:
            power_measurements.stop()
    if not passed:
        raise RuntimeError("Run harness failed!")

    if generate_conf_files_only and result == "Generated conf files":
        return

    # Append result to perf result summary log.
    log_dir = config["log_dir"]
    summary_file = os.path.join(log_dir, "perf_harness_summary.json")
    results = {}
    if os.path.exists(summary_file):
        with open(summary_file) as f:
            results = json.load(f)

    config_name = "{:}-{:}-{:}".format(harness.get_system_name(),
                                       config["config_ver"],
                                       config["scenario"])
    if config_name not in results:
        results[config_name] = {}
    results[config_name][benchmark_name] = result

    with open(summary_file, "w") as f:
        json.dump(results, f)

    # Check accuracy from loadgen logs.
    if not compliance:
        # TEST01 fails the accuracy test because it produces fewer predictions than expected
        accuracy = check_accuracy(
            os.path.join(harness.get_full_log_dir(),
                         "mlperf_log_accuracy.json"), config)
        summary_file = os.path.join(log_dir, "accuracy_summary.json")
        results = {}
        if os.path.exists(summary_file):
            with open(summary_file) as f:
                results = json.load(f)

        if config_name not in results:
            results[config_name] = {}
        results[config_name][benchmark_name] = accuracy

        with open(summary_file, "w") as f:
            json.dump(results, f)