def handle_calibrate(config): benchmark_name = config["benchmark"] logging.info("Generating calibration cache for Benchmark \"{:}\"".format(benchmark_name)) config = apply_overrides(config, common_args.CALIBRATION_ARGS) config["dla_core"] = None config["force_calibration"] = True b = get_benchmark(config) b.calibrate()
def get_benchmark(conf): """Return module of benchmark initialized with config.""" benchmark_name = conf["benchmark"] if benchmark_name == BENCHMARKS.BERT: # TODO now only BERT uses gpu_inference_streams to generate engines conf = apply_overrides(conf, ['gpu_inference_streams']) BERTBuilder = import_module( "code.bert.tensorrt_sparse.bert_var_seqlen").BERTBuilder return BERTBuilder(conf) elif benchmark_name == BENCHMARKS.ResNet50: ResNet50Builder = import_module( "code.resnet50.int4.harness").ResNet50Builder return ResNet50Builder(conf) else: raise ValueError("Unknown benchmark: {:}".format(benchmark_name))
def handle_generate_engine(config, gpu=True, dla=True, copy_from_default=False): benchmark_name = config["benchmark"] logging.info( "Building engines for {:} benchmark in {:} scenario...".format( benchmark_name, config["scenario"])) start_time = time.time() arglist = common_args.GENERATE_ENGINE_ARGS config = apply_overrides(config, arglist) if dla and "dla_batch_size" in config: config["batch_size"] = config["dla_batch_size"] logging.info("Building DLA engine for {:}_{:}_{:}".format( config["system_id"], benchmark_name, config["scenario"])) b = get_benchmark(config) if copy_from_default: copy_default_engine(b) else: b.build_engines() if gpu and "gpu_batch_size" in config: config["batch_size"] = config["gpu_batch_size"] config["dla_core"] = None logging.info("Building GPU engine for {:}_{:}_{:}".format( config["system_id"], benchmark_name, config["scenario"])) b = get_benchmark(config) if copy_from_default: copy_default_engine(b) else: b.build_engines() end_time = time.time() logging.info( "Finished building engines for {:} benchmark in {:} scenario.".format( benchmark_name, config["scenario"])) print("Time taken to generate engines: {:} seconds".format(end_time - start_time))
def get_benchmark(conf): """Return module of benchmark initialized with config.""" benchmark_name = conf["benchmark"] # Do not use a map. We want to import benchmarks as we need them, because some take # time to load due to plugins. if benchmark_name == BENCHMARKS.ResNet50: ResNet50 = import_module("code.resnet50.tensorrt.ResNet50").ResNet50 return ResNet50(conf) elif benchmark_name == BENCHMARKS.SSDResNet34: SSDResNet34 = import_module( "code.ssd-resnet34.tensorrt.SSDResNet34").SSDResNet34 return SSDResNet34(conf) elif benchmark_name == BENCHMARKS.SSDMobileNet: SSDMobileNet = import_module( "code.ssd-mobilenet.tensorrt.SSDMobileNet").SSDMobileNet return SSDMobileNet(conf) elif benchmark_name == BENCHMARKS.BERT: # TODO now only BERT uses gpu_inference_streams to generate engines conf = apply_overrides(conf, ['gpu_inference_streams']) BERTBuilder = import_module( "code.bert.tensorrt.bert_var_seqlen").BERTBuilder return BERTBuilder(conf) elif benchmark_name == BENCHMARKS.RNNT: RNNTBuilder = import_module( "code.rnnt.tensorrt.rnn-t_builder").RNNTBuilder return RNNTBuilder(conf) elif benchmark_name == BENCHMARKS.DLRM: DLRMBuilder = import_module("code.dlrm.tensorrt.dlrm").DLRMBuilder return DLRMBuilder(conf) elif benchmark_name == BENCHMARKS.UNET: UNETBuilder = import_module( "code.3d-unet.tensorrt.3d-unet").UnetBuilder return UNETBuilder(conf) else: raise ValueError("Unknown benchmark: {:}".format(benchmark_name))
def main(main_args, system): """ Args: main_args: Args parsed from user input. system: System to use """ system_id = system.get_id() # Turn off MPS in case it's turned on. turn_off_mps() # Get user's benchmarks, else run all. benchmarks = BENCHMARKS.ALL if main_args["benchmarks"] is not None: benchmarks = main_args["benchmarks"].split(",") benchmarks = [BENCHMARKS.alias(b) for b in benchmarks] # Get user's scenarios, else use all. scenarios = SCENARIOS.ALL if main_args["scenarios"] is not None: scenarios = main_args["scenarios"].split(",") scenarios = [SCENARIOS.alias(s) for s in scenarios] profile = main_args.get("profile", None) power = main_args.get("power", False) # Automatically find config file paths config_files = main_args["configs"] if config_files == "" or config_files is None: config_files = find_config_files(benchmarks, scenarios) if config_files == "": logging.warn( "Cannot find any valid configs for the specified benchmark-scenario pairs." ) return logging.info("Using config files: {:}".format(str(config_files))) configs = load_configs(config_files) for config in configs: base_benchmark_conf = get_system_benchmark_config(config, system_id) if base_benchmark_conf is None: continue base_benchmark_conf["config_name"] = "{:}_{:}_{:}".format( system_id, base_benchmark_conf["benchmark"], base_benchmark_conf["scenario"]) logging.info("Processing config \"{:}\"".format( base_benchmark_conf["config_name"])) # Load config_ver / apply overrides conf_vers = main_args.get("config_ver", "default").split(",") # Build default first. This is because some config_vers only modify harness args, and the engine is the same as # default. In this case, we build default first, and copy it instead of rebuilding it. if "default" in conf_vers: conf_vers = ["default"] + list(set(conf_vers) - {"default"}) elif "all" in conf_vers: conf_vers = ["default"] + list( base_benchmark_conf.get("config_ver", {}).keys()) for conf_ver in conf_vers: benchmark_conf = dict( base_benchmark_conf) # Copy the config so we don't modify it # These fields are canonical names that refer to certain config versions benchmark_conf["accuracy_level"] = "99%" benchmark_conf["optimization_level"] = "plugin-enabled" benchmark_conf["inference_server"] = "lwis" """@etcheng NOTE: The original plan was to use a syntax like high_accuracy+triton to be able to combine already defined config_vers. However, since high_accuracy, triton, and high_accuracy+triton are likely to all have different expected QPS values, it makes more sense to keep high_accuracy_triton as a separate, individual config_ver. In the future, perhaps we can make an "extends": [ list of strings ] or { dict of config_ver name -> config_key } field in config_vers, so that we can define new config_vers that extend or combine previous config_vers. """ equiv_to_default = False if conf_ver != "default": if "config_ver" not in benchmark_conf or conf_ver not in benchmark_conf[ "config_ver"]: logging.warn( "--config_ver={:} does not exist in config file '{:}'". format(conf_ver, benchmark_conf["config_name"])) continue else: if "high_accuracy" in conf_ver: benchmark_conf["accuracy_level"] = "99.9%" if "ootb" in conf_ver: benchmark_conf["optimization_level"] = "ootb" # "inference_server" is set when we run the harness overrides = benchmark_conf["config_ver"][conf_ver] # Check if this config_ver is equivalent to the default engine gen_eng_argset = set(common_args.GENERATE_ENGINE_ARGS) override_argset = set(overrides.keys()) equiv_to_default = (len(gen_eng_argset & override_argset) == 0) benchmark_conf.update(overrides) # Update the config_ver key to be the actual string name, not the overrides benchmark_conf["config_ver"] = conf_ver need_gpu = not main_args["no_gpu"] need_dla = not main_args["gpu_only"] # Override the system_name if it exists if "system_name" in main_args: benchmark_conf["system_name"] = main_args["system_name"] # Generate engines. if main_args["action"] == "generate_engines": # Turn on MPS if server scenario and if active_sms is specified. benchmark_conf = apply_overrides(benchmark_conf, ["active_sms"]) active_sms = benchmark_conf.get("active_sms", None) copy_from_default = ("default" in conf_vers) and equiv_to_default if copy_from_default: logging.info( "config_ver={:} only modifies harness args. Re-using default engine." .format(conf_ver)) _gen_args = [benchmark_conf] _gen_kwargs = { "gpu": need_gpu, "dla": need_dla, "copy_from_default": copy_from_default } if not main_args["no_child_process"]: if config[ "scenario"] == SCENARIOS.Server and active_sms is not None and active_sms < 100: with ScopedMPS(active_sms): launch_handle_generate_engine( *_gen_args, **_gen_kwargs) else: launch_handle_generate_engine(*_gen_args, **_gen_kwargs) else: handle_generate_engine(*_gen_args, **_gen_kwargs) # Run harness. elif main_args["action"] == "run_harness": # In case there's a leftover audit.config file from a prior compliance run or other reason # we need to delete it or we risk silent failure. auditing.cleanup() handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power) elif main_args["action"] == "run_audit_harness": logging.info('\n\n\nRunning compliance harness for test ' + main_args['audit_test'] + '\n\n\n') # Find the correct audit.config file and move it in current directory dest_config = auditing.load(main_args['audit_test'], benchmark_conf['benchmark']) # Make sure the log_file override is valid os.makedirs("build/compliance_logs", exist_ok=True) # Pass audit test name to handle_run_harness via benchmark_conf benchmark_conf['audit_test_name'] = main_args['audit_test'] # Run harness handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power, compliance=True) # Cleanup audit.config logging.info("AUDIT HARNESS: Cleaning Up audit.config...") auditing.cleanup() elif main_args["action"] == "run_audit_verification": logging.info("Running compliance verification for test " + main_args['audit_test']) handle_audit_verification( audit_test_name=main_args['audit_test'], config=benchmark_conf) auditing.cleanup() elif main_args["action"] == "calibrate": # To generate calibration cache, we only need to run each benchmark once. # Use offline config. if benchmark_conf["scenario"] == SCENARIOS.Offline: handle_calibrate(benchmark_conf) elif main_args["action"] == "generate_conf_files": handle_run_harness(benchmark_conf, need_gpu, need_dla, generate_conf_files_only=True)
def handle_run_harness(config, gpu=True, dla=True, profile=None, power=False, generate_conf_files_only=False, compliance=False): """Run harness for given benchmark and scenario.""" benchmark_name = config["benchmark"] logging.info("Running harness for {:} benchmark in {:} scenario...".format( benchmark_name, config["scenario"])) arglist = common_args.getScenarioBasedHarnessArgs(config["scenario"], benchmark_name) config = apply_overrides(config, arglist) # Validate arguments if not dla: config["dla_batch_size"] = None if not gpu: config["gpu_batch_size"] = None # If we only want to generate conf_files, then set flag to true if generate_conf_files_only: config["generate_conf_files_only"] = True profile = None power = False # MLPINF-829: Disable CUDA graphs when there is a profiler if profile is not None: logging.warn( "Due to MLPINF-829, CUDA graphs results in a CUDA illegal memory access when run with a profiler \ on r460 driver. Force-disabling CUDA graphs.") config["use_graphs"] = False harness, config = get_harness(config, profile) if power: try: from code.internal.power_measurements import PowerMeasurements power_logfile_name = "{}_{}_{}_{}".format( config.get("config_name"), config.get("accuracy_level"), config.get("optimization_level"), config.get("inference_server")) power_measurements = PowerMeasurements("{}/{}/{}".format( os.getcwd(), "power_measurements", power_logfile_name)) power_measurements.start() except BaseException: power_measurements = None for key, value in config.items(): print("{} : {}".format(key, value)) result = "" if compliance: # AP: We need to keep the compliance logs separated from accuracy and perf # otherwise it messes up the update_results process config['log_dir'] = os.path.join('build/compliance_logs', config['audit_test_name']) logging.info( 'AUDIT HARNESS: Overriding log_dir for compliance run. Set to ' + config['log_dir']) # Launch the harness passed = True try: result = harness.run_harness() logging.info("Result: {:}".format(result)) except Exception as _: traceback.print_exc(file=sys.stdout) passed = False finally: if power and power_measurements is not None: power_measurements.stop() if not passed: raise RuntimeError("Run harness failed!") if generate_conf_files_only and result == "Generated conf files": return # Append result to perf result summary log. log_dir = config["log_dir"] summary_file = os.path.join(log_dir, "perf_harness_summary.json") results = {} if os.path.exists(summary_file): with open(summary_file) as f: results = json.load(f) config_name = "{:}-{:}-{:}".format(harness.get_system_name(), config["config_ver"], config["scenario"]) if config_name not in results: results[config_name] = {} results[config_name][benchmark_name] = result with open(summary_file, "w") as f: json.dump(results, f) # Check accuracy from loadgen logs. if not compliance: # TEST01 fails the accuracy test because it produces fewer predictions than expected accuracy = check_accuracy( os.path.join(harness.get_full_log_dir(), "mlperf_log_accuracy.json"), config) summary_file = os.path.join(log_dir, "accuracy_summary.json") results = {} if os.path.exists(summary_file): with open(summary_file) as f: results = json.load(f) if config_name not in results: results[config_name] = {} results[config_name][benchmark_name] = accuracy with open(summary_file, "w") as f: json.dump(results, f)