def main(main_args, system_id): # Turn off MPS in case it's turned on. turn_off_mps() benchmarks = BENCHMARKS.ALL if main_args["benchmarks"] is not None: benchmarks = main_args["benchmarks"].split(",") for i, benchmark in enumerate(benchmarks): benchmarks[i] = BENCHMARKS.alias(benchmark) scenarios = SCENARIOS.ALL if main_args["scenarios"] is not None: scenarios = main_args["scenarios"].split(",") for i, scenario in enumerate(scenarios): scenarios[i] = SCENARIOS.alias(scenario) profile = main_args.get("profile", None) power = main_args.get("power", False) # Automatically detect architecture and scenarios and load configs config_files = main_args["configs"] if config_files == "" or config_files is None: config_files = find_config_files(benchmarks, scenarios) if config_files == "": logging.warn("Cannot find any valid configs for the specified benchmark-scenario pairs.") return logging.info("Using config files: {:}".format(str(config_files))) configs = load_configs(config_files) for config in configs: base_benchmark_conf = flatten_config(config, system_id) if base_benchmark_conf is None: continue base_benchmark_conf["config_name"] = "{:}_{:}_{:}".format( system_id, base_benchmark_conf["benchmark"], base_benchmark_conf["scenario"] ) logging.info("Processing config \"{:}\"".format(base_benchmark_conf["config_name"])) # Load config_ver / apply overrides conf_vers = main_args.get("config_ver", "default").split(",") # Build default first. This is because some config_vers only modify harness args, and the engine is the same as # default. In this case, we build default first, and copy it instead of rebuilding it. if "default" in conf_vers: conf_vers = ["default"] + list(set(conf_vers) - {"default"}) elif "all" in conf_vers: conf_vers = ["default"] + list(base_benchmark_conf.get("config_ver", {}).keys()) for conf_ver in conf_vers: benchmark_conf = dict(base_benchmark_conf) # Copy the config so we don't modify it # These fields are canonical names that refer to certain config versions benchmark_conf["accuracy_level"] = "99%" benchmark_conf["optimization_level"] = "plugin-enabled" benchmark_conf["inference_server"] = "lwis" """@etcheng NOTE: The original plan was to use a syntax like high_accuracy+triton to be able to combine already defined config_vers. However, since high_accuracy, triton, and high_accuracy+triton are likely to all have different expected QPS values, it makes more sense to keep high_accuracy_triton as a separate, individual config_ver. In the future, perhaps we can make an "extends": [ list of strings ] or { dict of config_ver name -> config_key } field in config_vers, so that we can define new config_vers that extend or combine previous config_vers. """ equiv_to_default = False if conf_ver != "default": if "config_ver" not in benchmark_conf or conf_ver not in benchmark_conf["config_ver"]: logging.warn( "--config_ver={:} does not exist in config file '{:}'".format(conf_ver, benchmark_conf["config_name"])) continue else: if "high_accuracy" in conf_ver: benchmark_conf["accuracy_level"] = "99.9%" if "ootb" in conf_ver: benchmark_conf["optimization_level"] = "ootb" # "inference_server" is set when we run the harness overrides = benchmark_conf["config_ver"][conf_ver] # Check if this config_ver is equivalent to the default engine gen_eng_argset = set(common_args.GENERATE_ENGINE_ARGS) override_argset = set(overrides.keys()) equiv_to_default = (len(gen_eng_argset & override_argset) == 0) benchmark_conf.update(overrides) # Update the config_ver key to be the actual string name, not the overrides benchmark_conf["config_ver"] = conf_ver need_gpu = not main_args["no_gpu"] need_dla = not main_args["gpu_only"] # Override the system_name if it exists if "system_name" in main_args: benchmark_conf["system_name"] = main_args["system_name"] if main_args["action"] == "generate_engines": # Turn on MPS if server scenario and if active_sms is specified. benchmark_conf = apply_overrides(benchmark_conf, ["active_sms"]) active_sms = benchmark_conf.get("active_sms", None) copy_from_default = ("default" in conf_vers) and equiv_to_default if copy_from_default: logging.info( "config_ver={:} only modifies harness args. Re-using default engine.".format(conf_ver)) _gen_args = [benchmark_conf] _gen_kwargs = { "gpu": need_gpu, "dla": need_dla, "copy_from_default": copy_from_default } if not main_args["no_child_process"]: if config["scenario"] == SCENARIOS.Server and active_sms is not None and active_sms < 100: with ScopedMPS(active_sms): launch_handle_generate_engine(*_gen_args, **_gen_kwargs) else: launch_handle_generate_engine(*_gen_args, **_gen_kwargs) else: handle_generate_engine(*_gen_args, **_gen_kwargs) elif main_args["action"] == "run_harness": # In case there's a leftover audit.config file from a prior compliance run or other reason # we need to delete it or we risk silent failure. auditing.cleanup() handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power) elif main_args["action"] == "run_audit_harness": logging.info('\n\n\nRunning compliance harness for test ' + main_args['audit_test'] + '\n\n\n') # Find the correct audit.config file and move it in current directory dest_config = auditing.load(main_args['audit_test'], benchmark_conf['benchmark']) # Make sure the log_file override is valid os.makedirs("build/compliance_logs", exist_ok=True) # Pass audit test name to handle_run_harness via benchmark_conf benchmark_conf['audit_test_name'] = main_args['audit_test'] # Run harness handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power, compliance=True) # Cleanup audit.config logging.info("AUDIT HARNESS: Cleaning Up audit.config...") auditing.cleanup() elif main_args["action"] == "run_audit_verification": logging.info("Running compliance verification for test " + main_args['audit_test']) handle_audit_verification(audit_test_name=main_args['audit_test'], config=benchmark_conf) auditing.cleanup() elif main_args["action"] == "calibrate": # To generate calibration cache, we only need to run each benchmark once. # Use offline config. if benchmark_conf["scenario"] == SCENARIOS.Offline: handle_calibrate(benchmark_conf) elif main_args["action"] == "generate_conf_files": handle_run_harness(benchmark_conf, need_gpu, need_dla, generate_conf_files_only=True)
def main(main_args, system): """ Args: main_args: Args parsed from user input. system: System to use """ system_id = system.get_id() # Turn off MPS in case it's turned on. turn_off_mps() # Get user's benchmarks, else run all. benchmarks = BENCHMARKS.ALL if main_args["benchmarks"] is not None: benchmarks = main_args["benchmarks"].split(",") benchmarks = [BENCHMARKS.alias(b) for b in benchmarks] # Get user's scenarios, else use all. scenarios = SCENARIOS.ALL if main_args["scenarios"] is not None: scenarios = main_args["scenarios"].split(",") scenarios = [SCENARIOS.alias(s) for s in scenarios] profile = main_args.get("profile", None) power = main_args.get("power", False) # Automatically find config file paths config_files = main_args["configs"] if config_files == "" or config_files is None: config_files = find_config_files(benchmarks, scenarios) if config_files == "": logging.warn( "Cannot find any valid configs for the specified benchmark-scenario pairs." ) return logging.info("Using config files: {:}".format(str(config_files))) configs = load_configs(config_files) for config in configs: base_benchmark_conf = get_system_benchmark_config(config, system_id) if base_benchmark_conf is None: continue base_benchmark_conf["config_name"] = "{:}_{:}_{:}".format( system_id, base_benchmark_conf["benchmark"], base_benchmark_conf["scenario"]) logging.info("Processing config \"{:}\"".format( base_benchmark_conf["config_name"])) # Load config_ver / apply overrides conf_vers = main_args.get("config_ver", "default").split(",") # Build default first. This is because some config_vers only modify harness args, and the engine is the same as # default. In this case, we build default first, and copy it instead of rebuilding it. if "default" in conf_vers: conf_vers = ["default"] + list(set(conf_vers) - {"default"}) elif "all" in conf_vers: tmp = ["default"] + list( base_benchmark_conf.get("config_ver", {}).keys()) # As per request, 'all' should skip 'maxQ' config_vers for now. MaxQ should only be run when specified # directly. conf_vers = [] for s in tmp: if "maxq" not in s.lower() and "hetero" not in s.lower(): conf_vers.append(s) for conf_ver in conf_vers: benchmark_conf = dict( base_benchmark_conf) # Copy the config so we don't modify it # These fields are canonical names that refer to certain config versions benchmark_conf["accuracy_level"] = "99%" benchmark_conf["optimization_level"] = "plugin-enabled" benchmark_conf["inference_server"] = "lwis" equiv_to_default = False if conf_ver != "default": if "config_ver" not in benchmark_conf or conf_ver not in benchmark_conf[ "config_ver"]: logging.warn( "--config_ver={:} does not exist in config file '{:}'". format(conf_ver, benchmark_conf["config_name"])) continue else: if "high_accuracy" in conf_ver: benchmark_conf["accuracy_level"] = "99.9%" if "ootb" in conf_ver: benchmark_conf["optimization_level"] = "ootb" # "inference_server" is set when we run the harness overrides = benchmark_conf["config_ver"][conf_ver] # Enforce Triton check if "triton" in conf_ver.lower() and not overrides.get( "use_triton", False): raise RuntimeError( "conf_ver={} references Triton harness, but 'use_triton' is false" .format(conf_ver)) # Check if this config_ver is equivalent to the default engine # RNNT has multiple engines, so disable the equiv_to_default. if benchmark_conf["benchmark"] != BENCHMARKS.RNNT: gen_eng_argset = set(common_args.GENERATE_ENGINE_ARGS) override_argset = set(overrides.keys()) equiv_to_default = (len(gen_eng_argset & override_argset) == 0) benchmark_conf.update(overrides) # Update the config_ver key to be the actual string name, not the overrides benchmark_conf["config_ver"] = conf_ver need_gpu = not main_args["no_gpu"] need_dla = not main_args["gpu_only"] # Override the system_name if it exists if "system_name" in main_args: benchmark_conf["system_name"] = main_args["system_name"] # Check for use_cpu if system_id.startswith("Triton_CPU"): benchmark_conf["use_cpu"] = True # Generate engines. if main_args["action"] == "generate_engines": # Turn on MPS if server scenario and if active_sms is specified. benchmark_conf = apply_overrides(benchmark_conf, ["active_sms"]) active_sms = benchmark_conf.get("active_sms", None) copy_from_default = ("default" in conf_vers) and equiv_to_default if copy_from_default: logging.info( "config_ver={:} only modifies harness args. Re-using default engine." .format(conf_ver)) _gen_args = [benchmark_conf] _gen_kwargs = { "gpu": need_gpu, "dla": need_dla, "copy_from_default": copy_from_default } if not main_args["no_child_process"]: if config[ "scenario"] == SCENARIOS.Server and active_sms is not None and active_sms < 100: with ScopedMPS(active_sms): launch_handle_generate_engine( *_gen_args, **_gen_kwargs) else: launch_handle_generate_engine(*_gen_args, **_gen_kwargs) else: handle_generate_engine(*_gen_args, **_gen_kwargs) # Run CPU harness: elif main_args["action"] == "run_cpu_harness": auditing.cleanup() benchmark_conf["use_cpu"] = True handle_run_harness(benchmark_conf, False, False, None, power) # Run harness. elif main_args["action"] == "run_harness": # In case there's a leftover audit.config file from a prior compliance run or other reason # we need to delete it or we risk silent failure. auditing.cleanup() handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power) elif main_args["action"] == "run_audit_harness" or main_args[ "action"] == "run_cpu_audit_harness": logging.info('\n\n\nRunning compliance harness for test ' + main_args['audit_test'] + '\n\n\n') # Find the correct audit.config file and move it in current directory dest_config = auditing.load(main_args['audit_test'], benchmark_conf['benchmark']) # Make sure the log_file override is valid os.makedirs("build/compliance_logs", exist_ok=True) # Pass audit test name to handle_run_harness via benchmark_conf benchmark_conf['audit_test_name'] = main_args['audit_test'] if main_args["action"] == "run_cpu_audit_harness": need_gpu = False need_dla = False profile = None benchmark_conf["use_cpu"] = True # Run harness handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power, compliance=True) # Cleanup audit.config logging.info("AUDIT HARNESS: Cleaning Up audit.config...") auditing.cleanup() elif main_args["action"] == "run_audit_verification": logging.info("Running compliance verification for test " + main_args['audit_test']) handle_audit_verification( audit_test_name=main_args['audit_test'], config=benchmark_conf) auditing.cleanup() elif main_args["action"] == "run_cpu_audit_verification": logging.info("Running compliance verification for test " + main_args['audit_test']) benchmark_conf["use_cpu"] = True handle_audit_verification( audit_test_name=main_args['audit_test'], config=benchmark_conf) auditing.cleanup() elif main_args["action"] == "calibrate": # To generate calibration cache, we only need to run each benchmark once. # Use offline config. if benchmark_conf["scenario"] == SCENARIOS.Offline: handle_calibrate(benchmark_conf) elif main_args["action"] == "generate_conf_files": handle_run_harness(benchmark_conf, need_gpu, need_dla, generate_conf_files_only=True)