Beispiel #1
0
def main(main_args, system_id):
    # Turn off MPS in case it's turned on.
    turn_off_mps()

    benchmarks = BENCHMARKS.ALL
    if main_args["benchmarks"] is not None:
        benchmarks = main_args["benchmarks"].split(",")
        for i, benchmark in enumerate(benchmarks):
            benchmarks[i] = BENCHMARKS.alias(benchmark)
    scenarios = SCENARIOS.ALL
    if main_args["scenarios"] is not None:
        scenarios = main_args["scenarios"].split(",")
        for i, scenario in enumerate(scenarios):
            scenarios[i] = SCENARIOS.alias(scenario)

    profile = main_args.get("profile", None)
    power = main_args.get("power", False)

    # Automatically detect architecture and scenarios and load configs
    config_files = main_args["configs"]
    if config_files == "" or config_files is None:
        config_files = find_config_files(benchmarks, scenarios)
        if config_files == "":
            logging.warn("Cannot find any valid configs for the specified benchmark-scenario pairs.")
            return

    logging.info("Using config files: {:}".format(str(config_files)))
    configs = load_configs(config_files)

    for config in configs:
        base_benchmark_conf = flatten_config(config, system_id)
        if base_benchmark_conf is None:
            continue

        base_benchmark_conf["config_name"] = "{:}_{:}_{:}".format(
            system_id,
            base_benchmark_conf["benchmark"],
            base_benchmark_conf["scenario"]
        )
        logging.info("Processing config \"{:}\"".format(base_benchmark_conf["config_name"]))

        # Load config_ver / apply overrides
        conf_vers = main_args.get("config_ver", "default").split(",")

        # Build default first. This is because some config_vers only modify harness args, and the engine is the same as
        # default. In this case, we build default first, and copy it instead of rebuilding it.
        if "default" in conf_vers:
            conf_vers = ["default"] + list(set(conf_vers) - {"default"})
        elif "all" in conf_vers:
            conf_vers = ["default"] + list(base_benchmark_conf.get("config_ver", {}).keys())

        for conf_ver in conf_vers:
            benchmark_conf = dict(base_benchmark_conf)  # Copy the config so we don't modify it

            # These fields are canonical names that refer to certain config versions
            benchmark_conf["accuracy_level"] = "99%"
            benchmark_conf["optimization_level"] = "plugin-enabled"
            benchmark_conf["inference_server"] = "lwis"

            """@etcheng
            NOTE: The original plan was to use a syntax like high_accuracy+triton to be able to combine already defined
            config_vers. However, since high_accuracy, triton, and high_accuracy+triton are likely to all have different
            expected QPS values, it makes more sense to keep high_accuracy_triton as a separate, individual config_ver.

            In the future, perhaps we can make an "extends": [ list of strings ] or { dict of config_ver name ->
            config_key } field in config_vers, so that we can define new config_vers that extend or combine previous
            config_vers.
            """

            equiv_to_default = False

            if conf_ver != "default":
                if "config_ver" not in benchmark_conf or conf_ver not in benchmark_conf["config_ver"]:
                    logging.warn(
                        "--config_ver={:} does not exist in config file '{:}'".format(conf_ver, benchmark_conf["config_name"]))
                    continue
                else:
                    if "high_accuracy" in conf_ver:
                        benchmark_conf["accuracy_level"] = "99.9%"
                    if "ootb" in conf_ver:
                        benchmark_conf["optimization_level"] = "ootb"
                    # "inference_server" is set when we run the harness

                    overrides = benchmark_conf["config_ver"][conf_ver]

                    # Check if this config_ver is equivalent to the default engine
                    gen_eng_argset = set(common_args.GENERATE_ENGINE_ARGS)
                    override_argset = set(overrides.keys())
                    equiv_to_default = (len(gen_eng_argset & override_argset) == 0)

                    benchmark_conf.update(overrides)

            # Update the config_ver key to be the actual string name, not the overrides
            benchmark_conf["config_ver"] = conf_ver

            need_gpu = not main_args["no_gpu"]
            need_dla = not main_args["gpu_only"]

            # Override the system_name if it exists
            if "system_name" in main_args:
                benchmark_conf["system_name"] = main_args["system_name"]

            if main_args["action"] == "generate_engines":
                # Turn on MPS if server scenario and if active_sms is specified.
                benchmark_conf = apply_overrides(benchmark_conf, ["active_sms"])
                active_sms = benchmark_conf.get("active_sms", None)

                copy_from_default = ("default" in conf_vers) and equiv_to_default
                if copy_from_default:
                    logging.info(
                        "config_ver={:} only modifies harness args. Re-using default engine.".format(conf_ver))

                _gen_args = [benchmark_conf]
                _gen_kwargs = {
                    "gpu": need_gpu,
                    "dla": need_dla,
                    "copy_from_default": copy_from_default
                }

                if not main_args["no_child_process"]:
                    if config["scenario"] == SCENARIOS.Server and active_sms is not None and active_sms < 100:
                        with ScopedMPS(active_sms):
                            launch_handle_generate_engine(*_gen_args, **_gen_kwargs)
                    else:
                        launch_handle_generate_engine(*_gen_args, **_gen_kwargs)
                else:
                    handle_generate_engine(*_gen_args, **_gen_kwargs)
            elif main_args["action"] == "run_harness":
                # In case there's a leftover audit.config file from a prior compliance run or other reason
                # we need to delete it or we risk silent failure.
                auditing.cleanup()

                handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power)
            elif main_args["action"] == "run_audit_harness":
                logging.info('\n\n\nRunning compliance harness for test ' + main_args['audit_test'] + '\n\n\n')

                # Find the correct audit.config file and move it in current directory
                dest_config = auditing.load(main_args['audit_test'], benchmark_conf['benchmark'])

                # Make sure the log_file override is valid
                os.makedirs("build/compliance_logs", exist_ok=True)

                # Pass audit test name to handle_run_harness via benchmark_conf
                benchmark_conf['audit_test_name'] = main_args['audit_test']

                # Run harness
                handle_run_harness(benchmark_conf, need_gpu, need_dla, profile, power, compliance=True)

                # Cleanup audit.config
                logging.info("AUDIT HARNESS: Cleaning Up audit.config...")
                auditing.cleanup()
            elif main_args["action"] == "run_audit_verification":
                logging.info("Running compliance verification for test " + main_args['audit_test'])
                handle_audit_verification(audit_test_name=main_args['audit_test'], config=benchmark_conf)
                auditing.cleanup()
            elif main_args["action"] == "calibrate":
                # To generate calibration cache, we only need to run each benchmark once.
                # Use offline config.
                if benchmark_conf["scenario"] == SCENARIOS.Offline:
                    handle_calibrate(benchmark_conf)
            elif main_args["action"] == "generate_conf_files":
                handle_run_harness(benchmark_conf, need_gpu, need_dla, generate_conf_files_only=True)
Beispiel #2
0
    def __init__(self, bench, scen, config_dict, config_funcs=None):
        """ Construct a ConfigGrid

        Args:
            bench (str): The benchmark requested (fuzzy match behavior using BENCHMARKS.alias)
            scen (str): The scenario requested (fuzzy match behavior using SCENARIOS.alias)
            config_dict (Dict[str, List]): A config dictionary. Refer to 'Config Schema' in the README for format
            config_funcs (Dict[str, Callable]): A dictionary of META* functions. Refer to 'Config Schema' in the README for requirements.

        """
        if args.spoof_system_id:
            self.system_id = args.spoof_system_id
        else:
            self.system = get_system()
            self.system_id = self.system.get_id()
        self.benchmark = BENCHMARKS.alias(bench)
        self.scenario = SCENARIOS.alias(scen)
        candidate_configs = find_config_files(benchmarks=[self.benchmark],
                                              scenarios=[self.scenario])
        configs = load_configs(candidate_configs)
        assert len(configs) == 1
        # To work with "extends" and "scales", we need to call into another config helper:
        self.base_config = get_system_benchmark_config(configs[0], self.system_id)
        self.default_config = configs[0]['default']
        griddict = config_dict
        self.no_rebuild_params = None
        # No-op
        self.is_config_valid = lambda x: True
        # No-op
        self.search_callback = None
        self.replay = None
        funcs_processed = set()
        if config_funcs:
            if config_funcs.get("META_search_callback"):
                funcs_processed.add("META_search_callback")
                self.search_callback = config_funcs['META_search_callback']
                if not args.use_cached:
                    raise RuntimeError(f"META_search_callback must be used with --use_cached for reproducibility.")
            if config_funcs.get("META_get_no_rebuild_params"):
                funcs_processed.add("META_get_no_rebuild_params")
                norebuild_params = config_funcs.get("META_get_no_rebuild_params")()
                assert isinstance(norebuild_params, list)
                # Make sure these keys all exist in our grid params:
                # But we might not know grid params if a search_callback is being used:
                if self.search_callback is None:
                    missing_keys = set(norebuild_params) - set(griddict.keys())
                    if len(missing_keys) > 0:
                        raise RuntimeError(f"The keys: {missing_keys} were mentioned in META_get_no_rebuild_params, but are not a specified parameter in:\n{griddict.keys()}")
                else:
                    print("WARNING: Not checking get_no_rebuild_params against grid parameters, be careful")
                # For use later, we're gonna turn this into a set:
                self.no_rebuild_params = set(norebuild_params)
            if config_funcs.get("META_is_config_valid"):
                funcs_processed.add("META_is_config_valid")
                self.is_config_valid = config_funcs["META_is_config_valid"]

                # Make sure we aren't scanning other params:
            # Other META handling goes here
            unmatched_funcs = set(config_funcs.keys()) - funcs_processed
            if len(unmatched_funcs) > 0:
                raise RuntimeError(f"Found the following META functions which haven't been implemented, refer to README for proper naming {unmatched_funcs}")

        # Make sure we can set all keys are in our config:
        if not args.no_check_keys:
            for grid_key in griddict.keys():
                if grid_key not in self.base_config:
                    print(f"{grid_key} not found in base config")
                    print(f"{self.base_config}")
                    assert False
        # Make sure all values are non-empty lists of something that isn't a list or a dict
        # TODO expand this to something reasonable to help META_bisect
        if "META_search_callback" in funcs_processed:
            print("WARNING: Skipping parameter validation because META_search_callback was provided")
        else:
            for val in griddict.values():
                assert isinstance(val, list)
                #assert len(val) >= 1
                assert all(not isinstance(el, list) and not isinstance(el, dict) for el in val)
        self.grid = griddict
Beispiel #3
0
def main(main_args, system):
    """
    Args:
        main_args: Args parsed from user input.
        system: System to use
    """
    system_id = system.get_id()

    # Turn off MPS in case it's turned on.
    turn_off_mps()

    # Get user's benchmarks, else run all.
    benchmarks = BENCHMARKS.ALL
    if main_args["benchmarks"] is not None:
        benchmarks = main_args["benchmarks"].split(",")
        benchmarks = [BENCHMARKS.alias(b) for b in benchmarks]

    # Get user's scenarios, else use all.
    scenarios = SCENARIOS.ALL
    if main_args["scenarios"] is not None:
        scenarios = main_args["scenarios"].split(",")
        scenarios = [SCENARIOS.alias(s) for s in scenarios]

    profile = main_args.get("profile", None)
    power = main_args.get("power", False)

    # Automatically find config file paths
    config_files = main_args["configs"]
    if config_files == "" or config_files is None:
        config_files = find_config_files(benchmarks, scenarios)
        if config_files == "":
            logging.warn(
                "Cannot find any valid configs for the specified benchmark-scenario pairs."
            )
            return

    logging.info("Using config files: {:}".format(str(config_files)))
    configs = load_configs(config_files)

    for config in configs:
        base_benchmark_conf = get_system_benchmark_config(config, system_id)
        if base_benchmark_conf is None:
            continue

        base_benchmark_conf["config_name"] = "{:}_{:}_{:}".format(
            system_id, base_benchmark_conf["benchmark"],
            base_benchmark_conf["scenario"])
        logging.info("Processing config \"{:}\"".format(
            base_benchmark_conf["config_name"]))

        # Load config_ver / apply overrides
        conf_vers = main_args.get("config_ver", "default").split(",")

        # Build default first. This is because some config_vers only modify harness args, and the engine is the same as
        # default. In this case, we build default first, and copy it instead of rebuilding it.
        if "default" in conf_vers:
            conf_vers = ["default"] + list(set(conf_vers) - {"default"})
        elif "all" in conf_vers:
            tmp = ["default"] + list(
                base_benchmark_conf.get("config_ver", {}).keys())
            # As per request, 'all' should skip 'maxQ' config_vers for now. MaxQ should only be run when specified
            # directly.
            conf_vers = []
            for s in tmp:
                if "maxq" not in s.lower() and "hetero" not in s.lower():
                    conf_vers.append(s)

        for conf_ver in conf_vers:
            benchmark_conf = dict(
                base_benchmark_conf)  # Copy the config so we don't modify it

            # These fields are canonical names that refer to certain config versions
            benchmark_conf["accuracy_level"] = "99%"
            benchmark_conf["optimization_level"] = "plugin-enabled"
            benchmark_conf["inference_server"] = "lwis"

            equiv_to_default = False

            if conf_ver != "default":
                if "config_ver" not in benchmark_conf or conf_ver not in benchmark_conf[
                        "config_ver"]:
                    logging.warn(
                        "--config_ver={:} does not exist in config file '{:}'".
                        format(conf_ver, benchmark_conf["config_name"]))
                    continue
                else:
                    if "high_accuracy" in conf_ver:
                        benchmark_conf["accuracy_level"] = "99.9%"
                    if "ootb" in conf_ver:
                        benchmark_conf["optimization_level"] = "ootb"
                    # "inference_server" is set when we run the harness

                    overrides = benchmark_conf["config_ver"][conf_ver]

                    # Enforce Triton check
                    if "triton" in conf_ver.lower() and not overrides.get(
                            "use_triton", False):
                        raise RuntimeError(
                            "conf_ver={} references Triton harness, but 'use_triton' is false"
                            .format(conf_ver))

                    # Check if this config_ver is equivalent to the default engine
                    # RNNT has multiple engines, so disable the equiv_to_default.
                    if benchmark_conf["benchmark"] != BENCHMARKS.RNNT:
                        gen_eng_argset = set(common_args.GENERATE_ENGINE_ARGS)
                        override_argset = set(overrides.keys())
                        equiv_to_default = (len(gen_eng_argset
                                                & override_argset) == 0)

                    benchmark_conf.update(overrides)

            # Update the config_ver key to be the actual string name, not the overrides
            benchmark_conf["config_ver"] = conf_ver

            need_gpu = not main_args["no_gpu"]
            need_dla = not main_args["gpu_only"]

            # Override the system_name if it exists
            if "system_name" in main_args:
                benchmark_conf["system_name"] = main_args["system_name"]

            # Check for use_cpu
            if system_id.startswith("Triton_CPU"):
                benchmark_conf["use_cpu"] = True

            # Generate engines.
            if main_args["action"] == "generate_engines":
                # Turn on MPS if server scenario and if active_sms is specified.
                benchmark_conf = apply_overrides(benchmark_conf,
                                                 ["active_sms"])
                active_sms = benchmark_conf.get("active_sms", None)

                copy_from_default = ("default"
                                     in conf_vers) and equiv_to_default
                if copy_from_default:
                    logging.info(
                        "config_ver={:} only modifies harness args. Re-using default engine."
                        .format(conf_ver))

                _gen_args = [benchmark_conf]
                _gen_kwargs = {
                    "gpu": need_gpu,
                    "dla": need_dla,
                    "copy_from_default": copy_from_default
                }

                if not main_args["no_child_process"]:
                    if config[
                            "scenario"] == SCENARIOS.Server and active_sms is not None and active_sms < 100:
                        with ScopedMPS(active_sms):
                            launch_handle_generate_engine(
                                *_gen_args, **_gen_kwargs)
                    else:
                        launch_handle_generate_engine(*_gen_args,
                                                      **_gen_kwargs)
                else:
                    handle_generate_engine(*_gen_args, **_gen_kwargs)

            # Run CPU harness:
            elif main_args["action"] == "run_cpu_harness":
                auditing.cleanup()
                benchmark_conf["use_cpu"] = True
                handle_run_harness(benchmark_conf, False, False, None, power)
            # Run harness.
            elif main_args["action"] == "run_harness":
                # In case there's a leftover audit.config file from a prior compliance run or other reason
                # we need to delete it or we risk silent failure.
                auditing.cleanup()

                handle_run_harness(benchmark_conf, need_gpu, need_dla, profile,
                                   power)
            elif main_args["action"] == "run_audit_harness" or main_args[
                    "action"] == "run_cpu_audit_harness":
                logging.info('\n\n\nRunning compliance harness for test ' +
                             main_args['audit_test'] + '\n\n\n')

                # Find the correct audit.config file and move it in current directory
                dest_config = auditing.load(main_args['audit_test'],
                                            benchmark_conf['benchmark'])

                # Make sure the log_file override is valid
                os.makedirs("build/compliance_logs", exist_ok=True)

                # Pass audit test name to handle_run_harness via benchmark_conf
                benchmark_conf['audit_test_name'] = main_args['audit_test']

                if main_args["action"] == "run_cpu_audit_harness":
                    need_gpu = False
                    need_dla = False
                    profile = None
                    benchmark_conf["use_cpu"] = True

                # Run harness
                handle_run_harness(benchmark_conf,
                                   need_gpu,
                                   need_dla,
                                   profile,
                                   power,
                                   compliance=True)

                # Cleanup audit.config
                logging.info("AUDIT HARNESS: Cleaning Up audit.config...")
                auditing.cleanup()
            elif main_args["action"] == "run_audit_verification":
                logging.info("Running compliance verification for test " +
                             main_args['audit_test'])
                handle_audit_verification(
                    audit_test_name=main_args['audit_test'],
                    config=benchmark_conf)
                auditing.cleanup()
            elif main_args["action"] == "run_cpu_audit_verification":
                logging.info("Running compliance verification for test " +
                             main_args['audit_test'])
                benchmark_conf["use_cpu"] = True
                handle_audit_verification(
                    audit_test_name=main_args['audit_test'],
                    config=benchmark_conf)
                auditing.cleanup()
            elif main_args["action"] == "calibrate":
                # To generate calibration cache, we only need to run each benchmark once.
                # Use offline config.
                if benchmark_conf["scenario"] == SCENARIOS.Offline:
                    handle_calibrate(benchmark_conf)
            elif main_args["action"] == "generate_conf_files":
                handle_run_harness(benchmark_conf,
                                   need_gpu,
                                   need_dla,
                                   generate_conf_files_only=True)
    def __init__(self, bench, scen, config_dict, config_funcs=None):
        """ Construct a ConfigGrid

        Args:
            bench (str): The benchmark requested (fuzzy match behavior using BENCHMARKS.alias)
            scen (str): The scenario requested (fuzzy match behavior using SCENARIOS.alias)
            config_dict (Dict[str, List]): A config dictionary. Refer to 'Config Schema' in the README for format
            config_funcs (Dict[str, Callable]): A dictionary of META* functions. Refer to 'Config Schema' in the README for requirements.

        """
        self.system = get_system()
        self.system_id = self.system.get_id()
        self.benchmark = BENCHMARKS.alias(bench)
        self.scenario = SCENARIOS.alias(scen)
        candidate_configs = find_config_files(benchmarks=[self.benchmark],
                                              scenarios=[self.scenario])
        configs = load_configs(candidate_configs)
        assert len(configs) == 1
        # To work with "extends" and "scales", we need to call into another config helper:
        self.base_config = get_system_benchmark_config(configs[0],
                                                       self.system_id)
        self.default_config = configs[0]['default']
        griddict = config_dict
        self.no_rebuild_params = None
        # No-op
        self.is_config_valid = lambda x: True
        funcs_processed = set()
        if config_funcs:
            if config_funcs.get("META_get_no_rebuild_params"):
                funcs_processed.add("META_get_no_rebuild_params")
                norebuild_params = config_funcs.get(
                    "META_get_no_rebuild_params")()
                assert isinstance(norebuild_params, list)
                # Make sure these keys all exist in our grid params:
                missing_keys = set(norebuild_params) - set(griddict.keys())
                if len(missing_keys) > 0:
                    raise RuntimeError(
                        f"The keys: {missing_keys} were mentioned in META_get_no_rebuild_params, but are not a specified parameter in:\n{griddict.keys()}"
                    )
                # For use later, we're gonna turn this into a set:
                self.no_rebuild_params = set(norebuild_params)
            if config_funcs.get("META_is_config_valid"):
                funcs_processed.add("META_is_config_valid")
                self.is_config_valid = config_funcs["META_is_config_valid"]
            # Other META handling goes here
            unmatched_funcs = set(config_funcs.keys()) - funcs_processed
            if len(unmatched_funcs) > 0:
                raise RuntimeError(
                    f"Found the following META functions which haven't been implemented, refer to README for proper naming {unmatched_funcs}"
                )

        # Make sure we can set all keys are in our config:
        for grid_key in griddict.keys():
            if grid_key not in self.base_config:
                print(f"{grid_key} not found in base config")
                print(f"{self.base_config}")
                assert False
        # Make sure all values are non-empty lists of something that isn't a list or a dict
        for val in griddict.values():
            assert isinstance(val, list)
            assert len(val) >= 1
            assert all(not isinstance(el, list) and not isinstance(el, dict)
                       for el in val)

        self.grid = griddict