Beispiel #1
0
def find_valid_runs(input_list, scenario):
    # Check for query constraints documented in https://github.com/mlperf/inference_policies/blob/master/inference_rules.adoc#scenarios
    QUERY_METRIC_CONSTRAINTS = {
        "Offline": ("effective_samples_per_query", 24576),
        "Server": ("effective_min_query_count", 270336),
        "MultiStream": ("effective_min_query_count", 270336),
        "SingleStream": ("effective_min_query_count", 1024),
    }

    perf_list = []
    perf_power_list = []
    accu_list = []
    for input_file in input_list:
        # Check if this is Accuracy run or Performance run.
        if os.path.getsize(input_file) > 4:
            accu_list.append(input_file)
            continue

        # Check for valid perf run
        log_dir = os.path.dirname(input_file)
        scenario_key = QUERY_METRIC_CONSTRAINTS[scenario][0]
        result = from_loadgen_by_keys(log_dir,
                                      ["result_validity", scenario_key])

        is_valid = ("result_validity" in result) and (result["result_validity"]
                                                      == "VALID")
        satisfies_query_constraint = (scenario_key in result) and (float(
            result[scenario_key]) >= QUERY_METRIC_CONSTRAINTS[scenario][1])
        if is_valid and satisfies_query_constraint:
            perf_list.append(input_file)
            if "power" in log_dir:
                perf_power_list.append(input_file)

    return perf_list, perf_power_list, accu_list
Beispiel #2
0
def sort_perf_list(perf_file_list, scenario):
    # Sorts performance runs via a tiebreaker criteria
    scenario_criteria = {
        "Offline": ("result_samples_per_second", SortingCriteria.Higher),
        "Server":
        ("result_99.00_percentile_latency_ns", SortingCriteria.Lower),
        "SingleStream":
        ("result_90.00_percentile_latency_ns", SortingCriteria.Lower),
        "MultiStream":
        ("result_99.00_percentile_latency_ns", SortingCriteria.Lower),
    }

    perf_vals = []
    for perf_file in perf_file_list:
        log_dir = os.path.dirname(perf_file)
        scenario_key = scenario_criteria[scenario][0]
        result = from_loadgen_by_keys(log_dir, [scenario_key])
        if len(result) == 0:
            raise Exception("Could not find perf value in file: " +
                            os.path.join(log_dir, "mlperf_log_detail.txt"))

        perf_vals.append((perf_file, float(result[scenario_key])))

    sorted_perf_vals = sorted(
        perf_vals,
        key=lambda k: k[1],
        reverse=(scenario_criteria[scenario][1] == SortingCriteria.Lower))

    return [k[0] for k in sorted_perf_vals]
Beispiel #3
0
def traverse_results(results_dir):
    perf_glob = os.path.join(results_dir, "**", "performance", "run_*", "mlperf_log_detail.txt")
    perf_run_logs = glob.glob(perf_glob, recursive=True)

    # Sort the logs so we add each run in order
    perf_run_logs = list(sorted(perf_run_logs))

    results_tree = Tree()
    for entry in perf_run_logs:
        parts = entry.split("/")
        # results/<system_id>/<benchmark>/<scenario>/performance/<run id>/mlperf_log_summary.txt
        system_id = parts[1]
        benchmark = parts[2]
        scenario = parts[3]
        log_dir = os.path.dirname(entry)

        scenario_key = scenario_loadgen_log_keys[scenario]
        result = from_loadgen_by_keys(log_dir, [scenario_key])
        if len(result) == 0:
            print("WARNING: Could not find perf value in file: " + entry + ". Using 0")
            perf_number = 0.0
        else:
            perf_number = float(result[scenario_key])

        results_tree.insert([system_id, benchmark, scenario], perf_number, append=True)
    return results_tree
    def run_harness(self):
        flag_dict = self.build_default_flags()
        flag_dict.update(self.build_scenario_specific_flags())

        # Handle engines
        if self.has_gpu:
            flag_dict["gpu_engines"] = self.gpu_engine

        # MLPINF-853: Special handing of --fast. Use min_duration=60000, and if Multistream, use min_query_count=1.
        if flag_dict.get("fast", False):
            if "min_duration" not in flag_dict:
                flag_dict["min_duration"] = 60000
            if self.scenario in [SCENARIOS.Offline, SCENARIOS.MultiStream]:
                if "min_query_count" not in flag_dict:
                    flag_dict["min_query_count"] = 1
            flag_dict["fast"] = None

        # Generates the entries in the `measurements/` directory, and updates flag_dict accordingly
        generate_measurements_entry(
            self.get_system_name(),
            self.name,
            self._get_submission_benchmark_name(),
            self.scenario,
            self.args["input_dtype"],
            self.args["precision"],
            flag_dict)

        # Stop here if we are only generating .conf files in measurements
        if self.generate_conf_files_only:
            return "Generated conf files"

        argstr = self._build_custom_flags(flag_dict)
        if type(argstr) is dict:
            argstr = args_to_string(flag_dict)

        # Handle environment variables
        if self.use_jemalloc:
            self.prepend_ld_preload("/usr/lib/x86_64-linux-gnu/libjemalloc.so.2")

        cmd = "{:} {:}".format(self.executable, argstr)
        output = run_command(cmd, get_output=True, custom_env=self.env_vars)

        # Return harness result.
        scenario_key = scenario_loadgen_log_keys[self.scenario]
        results = from_loadgen_by_keys(
            os.path.join(
                self.args["log_dir"],
                self.get_system_name(),
                self._get_submission_benchmark_name(),
                self.scenario),
            ["result_validity", scenario_key])

        if scenario_key not in results:
            result_string = "Cannot find performance result. Maybe you are running in AccuracyOnly mode."
        elif "result_validity" not in results:
            result_string = "{}: {}, Result validity unknown".format(scenario_key, results[scenario_key])
        else:
            result_string = "{}: {}, Result is {}".format(scenario_key, results[scenario_key], results["result_validity"])
        return self._handle_harness_result(result_string)
Beispiel #5
0
    def __init__(self, directory, verbose=False):
        """ Populate self.data from contents of directory which contains:
        - METAFILE at top level
        - mlperf_log_summary.txt in a run/platform-specific subdirectory.
        """
        search_path = os.path.join(directory, "**/mlperf_log_detail.txt")
        paths = [name for name in glob.glob(search_path, recursive=True)]
        if not paths:
            raise RuntimeError(f"Could not find mlperf_log_detail.txt in: \n{directory}\nDid you mean to run with --noparse?")
        key_set = self.verbose_stat_key_set if verbose else self.scenario_keys_set
        result = from_loadgen_by_keys(os.path.dirname(paths[0]), key_set)
        assert len(result) > 0
        to_ret = {}
        to_ret.update(result)

        with open(os.path.join(directory, METAFILE), 'r') as f:
            extra_stats = json.load(f)['run_info']
        to_ret.update(extra_stats)

        self.data = to_ret
Beispiel #6
0
def main():
    log_dir = common_args.parse_args(["log_dir"])["log_dir"]

    summary_file = os.path.join(log_dir, "perf_harness_summary.json")
    with open(summary_file) as f:
        results = json.load(f)

    print("")
    print(
        "======================= Perf harness results: ======================="
    )
    print("")

    for config_name in results:
        print("{:}:".format(config_name))
        for benchmark in results[config_name]:
            print("    {:}: {:}".format(benchmark,
                                        results[config_name][benchmark]))
        print("")

    summary_file = os.path.join(log_dir, "accuracy_summary.json")
    with open(summary_file) as f:
        results = json.load(f)

    print("")
    print("======================= Accuracy results: =======================")
    print("")

    for config_name in results:
        print("{:}:".format(config_name))
        for benchmark in results[config_name]:
            print("    {:}: {:}".format(benchmark,
                                        results[config_name][benchmark]))
        print("")

    # If this is a power run, we should print out the average power
    if os.path.exists(os.path.join(log_dir, "spl.txt")):
        print("")
        print("======================= Power results: =======================")
        print("")
        for config_name in results:
            print("{:}:".format(config_name))
            for benchmark in results[config_name]:
                # Get power_start and power_end
                detail_logs = glob.glob(os.path.join(log_dir, "**",
                                                     "mlperf_log_detail.txt"),
                                        recursive=True)
                if len(detail_logs) == 0:
                    raise RuntimeError(
                        "Could not find detail logs for power run!")
                elif len(detail_logs) > 1:
                    print(
                        "WARNING: Power harness run contains multiple benchmark-scenario runs. This is not advised."
                    )

                # Select the correct detail log
                scenario = config_name.split("-")[-1]
                detail_log_path = None
                for detail_log in detail_logs:
                    components = detail_log.split("/")
                    if scenario == components[-2] and benchmark == components[
                            -3]:
                        detail_log_path = detail_log
                        break

                if detail_log_path is None:
                    raise RuntimeError(
                        "Could not find mlperf_log_detail.txt for {}-{}".
                        format(benchmark, scenario))

                power_times = from_loadgen_by_keys(
                    os.path.dirname(detail_log_path),
                    ["power_begin", "power_end"])
                power_begin = from_timestamp(power_times["power_begin"])
                power_end = from_timestamp(power_times["power_end"])

                # Read power metrics from spl.txt
                with open(os.path.join(log_dir, "spl.txt")) as f:
                    lines = f.read().split("\n")

                power_vals = []
                for line in lines:
                    data = line.split(",")
                    if len(data) != 12:
                        continue

                    timestamp = data[1]
                    watts = float(data[3])
                    curr_time = from_timestamp(timestamp)

                    if power_begin <= curr_time and curr_time <= power_end:
                        power_vals.append(watts)
                avg_power = sum(power_vals) / len(power_vals)
                print(
                    "    {}: avg power under load: {:.2f}W with {} power samples"
                    .format(benchmark, avg_power, len(power_vals)))
            print("")