예제 #1
0
def turn_on_mps(active_sms):
    if not is_xavier():
        turn_off_mps()
        cmd = "export CUDA_MPS_ACTIVE_THREAD_PERCENTAGE={:d} && nvidia-cuda-mps-control -d".format(
            active_sms)
        logging.info("Turn on MPS with active_sms = {:d}.".format(active_sms))
        run_command(cmd)
예제 #2
0
 def _get_core_temps(cls):
     if cls.system.arch == Architecture.Xavier:
         # Because we don't have nvidia-smi on xavier, we need to use sysfs to read out the temperature
         # The type of the thermal_zone is in /sys/devices/virtual/thermal/termal_zone<N>/type.
         # To avoid doing a bunch of process spawn to check if a given node is a GPU node, we're gonna hardcode the GPU_therm node:
         # AGX_Xavier: thermal_zone1
         # Xavier_NX: thermal_zone1
         # NOTE, this may change in subsequent/previous submission models.
         try:
             out_text = run_command("cat /sys/devices/virtual/thermal/thermal_zone1/temp", get_output=True, tee=False)
             # The temperature is in units of milli degC, so scale the result:
             temps = [int(str_temp) / 1000 for str_temp in out_text]
         except Exception as e:
             print("Bad temp reading")
             raise e
     else:
         # Non-xavier branch
         try:
             out_text = run_command("nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader", get_output=True, tee=False)
             # multi-gpu instance return a list of strings corresponding to temp of each core
             temps = [int(str_temp) for str_temp in out_text]
         except Exception as e:
             print("Bad temp reading")
             raise e
     return temps
예제 #3
0
def flac_to_wav(absolute_data_dir, librispeech_path, src, dest):
    wav_file_path = os.path.join(librispeech_path, dest)
    manifest_path = os.path.join(librispeech_path, dest + ".json")

    script_cmd = "cd build/inference/speech_recognition/rnnt && python3 pytorch/utils/convert_librispeech.py --input_dir {:} --dest_dir {:} --output_json {:}".format(
        os.path.join(absolute_data_dir, "LibriSpeech", src), wav_file_path,
        manifest_path)
    run_command(script_cmd)
예제 #4
0
    def run_harness(self):
        flag_dict = self.build_default_flags()
        flag_dict.update(self.build_scenario_specific_flags())

        # Handle engines
        if self.has_gpu:
            flag_dict["gpu_engines"] = self.gpu_engine

        # Generates the entries in the `measurements/` directory, and updates flag_dict accordingly
        generate_measurements_entry(self.get_system_name(), self.name,
                                    self._get_submission_benchmark_name(),
                                    self.scenario, self.args["input_dtype"],
                                    self.args["precision"], flag_dict)

        # Stop here if we are only generating .conf files in measurements
        if self.generate_conf_files_only:
            return "Generated conf files"

        argstr = self._build_custom_flags(flag_dict)
        if type(argstr) is dict:
            argstr = args_to_string(flag_dict)

        # Handle environment variables
        if self.use_jemalloc:
            self.prepend_ld_preload(
                "/usr/lib/x86_64-linux-gnu/libjemalloc.so.2")

        cmd = "{:} {:}".format(self.executable, argstr)
        output = run_command(cmd, get_output=True, custom_env=self.env_vars)

        # Return harness result.
        return self._handle_harness_result(
            self.harness_get_result(output,
                                    scenario_result_regex[self.scenario]))
예제 #5
0
    def run_harness(self):
        flag_dict = self.build_default_flags()
        flag_dict.update(self.build_scenario_specific_flags())

        # Handle engines
        if self.has_gpu:
            flag_dict["gpu_engines"] = self.gpu_engine

        # MLPINF-853: Special handing of --fast. Use min_duration=60000, and if Multistream, use min_query_count=1.
        if flag_dict.get("fast", False):
            if "min_duration" not in flag_dict:
                flag_dict["min_duration"] = 60000
            if self.scenario in [SCENARIOS.Offline, SCENARIOS.MultiStream]:
                if "min_query_count" not in flag_dict:
                    flag_dict["min_query_count"] = 1
            flag_dict["fast"] = None

        # Generates the entries in the `measurements/` directory, and updates flag_dict accordingly
        generate_measurements_entry(
            self.get_system_name(),
            self.name,
            self._get_submission_benchmark_name(),
            self.scenario,
            self.args["input_dtype"],
            self.args["precision"],
            flag_dict)

        # Stop here if we are only generating .conf files in measurements
        if self.generate_conf_files_only:
            return "Generated conf files"

        argstr = self._build_custom_flags(flag_dict)
        if type(argstr) is dict:
            argstr = args_to_string(flag_dict)

        # Handle environment variables
        if self.use_jemalloc:
            self.prepend_ld_preload("/usr/lib/x86_64-linux-gnu/libjemalloc.so.2")

        cmd = "{:} {:}".format(self.executable, argstr)
        output = run_command(cmd, get_output=True, custom_env=self.env_vars)

        # Return harness result.
        scenario_key = scenario_loadgen_log_keys[self.scenario]
        results = from_loadgen_by_keys(
            os.path.join(
                self.args["log_dir"],
                self.get_system_name(),
                self._get_submission_benchmark_name(),
                self.scenario),
            ["result_validity", scenario_key])

        if scenario_key not in results:
            result_string = "Cannot find performance result. Maybe you are running in AccuracyOnly mode."
        elif "result_validity" not in results:
            result_string = "{}: {}, Result validity unknown".format(scenario_key, results[scenario_key])
        else:
            result_string = "{}: {}, Result is {}".format(scenario_key, results[scenario_key], results["result_validity"])
        return self._handle_harness_result(result_string)
예제 #6
0
def verify_test01(harness):
    # Compute path to results dir
    script_path = 'build/inference/compliance/nvidia/TEST01/run_verification.py'
    results_path = os.path.join('results', harness.get_system_name(), harness._get_submission_benchmark_name(), harness.scenario)
    logging.info('AUDIT HARNESS: ' + results_path + '/accuracy' + '\n' + results_path + '/performance')
    verification_command = 'python3 {} --results={} --compliance={} --output_dir={}'.format(
        script_path, results_path, harness.get_full_log_dir(), harness.get_full_log_dir())
    return run_command(verification_command, get_output=True)
예제 #7
0
def verify_test04(harness):
    current_path = harness.get_full_log_dir(
    )  # Might be using TEST04-B instead of TEST04-A
    test04a_path = current_path.replace('TEST04-B',
                                        'TEST04-A')  # Make sure it's TEST04-A
    test04b_path = test04a_path.replace('TEST04-A',
                                        'TEST04-B')  # Make sure it's TEST04-B
    output_path = harness.get_full_log_dir()
    script_path = 'build/inference/compliance/nvidia/TEST04-A/run_verification.py'
    verification_command = 'python3 {} --test4A_dir {} --test4B_dir {} --output_dir {}'.format(
        script_path, test04a_path, test04b_path, output_path)
    return run_command(verification_command, get_output=True)
예제 #8
0
def verify_test01(harness):
    # Compute path to results dir
    script_path = 'build/inference/compliance/nvidia/TEST01/run_verification.py'
    results_path = os.path.join('results', harness.get_system_name(), harness._get_submission_benchmark_name(), harness.scenario)
    logging.info('AUDIT HARNESS: ' + results_path + '/accuracy' + '\n' + results_path + '/performance')
    verification_command = 'python3 {} --results={} --compliance={} --output_dir={}'.format(
        script_path, results_path, harness.get_full_log_dir(), harness.get_full_log_dir())
    try:
        command_result = run_command(verification_command, get_output=True)
    except:
        # Handle test 01 failure
        logging.info('TEST01 verification failed. Proceeding to fallback approach')
        command_result = 'TEST01 FALLBACK'  # Signal main.py to finish the process
    return command_result
예제 #9
0
def turn_off_mps():
    if not is_xavier() and check_mps_status():
        cmd = "echo quit | nvidia-cuda-mps-control"
        logging.info("Turn off MPS.")
        run_command(cmd)
예제 #10
0
def preprocess_rnnt(data_dir, preprocessed_data_dir):
    # Use the flac->wav and manifest generation script in the reference repo.
    logging.info(
        "Updating reference repo for convert_librispeech.py script...")
    run_command("make clone_loadgen")

    absolute_data_dir = to_absolute_path(data_dir)
    absolute_preproc_data_dir = to_absolute_path(preprocessed_data_dir)
    librispeech_path = os.path.join(absolute_preproc_data_dir, "LibriSpeech")

    logging.info(
        "Converting flac -> wav and generating manifest.json for test set...")
    flac_to_wav(absolute_data_dir, librispeech_path, "dev-clean",
                "dev-clean-wav")

    logging.info("Converting wav files to npy files for test set...")
    npy_out_path = os.path.join(absolute_preproc_data_dir,
                                "rnnt_dev_clean_512")
    wav_out_path = os.path.join(absolute_preproc_data_dir,
                                "rnnt_dev_clean_500_raw")
    args = Namespace(
        dataset_dir=librispeech_path + "/",
        output_dir=npy_out_path + "/",
        val_manifest=os.path.join(librispeech_path, "dev-clean-wav.json"),
        batch_size=1,
        fp16=False,
        fixed_seq_length=512,
        generate_wav_npy=True,
        fixed_wav_file_length=240000,
        seed=42,
        model_toml="code/rnnt/tensorrt/preprocessing/configs/rnnt.toml",
        max_duration=15.0,
        pad_to=0)
    convert_rnnt_data_main(args)
    shutil.move(os.path.join(npy_out_path, "wav_files"), wav_out_path)

    # Calibration set: 500 sequences selected from train-clean-100
    calibration_file = "build/inference/calibration/LibriSpeech/calibration_files.txt"

    # train-clean-100 is very large, but we only care about the ones in the calibration set
    # Convert the .wav file names to the corresponding .flac files, then transfer the files to a temporary directory
    logging.info("Building calibration set...")
    with open(calibration_file) as f:
        calibration_wavs = f.read().split("\n")

    def wav_to_flac(wav):
        p = wav.split("/")
        p[0] = "train-clean-100"
        p[-1] = p[-1].split(".")[0] + ".flac"
        return p

    calibration_flacs = [
        wav_to_flac(x) for x in calibration_wavs if len(x) > 0
    ]
    calib_dir = "calib_flacs"

    seen_transcripts = set()

    for flac in calibration_flacs:
        new_dir = flac[:-1]
        new_dir[0] = calib_dir
        assert (len(new_dir) == 3)

        new_dir_path = os.path.join(absolute_data_dir, "LibriSpeech", *new_dir)
        os.makedirs(new_dir_path, exist_ok=True)

        flac_path = os.path.join(absolute_data_dir, "LibriSpeech", *flac)
        new_flac_path = os.path.join(new_dir_path, flac[-1])
        logging.info(flac_path + " -> " + new_flac_path)
        shutil.copyfile(flac_path, new_flac_path)

        trans_file = "{:}-{:}.trans.txt".format(new_dir[1], new_dir[2])
        trans_file_src_path = os.path.join(absolute_data_dir, "LibriSpeech",
                                           *flac[:-1], trans_file)
        trans_file_dst_path = os.path.join(new_dir_path, trans_file)

        # Extract transcript for this sample flac
        flac_id = flac[-1].split(".")[0]
        flac_transcript = None
        with open(trans_file_src_path) as transcript_f:
            transcript = transcript_f.read().split("\n")
            for line in transcript:
                if line.startswith(flac_id):
                    flac_transcript = line

        if flac_transcript is None:
            raise ValueError(
                "Invalid flac ID: {:} does not exist in {:}".format(
                    flac_id, trans_file_src_path))

        # Update transcript
        if trans_file in seen_transcripts:
            f = open(trans_file_dst_path, 'a')
        else:
            f = open(trans_file_dst_path, 'w')
            seen_transcripts.add(trans_file)
        f.write(flac_transcript + "\n")
        f.close()

    logging.info(
        "Converting flac -> wav and generating manifest.json for calibration set..."
    )
    flac_to_wav(absolute_data_dir, librispeech_path, calib_dir,
                "train-clean-100-wav")

    logging.info("Converting wav files to npy files for calibration set...")
    npy_out_path = os.path.join(absolute_preproc_data_dir,
                                "rnnt_train_clean_512_fp32")
    wav_out_path = os.path.join(absolute_preproc_data_dir,
                                "rnnt_train_clean_512_wav")
    args = Namespace(
        dataset_dir=librispeech_path + "/",
        output_dir=npy_out_path + "/",
        val_manifest=os.path.join(librispeech_path,
                                  "train-clean-100-wav.json"),
        batch_size=1,
        fp16=False,
        fixed_seq_length=512,
        generate_wav_npy=True,
        fixed_wav_file_length=240000,
        seed=42,
        model_toml="code/rnnt/tensorrt/preprocessing/configs/rnnt.toml",
        max_duration=15.0,
        pad_to=0)
    convert_rnnt_data_main(args)
    shutil.move(os.path.join(npy_out_path, "wav_files"), wav_out_path)

    data_map_dir = to_absolute_path("data_maps/rnnt_train_clean_512")
    os.makedirs(data_map_dir, exist_ok=True)

    data_map_path = os.path.join(data_map_dir, "val_map.txt")
    shutil.copyfile(os.path.join(npy_out_path, "val_map_512.txt"),
                    data_map_path)
예제 #11
0
def turn_off_mps():
    """Turn off MPS."""
    if not is_xavier() and is_mps_enabled():
        cmd = "echo quit | nvidia-cuda-mps-control"
        logging.info("Turn off MPS.")
        run_command(cmd)
def main():
    print("Updating Xavier accuracy.txt files...")

    benchmark_list = [
        BENCHMARKS.BERT + "-99", BENCHMARKS.RNNT, BENCHMARKS.UNET + "-99",
        BENCHMARKS.UNET + "-99.9"
    ]
    scenario_list = [SCENARIOS.SingleStream, SCENARIOS.Offline]
    system_list = ["AGX_Xavier_TRT", "Xavier_NX_TRT"]

    # Restore all the mlperf_log_accuracy.json files
    os.makedirs("build/artifacts", exist_ok=True)
    cmd = (
        "python3 scripts/restore_full_accuracy_logs.py --artifactory_username={:} --artifactory_api_key={:} "
        "--systems={:} --benchmarks={:} --scenarios={:} --test_ids= ").format(
            getpass.getuser(), os.environ["ARTIFACTORY_API_KEY"],
            ",".join(system_list), ",".join(benchmark_list),
            ",".join(scenario_list))
    run_command(cmd)

    # Re-compute the accuracies
    for system in system_list:
        for benchmark in benchmark_list:
            for scenario in scenario_list:
                print("Processing {:}-{:}-{:}".format(system, benchmark,
                                                      scenario))
                result_dir = os.path.join("results", system, benchmark,
                                          scenario, "accuracy")
                accuracy_path = os.path.join(result_dir, "accuracy.txt")
                log_path = os.path.join(result_dir, "mlperf_log_accuracy.json")

                # Get the hash for accuracy log
                hash = None
                with open(accuracy_path) as f:
                    for line in f:
                        matches = re.match(r"(hash=[0-9a-fA-F]{64})",
                                           line.rstrip())
                        if matches is None:
                            continue
                        hash = matches.group(1)
                        break
                if hash is None:
                    raise RuntimeError(
                        "Accuracy file {:} does not contain a hash!".format(
                            accuracy_path))

                # Regenerate accuracy.txt
                config = {
                    "benchmark": benchmark.replace("-99.9",
                                                   "").replace("-99", ""),
                    "accuracy_level":
                    "99.9%" if "99.9" in benchmark else "99%",
                    "precision": "int8"
                }
                check_accuracy(log_path, config, True)

                # Add back hash
                with open(accuracy_path, "a") as f:
                    print(hash, file=f)

                print("Done with {:}-{:}-{:}".format(system, benchmark,
                                                     scenario))

    print("Done Xavier accuracy.txt files...")
예제 #13
0
def check_accuracy(log_file, config, is_compliance=False):
    benchmark_name = config["benchmark"]

    accuracy_targets = {
        BENCHMARKS.ResNet50: 76.46,
        BENCHMARKS.SSDResNet34: 20.0,
        BENCHMARKS.SSDMobileNet: 22.0,
        BENCHMARKS.BERT: 90.874,
        BENCHMARKS.DLRM: 80.25,
        BENCHMARKS.RNNT: 100.0 - 7.45225,
        BENCHMARKS.UNET: 0.853
    }
    threshold_ratio = float(config["accuracy_level"][:-1]) / 100

    if not os.path.exists(log_file):
        return "Cannot find accuracy JSON file."
    # checking if log_file is empty by just reading first several bytes
    # indeed, first 4B~6B is likely all we need to check: '', '[]', '[]\r', '[\n]\n', '[\r\n]\r\n', ...
    # but checking 8B for safety
    with open(log_file, 'r') as lf:
        first_8B = lf.read(8)
        if not first_8B or ('[' in first_8B and ']' in first_8B):
            return "No accuracy results in PerformanceOnly mode."

    dtype_expand_map = {"fp16": "float16", "fp32": "float32", "int8": "float16"} # Use FP16 output for INT8 mode
    accuracy_regex_map = import_module("build.inference.tools.submission.submission-checker").ACC_PATTERN

    threshold = accuracy_targets[benchmark_name] * threshold_ratio
    if benchmark_name in [BENCHMARKS.ResNet50]:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-imagenet.py --mlperf-accuracy-file {:} \
            --imagenet-val-file data_maps/imagenet/val_map.txt --dtype int32 ".format(log_file)
        regex = accuracy_regex_map["acc"]
    elif benchmark_name == BENCHMARKS.SSDResNet34:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \
            --coco-dir {:} --output-file build/ssd-resnet34-results.json --use-inv-map".format(
            log_file, os.path.join(os.environ.get("PREPROCESSED_DATA_DIR", "build/preprocessed_data"), "coco"))
        regex = accuracy_regex_map["mAP"]
    elif benchmark_name == BENCHMARKS.SSDMobileNet:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \
            --coco-dir {:} --output-file build/ssd-mobilenet-results.json".format(
            log_file, os.path.join(os.environ.get("PREPROCESSED_DATA_DIR", "build/preprocessed_data"), "coco"))
        regex = accuracy_regex_map["mAP"]
    elif benchmark_name == BENCHMARKS.BERT:
        # Having issue installing tokenizers on Xavier...
        if is_xavier():
            cmd = "python3 code/bert/tensorrt/accuracy-bert.py --mlperf-accuracy-file {:} --squad-val-file {:}".format(
                log_file, os.path.join(os.environ.get("DATA_DIR", "build/data"), "squad", "dev-v1.1.json"))
        else:
            dtype = config["precision"].lower()
            if dtype in dtype_expand_map:
                dtype = dtype_expand_map[dtype]
            val_data_path = os.path.join(
                os.environ.get("DATA_DIR", "build/data"),
                "squad", "dev-v1.1.json")
            vocab_file_path = "build/models/bert/vocab.txt"
            output_prediction_path = os.path.join(os.path.dirname(log_file), "predictions.json")
            cmd = "python3 build/inference/language/bert/accuracy-squad.py " \
                "--log_file {:} --vocab_file {:} --val_data {:} --out_file {:} " \
                "--output_dtype {:}".format(log_file, vocab_file_path, val_data_path, output_prediction_path, dtype)
        regex = accuracy_regex_map["F1"]
    elif benchmark_name == BENCHMARKS.DLRM:
        cmd = "python3 build/inference/recommendation/dlrm/pytorch/tools/accuracy-dlrm.py --mlperf-accuracy-file {:} " \
              "--day-23-file build/data/criteo/day_23 --aggregation-trace-file " \
              "build/preprocessed_data/criteo/full_recalib/sample_partition_trace.txt".format(log_file)
        regex = accuracy_regex_map["AUC"]
    elif benchmark_name == BENCHMARKS.RNNT:
        # Having issue installing librosa on Xavier...
        if is_xavier():
            cmd = "python3 code/rnnt/tensorrt/accuracy.py --loadgen_log {:}".format(log_file)
        else:
            # RNNT output indices are in INT8
            cmd = "python3 build/inference/speech_recognition/rnnt/accuracy_eval.py " \
                "--log_dir {:} --dataset_dir build/preprocessed_data/LibriSpeech/dev-clean-wav " \
                "--manifest build/preprocessed_data/LibriSpeech/dev-clean-wav.json " \
                "--output_dtype int8".format(os.path.dirname(log_file))
        regex = accuracy_regex_map["WER"]
    elif benchmark_name == BENCHMARKS.UNET:
        postprocess_dir = "build/brats_postprocessed_data"
        if not os.path.exists(postprocess_dir):
            os.makedirs(postprocess_dir)
        dtype = config["precision"].lower()
        if dtype in dtype_expand_map:
            dtype = dtype_expand_map[dtype]
        cmd = "python3 build/inference/vision/medical_imaging/3d-unet/accuracy-brats.py --log_file {:} " \
            "--output_dtype {:} --preprocessed_data_dir build/preprocessed_data/brats/brats_reference_preprocessed " \
            "--postprocessed_data_dir {:} " \
            "--label_data_dir build/preprocessed_data/brats/brats_reference_raw/Task043_BraTS2019/labelsTr".format(log_file, dtype, postprocess_dir)
        regex = accuracy_regex_map["DICE"]
        # Having issue installing nnUnet on Xavier...
        if is_xavier():
            logging.warning(
                "Accuracy checking for 3DUnet is not supported on Xavier. Please run the following command on desktop:\n{:}".format(cmd))
            cmd = 'echo "Accuracy: mean = 1.0000, whole tumor = 1.0000, tumor core = 1.0000, enhancing tumor = 1.0000"'
    else:
        raise ValueError("Unknown benchmark: {:}".format(benchmark_name))

    output = run_command(cmd, get_output=True)
    result_regex = re.compile(regex)
    accuracy = None
    with open(os.path.join(os.path.dirname(log_file), "accuracy.txt"), "w") as f:
        for line in output:
            print(line, file=f)
    for line in output:
        result_match = result_regex.match(line)
        if not result_match is None:
            accuracy = float(result_match.group(1))
            break

    accuracy_result = "PASSED" if accuracy is not None and accuracy >= threshold else "FAILED"

    if accuracy_result == "FAILED" and not is_compliance:
        raise RuntimeError(
            "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}!".format(
                accuracy, threshold, accuracy_result))

    if is_compliance:
        return accuracy  # Needed for numerical comparison

    return "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}.".format(
        accuracy, threshold, accuracy_result)
예제 #14
0
def handle_audit_verification(audit_test_name, config):
    # Decouples the verification step from any auditing runs for better maintenance and testing
    logging.info('AUDIT HARNESS: Running verification script...')
    # Prepare log_dir
    config['log_dir'] = os.path.join('build/compliance_logs', audit_test_name)
    # Get a harness object
    harness, config = _generate_harness_object(config=config, profile=None)

    result = None
    if audit_test_name == 'TEST01':
        result = auditing.verify_test01(harness)
        if result == 'TEST01 FALLBACK':
            # Signals a fallback for failed test
            # Process description:
            #   1. Generate baseline_accuracy file
            #   2. Calculate the accuracy of baseline, using the benchmark's accuracy script
            #   3. Use same script to calculate accuracy of compliance run
            #   4. Depending on accuracy level, declare success if two values are within defined tolerance.
            logging.info('main.py notified for fallback handling on TEST01')

            # Run compliance script to generate baseline file
            full_log_dir = harness.get_full_log_dir()
            results_path = os.path.join('results', harness.get_system_name(), harness._get_submission_benchmark_name(), harness.scenario)
            harness_accuracy_log = os.path.join(results_path, 'accuracy/mlperf_log_accuracy.json')
            compliance_accuracy_log = os.path.join(full_log_dir, 'mlperf_log_accuracy.json')
            fallback_command = 'bash build/inference/compliance/nvidia/TEST01/create_accuracy_baseline.sh {} {}'.format(
                harness_accuracy_log,
                compliance_accuracy_log
            )
            # generates new file called mlperf_log_accuracy_baseline.json
            run_command(fallback_command, get_output=True)

            def move_file(src, dst):
                logging.info('Moving file: {} --> {}'.format(src, dst))
                shutil.move(src, dst)

            def copy_file(src, dst):
                logging.info('Copying file: {} --> {}'.format(src, dst))
                shutil.copy(src, dst)

            # Create accuracy and performance directories
            accuracy_dir = os.path.join(full_log_dir, 'TEST01', 'accuracy')
            performance_dir = os.path.join(full_log_dir, 'TEST01', 'performance', 'run_1')
            os.makedirs(accuracy_dir, exist_ok=True)
            os.makedirs(performance_dir, exist_ok=True)

            # Get the accuracy of baseline file
            fallback_result_baseline = check_accuracy('mlperf_log_accuracy_baseline.json', config, is_compliance=True)
            # Move it to the submission dir
            dest_path = os.path.join(accuracy_dir, 'baseline_accuracy.txt')
            move_file('accuracy.txt', dest_path)

            # Get the accuracy of compliance file
            fallback_result_compliance = check_accuracy('{}/mlperf_log_accuracy.json'.format(full_log_dir), config, is_compliance=True)
            # Move it to the submission dir - check_accuracy stores accuracy.txt in the directory 
            # name provided in its first argument. So this file will already be located inside get_full_log_dir()
            src_path = os.path.join(full_log_dir, 'accuracy.txt')
            dest_path = os.path.join(accuracy_dir, 'compliance_accuracy.txt')
            move_file(src_path, dest_path)

            # Move the required logs to their correct locations since run_verification.py has failed.
            move_file('verify_accuracy.txt', os.path.join(full_log_dir, 'TEST01', 'verify_accuracy.txt'))
            copy_file(os.path.join(full_log_dir, 'mlperf_log_accuracy.json'), os.path.join(accuracy_dir, 'mlperf_log_accuracy.json'))
            copy_file(os.path.join(full_log_dir, 'mlperf_log_detail.txt'), os.path.join(performance_dir, 'mlperf_log_detail.txt'))
            copy_file(os.path.join(full_log_dir, 'mlperf_log_summary.txt'), os.path.join(performance_dir, 'mlperf_log_summary.txt'))

            # Need to run verify_performance.py script to get verify_performance.txt file.
            verify_performance_command = ("python3 build/inference/compliance/nvidia/TEST01/verify_performance.py -r "
                + results_path + "/performance/run_1/mlperf_log_summary.txt" + " -t "
                + performance_dir + "/mlperf_log_summary.txt | tee " + full_log_dir + "/TEST01/verify_performance.txt")
            run_command(verify_performance_command, get_output=True)

            # Check level of accuracy - this test's tolerance depends on it
            accuracy_level = config["accuracy_level"][:-1]
            if accuracy_level == '99.9':
                logging.info('High Accuracy benchmark detected. Tolerance set to 0.1%')
                if not math.isclose(fallback_result_baseline, fallback_result_compliance, rel_tol=0.001):
                    raise ValueError('TEST01 + Fallback failure: BASELINE ACCURACY: {}, COMPLIANCE_ACCURACY: {}'.format(fallback_result_baseline, fallback_result_compliance))
                else:
                    logging.info('AUDIT HARNESS: Success: TEST01 failure redeemed via fallback approach.')
                    print('TEST PASS')
            elif accuracy_level == '99':
                logging.info('Low Accuracy benchmark detected. Tolerance set to 1%')
                if not math.isclose(fallback_result_baseline, fallback_result_compliance, rel_tol=0.01):
                    raise ValueError('TEST01 + Fallback failure: BASELINE ACCURACY: {}, COMPLIANCE_ACCURACY: {}'.format(fallback_result_baseline, fallback_result_compliance))
                else:
                    logging.info('AUDIT HARNESS: Success: TEST01 failure redeemed via fallback approach.')
                    print('TEST PASS')
            else:
                raise ValueError('Accuracy level not supported: {}'.format(accuracy_level))
    elif audit_test_name == 'TEST04-A' or audit_test_name == 'TEST04-B':
        exclude_list = [BENCHMARKS.BERT, BENCHMARKS.DLRM, BENCHMARKS.RNNT]
        if BENCHMARKS.alias(config['benchmark']) in exclude_list:
            logging.info('TEST04 is not supported for benchmark {}. Ignoring request...'.format(config['benchmark']))
            return None
        result = auditing.verify_test04(harness)
    elif audit_test_name == 'TEST05':
        result = auditing.verify_test05(harness)
    return result
예제 #15
0
def check_accuracy(log_file, config, is_compliance=False):
    """Check accuracy of given benchmark."""

    benchmark_name = config["benchmark"]

    accuracy_targets = {
        BENCHMARKS.BERT: 90.874,
        BENCHMARKS.DLRM: 80.25,
        BENCHMARKS.RNNT: 100.0 - 7.45225,
        BENCHMARKS.ResNet50: 76.46,
        BENCHMARKS.SSDMobileNet: 22.0,
        BENCHMARKS.SSDResNet34: 20.0,
        BENCHMARKS.UNET: 0.853,
    }
    threshold_ratio = float(config["accuracy_level"][:-1]) / 100

    if not os.path.exists(log_file):
        return "Cannot find accuracy JSON file."

    # checking if log_file is empty by just reading first several bytes
    # indeed, first 4B~6B is likely all we need to check: '', '[]', '[]\r', '[\n]\n', '[\r\n]\r\n', ...
    # but checking 8B for safety
    with open(log_file, 'r') as lf:
        first_8B = lf.read(8)
        if not first_8B or ('[' in first_8B and ']' in first_8B):
            return "No accuracy results in PerformanceOnly mode."

    dtype_expand_map = {
        "fp16": "float16",
        "fp32": "float32",
        "int8": "float16"
    }  # Use FP16 output for INT8 mode

    # Since submission-checker uses a relative import, but we are running from main.py, we need to surface its directory
    # into sys.path so it can successfully import it.
    # Insert into index 1 so that current working directory still takes precedence.
    sys.path.insert(
        1,
        os.path.join(os.getcwd(), "build", "inference", "tools", "submission"))
    accuracy_regex_map = import_module("submission-checker").ACC_PATTERN

    threshold = accuracy_targets[benchmark_name] * threshold_ratio

    # Every benchmark has its own accuracy script. Prepare commandline with args to the script.
    skip_run_command = False
    if benchmark_name in [BENCHMARKS.ResNet50]:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-imagenet.py --mlperf-accuracy-file {:} \
            --imagenet-val-file data_maps/imagenet/val_map.txt --dtype int32 ".format(
            log_file)
        regex = accuracy_regex_map["acc"]
    elif benchmark_name == BENCHMARKS.SSDResNet34:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \
            --coco-dir {:} --output-file build/ssd-resnet34-results.json --use-inv-map".format(
            log_file,
            os.path.join(
                os.environ.get("PREPROCESSED_DATA_DIR",
                               "build/preprocessed_data"), "coco"))
        regex = accuracy_regex_map["mAP"]
    elif benchmark_name == BENCHMARKS.SSDMobileNet:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \
            --coco-dir {:} --output-file build/ssd-mobilenet-results.json".format(
            log_file,
            os.path.join(
                os.environ.get("PREPROCESSED_DATA_DIR",
                               "build/preprocessed_data"), "coco"))
        regex = accuracy_regex_map["mAP"]
    elif benchmark_name == BENCHMARKS.BERT:
        # Having issue installing tokenizers on Xavier...
        if is_xavier():
            cmd = "python3 code/bert/tensorrt/accuracy-bert.py --mlperf-accuracy-file {:} --squad-val-file {:}".format(
                log_file,
                os.path.join(os.environ.get("DATA_DIR", "build/data"), "squad",
                             "dev-v1.1.json"))
        else:
            dtype = config["precision"].lower()
            if dtype in dtype_expand_map:
                dtype = dtype_expand_map[dtype]
            val_data_path = os.path.join(
                os.environ.get("DATA_DIR", "build/data"), "squad",
                "dev-v1.1.json")
            vocab_file_path = "build/models/bert/vocab.txt"
            if 'CPU' in config['config_name']:
                vocab_file_path = "build/data/squad/vocab.txt"
            output_prediction_path = os.path.join(os.path.dirname(log_file),
                                                  "predictions.json")
            cmd = "python3 build/inference/language/bert/accuracy-squad.py " \
                "--log_file {:} --vocab_file {:} --val_data {:} --out_file {:} " \
                "--output_dtype {:}".format(log_file, vocab_file_path, val_data_path, output_prediction_path, dtype)
        regex = accuracy_regex_map["F1"]
    elif benchmark_name == BENCHMARKS.DLRM:
        cmd = "python3 build/inference/recommendation/dlrm/pytorch/tools/accuracy-dlrm.py --mlperf-accuracy-file {:} " \
              "--day-23-file build/data/criteo/day_23 --aggregation-trace-file " \
              "build/preprocessed_data/criteo/full_recalib/sample_partition_trace.txt".format(log_file)
        regex = accuracy_regex_map["AUC"]
    elif benchmark_name == BENCHMARKS.RNNT:
        # Having issue installing librosa on Xavier...
        if is_xavier():
            cmd = "python3 code/rnnt/tensorrt/accuracy.py --loadgen_log {:}".format(
                log_file)
        else:
            # RNNT output indices are in INT8
            cmd = "python3 build/inference/speech_recognition/rnnt/accuracy_eval.py " \
                "--log_dir {:} --dataset_dir build/preprocessed_data/LibriSpeech/dev-clean-wav " \
                "--manifest build/preprocessed_data/LibriSpeech/dev-clean-wav.json " \
                "--output_dtype int8".format(os.path.dirname(log_file))
        regex = accuracy_regex_map["WER"]
    elif benchmark_name == BENCHMARKS.UNET:
        postprocess_dir = "build/brats_postprocessed_data"
        if not os.path.exists(postprocess_dir):
            os.makedirs(postprocess_dir)
        dtype = config["precision"].lower()
        if dtype in dtype_expand_map:
            dtype = dtype_expand_map[dtype]
        cmd = "python3 build/inference/vision/medical_imaging/3d-unet/accuracy-brats.py --log_file {:} " \
            "--output_dtype {:} --preprocessed_data_dir build/preprocessed_data/brats/brats_reference_preprocessed " \
            "--postprocessed_data_dir {:} " \
            "--label_data_dir build/preprocessed_data/brats/brats_reference_raw/Task043_BraTS2019/labelsTr".format(log_file, dtype, postprocess_dir)
        regex = accuracy_regex_map["DICE"]
        # Having issue installing nnUnet on Xavier...
        if is_xavier():
            # Internally, run on another node to process the accuracy.
            try:
                cmd = cmd.replace(os.getcwd(), ".", 1)
                temp_cmd = "ssh -oBatchMode=yes computelab-frontend-02 \"timeout 1200 srun --gres=gpu:ga100:1 -t 20:00 " \
                    "bash -c 'cd {:} && make prebuild DOCKER_COMMAND=\\\"{:}\\\"'\"".format(os.getcwd(), cmd)
                full_output = run_command(temp_cmd, get_output=True)
                start_line_idx = -1
                end_line_idx = -1
                for (line_idx, line) in enumerate(full_output):
                    if "Please cite the following paper when using nnUNet:" in line:
                        start_line_idx = line_idx
                    if "Done!" in line:
                        end_line_idx = line_idx
                assert start_line_idx != -1 and end_line_idx != -1, "Failed in accuracy checking"
                output = full_output[start_line_idx:end_line_idx + 1]
                skip_run_command = True
            except Exception as e:
                logging.warning(
                    "Accuracy checking for 3DUnet is not supported on Xavier. Please run the following command on desktop:\n{:}"
                    .format(cmd))
                output = [
                    "Accuracy: mean = 1.0000, whole tumor = 1.0000, tumor core = 1.0000, enhancing tumor = 1.0000"
                ]
                skip_run_command = True
    else:
        raise ValueError("Unknown benchmark: {:}".format(benchmark_name))

    # Run benchmark's accuracy script and parse output for result.
    if not skip_run_command:
        output = run_command(cmd, get_output=True)
    result_regex = re.compile(regex)
    accuracy = None
    with open(os.path.join(os.path.dirname(log_file), "accuracy.txt"),
              "w") as f:
        for line in output:
            print(line, file=f)
    for line in output:
        result_match = result_regex.match(line)
        if not result_match is None:
            accuracy = float(result_match.group(1))
            break

    accuracy_result = "PASSED" if accuracy is not None and accuracy >= threshold else "FAILED"

    if accuracy_result == "FAILED" and not is_compliance:
        raise RuntimeError(
            "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}!".format(
                accuracy, threshold, accuracy_result))

    if is_compliance:
        return accuracy  # Needed for numerical comparison

    return "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}.".format(
        accuracy, threshold, accuracy_result)
예제 #16
0
def tee(cmd):
    # Unused return, but we need to request output to get tee effect
    run_command(cmd, get_output=True, tee=True)