def run_3dunet_accuracy(engine_file, batch_size, num_images, verbose=False): runner = EngineRunner(engine_file, verbose=verbose) input_dtype, input_format = get_input_format(runner.engine) if verbose: logging.info("Running UNET accuracy test with:") logging.info(" engine_file: {:}".format(engine_file)) logging.info(" batch_size: {:}".format(batch_size)) logging.info(" num_images: {:}".format(num_images)) logging.info(" input_dtype: {:}".format(input_dtype)) logging.info(" input_format: {:}".format(input_format)) if input_dtype == trt.DataType.FLOAT: format_string = "fp32" elif input_dtype == trt.DataType.INT8: if input_format == trt.TensorFormat.LINEAR: format_string = "int8_linear" elif input_dtype == trt.DataType.HALF: if input_format == trt.TensorFormat.LINEAR: format_string = "fp16_linear" elif input_format == trt.TensorFormat.DHWC8: format_string = "fp16_dhwc8" image_dir = os.path.join(os.getenv("PREPROCESSED_DATA_DIR", "build/preprocessed_data"), "brats", "brats_npy", format_string) if num_images is None: num_images = 67 image_list = [] with open("data_maps/brats/val_map.txt") as f: for line in f: image_list.append(line.split()[0]) predictions = [] batch_idx = 0 for image_idx in range(0, num_images, batch_size): actual_batch_size = batch_size if image_idx + batch_size <= num_images else num_images - image_idx batch_images = np.ascontiguousarray(np.stack([np.load(os.path.join(image_dir, name + ".npy")) for name in image_list[image_idx:image_idx + actual_batch_size]])) start_time = time.time() outputs = runner([batch_images], actual_batch_size) print(np.mean(batch_images[0].astype(np.float32))) print(np.std(batch_images[0].astype(np.float32))) print(np.mean(outputs[0].astype(np.float32))) print(np.std(outputs[0].astype(np.float32))) if verbose: logging.info("Batch {:d} (Size {:}) >> Inference time: {:f}".format(batch_idx, actual_batch_size, time.time() - start_time)) predictions.extend(outputs[0][:actual_batch_size]) batch_idx += 1 logging.warning("3D-Unet standalone accuracy checker does not have accuracy checking yet! Always return 1.0") return 1.0
def postprocess_2(self): # Set input dtype and format input_tensor = self.network.get_input(0) if self.input_dtype == "int8": input_tensor.dtype = trt.int8 input_tensor.dynamic_range = (-128, 127) if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "chw4": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4) # Get the layers we care about. nb_layers = self.network.num_layers logging.debug(nb_layers) for i in range(nb_layers): layer = self.network.get_layer(i) logging.info("({:}) Layer '{:}' -> Type: {:} ON {:}".format( i, layer.name, layer.type, self.builder_config.get_device_type(layer))) while self.network.num_outputs > 0: logging.info("Unmarking output: {:}".format( self.network.get_output(0).name)) self.network.unmark_output(self.network.get_output(0)) #add top-k last_fc_layer = self.network.get_layer(nb_layers - 1) topk_layer = self.network.add_topk(last_fc_layer.get_output(0), trt.TopKOperation.MAX, 1, 2) topk_layer.name = "topk_layer" topk_layer.get_output(0).name = "topk_layer_output_value" topk_layer.get_output(1).name = "topk_layer_output_index" self.network.mark_output(topk_layer.get_output(1)) if self.network.num_outputs != 1: logging.warning( "num outputs should be 1 after unmarking! Has {:}".format( self.network.num_outputs)) raise Exception
def postprocess(self, useConvForFC=False): # Set input dtype and format input_tensor = self.network.get_input(0) if self.input_dtype == "int8": input_tensor.dtype = trt.int8 input_tensor.dynamic_range = (-128, 127) if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "chw4": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4) # Get the layers we care about. nb_layers = self.network.num_layers logging.debug(nb_layers) for i in range(nb_layers): layer = self.network.get_layer(i) logging.info("({:}) Layer '{:}' -> Type: {:} ON {:}".format( i, layer.name, layer.type, self.builder_config.get_device_type(layer))) # Detect the FC layer. # if "Fully Connected" in layer.name: if "MatMul" in layer.name: fc_layer = layer assert fc_layer.type == trt.LayerType.FULLY_CONNECTED fc_layer.__class__ = trt.IFullyConnectedLayer fc_kernel = fc_layer.kernel.reshape(1001, 2048)[1:, :] fc_bias = fc_layer.bias[1:] # (i-13)th layer should be reduction. reduce_layer = self.network.get_layer(i - 13) assert reduce_layer.type == trt.LayerType.REDUCE reduce_layer.__class__ = trt.IReduceLayer # (i-14)th layer should be the last ReLU last_conv_layer = self.network.get_layer(i - 14) assert last_conv_layer.type == trt.LayerType.ACTIVATION last_conv_layer.__class__ = trt.IActivationLayer # Unmark the old output since we are going to add new layers for the final part of the network. while self.network.num_outputs > 0: logging.info("Unmarking output: {:}".format( self.network.get_output(0).name)) self.network.unmark_output(self.network.get_output(0)) # Replace the reduce layer with pooling layer pool_layer_new = self.network.add_pooling( last_conv_layer.get_output(0), trt.PoolingType.AVERAGE, (7, 7)) pool_layer_new.name = "squeeze_replaced" pool_layer_new.get_output(0).name = "squeeze_replaced_output" # Add fc layer fc_kernel = fc_kernel.flatten() if useConvForFC: fc_layer_new = self.network.add_convolution( pool_layer_new.get_output(0), fc_bias.size, (1, 1), fc_kernel, fc_bias) else: fc_layer_new = self.network.add_fully_connected( pool_layer_new.get_output(0), fc_bias.size, fc_kernel, fc_bias) fc_layer_new.name = "fc_replaced" fc_layer_new.get_output(0).name = "fc_replaced_output" # Add topK layer. topk_layer = self.network.add_topk(fc_layer_new.get_output(0), trt.TopKOperation.MAX, 1, 2) topk_layer.name = "topk_layer" topk_layer.get_output(0).name = "topk_layer_output_value" topk_layer.get_output(1).name = "topk_layer_output_index" # Mark the new output. self.network.mark_output(topk_layer.get_output(1)) if self.network.num_outputs != 1: logging.warning( "num outputs should be 1 after unmarking! Has {:}".format( self.network.num_outputs)) raise Exception
def initialize(self): """ Parse input ONNX file to a TRT network. Apply layer optimizations and fusion plugins on network. """ # Query system id for architecture self.system = get_system() self.gpu_arch = self.system.arch # Create network. self.network = self.builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) # Parse from onnx file. parser = trt.OnnxParser(self.network, self.logger) with open(self.model_path, "rb") as f: model = f.read() success = parser.parse(model) if not success: raise RuntimeError( "ofa_autusinian onnx model processing failed! Error: {:}". format(parser.get_error(0).desc())) # Set input dtype and format input_tensor = self.network.get_input(0) if self.input_dtype == "int8": input_tensor.dtype = trt.int8 scale = struct.unpack('!f', bytes.fromhex('3caa5293'))[0] input_tensor.dynamic_range = (-scale * 127.0, scale * 127.0) if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "chw4": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4) # Get the layers we care about. nb_layers = self.network.num_layers while self.network.num_outputs > 0: logging.info("Unmarking output: {:}".format( self.network.get_output(0).name)) self.network.unmark_output(self.network.get_output(0)) #add top-k last_fc_layer = self.network.get_layer(nb_layers - 1) topk_layer = self.network.add_topk(last_fc_layer.get_output(0), trt.TopKOperation.MAX, 1, 2) topk_layer.name = "topk_layer" topk_layer.get_output(0).name = "topk_layer_output_value" topk_layer.get_output(1).name = "topk_layer_output_index" self.network.mark_output(topk_layer.get_output(1)) if self.network.num_outputs != 1: logging.warning( "num outputs should be 1 after unmarking! Has {:}".format( self.network.num_outputs)) raise Exception if self.precision == "int8" and self.batch_size > 1 and ( not self.need_calibration): self.autosinian_optimize() self.initialized = True
def check_accuracy(log_file, config, is_compliance=False): benchmark_name = config["benchmark"] accuracy_targets = { BENCHMARKS.ResNet50: 76.46, BENCHMARKS.SSDResNet34: 20.0, BENCHMARKS.SSDMobileNet: 22.0, BENCHMARKS.BERT: 90.874, BENCHMARKS.DLRM: 80.25, BENCHMARKS.RNNT: 100.0 - 7.45225, BENCHMARKS.UNET: 0.853 } threshold_ratio = float(config["accuracy_level"][:-1]) / 100 if not os.path.exists(log_file): return "Cannot find accuracy JSON file." # checking if log_file is empty by just reading first several bytes # indeed, first 4B~6B is likely all we need to check: '', '[]', '[]\r', '[\n]\n', '[\r\n]\r\n', ... # but checking 8B for safety with open(log_file, 'r') as lf: first_8B = lf.read(8) if not first_8B or ('[' in first_8B and ']' in first_8B): return "No accuracy results in PerformanceOnly mode." dtype_expand_map = {"fp16": "float16", "fp32": "float32", "int8": "float16"} # Use FP16 output for INT8 mode accuracy_regex_map = import_module("build.inference.tools.submission.submission-checker").ACC_PATTERN threshold = accuracy_targets[benchmark_name] * threshold_ratio if benchmark_name in [BENCHMARKS.ResNet50]: cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-imagenet.py --mlperf-accuracy-file {:} \ --imagenet-val-file data_maps/imagenet/val_map.txt --dtype int32 ".format(log_file) regex = accuracy_regex_map["acc"] elif benchmark_name == BENCHMARKS.SSDResNet34: cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \ --coco-dir {:} --output-file build/ssd-resnet34-results.json --use-inv-map".format( log_file, os.path.join(os.environ.get("PREPROCESSED_DATA_DIR", "build/preprocessed_data"), "coco")) regex = accuracy_regex_map["mAP"] elif benchmark_name == BENCHMARKS.SSDMobileNet: cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \ --coco-dir {:} --output-file build/ssd-mobilenet-results.json".format( log_file, os.path.join(os.environ.get("PREPROCESSED_DATA_DIR", "build/preprocessed_data"), "coco")) regex = accuracy_regex_map["mAP"] elif benchmark_name == BENCHMARKS.BERT: # Having issue installing tokenizers on Xavier... if is_xavier(): cmd = "python3 code/bert/tensorrt/accuracy-bert.py --mlperf-accuracy-file {:} --squad-val-file {:}".format( log_file, os.path.join(os.environ.get("DATA_DIR", "build/data"), "squad", "dev-v1.1.json")) else: dtype = config["precision"].lower() if dtype in dtype_expand_map: dtype = dtype_expand_map[dtype] val_data_path = os.path.join( os.environ.get("DATA_DIR", "build/data"), "squad", "dev-v1.1.json") vocab_file_path = "build/models/bert/vocab.txt" output_prediction_path = os.path.join(os.path.dirname(log_file), "predictions.json") cmd = "python3 build/inference/language/bert/accuracy-squad.py " \ "--log_file {:} --vocab_file {:} --val_data {:} --out_file {:} " \ "--output_dtype {:}".format(log_file, vocab_file_path, val_data_path, output_prediction_path, dtype) regex = accuracy_regex_map["F1"] elif benchmark_name == BENCHMARKS.DLRM: cmd = "python3 build/inference/recommendation/dlrm/pytorch/tools/accuracy-dlrm.py --mlperf-accuracy-file {:} " \ "--day-23-file build/data/criteo/day_23 --aggregation-trace-file " \ "build/preprocessed_data/criteo/full_recalib/sample_partition_trace.txt".format(log_file) regex = accuracy_regex_map["AUC"] elif benchmark_name == BENCHMARKS.RNNT: # Having issue installing librosa on Xavier... if is_xavier(): cmd = "python3 code/rnnt/tensorrt/accuracy.py --loadgen_log {:}".format(log_file) else: # RNNT output indices are in INT8 cmd = "python3 build/inference/speech_recognition/rnnt/accuracy_eval.py " \ "--log_dir {:} --dataset_dir build/preprocessed_data/LibriSpeech/dev-clean-wav " \ "--manifest build/preprocessed_data/LibriSpeech/dev-clean-wav.json " \ "--output_dtype int8".format(os.path.dirname(log_file)) regex = accuracy_regex_map["WER"] elif benchmark_name == BENCHMARKS.UNET: postprocess_dir = "build/brats_postprocessed_data" if not os.path.exists(postprocess_dir): os.makedirs(postprocess_dir) dtype = config["precision"].lower() if dtype in dtype_expand_map: dtype = dtype_expand_map[dtype] cmd = "python3 build/inference/vision/medical_imaging/3d-unet/accuracy-brats.py --log_file {:} " \ "--output_dtype {:} --preprocessed_data_dir build/preprocessed_data/brats/brats_reference_preprocessed " \ "--postprocessed_data_dir {:} " \ "--label_data_dir build/preprocessed_data/brats/brats_reference_raw/Task043_BraTS2019/labelsTr".format(log_file, dtype, postprocess_dir) regex = accuracy_regex_map["DICE"] # Having issue installing nnUnet on Xavier... if is_xavier(): logging.warning( "Accuracy checking for 3DUnet is not supported on Xavier. Please run the following command on desktop:\n{:}".format(cmd)) cmd = 'echo "Accuracy: mean = 1.0000, whole tumor = 1.0000, tumor core = 1.0000, enhancing tumor = 1.0000"' else: raise ValueError("Unknown benchmark: {:}".format(benchmark_name)) output = run_command(cmd, get_output=True) result_regex = re.compile(regex) accuracy = None with open(os.path.join(os.path.dirname(log_file), "accuracy.txt"), "w") as f: for line in output: print(line, file=f) for line in output: result_match = result_regex.match(line) if not result_match is None: accuracy = float(result_match.group(1)) break accuracy_result = "PASSED" if accuracy is not None and accuracy >= threshold else "FAILED" if accuracy_result == "FAILED" and not is_compliance: raise RuntimeError( "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}!".format( accuracy, threshold, accuracy_result)) if is_compliance: return accuracy # Needed for numerical comparison return "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}.".format( accuracy, threshold, accuracy_result)
def check_accuracy(log_file, config, is_compliance=False): """Check accuracy of given benchmark.""" benchmark_name = config["benchmark"] accuracy_targets = { BENCHMARKS.BERT: 90.874, BENCHMARKS.DLRM: 80.25, BENCHMARKS.RNNT: 100.0 - 7.45225, BENCHMARKS.ResNet50: 76.46, BENCHMARKS.SSDMobileNet: 22.0, BENCHMARKS.SSDResNet34: 20.0, BENCHMARKS.UNET: 0.853, } threshold_ratio = float(config["accuracy_level"][:-1]) / 100 if not os.path.exists(log_file): return "Cannot find accuracy JSON file." # checking if log_file is empty by just reading first several bytes # indeed, first 4B~6B is likely all we need to check: '', '[]', '[]\r', '[\n]\n', '[\r\n]\r\n', ... # but checking 8B for safety with open(log_file, 'r') as lf: first_8B = lf.read(8) if not first_8B or ('[' in first_8B and ']' in first_8B): return "No accuracy results in PerformanceOnly mode." dtype_expand_map = { "fp16": "float16", "fp32": "float32", "int8": "float16" } # Use FP16 output for INT8 mode # Since submission-checker uses a relative import, but we are running from main.py, we need to surface its directory # into sys.path so it can successfully import it. # Insert into index 1 so that current working directory still takes precedence. sys.path.insert( 1, os.path.join(os.getcwd(), "build", "inference", "tools", "submission")) accuracy_regex_map = import_module("submission-checker").ACC_PATTERN threshold = accuracy_targets[benchmark_name] * threshold_ratio # Every benchmark has its own accuracy script. Prepare commandline with args to the script. skip_run_command = False if benchmark_name in [BENCHMARKS.ResNet50]: cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-imagenet.py --mlperf-accuracy-file {:} \ --imagenet-val-file data_maps/imagenet/val_map.txt --dtype int32 ".format( log_file) regex = accuracy_regex_map["acc"] elif benchmark_name == BENCHMARKS.SSDResNet34: cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \ --coco-dir {:} --output-file build/ssd-resnet34-results.json --use-inv-map".format( log_file, os.path.join( os.environ.get("PREPROCESSED_DATA_DIR", "build/preprocessed_data"), "coco")) regex = accuracy_regex_map["mAP"] elif benchmark_name == BENCHMARKS.SSDMobileNet: cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \ --coco-dir {:} --output-file build/ssd-mobilenet-results.json".format( log_file, os.path.join( os.environ.get("PREPROCESSED_DATA_DIR", "build/preprocessed_data"), "coco")) regex = accuracy_regex_map["mAP"] elif benchmark_name == BENCHMARKS.BERT: # Having issue installing tokenizers on Xavier... if is_xavier(): cmd = "python3 code/bert/tensorrt/accuracy-bert.py --mlperf-accuracy-file {:} --squad-val-file {:}".format( log_file, os.path.join(os.environ.get("DATA_DIR", "build/data"), "squad", "dev-v1.1.json")) else: dtype = config["precision"].lower() if dtype in dtype_expand_map: dtype = dtype_expand_map[dtype] val_data_path = os.path.join( os.environ.get("DATA_DIR", "build/data"), "squad", "dev-v1.1.json") vocab_file_path = "build/models/bert/vocab.txt" if 'CPU' in config['config_name']: vocab_file_path = "build/data/squad/vocab.txt" output_prediction_path = os.path.join(os.path.dirname(log_file), "predictions.json") cmd = "python3 build/inference/language/bert/accuracy-squad.py " \ "--log_file {:} --vocab_file {:} --val_data {:} --out_file {:} " \ "--output_dtype {:}".format(log_file, vocab_file_path, val_data_path, output_prediction_path, dtype) regex = accuracy_regex_map["F1"] elif benchmark_name == BENCHMARKS.DLRM: cmd = "python3 build/inference/recommendation/dlrm/pytorch/tools/accuracy-dlrm.py --mlperf-accuracy-file {:} " \ "--day-23-file build/data/criteo/day_23 --aggregation-trace-file " \ "build/preprocessed_data/criteo/full_recalib/sample_partition_trace.txt".format(log_file) regex = accuracy_regex_map["AUC"] elif benchmark_name == BENCHMARKS.RNNT: # Having issue installing librosa on Xavier... if is_xavier(): cmd = "python3 code/rnnt/tensorrt/accuracy.py --loadgen_log {:}".format( log_file) else: # RNNT output indices are in INT8 cmd = "python3 build/inference/speech_recognition/rnnt/accuracy_eval.py " \ "--log_dir {:} --dataset_dir build/preprocessed_data/LibriSpeech/dev-clean-wav " \ "--manifest build/preprocessed_data/LibriSpeech/dev-clean-wav.json " \ "--output_dtype int8".format(os.path.dirname(log_file)) regex = accuracy_regex_map["WER"] elif benchmark_name == BENCHMARKS.UNET: postprocess_dir = "build/brats_postprocessed_data" if not os.path.exists(postprocess_dir): os.makedirs(postprocess_dir) dtype = config["precision"].lower() if dtype in dtype_expand_map: dtype = dtype_expand_map[dtype] cmd = "python3 build/inference/vision/medical_imaging/3d-unet/accuracy-brats.py --log_file {:} " \ "--output_dtype {:} --preprocessed_data_dir build/preprocessed_data/brats/brats_reference_preprocessed " \ "--postprocessed_data_dir {:} " \ "--label_data_dir build/preprocessed_data/brats/brats_reference_raw/Task043_BraTS2019/labelsTr".format(log_file, dtype, postprocess_dir) regex = accuracy_regex_map["DICE"] # Having issue installing nnUnet on Xavier... if is_xavier(): # Internally, run on another node to process the accuracy. try: cmd = cmd.replace(os.getcwd(), ".", 1) temp_cmd = "ssh -oBatchMode=yes computelab-frontend-02 \"timeout 1200 srun --gres=gpu:ga100:1 -t 20:00 " \ "bash -c 'cd {:} && make prebuild DOCKER_COMMAND=\\\"{:}\\\"'\"".format(os.getcwd(), cmd) full_output = run_command(temp_cmd, get_output=True) start_line_idx = -1 end_line_idx = -1 for (line_idx, line) in enumerate(full_output): if "Please cite the following paper when using nnUNet:" in line: start_line_idx = line_idx if "Done!" in line: end_line_idx = line_idx assert start_line_idx != -1 and end_line_idx != -1, "Failed in accuracy checking" output = full_output[start_line_idx:end_line_idx + 1] skip_run_command = True except Exception as e: logging.warning( "Accuracy checking for 3DUnet is not supported on Xavier. Please run the following command on desktop:\n{:}" .format(cmd)) output = [ "Accuracy: mean = 1.0000, whole tumor = 1.0000, tumor core = 1.0000, enhancing tumor = 1.0000" ] skip_run_command = True else: raise ValueError("Unknown benchmark: {:}".format(benchmark_name)) # Run benchmark's accuracy script and parse output for result. if not skip_run_command: output = run_command(cmd, get_output=True) result_regex = re.compile(regex) accuracy = None with open(os.path.join(os.path.dirname(log_file), "accuracy.txt"), "w") as f: for line in output: print(line, file=f) for line in output: result_match = result_regex.match(line) if not result_match is None: accuracy = float(result_match.group(1)) break accuracy_result = "PASSED" if accuracy is not None and accuracy >= threshold else "FAILED" if accuracy_result == "FAILED" and not is_compliance: raise RuntimeError( "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}!".format( accuracy, threshold, accuracy_result)) if is_compliance: return accuracy # Needed for numerical comparison return "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}.".format( accuracy, threshold, accuracy_result)