def onnx_to_int8(anno, prefix): apex = onnxparser.create_onnxconfig() apex.set_model_file_name("model.onnx") apex.set_model_dtype(trt.infer.DataType.FLOAT) apex.set_print_layer_info(False) trt_parser = onnxparser.create_onnxparser(apex) data_type = apex.get_model_dtype() onnx_filename = apex.get_model_file_name() trt_parser.parse(onnx_filename, data_type) trt_parser.convert_to_trtnetwork() trt_network = trt_parser.get_trtnetwork() # calibration_files = create_calibration_dataset() batchstream = calibrator.ImageBatchStream(5, anno, prefix) int8_calibrator = calibrator.PythonEntropyCalibrator(["data"], batchstream) G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) builder = trt.infer.create_infer_builder(G_LOGGER) builder.set_max_batch_size(16) builder.set_max_workspace_size(1 << 20) builder.set_int8_calibrator(int8_calibrator) builder.set_int8_mode(True) engine = builder.build_cuda_engine(trt_network) modelstream = engine.serialize() trt.utils.write_engine_to_file("engin.bin", modelstream) engine.destroy() builder.destroy()
def onnx_infer(anno, prefix): apex = onnxparser.create_onnxconfig() apex.set_model_file_name("model.onnx") apex.set_model_dtype(trt.infer.DataType.FLOAT) apex.set_print_layer_info(False) trt_parser = onnxparser.create_onnxparser(apex) data_type = apex.get_model_dtype() onnx_filename = apex.get_model_file_name() trt_parser.parse(onnx_filename, data_type) trt_parser.convert_to_trtnetwork() trt_network = trt_parser.get_trtnetwork() G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) builder = trt.infer.create_infer_builder(G_LOGGER) builder.set_max_batch_size(16) engine = builder.build_cuda_engine(trt_network) context = engine.create_execution_context() lines = open(anno).readlines() total = 0 correct = 0 for line in lines: path, gt = line.strip().split(' ') gt = int(gt) img = read_image_chw(os.path.join(prefix, path)) output = infer(context, img, 10, 1) conf, pred = Tensor(output).topk(1, dim=0) pred = int(pred.data[0]) if pred == gt: correct += 1 total += 1 print(correct / total)
def main(): path = dir_path = os.path.dirname(os.path.realpath(__file__)) # Create onnx_config apex = onnxparser.create_onnxconfig() apex.set_model_file_name(MODEL) apex.set_model_dtype(trt.infer.DataType_kFLOAT) # Create onnx parser parser = onnxparser.create_onnxparser(apex) assert (parser) data_type = apex.get_model_dtype() onnx_filename = apex.get_model_file_name() parser.parse(onnx_filename, data_type) parser.report_parsing_info() parser.convert_to_trtnetwork() # retrieve network interface from the parser network = parser.get_trtnetwork() assert (network) # create infer builder builder = trt.infer.create_infer_builder(G_LOGGER) if (apex.get_model_dtype() == trt.infer.DataType_kHALF): builder.set_fp16_mode(True) elif (apex.get_model_dtype() == trt.infer.DataType_kINT8): print("Int8 Model not supported") sys.exit() # create engine engine = builder.build_cuda_engine(network) assert (engine) rand_file = randint(0, 9) img = get_testcase(DATA + str(rand_file) + '.pgm') data = normalize(img) print("Test case: " + str(rand_file)) if data.size == 0: msg = "The input tensor is of zero size - please check your path to the input or the file type" G_LOGGER.log(trt.infer.Logger.Severity_kERROR, msg) out = infer(engine, data, 1) print("Prediction: " + str(np.argmax(out))) # clean up engine.destroy() network.destroy() parser.destroy()
def onnx_2_float32(): apex = onnxparser.create_onnxconfig() apex.set_model_file_name(args.onnx_model_name) apex.set_model_dtype(trt.infer.DataType.FLOAT) apex.set_print_layer_info(False) trt_parser = onnxparser.create_onnxparser(apex) data_type = apex.get_model_dtype() onnx_filename = apex.get_model_file_name() trt_parser.parse(onnx_filename, data_type) trt_parser.convert_to_trtnetwork() trt_network = trt_parser.get_trtnetwork() G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) builder = trt.infer.create_infer_builder(G_LOGGER) builder.set_max_batch_size(16) engine = builder.build_cuda_engine(trt_network) modelstream = engine.serialize() trt.utils.write_engine_to_file(args.trt32_model_name, modelstream) engine.destroy() builder.destroy()
def run_onnx(onnx_file, data_type, bs, inp): # Create onnx_config apex = onnxparser.create_onnxconfig() apex.set_model_file_name(onnx_file) apex.set_model_dtype(convert_to_datatype(data_type)) # create parser trt_parser = onnxparser.create_onnxparser(apex) assert (trt_parser) data_type = apex.get_model_dtype() onnx_filename = apex.get_model_file_name() trt_parser.parse(onnx_filename, data_type) trt_parser.report_parsing_info() trt_parser.convert_to_trtnetwork() trt_network = trt_parser.get_trtnetwork() assert (trt_network) # create infer builder trt_builder = trt.infer.create_infer_builder(G_LOGGER) trt_builder.set_max_batch_size(max_batch_size) trt_builder.set_max_workspace_size(max_workspace_size) if (apex.get_model_dtype() == trt.infer.DataType_kHALF): print( "------------------- Running FP16 -----------------------------") trt_builder.set_half2_mode(True) elif (apex.get_model_dtype() == trt.infer.DataType_kINT8): print( "------------------- Running INT8 -----------------------------") trt_builder.set_int8_mode(True) else: print( "------------------- Running FP32 -----------------------------") print("----- Builder is Done -----") print("----- Creating Engine -----") trt_engine = trt_builder.build_cuda_engine(trt_network) print("----- Engine is built -----") time_inference(engine, bs, inp)
def run_onnx(onnx_file, data_type, bs, inp): # Create onnx_config apex = onnxparser.create_onnxconfig() apex.set_model_file_name(onnx_file) apex.set_model_dtype(convert_to_datatype(data_type)) # create parser trt_parser = onnxparser.create_onnxparser(apex) assert(trt_parser) data_type = apex.get_model_dtype() onnx_filename = apex.get_model_file_name() trt_parser.parse(onnx_filename, data_type) trt_parser.report_parsing_info() trt_parser.convert_to_trtnetwork() trt_network = trt_parser.get_trtnetwork() assert(trt_network) # create infer builder trt_builder = trt.infer.create_infer_builder(G_LOGGER) trt_builder.set_max_batch_size(max_batch_size) trt_builder.set_max_workspace_size(max_workspace_size) if (apex.get_model_dtype() == trt.infer.DataType_kHALF): print("------------------- Running FP16 -----------------------------") trt_builder.set_half2_mode(True) elif (apex.get_model_dtype() == trt.infer.DataType_kINT8): print("------------------- Running INT8 -----------------------------") trt_builder.set_int8_mode(True) else: print("------------------- Running FP32 -----------------------------") print("----- Builder is Done -----") print("----- Creating Engine -----") trt_engine = trt_builder.build_cuda_engine(trt_network) print("----- Engine is built -----") time_inference(engine, bs, inp)
from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.ops import data_flow_ops import tensorrt as trt from tensorrt.parsers import onnxparser apex = onnxparser.create_onnxconfig() #create config object apex.set_model_file_name("../model/onnx/hep_frozen_bs_64.onnx") apex.set_model_dtype(trt.infer.DataType.FLOAT) apex.set_print_layer_info(True) #create parser trt_parser = onnxparser.create_onnxparser(apex) data_type = apex.get_model_dtype() onnx_filename = apex.get_model_file_name() #parse trt_parser.parse(onnx_filename, data_type) #retrieve the network from the parser trt_parser.convert_to_trtnetwork() trt_network = trt_parser.get_trtnetwork()
def sample_onnx_parser(): parser = argparse.ArgumentParser() parser.add_argument("-f", "--file_format", default="ascii", choices=["ascii", "ppm"], type=str, help="input file format. ASCII if not specified.") parser.add_argument("-i", "--image_file", type=str, required=True, help="Image or ASCII file") parser.add_argument("-r", "--reference_file", type=str, required=True, help="Reference files with correct labels") parser.add_argument("-k", "--topK", type=str, required=True, help="Top K values predictions to print") parser.add_argument("-m", "--model_file", type=str, required=True, help="ONNX Model file") parser.add_argument("-d", "--data_type", default=32, type=int, choices=[8, 16, 32], help="Supported data type i.e. 8, 16, 32 bit") parser.add_argument("-b", "--max_batch_size", default=32, type=int, help="Maximum batch size") parser.add_argument("-w", "--max_workspace_size", default=1024*1024, type=int, help="Maximum workspace size") parser.add_argument("-v", "--add_verbosity", action="store_true") parser.add_argument("-q", "--reduce_verbosity", action="store_true") parser.add_argument("-l", "--print_layer_info", action="store_true") args = parser.parse_args() file_format = str.strip(args.file_format) image_file = str.strip(args.image_file) reference_file = str.strip(args.reference_file) model_file = str.strip(args.model_file) topK = int(args.topK) max_batch_size = args.max_batch_size max_workspace_size = args.max_workspace_size data_type = args.data_type add_verbosity = args.add_verbosity reduce_verbosity = args.reduce_verbosity print_layer_info = args.print_layer_info print("Input Arguments: ") print("file_format", file_format) print("image_file", image_file) print("reference_file",reference_file) print("topK", str(topK)) print("model_file",model_file) print("data_type",data_type) print("max_workspace_size",max_workspace_size) print("max_batch_size",max_batch_size) print("add_verbosity", add_verbosity) print("reduce_verbosity", reduce_verbosity) print("print_layer_info",print_layer_info) # Create onnx_config apex = onnxparser.create_onnxconfig() apex.set_model_file_name(model_file) apex.set_model_dtype(convert_to_datatype(data_type)) if print_layer_info: apex.set_print_layer_info(True) if add_verbosity: apex.add_verbosity() if reduce_verbosity: apex.reduce_verbosity() # set batch size batch_size = 1 # create parser trt_parser = onnxparser.create_onnxparser(apex) assert(trt_parser) data_type = apex.get_model_dtype() onnx_filename = apex.get_model_file_name() trt_parser.parse(onnx_filename, data_type) trt_parser.report_parsing_info() trt_parser.convert_to_trtnetwork() trt_network = trt_parser.get_trtnetwork() assert(trt_network) # create infer builder trt_builder = trt.infer.create_infer_builder(G_LOGGER) trt_builder.set_max_batch_size(max_batch_size) trt_builder.set_max_workspace_size(max_workspace_size) if (apex.get_model_dtype() == trt.infer.DataType_kHALF): print("------------------- Running FP16 -----------------------------") trt_builder.set_fp16_mode(True) elif (apex.get_model_dtype() == trt.infer.DataType_kINT8): print("Int8 Model not supported") sys.exit() else: print("------------------- Running FP32 -----------------------------") print("----- Builder is Done -----") print("----- Creating Engine -----") trt_engine = trt_builder.build_cuda_engine(trt_network) print("----- Engine is built -----") # create input vector input_img = prepare_input(image_file, trt_engine, file_format) if input_img.size == 0: msg = "sampleONNX the input tensor is of zero size - please check your path to the input or the file type" G_LOGGER.log(trt.infer.Logger.Severity_kERROR, msg) trt_context = trt_engine.create_execution_context() output = inference_image(trt_context, input_img, batch_size) # post processing stage process_output(output, file_format, reference_file, topK) # clean up trt_parser.destroy() trt_network.destroy() trt_context.destroy() trt_engine.destroy() trt_builder.destroy() print("&&&& PASSED Onnx Parser Tested Successfully")