def main(): # Parse command line arguments args = parse_commandline_arguments() _, data_files = common.find_sample_data(description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", "train-images-idx3-ubyte", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH]) [test_set, test_labels, train_set, deploy_file, model_file] = data_files engine = None trt_engine_path = get_engine_path(args.precision) trt_runtime = trt.Runtime(TRT_LOGGER) # Inference batch size can be different from calibration batch size. batch_size = 32 if not os.path.exists(trt_engine_path): # Build a TensorRT engine. engine = build_int8_engine(deploy_file, model_file, batch_size, trt_engine_datatype=args.trt_engine_datatype) # Save the engine to file buf = engine.serialize() with open(trt_engine_path, 'wb') as f: f.write(buf) # If we get here, the file with engine exists, so we can load it if not engine: print("Loading cached TensorRT engine from {}".format(trt_engine_path)) with open(trt_engine_path, 'rb') as f: engine_data = f.read() engine = trt_runtime.deserialize_cuda_engine(engine_data) with engine.create_execution_context() as context: # Batch size for inference can be different than batch size used for calibration. check_accuracy(context, batch_size, test_set=load_mnist_data(test_set), test_labels=load_mnist_labels(test_labels))
def main(): data_path = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") model_file = ModelData.MODEL_FILE t1 = time.clock() with build_engine(model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. #with open('/home/nvidia/procedure/lenet5.engine','wb') as f: #f.write(engine.serialize()) inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_normalized_test_case( data_path, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred)) t2 = time.clock() print("use_time:" + str(t2 - t1))
def main(): # Set the data path to the directory that contains the trained models and test images for inference. data_path, data_files = common.find_sample_data( description= "Runs a ResNet152 on Cars dataset network with a TensorRT inference engine.", subfolder="cars_restnet152", find_files=[ "00001.jpg", "00002.jpg", "00003.jpg", "00004.jpg", "00005.jpg", "00006.jpg", ModelData.MODEL_PATH, "cars_labels.txt" ]) # Get test images, models and labels. test_images = data_files[0:6] onnx_model_file, labels_file = data_files[6:] labels = open(labels_file, 'r').read().split('\n') # print(onnx_model_file) # Build a TensorRT engine. with build_engine_onnx(onnx_model_file) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label do_inference(context, h_input, d_input, h_output, d_output, stream) # We use the highest probability as our prediction. Its index corresponds to the predicted label. # pred = labels[np.argmax(h_output)] print(h_output.shape)
def main(): # Get data files for the model. data_paths, [ deploy_file, model_file, mean_proto ] = common.find_sample_data( description="Runs an MNIST network using a Caffe model file", subfolder="mnist", find_files=[ "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto" ]) # Cache the engine in a temporary directory. engine_path = os.path.join(tempfile.gettempdir(), "mnist.engine") with get_engine(deploy_file, model_file, engine_path ) as engine, engine.create_execution_context() as context: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) mean = retrieve_mean(mean_proto) # For more information on performing inference, refer to the introductory samples. inputs[0].host, case_num = load_normalized_test_case(data_paths, mean) # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): _, data_files = common.find_sample_data( description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=[ "t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", "train-images-idx3-ubyte", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH ]) [test_set, test_labels, train_set, deploy_file, model_file] = data_files # Now we create a calibrator and give it the location of our calibration data. # We also allow it to cache calibration data for faster engine building. calibration_cache = "mnist_calibration.cache" calib = MNISTEntropyCalibrator(test_set, cache_file=calibration_cache) # Inference batch size can be different from calibration batch size. batch_size = 32 with build_int8_engine( deploy_file, model_file, calib, batch_size ) as engine, engine.create_execution_context() as context: # Batch size for inference can be different than batch size used for calibration. check_accuracy(context, batch_size, test_set=load_mnist_data(test_set), test_labels=load_mnist_labels(test_labels))
def main(): # Set the data path to the directory that contains the trained models and test images for inference. _, data_files = common.find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=[ "binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, "class_labels.txt" ]) # Get test images, models and labels. test_images = data_files[0:3] caffe_model_file, caffe_deploy_file, labels_file = data_files[3:] labels = open(labels_file, 'r').read().split('\n') # Build a TensorRT engine. with build_engine_caffe(caffe_model_file, caffe_deploy_file) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label do_inference(context, h_input, d_input, h_output, d_output, stream) # We use the highest probability as our prediction. Its index corresponds to the predicted label. pred = labels[np.argmax(h_output)] if "_".join(pred.split()) in os.path.splitext( os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred)
def main(): x = "/home/dgxuser125/rt-kennan/Swish3/build/libswish.so" ctypes.CDLL(x) data_paths, _ = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") model_path = os.environ.get("MODEL_PATH") or os.path.join( os.path.dirname(__file__), "models") model_file = os.path.join(model_path, ModelData.MODEL_FILE) with build_engine(model_file) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: # # Start measuring time inference_start_time = time.time() for i in range(1000): case_num = load_normalized_test_case( data_paths, pagelocked_buffer=inputs[0].host) [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) # print("Test Case: " + str(case_num)) # print("Prediction: " + str(pred)) end_time = time.time() print("time taken for one input with tenosrrt: ", (end_time - inference_start_time) / 1000)
def main(): data_path, data_files = common.find_sample_data( description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["batches", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH]) [batch_data_dir, deploy_file, model_file] = data_files # Now we create a calibrator and give it the location of our calibration data. # We also allow it to cache calibration data for faster engine building. calibration_cache = "mnist_calibration.cache" calib = calibrator.MNISTEntropyCalibrator(batch_data_dir, cache_file=calibration_cache) # We will use the calibrator batch size across the board. # This is not a requirement, but in this case it is convenient. batch_size = calib.get_batch_size() with build_int8_engine( deploy_file, model_file, calib) as engine, engine.create_execution_context() as context: # Allocate engine buffers. inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference for the whole batch. We have to specify batch size here, as the common.do_inference uses a default inputs[0].host, labels = load_random_batch(calib) [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) # Next we need to reshape the output to Nx10 (10 probabilities, one per digit), where N is batch size. output = output.reshape(batch_size, 10) validate_output(output, labels)
def main(): # Get data files for the model. data_path, [deploy_file, model_file, mean_proto] = common.find_sample_data( description="Runs an MNIST network using a Caffe model file", subfolder="mnist", find_files=[ "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto" ]) with build_engine(deploy_file, model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) mean = retrieve_mean(mean_proto) with engine.create_execution_context() as context: case_num = load_normalized_test_case(data_path, inputs[0].host, mean) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred)) # After the engine is destroyed, we destroy the plugin. This function is exposed through the binding code in plugin/pyFullyConnected.cpp. fc_factory.destroy_plugin()
def main(): _, _ = common.find_sample_data( description="Runs an MNIST network using a PyTorch model", subfolder="mnist") # Train the PyTorch model mnist_model = model.MnistModel() mnist_model.learn() weights = mnist_model.get_weights() # Do inference with TensorRT. with build_engine(weights) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): # Set the data path to the directory that contains the trained models and test images for inference. #data_path, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", find_files=[ModelData.MODEL_PATH, ModelData.DEPLOY_PATH]) data_path, data_files, precision = common.find_sample_data(find_files=[".caffemodel", ".prototxt"]) # Get test images, models and labels. #test_images = data_files[0:3] #caffe_model_file, caffe_deploy_file, labels_file = data_files[3:] caffe_model_file, caffe_deploy_file = data_files[:] #labels = open(labels_file, 'r').read().split('\n') # Build a TensorRT engine. with build_engine_caffe(caffe_model_file, caffe_deploy_file, precision) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. #test_image = random.choice(test_images) #test_case = load_normalized_test_case(test_image, h_input) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label do_inference(context, h_input, d_input, h_output, d_output, stream)
def main(): #解析样本数据得到相应的数据文件路径 _, data_files = common.find_sample_data( description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=[ "t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", "train-images-idx3-ubyte", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH ], err_msg="Please follow the README to download the MNIST dataset") #给相应参数赋值 [test_set, test_labels, train_set, deploy_file, model_file] = data_files # Now we create a calibrator and give it the location of our calibration data. # We also allow it to cache calibration data for faster engine building. #创建一个校准类实例 calibration_cache = "mnist_calibration.cache" #MNISTEntropyCalibrator参考calibrator.py中的实现 calib = MNISTEntropyCalibrator(test_set, cache_file=calibration_cache) # Inference batch size can be different from calibration batch size. #推理的batch_size跟校准的batch_size可以不一样 batch_size = 32 #build_int8_engine参考本文件下的实现 with build_int8_engine( deploy_file, model_file, calib, batch_size ) as engine, engine.create_execution_context() as context: # Batch size for inference can be different than batch size used for calibration. #check_accuracy参考本文件下的实现 check_accuracy(context, batch_size, test_set=load_mnist_data(test_set), test_labels=load_mnist_labels(test_labels))
def main(): data_paths, _ = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") model_path = os.environ.get("MODEL_PATH") or os.path.join( os.path.dirname(__file__), "models") model_file = os.path.join(model_path, ModelData.MODEL_FILE) with build_engine(model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_normalized_test_case( data_paths, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): # Set the data path to the directory that contains the trained models and test images for inference. #解析样本数据 _, data_files = common.find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=[ "binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt" ]) # Get test images, models and labels. #获取册数图片,label,和模型文件等 test_images = data_files[0:3] onnx_model_file, labels_file = data_files[3:] labels = open(labels_file, 'r').read().split('\n') # Build a TensorRT engine. #构建相应的tensorrt引擎 #build_engine_onnx参考本文件下的实现 with build_engine_onnx(onnx_model_file) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. #分配缓冲区的内存 inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Contexts are used to perform inference. #创建推理上下文 with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. #加载测试数据 test_image = random.choice(test_images) #load_normalized_test_case参考本文件下的实现 #加载数据到主机内存 test_case = load_normalized_test_case(test_image, inputs[0].host) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label #do_inference_v2参考common.py trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # We use the highest probability as our prediction. Its index corresponds to the predicted label. #获取最终的输出 pred = labels[np.argmax(trt_outputs[0])] if "_".join(pred.split()) in os.path.splitext( os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred)
def main(): data_path = common.find_sample_data( description="Runs a network using a UFF model file", subfolder=".") model_file = ModelData.MODEL_FILE with build_engine(model_file) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: input_tests = ["img.ppm", "ones.ppm", "orange.ppm", "panda.ppm"] for input_test in input_tests: load_test_case(input_test, pagelocked_buffer=inputs[0].host) [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) print(output)
def main(): data_path, data_files = common.find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"]) test_images = data_files[0:3] onnx_model_file, labels_file = data_files[3:] labels = open(labels_file, 'r').read().split('\n') with build_engine_onnx(onnx_model_file) as engine: h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) with engine.create_execution_context() as context: test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) do_inference(context, h_input, d_input, h_output, d_output, stream) pred = labels[np.argmax(h_output)] print("Recognized " + test_case + " as " + pred)
def build_int8_engine(deploy_file, model_file, batch_size=32, trt_engine_datatype=trt.DataType.FLOAT): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. builder.max_batch_size = batch_size builder.max_workspace_size = common.GiB(1) if trt_engine_datatype == trt.DataType.HALF: builder.fp16_mode = True elif trt_engine_datatype == trt.DataType.INT8: # Now we create a calibrator and give it the location of our calibration data. # We also allow it to cache calibration data for faster engine building. _, [calib_data] = common.find_sample_data(description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["t10k-images-idx3-ubyte"]) calibration_cache = "mnist_calibration.cache" builder.int8_mode = True builder.int8_calibrator = MNISTEntropyCalibrator(calib_data, cache_file=calibration_cache) # Parse Caffe model model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. return builder.build_cuda_engine(network)
def main(): _, _ = common.find_sample_data( description="Runs an MNIST network using a PyTorch model", subfolder="mnist") # Train the PyTorch model mnist_model = model.MnistModel() mnist_model.learn() weights = mnist_model.get_weights() # Do inference with TensorRT. with build_engine(weights) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) print("Output Before Engine Refit") check_output(engine, inputs, outputs, bindings, stream, mnist_model) # Refit the engine with the actual trained weights for the conv_1 layer. with trt.Refitter(engine, TRT_LOGGER) as refitter: # To get a list of all refittable layers and associated weightRoles # in the network, use refitter.get_all() # Set the actual weights for the conv_1 layer. Since it consists of # kernel weights and bias weights, set each of them by specifying # the WeightsRole. refitter.set_weights("conv_1", trt.WeightsRole.KERNEL, weights['conv1.weight'].numpy()) refitter.set_weights("conv_1", trt.WeightsRole.BIAS, weights['conv1.bias'].numpy()) # Get description of missing weights. This should return empty # lists in this case. [missingLayers, weightRoles] = refitter.get_missing() assert len( missingLayers ) == 0, "Refitter found missing weights. Call set_weights() for all missing weights" # Refit the engine with the new weights. This will return True if # the refit operation succeeded. assert refitter.refit_cuda_engine() print("Output After Engine Refit") assert check_output(engine, inputs, outputs, bindings, stream, mnist_model)
def main(): #解析样本数据,获取数据路径 #find_sample_data的具体实现参考common.py中的实现 data_paths, _ = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") #获取模型文件的路径 model_path = os.environ.get("MODEL_PATH") or os.path.join( os.path.dirname(__file__), "models") model_file = os.path.join(model_path, ModelData.MODEL_FILE) #创建相应的engine文件 with build_engine(model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. #allocate_buffers参考common.py #获取相应缓冲区地址的列表已经相应绑定的列表等 inputs, outputs, bindings, stream = common.allocate_buffers(engine) #create_execution_context新建一个IExecutionContext类实例 with engine.create_execution_context() as context: #load_normalized_test_case的实现参考当前文件下的相关实现 #将测试数据加载到提供的缓冲区里面 case_num = load_normalized_test_case( data_paths, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. #进行相应的推理过程 #do_inference的具体实现参考common.py [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) #得到最终的预测输出,也就是相应的后处理过程 pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): global args args = parser.parse_args() # Set the data path to the directory that contains the trained models and test images for inference. _, data_files = common.find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=[ "binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", "class_labels.txt" ]) labels_file = data_files[3] labels = open(labels_file, 'r').read().split('\n') # data loading # # All pre-trained models expect input images normalized in the same way, # i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H and # W are expected to be at least 224. The images have to be loaded in to a # range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and # std = [0.229, 0.224, 0.225] normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) imagenet_data = datasets.ImageNet(args.data, split='train', transform=transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]), download=False) # print("size of Imagenet data is {}".format(len(imagenet_data))) data_loader = torch.utils.data.DataLoader( imagenet_data, batch_size=args.batch_size, shuffle=False, # num_workers=args.workers, num_workers=0, pin_memory=True) with get_resnet50_engine(ModelData.MODEL_PATH) as engine: # Allocate buffers and create a CUDA stream. h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. run(data_loader, engine) # run(0, engine) # return # define loss function (criterion) # criterion = nn.CrossEntropyLoss().cuda() # validate(data_loader, resnet50) return
def __init__(self, trt_engine_path, uff_model_path, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1): """Initializes TensorRT objects needed for model inference. Args: trt_engine_path (str): path where TensorRT engine should be stored uff_model_path (str): path of .uff model trt_engine_datatype (trt.DataType): requested precision of TensorRT engine used for inference batch_size (int): batch size for which engine should be optimized for """ # We first load all custom plugins shipped with TensorRT, # some of them will be needed during inference trt.init_libnvinfer_plugins(TRT_LOGGER, '') # Initialize runtime needed for loading TensorRT engine from file self.trt_runtime = trt.Runtime(TRT_LOGGER) # TRT engine placeholder self.trt_engine = None # Display requested engine settings to stdout print("TensorRT inference engine settings:") print(" * Inference precision - {}".format(trt_engine_datatype)) print(" * Max batch size - {}\n".format(batch_size)) # If engine is not cached, we need to build it if not os.path.exists(trt_engine_path): # This function uses supplied .uff file # alongside with UffParser to build TensorRT # engine. For more details, check implmentation # Set up for calibration if trt_engine_datatype == trt.DataType.INT8: with open(PATHS.get_voc_image_set_path(), 'r') as f: voc_image_numbers = f.readlines() voc_image_numbers = [ line.strip() for line in voc_image_numbers ] total_imgs = len(voc_image_numbers) voc_names = [] calibration_cache = "ssd_calibration_eval.cache" for n in range(total_imgs): voc_names.append(voc_image_numbers[n] + ".jpg") _, calib_data = common.find_sample_data( description="Runs a ResNet50 network in Int8 mode", subfolder="JPEGImages", find_files=voc_names) calib = VOCEntropyCalibrator(calib_data, total_imgs, cache_file=calibration_cache) self.trt_engine = engine_utils.build_engine( uff_model_path, calib, TRT_LOGGER, trt_engine_datatype=trt_engine_datatype, batch_size=batch_size) # Save the engine to file engine_utils.save_engine(self.trt_engine, trt_engine_path) # If we get here, the file with engine exists, so we can load it if not self.trt_engine: print("Loading cached TensorRT engine from {}".format( trt_engine_path)) self.trt_engine = engine_utils.load_engine(self.trt_runtime, trt_engine_path) # This allocates memory for network inputs/outputs on both CPU and GPU self.inputs, self.outputs, self.bindings, self.stream = \ engine_utils.allocate_buffers(self.trt_engine) # Execution context is needed for inference self.context = self.trt_engine.create_execution_context() # Allocate memory for multiple usage [e.g. multiple batch inference] input_volume = trt.volume(model_utils.ModelData.INPUT_SHAPE) self.numpy_array = np.zeros( (self.trt_engine.max_batch_size, input_volume))
def main(): data_root = '/home/cvrr/opt/TensorRT-5.0.2.6/python/data/cars_resnet152/tiny-imagenet-200/val/' image_root = '/home/cvrr/opt/TensorRT-5.0.2.6/python/data/cars_resnet152/tiny-imagenet-200/val/images/' text_file = open(data_root + "val_annotations.txt", "r") anno = [line.split("\t") for line in text_file.readlines()] label_file = open(data_root + "labels.txt", "r") find_labels = [line.split(" ") for line in label_file.readlines()] # cars_annos_all = scipy.io.loadmat(data_root + 'cars_train_annos.mat') # cars_annos = cars_annos_all['annotations'] # cars_annos = np.transpose(cars_annos) # Set the data path to the directory that contains the trained models and test images for inference. data_path, data_files = common.find_sample_data( description= "Runs a ResNet152 on Cars dataset network with a TensorRT inference engine.", subfolder="cars_resnet152", find_files=[ "00001.jpg", "00002.jpg", "00003.jpg", "00004.jpg", "00005.jpg", "00006.jpg", "00007.jpg", "00008.jpg", "00009.jpg", "00010.jpg", ModelData.MODEL_PATH, "cars_labels.txt" ]) # Get test images, models and labels. test_images = data_files[0:10] onnx_model_file, labels_file = data_files[10:] labels = open(labels_file, 'r').read().split('\n') print(len(labels)) # print(labels) # add the weight of the last layer fc_weights = np.load( '/home/cvrr/opt/TensorRT-5.0.2.6/python/data/cars_resnet152/last_layer_weights.npy' ) # (196, 2048) # print(onnx_model_file) # Build a TensorRT engine. engine_name = 'resnet152v2.engine' with open(engine_name, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) # with build_engine_onnx(onnx_model_file) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: t1 = time.time() right_count = 0 for i in range(1000): test_image = image_root + 'val_%d.JPEG' % (i) true_label = anno[i][1] # bbox_x1 = cars_annos[i][0][0][0][0] # bbox_y1 = cars_annos[i][0][1][0][0] # bbox_x2 = cars_annos[i][0][2][0][0] # bbox_y2 = cars_annos[i][0][3][0][0] # true_label = int(cars_annos[i][0][4][0][0]) print('true label', true_label) image = Image.open(test_image).convert('RGB') # image = image.crop([max(0 , bbox_x1 - 16), max(0, bbox_y1 - 16), min(image.size[0], bbox_x2 + 16), min(image.size[1], bbox_y2 + 16)]) # image.show() c, h, w = ModelData.INPUT_SHAPE image_arr = np.asarray(image.resize( (w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype( trt.nptype(ModelData.DTYPE)).ravel() a = (image_arr / 255.0 - 0.45) / 0.225 np.copyto(h_input, a) # h, w = img.size # dim_diff = np.abs(h - w) # print(test_image) # for test_image in test_images: # Load a normalized test case into the host input page-locked buffer. # test_image = random.choice(test_images) # test_case = load_normalized_test_case(test_image, h_input) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label do_inference(context, h_input, d_input, h_output, d_output, stream) # print(h_output.shape) # We use the highest probability as our prediction. Its index corresponds to the predicted label. # output = fc_weights @ h_output output = h_output # print(output) print('predition:', np.argmax(output)) pred = find_labels[np.argmax(output)][0] if true_label == pred: right_count += 1 # print('predition:',pred) print(right_count) t2 = time.time() print('total time:', t2 - t1)