def main(): # Load the shared object file containing the Clip plugin implementation. # By doing this, you will also register the Clip plugin with the TensorRT # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details. if not os.path.isfile(CLIP_PLUGIN_LIBRARY): raise IOError("\n{}\n{}\n{}\n".format( "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY), "Please build the Clip sample plugin.", "For more information, see the included README.md")) ctypes.CDLL(CLIP_PLUGIN_LIBRARY) # Load pretrained model if not os.path.isfile(MODEL_PATH): raise IOError("\n{}\n{}\n{}\n".format( "Failed to load model file ({}).".format(MODEL_PATH), "Please use 'python lenet5.py' to train and save the model.", "For more information, see the included README.md")) # Build an engine and retrieve the image mean from the model. with build_engine(MODEL_PATH) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) trt_rok = gs.create_plugin_node(name="trt_rok", op="RegionOfKeypoints_TRT", region_shape=5)
def main(): # Get data files for the model. data_paths, [ deploy_file, model_file, mean_proto ] = common.find_sample_data( description="Runs an MNIST network using a Caffe model file", subfolder="mnist", find_files=[ "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto" ]) # Cache the engine in a temporary directory. engine_path = os.path.join(tempfile.gettempdir(), "mnist.engine") with get_engine(deploy_file, model_file, engine_path ) as engine, engine.create_execution_context() as context: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) mean = retrieve_mean(mean_proto) # For more information on performing inference, refer to the introductory samples. inputs[0].host, case_num = load_normalized_test_case(data_paths, mean) # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def get_buffer(engine, img_np): # allocate buffers inputs, outputs, bindings, stream = common.allocate_buffers(engine) # load data inputs[0].host = img_np return inputs, outputs, bindings, stream
def main(): data_path = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") model_file = ModelData.MODEL_FILE t1 = time.clock() with build_engine(model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. #with open('/home/nvidia/procedure/lenet5.engine','wb') as f: #f.write(engine.serialize()) inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_normalized_test_case( data_path, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred)) t2 = time.clock() print("use_time:" + str(t2 - t1))
def predict(self, preprocessed_image): np_image = preprocessed_image assert ( 1, self.channels, self.model_height, self.model_width, ) == np_image.shape, "Image must be resized to model shape" if self.is_fp16: np_image = np_image.astype(np.float16) self.cfx.push() try: inputs, outputs, bindings, stream = common.allocate_buffers(self.engine) # Do inference inputs[0].host = np_image trt_outputs = do_inference( self.context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, ) finally: self.cfx.pop() # very important # logger.debug('Len of outputs: ', len(trt_outputs)) num_classes = len(self.labels) trt_outputs[0] = trt_outputs[0].reshape(1, -1, 1, 4) trt_outputs[1] = trt_outputs[1].reshape(1, -1, num_classes) return trt_outputs
def main(): data_paths, _ = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") model_path = os.environ.get("MODEL_PATH") or os.path.join( os.path.dirname(__file__), "models") model_file = os.path.join(model_path, ModelData.MODEL_FILE) with build_engine(model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_normalized_test_case( data_paths, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): # Load the shared object file containing the Clip plugin implementation. # By doing this, you will also register the Clip plugin with the TensorRT # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details. if not os.path.isfile(CLIP_PLUGIN_LIBRARY): raise IOError("\n{}\n{}\n{}\n".format( "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY), "Please build the Clip sample plugin.", "For more information, see the included README.md")) ctypes.CDLL(CLIP_PLUGIN_LIBRARY) # Load pretrained model if not os.path.isfile(MODEL_PATH): raise IOError("\n{}\n{}\n{}\n".format( "Failed to load model file ({}).".format(MODEL_PATH), "Please use 'python lenet5.py' to train and save the model.", "For more information, see the included README.md")) # Build an engine and retrieve the image mean from the model. with build_engine(MODEL_PATH) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: print("\n=== Testing ===") test_case = load_normalized_test_case(inputs[0].host) print("Loading Test Case: " + str(test_case)) # The common do_inference function will return a list of outputs - we only have one in this case. [pred] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) print("Prediction: " + str(np.argmax(pred)))
def main(): # By doing this, you will also register the Clip plugin with the TensorRT # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details. if not os.path.isfile(CLIP_PLUGIN_LIBRARY): raise IOError("\n{}\n{}\n{}\n".format( "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY), "Please build the Clip sample plugin.", "For more information, see the included README.md")) # Train MNIST data and get weights. mnist_model = MnistModel() mnist_model.learn() weights = mnist_model.get_weights() # Do inference with TensorRT. with build_engine(weights) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): engine = init_construct_network() inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: print(np.ones((x1 * y1 * z1), np.float32).reshape(-1)) np.copyto(inputs[0].host, np.ones((x1 * y1 * z1), np.float32).reshape(-1)) np.copyto(inputs[1].host, np.ones((x2 * y2 * z2), np.float32).reshape(-1)) time_start = time.time() output = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) time_end = time.time() print("time ", time_end - time_start) print(output[0].reshape((x1 * y1 * z1))) data = output[0].reshape((x1 * y1 * z1)) print(data[0]) print(output[1].reshape((x1 * y1 * z1))) print("ok")
def main(): # Get data files for the model. data_path, [deploy_file, model_file, mean_proto] = common.find_sample_data( description="Runs an MNIST network using a Caffe model file", subfolder="mnist", find_files=[ "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto" ]) with build_engine(deploy_file, model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) mean = retrieve_mean(mean_proto) with engine.create_execution_context() as context: case_num = load_normalized_test_case(data_path, inputs[0].host, mean) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred)) # After the engine is destroyed, we destroy the plugin. This function is exposed through the binding code in plugin/pyFullyConnected.cpp. fc_factory.destroy_plugin()
def check_accuracy(context, batch_size, test_set, test_labels): inputs, outputs, bindings, stream = common.allocate_buffers(context.engine) num_correct = 0 num_total = 0 batch_num = 0 for start_idx in range(0, test_set.shape[0], batch_size): batch_num += 1 if batch_num % 10 == 0: print("Validating batch {:}".format(batch_num)) # If the number of images in the test set is not divisible by the batch size, the last batch will be smaller. # This logic is used for handling that case. end_idx = min(start_idx + batch_size, test_set.shape[0]) effective_batch_size = end_idx - start_idx # Do inference for every batch. inputs[0].host = test_set[start_idx:start_idx + effective_batch_size] [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=effective_batch_size) # Use argmax to get predictions and then check accuracy preds = np.argmax(output.reshape(32, 10)[0:effective_batch_size], axis=1) labels = test_labels[start_idx:start_idx + effective_batch_size] num_total += effective_batch_size num_correct += np.count_nonzero(np.equal(preds, labels)) percent_correct = 100 * num_correct / float(num_total) print("Total Accuracy: {:}%".format(percent_correct))
def main(): # Set the data path to the directory that contains the trained models and test images for inference. _, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"]) # Get test images, models and labels. test_images = data_files[0:3] onnx_model_file, labels_file = data_files[3:] labels = open(labels_file, 'r').read().split('\n') # Build a TensorRT engine. with build_engine_onnx(onnx_model_file) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, inputs[0].host) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # We use the highest probability as our prediction. Its index corresponds to the predicted label. pred = labels[np.argmax(trt_outputs[0])] if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred)
def main(): data_path, data_files = common.find_sample_data( description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["batches", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH]) [batch_data_dir, deploy_file, model_file] = data_files # Now we create a calibrator and give it the location of our calibration data. # We also allow it to cache calibration data for faster engine building. calibration_cache = "mnist_calibration.cache" calib = calibrator.MNISTEntropyCalibrator(batch_data_dir, cache_file=calibration_cache) # We will use the calibrator batch size across the board. # This is not a requirement, but in this case it is convenient. batch_size = calib.get_batch_size() with build_int8_engine( deploy_file, model_file, calib) as engine, engine.create_execution_context() as context: # Allocate engine buffers. inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference for the whole batch. We have to specify batch size here, as the common.do_inference uses a default inputs[0].host, labels = load_random_batch(calib) [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) # Next we need to reshape the output to Nx10 (10 probabilities, one per digit), where N is batch size. output = output.reshape(batch_size, 10) validate_output(output, labels)
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" global INPUTS, OUTPUTS, BINDINGS, STREAM, CONTEXT # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = os.path.join( sys.path[0], '/home/wlz/catkin_ws/src/krrt-planner/opnet/models/no_surf_80_32.onnx') engine_file_path = os.path.join( sys.path[0], '/home/wlz/catkin_ws/src/krrt-planner/opnet/models/no_surf_80_32.trt') input = np.random.randn(1, SHAPE[1], SHAPE[2], SHAPE[3]).astype(np.float32) # Output shapes expected by the post-processor output_shapes = [SHAPE] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: print("GET ENGINE SUCCEED") CONTEXT = context INPUTS, OUTPUTS, BINDINGS, STREAM = common.allocate_buffers(engine) # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference time0 = time.time() for i in range(10): # print('Running inference') output = trt_inference(input) print("prepocess time: %fs" % (time.time() - time0)) print('done')
def main(): # Set the data path to the directory that contains the trained models and test images for inference. # data_path, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, "class_labels.txt"]) # Get test images, models and labels. # test_images = data_files[0:3] # test_image = "0.jpg" test_image_list = os.listdir('car_rec_test') print(test_image_list) engine_file_path = "car_rec.trt" caffe_model_file, caffe_deploy_file, labels_file = [ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, ModelData.LABEL_PATH] labels = open(labels_file, 'r').read().split('\n') # Build a TensorRT engine. with build_engine_caffe(caffe_model_file, caffe_deploy_file, engine_file_path) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. # h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. # test_image = random.choice(test_images) # test_case = load_normalized_test_case(test_image, h_input) test_cases = load_normalized_test_cases(test_image_list, inputs) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label # do_inference(context, h_input, d_input, h_output, d_output, stream) trt_outputs = common.do_inference(context, bindings, inputs, outputs, stream, 16) outs = trt_outputs[0].reshape(16,427) print(outs) for x in range(0,len(outs)): pred = labels[np.argmax(outs[x])] print(pred) pass
def main(args): with get_engine(args.engine_path, args.model_dir) as engine: with engine.create_execution_context() as context: origin_img = cv2.imread(args.image_path) t1 = time.time() img, (ratio_h, ratio_w) = preprocess(origin_img) cv2.imwrite("processed.jpg", img) h, w, _ = img.shape # hwc to chw img = img.transpose((2, 0, 1)) # flatten the image into a 1D array img = img.ravel() context.set_binding_shape(0, (1, 3, h, w)) # allocate buffers and create a stream. inputs, outputs, bindings, stream = common.allocate_buffers( engine, context) # copy to pagelocked memory np.copyto(inputs[0].host, img) # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # reshape 1D array to chw output = np.reshape(output, (6, h // 4, w // 4)) # transpose chw to hwc output = output.transpose(1, 2, 0) boxes = postprocess(origin_img, output, ratio_h, ratio_w) t2 = time.time() print("total cost %fms" % ((t2 - t1) * 1000)) draw_result(origin_img, boxes)
def main(): #data_path, _ = common.find_sample_data(description="Runs an MNIST network using a UFF model file", subfolder="mnist") data_path = '/home/ai/tensorrt_tar/TensorRT-5.1.5.0/data/mnist' model_path = os.environ.get("MODEL_PATH") or os.path.join( os.path.dirname(__file__), "models") model_file = os.path.join(model_path, ModelData.MODEL_FILE) with build_engine(model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_normalized_test_case( data_path, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. start1 = time.time() [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Prediction: " + str(pred), 'time is :', time.time() - start1) model1 = model.create_model() model1.load_weights("models/lenet5.pb") start2 = time.time() output = model1.predict(inputs[0]) pred = np.argmax(output) print("Prediction: " + str(pred), 'time is :', time.time() - start2)
def predict(inp: Image, metadata): image_raw, image = preprocessor.process(inp) shape_orig_WH = image_raw.size inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image') # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image a = perf_counter() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) b = perf_counter() metadata['TensorRT Inference Latency (s)'] = (b - a) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) return obj_detected_img
def main(): #命令行参数解析器 common.add_help(description="Runs an MNIST network using a PyTorch model") # Train the PyTorch model #创建一个模型类实例 mnist_model = model.MnistModel() #进行训练 mnist_model.learn() #获取相应的权重 weights = mnist_model.get_weights() # Do inference with TensorRT. #进行推理 #build_engine具体参考本文件的实现 with build_engine(weights) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. #分配相应的缓冲区内存 inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: #load_random_test_case参考本文件下的实现 #随即加载测试数据,复制到主机内存 case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. #进行推理 [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) #获取最终结果 pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main3(): device = torch.device('cuda:0') path = '04.jpg' onnx_file_path = './models/ResNet50.onnx' engine_file_path = './models/ResNet50.trt' with get_engine( onnx_file_path, engine_file_path, ) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: load_normalized_test_case(path, inputs[0].host) start1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) output = trt_outputs print('processing time3 is', time.time() - start1, np.argmax(output)) return output
def main(engine_path, image_path, image_size): with get_engine(engine_path) as engine, engine.create_execution_context( ) as context: buffers = common.allocate_buffers(engine) image_src = cv2.imread(image_path) detect(engine, context, buffers, image_src, image_size)
def main(): engine_file_path = 'plate_detection.trt' input_image_path = '../cat.jpg' input_resolution_plate_detection_HW = (325, 325) preprocessor = PreprocessYOLO(input_resolution_plate_detection_HW) image_raw, image = preprocessor.process(input_image_path) print(image.shape) trt_outputs = [] with get_engine_from_bin(engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) # in this case, it demonstrates to perform inference for 50 times total_time = 0; n_time_inference = 10000 for i in range(n_time_inference): t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) t2 = time.time() delta_time = t2 - t1 total_time += delta_time print('inference-{} cost: {}ms'.format(str(i+1), delta_time*1000)) avg_time_original_model = total_time / n_time_inference print("average inference time: {}ms".format(avg_time_original_model*1000)) print(trt_outputs[0].shape) print(trt_outputs[1].shape)
def main(): """Create a TensorRT engine for ONNX-based model and run inference.""" # Try to load a previously generated network graph in ONNX format: onnx_file_path = '/models/run09/jetracer.onnx' engine_file_path = '/models/run09/jetracer.trt' ino = 378 # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image') # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. image = cv2.imread( f'/models/train_data/Images/{ino:03d}.jpg').transpose( 2, 0, 1).reshape(1, 3, 320, 640) inputs[0].host = np.array(image, dtype=np.float16, order='C') trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. #mask = trt_outputs.reshape(320,640).numpy()[0][0]>0.4 print(trt_outputs[0].shape)
def main(): x = "/home/dgxuser125/rt-kennan/Swish3/build/libswish.so" ctypes.CDLL(x) data_paths, _ = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") model_path = os.environ.get("MODEL_PATH") or os.path.join( os.path.dirname(__file__), "models") model_file = os.path.join(model_path, ModelData.MODEL_FILE) with build_engine(model_file) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: # # Start measuring time inference_start_time = time.time() for i in range(1000): case_num = load_normalized_test_case( data_paths, pagelocked_buffer=inputs[0].host) [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) # print("Test Case: " + str(case_num)) # print("Prediction: " + str(pred)) end_time = time.time() print("time taken for one input with tenosrrt: ", (end_time - inference_start_time) / 1000)
def main(): common.add_help(description="Runs an MNIST network using a PyTorch model") # Train the PyTorch model mnist_model = model.MnistModel() mnist_model.learn() weights = mnist_model.get_weights() # Do inference with TensorRT. engine = build_engine(weights) # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) context = engine.create_execution_context() case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): # load label labels = [line.rstrip('\n') for line in open('class_labels.txt')] # load engine trt_engine = './vgg16_32.trt' Cifar10_engine = load_engine(trt_engine) dispW = 1280 dispH = 720 flip = 0 fpsReport = 0 camSet = 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=1280, height=720, format=NV12, framerate=60/1 \ ! nvvidconv flip-method=' + str( flip) + ' ! video/x-raw, width=' + str(dispW) + ', height=' + str( dispH) + ',\ format=BGRx ! videoconvert !video/x-raw, format=BGR ! appsink' cap = cv2.VideoCapture(camSet, cv2.CAP_GSTREAMER) timeStamp = time.time() font = cv2.FONT_HERSHEY_SIMPLEX while True: _, frame = cap.read() frame = frame.astype('float32') frameRGB = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) img = cv2.resize(frameRGB, (32, 32)) / 255 img = img.transpose((2, 0, 1)).flatten() # 分配buffers給inputs和outputs inputs, outputs, bindings, stream = common.allocate_buffers( Cifar10_engine) inputs[0].host = img # inference with Cifar10_engine.create_execution_context() as context: trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = trt_outputs[0].argmax(-1) # fps dt = time.time() - timeStamp fps = 1 / dt fpsReport = .9 * fpsReport + .1 * fps timeStamp = time.time() cv2.rectangle(frame, (0, 0), (350 + len(labels[pred]) * 30, 80), (0, 0, 255), -1) cv2.putText(frame, str(round(fpsReport, 1)) + 'fps' + ', ' + labels[pred], (0, 60), font, 2, (0, 255, 255), 3) cv2.imshow('stream', frame / 255) if cv2.waitKey(1) == 27: break cap.release() cv2.destroyAllWindows()
def init(): global inputs, outputs, bindings, stream, engine, TRT_LOGGER, context TRT_LOGGER = trt.Logger() onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" engine = get_engine(TRT_LOGGER, onnx_file_path, engine_file_path) context = engine.create_execution_context() inputs, outputs, bindings, stream = common.allocate_buffers(engine)
def main(win_title): # load trt engine print('load trt engine') trt_path = 'engine.trt' engine = load_engine(trt_runtime, trt_path) print('load labels') label = get_label('keras_models/labels.txt') # allocate buffers print('allocate buffers') inputs, outputs, bindings, stream = common.allocate_buffers(engine) print('create execution context') context = engine.create_execution_context() print('start stream') fps = -1 GSTREAMER_PIPELINE = 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=1920, height=1080, format=(string)NV12, framerate=60/1 ! nvvidconv flip-method=0 ! video/x-raw, width=640, height=480, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink' cap = cv2.VideoCapture(GSTREAMER_PIPELINE, cv2.CAP_GSTREAMER) while (1): t_start = time.time() ret, frame = cap.read() size = (224, 224) inputs[0].host = preprocess(frame) # with engine.create_execution_context() as context: trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) preds = trt_outputs[0] idx = np.argmax(preds) result = label[idx] info = '{} : {:.3f} , FPS {}'.format(result, preds[idx], fps) cv2.putText(frame, info, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 4) cv2.imshow(win_title, frame) if cv2.waitKey(1) == ord('q'): break fps = int(1 / (time.time() - t_start)) cap.release() cv2.destroyAllWindows() print('Quit')
def __init__(self, onnx_path, engine_path, dimx=80, dimy=80, dimz=48): self.output_shape = [1, dimx, dimy, dimz] self.trt_logger = trt.Logger(trt.Logger.INFO) self.cuda_ctx = cuda.Device(0).make_context() self.engine = self.get_engine(onnx_path, engine_path) self.context = self.engine.create_execution_context() self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers( self.engine) print("BUIND ENGINE SUCCEED")
def _infer(self, input_data): inputs, outputs, bindings, stream = common.allocate_buffers( self.engine) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = input_data return common.do_inference(self.context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)