def main(): engine_file_path = 'plate_detection.trt' input_image_path = '../cat.jpg' input_resolution_plate_detection_HW = (325, 325) preprocessor = PreprocessYOLO(input_resolution_plate_detection_HW) image_raw, image = preprocessor.process(input_image_path) print(image.shape) trt_outputs = [] with get_engine_from_bin(engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) # in this case, it demonstrates to perform inference for 50 times total_time = 0; n_time_inference = 10000 for i in range(n_time_inference): t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) t2 = time.time() delta_time = t2 - t1 total_time += delta_time print('inference-{} cost: {}ms'.format(str(i+1), delta_time*1000)) avg_time_original_model = total_time / n_time_inference print("average inference time: {}ms".format(avg_time_original_model*1000)) print(trt_outputs[0].shape) print(trt_outputs[1].shape)
def inference(network, engine, input_vars): input_vars = [input_vars[0]] inputs, outputs, bindings, stream = allocate_buffers(engine) with engine.create_execution_context() as context: for i, input_var in enumerate(input_vars): np.copyto(inputs[i].host, input_var.numpy().reshape(-1)) do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) return outputs
def inference(network, builder, input_vars): if type(input_vars) != tuple: input_vars = [input_vars] builder.max_workspace_size = 256 << 20 engine = builder.build_cuda_engine(network) inputs, outputs, bindings, stream = allocate_buffers(engine) for i, input_var in enumerate(input_vars): np.copyto(inputs[i].host, input_var.numpy().reshape(-1)) with engine.create_execution_context() as context: do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) return outputs
def main(): model_path = "model/trt/deconv-conv.trt" TRT_LOGGER = trt.Logger(trt.Logger.WARNING) runtime = trt.Runtime(TRT_LOGGER) f = open(model_path, "rb") engine = runtime.deserialize_cuda_engine(f.read()) context = engine.create_execution_context() f.close() for binding in engine: size = trt.volume( engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) # Append to the appropriate list. if engine.binding_is_input(binding): print("input_size: ", size, "dtype: ", dtype) else: print("output_size: ", size, "dtype: ", dtype) inputs, outputs, bindings, stream = common.allocate_buffers(engine) length = input_shape[0] * input_shape[1] * input_shape[2] data = np.zeros(length, dtype=np.float32) data[:] = 1.0 inputs[0].host = data.reshape(input_shape) print(inputs[0].host[0][0][:10]) outputs[0].host = np.zeros(output_shape, dtype=np.float32) trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) print(trt_outputs[0][0][0][:10]) print("starting...") starttime = time.time() for i in range(200): trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) endtime = time.time() print(endtime - starttime) print(trt_outputs[0][0][0][:10])
def main(): #命令行参数解析器 common.add_help(description="Runs an MNIST network using a PyTorch model") # Train the PyTorch model #创建一个模型类实例 mnist_model = model.MnistModel() #进行训练 mnist_model.learn() #获取相应的权重 weights = mnist_model.get_weights() # Do inference with TensorRT. #进行推理 #build_engine具体参考本文件的实现 with build_engine(weights) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. #分配相应的缓冲区内存 inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: #load_random_test_case参考本文件下的实现 #随即加载测试数据,复制到主机内存 case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. #进行推理 [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) #获取最终结果 pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def check_accuracy(context, batch_size, test_set, test_labels): inputs, outputs, bindings, stream = common.allocate_buffers(context.engine) num_correct = 0 num_total = 0 batch_num = 0 for start_idx in range(0, test_set.shape[0], batch_size): batch_num += 1 if batch_num % 10 == 0: print("Validating batch {:}".format(batch_num)) # If the number of images in the test set is not divisible by the batch size, the last batch will be smaller. # This logic is used for handling that case. end_idx = min(start_idx + batch_size, test_set.shape[0]) effective_batch_size = end_idx - start_idx # Do inference for every batch. inputs[0].host = test_set[start_idx:start_idx + effective_batch_size] [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=effective_batch_size) # Use argmax to get predictions and then check accuracy preds = np.argmax(output.reshape(32, 10)[0:effective_batch_size], axis=1) labels = test_labels[start_idx:start_idx + effective_batch_size] num_total += effective_batch_size num_correct += np.count_nonzero(np.equal(preds, labels)) percent_correct = 100 * num_correct / float(num_total) print("Total Accuracy: {:}%".format(percent_correct))
def infer_batch(self, image_paths): # Verify if the supplied batch size is not too big max_batch_size = self.trt_engine.max_batch_size actual_batch_size = len(image_paths) if actual_batch_size > max_batch_size: raise ValueError( "image_paths list bigger ({}) than engine max batch size ({})". format(actual_batch_size, max_batch_size)) # Load all images to CPU... imgs = self._load_imgs(image_paths) # ...copy them into appropriate place into memory... # (self.inputs was returned earlier by allocate_buffers()) np.copyto(self.inputs[0].host, imgs.ravel()) # # Run the engine. #self._do_inference(self.context, self.h_input, self.d_input, self.h_output, self.d_output, self.stream) out = engine_common.do_inference(self.context, self.bindings, self.inputs, self.outputs, self.stream, batch_size=1) # pred = labels[np.argmax(h_output)] # print(pred) return out
def main3(): device = torch.device('cuda:0') path = '04.jpg' onnx_file_path = './models/ResNet50.onnx' engine_file_path = './models/ResNet50.trt' with get_engine( onnx_file_path, engine_file_path, ) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: load_normalized_test_case(path, inputs[0].host) start1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) output = trt_outputs print('processing time3 is', time.time() - start1, np.argmax(output)) return output
def main(): data_paths, _ = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") model_path = os.environ.get("MODEL_PATH") or os.path.join( os.path.dirname(__file__), "models") model_file = os.path.join(model_path, ModelData.MODEL_FILE) with build_engine(model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_normalized_test_case( data_paths, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def infer(self, cv_image): """Infers model on given image. Args: image_path (str): image to run object detection model on """ model_input_width = model_utils.ModelData.get_input_width() model_input_height = model_utils.ModelData.get_input_height() image_resized = cv2.resize(cv_image, (model_input_width, model_input_height)) img_np = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB) img_np = img_np.transpose((2, 0, 1)).astype(np.float32) img_np = (2.0 / 255.0) * img_np - 1.0 img = img_np.ravel() # Load image into CPU #img = self._load_img(image_path) # Copy it into appropriate place into memory # (self.inputs was returned earlier by allocate_buffers()) np.copyto(self.inputs[0].host, img.ravel()) # When infering on single image, we measure inference # time to output it to the user # Fetch output from the model [detection_out, keepCount_out] = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream) # And return results return detection_out, keepCount_out
def main(): #data_path, _ = common.find_sample_data(description="Runs an MNIST network using a UFF model file", subfolder="mnist") data_path = '/home/ai/tensorrt_tar/TensorRT-5.1.5.0/data/mnist' model_path = os.environ.get("MODEL_PATH") or os.path.join( os.path.dirname(__file__), "models") model_file = os.path.join(model_path, ModelData.MODEL_FILE) with build_engine(model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_normalized_test_case( data_path, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. start1 = time.time() [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Prediction: " + str(pred), 'time is :', time.time() - start1) model1 = model.create_model() model1.load_weights("models/lenet5.pb") start2 = time.time() output = model1.predict(inputs[0]) pred = np.argmax(output) print("Prediction: " + str(pred), 'time is :', time.time() - start2)
def inference_batch(self, images): batch_size = len(images) img_batch = np.zeros( (self.batch_size, 3, self.img_size, self.img_size)) for i, img in enumerate(images): img = cv2.resize(img, (self.img_size, self.img_size)) img = img[..., ::-1] # BGR to RGB img = img.transpose(2, 0, 1) img = np.array(img, dtype=np.float32) img = (img - 127.5) / 128 img_batch[i] = img img_batch = np.array(img_batch, dtype=np.float32, order='C') self.inputs[0].host = img_batch feat_batch = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream, batch_size=batch_size) feats = np.asarray([ self.l2_norm_numpy(feat_batch[0][i * self.feat_size:(i + 1) * self.feat_size]) for i in range(batch_size) ]) return feats
def inference_batch(self, images, batch_size): """ multi-batch inference """ img_batch = np.zeros((batch_size, 3, self.img_size, self.img_size)) for i, img in enumerate(images): img = cv2.resize(img, (self.img_size, self.img_size)) # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img.transpose(2, 0, 1) img_batch[i] = img img_batch = np.array(img_batch, dtype=np.float32, order='C') self.inputs[0].host = img_batch fb = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream, batch_size=batch_size) fb = fb[0] atts = [{ 'gender': np.argmax(fb[i * self.feat_size:i * self.feat_size + 2]), 'age': np.sum( np.argmax(fb[i * self.feat_size + 2:i * self.feat_size + 202].reshape((100, 2)), axis=1)) } for i in range(batch_size)] return atts
def main(): # Get data files for the model. data_paths, [ deploy_file, model_file, mean_proto ] = common.find_sample_data( description="Runs an MNIST network using a Caffe model file", subfolder="mnist", find_files=[ "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto" ]) # Cache the engine in a temporary directory. engine_path = os.path.join(tempfile.gettempdir(), "mnist.engine") with get_engine(deploy_file, model_file, engine_path ) as engine, engine.create_execution_context() as context: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) mean = retrieve_mean(mean_proto) # For more information on performing inference, refer to the introductory samples. inputs[0].host, case_num = load_normalized_test_case(data_paths, mean) # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): # Set the data path to the directory that contains the trained models and test images for inference. # data_path, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, "class_labels.txt"]) # Get test images, models and labels. # test_images = data_files[0:3] # test_image = "0.jpg" test_image_list = os.listdir('car_rec_test') print(test_image_list) engine_file_path = "car_rec.trt" caffe_model_file, caffe_deploy_file, labels_file = [ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, ModelData.LABEL_PATH] labels = open(labels_file, 'r').read().split('\n') # Build a TensorRT engine. with build_engine_caffe(caffe_model_file, caffe_deploy_file, engine_file_path) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. # h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. # test_image = random.choice(test_images) # test_case = load_normalized_test_case(test_image, h_input) test_cases = load_normalized_test_cases(test_image_list, inputs) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label # do_inference(context, h_input, d_input, h_output, d_output, stream) trt_outputs = common.do_inference(context, bindings, inputs, outputs, stream, 16) outs = trt_outputs[0].reshape(16,427) print(outs) for x in range(0,len(outs)): pred = labels[np.argmax(outs[x])] print(pred) pass
def main(): common.add_help(description="Runs an MNIST network using a PyTorch model") # Train the PyTorch model mnist_model = model.MnistModel() mnist_model.learn() weights = mnist_model.get_weights() # Do inference with TensorRT. engine = build_engine(weights) # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) context = engine.create_execution_context() case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): data_path, data_files = common.find_sample_data( description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["batches", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH]) [batch_data_dir, deploy_file, model_file] = data_files # Now we create a calibrator and give it the location of our calibration data. # We also allow it to cache calibration data for faster engine building. calibration_cache = "mnist_calibration.cache" calib = calibrator.MNISTEntropyCalibrator(batch_data_dir, cache_file=calibration_cache) # We will use the calibrator batch size across the board. # This is not a requirement, but in this case it is convenient. batch_size = calib.get_batch_size() with build_int8_engine( deploy_file, model_file, calib) as engine, engine.create_execution_context() as context: # Allocate engine buffers. inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference for the whole batch. We have to specify batch size here, as the common.do_inference uses a default inputs[0].host, labels = load_random_batch(calib) [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) # Next we need to reshape the output to Nx10 (10 probabilities, one per digit), where N is batch size. output = output.reshape(batch_size, 10) validate_output(output, labels)
def predict(inp: Image, metadata): image_raw, image = preprocessor.process(inp) shape_orig_WH = image_raw.size inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image') # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image a = perf_counter() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) b = perf_counter() metadata['TensorRT Inference Latency (s)'] = (b - a) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) return obj_detected_img
def main(): # By doing this, you will also register the Clip plugin with the TensorRT # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details. if not os.path.isfile(CLIP_PLUGIN_LIBRARY): raise IOError("\n{}\n{}\n{}\n".format( "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY), "Please build the Clip sample plugin.", "For more information, see the included README.md")) # Train MNIST data and get weights. mnist_model = MnistModel() mnist_model.learn() weights = mnist_model.get_weights() # Do inference with TensorRT. with build_engine(weights) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): data_path = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") model_file = ModelData.MODEL_FILE t1 = time.clock() with build_engine(model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. #with open('/home/nvidia/procedure/lenet5.engine','wb') as f: #f.write(engine.serialize()) inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: case_num = load_normalized_test_case( data_path, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred)) t2 = time.clock() print("use_time:" + str(t2 - t1))
def get_trt_test_accuracy(engine, inputs, outputs, bindings, stream, mnist_model): #创建一个IExecutionContext上下文实例 with engine.create_execution_context() as context: #用于相应的数据统计 correct = 0 total = 0 # Run inference on every sample. # Technically this could be batched, however this only comprises a fraction of total # time spent in the test. #循环遍历每一个测试数据 #get_all_test_samples的具体实现参考model.py for test_img, test_name in mnist_model.get_all_test_samples(): #load_img_to_input_buffer的具体实现参考本文件下的实现 #加载图片数据到页面锁定的内存缓冲区 load_img_to_input_buffer(test_img, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. #进行相应的推理,do_inference的具体实现参考common.py [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) #得到相应的输出,也就是后处理的过程 pred = np.argmax(output) correct += (test_name == pred) total += 1 accuracy = float(correct) / total print("Got {} correct predictions out of {} ({:.1f}%)".format( correct, total, 100 * accuracy)) return accuracy
def infer(self, image_path): """Infers model on given image. Args: image_path (str): image to run object detection model on """ # Load image into CPU img = self._load_img(image_path) # Copy it into appropriate place into memory # (self.inputs was returned earlier by allocate_buffers()) np.copyto(self.inputs[0].host, img.ravel()) # When infering on single image, we measure inference # time to output it to the user inference_start_time = time.time() # Fetch output from the model [detection_out, keepCount_out] = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream) # Output inference time print("TensorRT inference time: {} ms".format( int(round((time.time() - inference_start_time) * 1000)))) # And return results return detection_out, keepCount_out
def infer_batch(self, image_paths): """Infers model on batch of same sized images resized to fit the model. Args: image_paths (str): paths to images, that will be packed into batch and fed into model """ # Verify if the supplied batch size is not too big max_batch_size = self.trt_engine.max_batch_size actual_batch_size = len(image_paths) if actual_batch_size > max_batch_size: raise ValueError( "image_paths list bigger ({}) than engine max batch size ({})". format(actual_batch_size, max_batch_size)) # Load all images to CPU... imgs = self._load_imgs(image_paths) # ...copy them into appropriate place into memory... # (self.inputs was returned earlier by allocate_buffers()) np.copyto(self.inputs[0].host, imgs.ravel()) # ...fetch model outputs... [detection_out, keep_count_out] = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream, batch_size=max_batch_size) # ...and return results. return detection_out, keep_count_out
def main(): # Load the shared object file containing the Clip plugin implementation. # By doing this, you will also register the Clip plugin with the TensorRT # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details. if not os.path.isfile(CLIP_PLUGIN_LIBRARY): raise IOError("\n{}\n{}\n{}\n".format( "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY), "Please build the Clip sample plugin.", "For more information, see the included README.md")) ctypes.CDLL(CLIP_PLUGIN_LIBRARY) # Load pretrained model if not os.path.isfile(MODEL_PATH): raise IOError("\n{}\n{}\n{}\n".format( "Failed to load model file ({}).".format(MODEL_PATH), "Please use 'python lenet5.py' to train and save the model.", "For more information, see the included README.md")) # Build an engine and retrieve the image mean from the model. with build_engine(MODEL_PATH) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: print("\n=== Testing ===") test_case = load_normalized_test_case(inputs[0].host) print("Loading Test Case: " + str(test_case)) # The common do_inference function will return a list of outputs - we only have one in this case. [pred] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) print("Prediction: " + str(np.argmax(pred)))
def get_trt_test_accuracy(engine, inputs, outputs, bindings, stream, mnist_model): with engine.create_execution_context() as context: correct = 0 total = 0 # Run inference on every sample. # Technically this could be batched, however this only comprises a fraction of total # time spent in the test. for test_img, test_name in mnist_model.get_all_test_samples(): load_img_to_input_buffer(test_img, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) correct += (test_name == pred) total += 1 accuracy = float(correct) / total print("Got {} correct predictions out of {} ({:.1f}%)".format( correct, total, 100 * accuracy)) return accuracy
def main(): # Get data files for the model. data_path, [deploy_file, model_file, mean_proto] = common.find_sample_data( description="Runs an MNIST network using a Caffe model file", subfolder="mnist", find_files=[ "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto" ]) with build_engine(deploy_file, model_file) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) mean = retrieve_mean(mean_proto) with engine.create_execution_context() as context: case_num = load_normalized_test_case(data_path, inputs[0].host, mean) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred)) # After the engine is destroyed, we destroy the plugin. This function is exposed through the binding code in plugin/pyFullyConnected.cpp. fc_factory.destroy_plugin()
def main(): x = "/home/dgxuser125/rt-kennan/Swish3/build/libswish.so" ctypes.CDLL(x) data_paths, _ = common.find_sample_data( description="Runs an MNIST network using a UFF model file", subfolder="mnist") model_path = os.environ.get("MODEL_PATH") or os.path.join( os.path.dirname(__file__), "models") model_file = os.path.join(model_path, ModelData.MODEL_FILE) with build_engine(model_file) as engine: inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: # # Start measuring time inference_start_time = time.time() for i in range(1000): case_num = load_normalized_test_case( data_paths, pagelocked_buffer=inputs[0].host) [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) # print("Test Case: " + str(case_num)) # print("Prediction: " + str(pred)) end_time = time.time() print("time taken for one input with tenosrrt: ", (end_time - inference_start_time) / 1000)
def main(): engine = init_construct_network() inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: print(np.ones((x1 * y1 * z1), np.float32).reshape(-1)) np.copyto(inputs[0].host, np.ones((x1 * y1 * z1), np.float32).reshape(-1)) np.copyto(inputs[1].host, np.ones((x2 * y2 * z2), np.float32).reshape(-1)) time_start = time.time() output = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) time_end = time.time() print("time ", time_end - time_start) print(output[0].reshape((x1 * y1 * z1))) data = output[0].reshape((x1 * y1 * z1)) print(data[0]) print(output[1].reshape((x1 * y1 * z1))) print("ok")
def main(): # load label labels = [line.rstrip('\n') for line in open('class_labels.txt')] # load engine trt_engine = './vgg16_32.trt' Cifar10_engine = load_engine(trt_engine) dispW = 1280 dispH = 720 flip = 0 fpsReport = 0 camSet = 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=1280, height=720, format=NV12, framerate=60/1 \ ! nvvidconv flip-method=' + str( flip) + ' ! video/x-raw, width=' + str(dispW) + ', height=' + str( dispH) + ',\ format=BGRx ! videoconvert !video/x-raw, format=BGR ! appsink' cap = cv2.VideoCapture(camSet, cv2.CAP_GSTREAMER) timeStamp = time.time() font = cv2.FONT_HERSHEY_SIMPLEX while True: _, frame = cap.read() frame = frame.astype('float32') frameRGB = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) img = cv2.resize(frameRGB, (32, 32)) / 255 img = img.transpose((2, 0, 1)).flatten() # 分配buffers給inputs和outputs inputs, outputs, bindings, stream = common.allocate_buffers( Cifar10_engine) inputs[0].host = img # inference with Cifar10_engine.create_execution_context() as context: trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = trt_outputs[0].argmax(-1) # fps dt = time.time() - timeStamp fps = 1 / dt fpsReport = .9 * fpsReport + .1 * fps timeStamp = time.time() cv2.rectangle(frame, (0, 0), (350 + len(labels[pred]) * 30, 80), (0, 0, 255), -1) cv2.putText(frame, str(round(fpsReport, 1)) + 'fps' + ', ' + labels[pred], (0, 60), font, 2, (0, 255, 255), 3) cv2.imshow('stream', frame / 255) if cv2.waitKey(1) == 27: break cap.release() cv2.destroyAllWindows()
def main(win_title): # load trt engine print('load trt engine') trt_path = 'engine.trt' engine = load_engine(trt_runtime, trt_path) print('load labels') label = get_label('keras_models/labels.txt') # allocate buffers print('allocate buffers') inputs, outputs, bindings, stream = common.allocate_buffers(engine) print('create execution context') context = engine.create_execution_context() print('start stream') fps = -1 GSTREAMER_PIPELINE = 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=1920, height=1080, format=(string)NV12, framerate=60/1 ! nvvidconv flip-method=0 ! video/x-raw, width=640, height=480, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink' cap = cv2.VideoCapture(GSTREAMER_PIPELINE, cv2.CAP_GSTREAMER) while (1): t_start = time.time() ret, frame = cap.read() size = (224, 224) inputs[0].host = preprocess(frame) # with engine.create_execution_context() as context: trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) preds = trt_outputs[0] idx = np.argmax(preds) result = label[idx] info = '{} : {:.3f} , FPS {}'.format(result, preds[idx], fps) cv2.putText(frame, info, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 4) cv2.imshow(win_title, frame) if cv2.waitKey(1) == ord('q'): break fps = int(1 / (time.time() - t_start)) cap.release() cv2.destroyAllWindows() print('Quit')