def main(): common.add_help(description="Runs an MNIST network using a PyTorch model") # Train the PyTorch model mnist_model = model.MnistModel() mnist_model.learn() weights = mnist_model.get_weights() # Do inference with TensorRT. engine = build_engine(weights) # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) context = engine.create_execution_context() case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): """Create a TensorRT engine for ONNX-based model and run inference.""" # Try to load a previously generated network graph in ONNX format: onnx_file_path = '/models/run09/jetracer.onnx' engine_file_path = '/models/run09/jetracer.trt' ino = 378 # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image') # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. image = cv2.imread( f'/models/train_data/Images/{ino:03d}.jpg').transpose( 2, 0, 1).reshape(1, 3, 320, 640) inputs[0].host = np.array(image, dtype=np.float16, order='C') trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. #mask = trt_outputs.reshape(320,640).numpy()[0][0]>0.4 print(trt_outputs[0].shape)
def predict(self, img, min_scale=736): # with self.engine.create_execution_context() as context: img = self.resize_image(img, min_scale=min_scale) self.load_normalized_test_case(img, self.inputs[0].host) trt_outputs = common.do_inference_v2(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream) preds = trt_outputs[0].reshape(1, 2, 736, 736) mask = preds[0, 0, ...] batch = {'shape': [(736, 736)]} box_list, score_list = SegDetectorRepresenter(thresh=0.5, box_thresh=0.7, max_candidates=1000, unclip_ratio=1.5)(batch, preds) box_list, score_list = box_list[0], score_list[0] is_output_polygon = False if len(box_list) > 0: if is_output_polygon: idx = [x.sum() > 0 for x in box_list] box_list = [box_list[i] for i, v in enumerate(idx) if v] score_list = [score_list[i] for i, v in enumerate(idx) if v] else: idx = box_list.reshape(box_list.shape[0], -1).sum(axis=1) > 0 # 去掉全为0的框 box_list, score_list = box_list[idx], score_list[idx] else: box_list, score_list = [], [] return mask, box_list, score_list
def main(args): with get_engine(args.engine_path, args.model_dir) as engine: with engine.create_execution_context() as context: origin_img = cv2.imread(args.image_path) t1 = time.time() img, (ratio_h, ratio_w) = preprocess(origin_img) cv2.imwrite("processed.jpg", img) h, w, _ = img.shape # hwc to chw img = img.transpose((2, 0, 1)) # flatten the image into a 1D array img = img.ravel() context.set_binding_shape(0, (1, 3, h, w)) # allocate buffers and create a stream. inputs, outputs, bindings, stream = common.allocate_buffers( engine, context) # copy to pagelocked memory np.copyto(inputs[0].host, img) # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # reshape 1D array to chw output = np.reshape(output, (6, h // 4, w // 4)) # transpose chw to hwc output = output.transpose(1, 2, 0) boxes = postprocess(origin_img, output, ratio_h, ratio_w) t2 = time.time() print("total cost %fms" % ((t2 - t1) * 1000)) draw_result(origin_img, boxes)
def main(): onnx_file_path = 'test3.onnx' engine_file_path = "model_engine.trt" input_image_path="../yoloF_test/YOLOF/datasets/coco/val2017/000000000285.jpg" image_raw=Image.open(input_image_path) w, h = image_raw.size w_, h_ = resize_im(w, h, scale=800, max_scale=4000) print(w_,h_) image_resized=image_raw.resize((w_,h_),resample=Image.BICUBIC) image_resized = np.array(image_resized, dtype=np.int32, order='C') output_shapes = [(1, 512, 28, 25)] trt_outputs = [] inputs = [] with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs,outputs, bindings, stream = allocate_buffers2(engine,w_, h_) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image_resized context.set_binding_shape(0, (1, 3, h_, w_)) trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] print(trt_outputs[0])
def main(): # Set the data path to the directory that contains the trained models and test images for inference. _, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"]) # Get test images, models and labels. test_images = data_files[0:3] onnx_model_file, labels_file = data_files[3:] labels = open(labels_file, 'r').read().split('\n') # Build a TensorRT engine. with build_engine_onnx(onnx_model_file) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, inputs[0].host) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # We use the highest probability as our prediction. Its index corresponds to the predicted label. pred = labels[np.argmax(trt_outputs[0])] if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred)
def trt_inference(input): global INPUTS, OUTPUTS, BINDINGS, STREAM, CONTEXT INPUTS[0].host = input trt_outputs = common.do_inference_v2(CONTEXT, bindings=BINDINGS, inputs=INPUTS, outputs=OUTPUTS, stream=STREAM) output = trt_outputs[0].reshape(SHAPE[1], SHAPE[2], SHAPE[3]) return output
def iter_inf(ds_path, batch_size, model): dataloader = data_loader(ds_path, batch_size, (224, 224)) num_cls = len(glob.glob(os.path.join(ds_path, '*'))) rslts = {} rslts['eval_num'] = len(glob.glob(os.path.join(ds_path, '*/*'))) print("ds_path",ds_path) print("rslts eval_num",rslts['eval_num']) correct, counter, inf_time = 0, 0, 0.0 batch_num = rslts['eval_num'] // batch_size + 1 remainder = rslts['eval_num'] % batch_size print("[ INFO ] Building engine.") with build_engine(batch_size, model) as engine, \ engine.create_execution_context() as context: rslts['warm_up_start'] = time.time() # For multi profile # context.active_optimization_profile = 0 context.set_binding_shape(0, (batch_size, 3, 224, 224)) print("[ INFO ] Inference start.") pbar = tqdm.tqdm(dataloader) for batch in pbar: imgs, labels = batch inputs, outputs, bindings, stream = common.allocate_buffers(engine) inputs[0].host = imgs counter += 1 inf_start = time.time() trt_outputs = common.do_inference_v2( context, bindings, inputs, outputs, stream) inf_time += time.time() - inf_start splited_outputs = ([[output[i:i+num_cls] for i in range(0, len(output), num_cls)] for output in trt_outputs]) preds = [np.argsort(splited_output)[:, -1] for splited_output in splited_outputs] # FIXME Only count first output here if counter != batch_num: correct += np.sum(np.equal(preds[0], labels)) else: correct += np.sum( np.equal(preds[0][:remainder], labels[:remainder])) rslts['inf_time'] = inf_time rslts['end'] = time.time() rslts['correct'] = correct print("[ INFO ] Inference done.") return rslts
def infer_img(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" # Download a dog image and save it to the following file path: input_image_path = download_file('dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file im = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) im = np.asarray(im)[...,::-1]; cv2.imshow("det",im) cv2.waitKey(); cv2.destroyAllWindows()
def main(): onnx_file_path = 'bidaf-modified.onnx' engine_file_path = "bidaf.trt" # input context = 'A quick brown fox jumps over the lazy dog.' query = 'What color is the fox?' cw_str, _ = preprocess(context) # get ravelled data cw, cc, qw, qc = get_inputs(context, query) # Do inference with TensorRT refit_weights = np.load("Parameter576_B_0.npy") fake_weights = np.ones_like(refit_weights) engine = get_engine(onnx_file_path, engine_file_path) refitter = trt.Refitter(engine, TRT_LOGGER) context = engine.create_execution_context() for weights, answer_correct in [(fake_weights, False), (refit_weights, True)]: print("Refitting engine...") # To get a list of all refittable weights' names # in the network, use refitter.get_all_weights(). # Refit named weights via set_named_weights refitter.set_named_weights('Parameter576_B_0', weights) # Get missing weights names. This should return empty # lists in this case. missing_weights = refitter.get_missing_weights() assert len( missing_weights) == 0, "Refitter found missing weights. Call set_named_weights() or set_weights() for all missing weights" # Refit the engine with the new weights. This will return True if # the refit operation succeeded. assert refitter.refit_cuda_engine() inputs, outputs, bindings, stream = common.allocate_buffers(engine) print("Doing inference...") # Do inference # Set host input. The common.do_inference_v2 function will copy the input to the GPU before executing. inputs[0].host = cw inputs[1].host = cc inputs[2].host = qw inputs[3].host = qc trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) start = np.asscalar(trt_outputs[0]) end = np.asscalar(trt_outputs[1]) answer = [w.encode() for w in cw_str[start:end + 1].reshape(-1)] assert answer_correct == (answer == [b'brown']) print("Passed")
def main(): for bs in BATCH_SIZEs: onnx_file_path = 'lab5_model.onnx' engine_file_path = ("model%d.trt" % (bs)) # Do inference with TensorRT test_loader = torch.utils.data.DataLoader(test_set, batch_size=bs, shuffle=False) trt_outputs = [] with get_engine( onnx_file_path, engine_file_path, bs) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference ') print('Batch size: %d' % (bs)) right = 0 number = 0 start_time = time.time() # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. for i, (images, labels) in enumerate(test_loader, 0): images = images.numpy() inputs[0].host = images [results] = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) for j in range(len(labels)): result = results[j * 11:(j + 1) * 11] pred = argmax(result) if (pred == labels[j]): right += 1 number += 1 latency = time.time() - start_time print('Time elapsed: %.4f' % (latency)) print(right / number) latencies.append(latency) FPSs.append(len(test_set) / (latency)) plt.subplot(2, 1, 1) plt.plot(BATCH_SIZEs, latencies) plt.ylabel('latency (s)') plt.subplot(2, 1, 2) plt.plot(BATCH_SIZEs, FPSs) plt.xlabel('batch size') plt.ylabel('FPS') plt.savefig('./test.png')
def infer_cam(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx'; engine_file_path = 'yolov3.trt' # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Create a pre-processor object by specifying the required input resolution for YOLOv3 postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} cap = cv2.VideoCapture(0) trt_outputs = [] # Do inference with TensorRT with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) while True: ret, frame = cap.read(); assert ret # Load an image from the specified input path, and return it together with a pre-processed version preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) image_raw, image = preprocessor.process(frame) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size; t = time() # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t = time()-t; fps = 1/t; print("infer: %.2fms, fps: %.2f" % (t*1000, fps)) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) im = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) im = np.asarray(im)[...,::-1] cv2.putText(im, "%.2f"%fps, (12,12), 3, 1, (0,255,0)) cv2.imshow("det",im) if cv2.waitKey(5) == 27: break cap.release(); cv2.destroyAllWindows()
def main(): # Set the data path to the directory that contains the trained models and test images for inference. kDEFAULT_DATA_ROOT = os.path.join(os.sep, "data") # Get test images, models and labels. labels_file = './labels.txt' labels = open(labels_file, 'r').read().split('\n') onnx_model_file = './weights/resnet152_f.onnx' classes_folder = glob.glob('./arranged_data_final/val/*') all_images = [ glob.glob(classes_folder[i] + '/*') for i in range(len(classes_folder)) ] merged_images = list(itertools.chain.from_iterable(all_images)) with open('./weights/resnet152.engine', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) # Allocate buffers and create a CUDA stream. inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. start_time = time.time() count = 0 for test_image in merged_images: test_case = load_normalized_image(test_image, inputs[0].host) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # We use the highest probability as our prediction. Its index corresponds to the predicted label. pred = labels[np.argmax(trt_outputs[0])] # if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: # print("Correctly recognized " + test_case + " as " + pred) # else: # print("Incorrectly recognized " + test_case + " as " + pred) count = count + 1 end_time = time.time() print('Total time=', end_time - start_time) print('Total images processed=', count) print('Frames Per Seconds with Tensorrt Engine =', count / (end_time - start_time))
def predict(self, preprocessed_image): np_image = np.array(preprocessed_image, dtype=np.float32)[np.newaxis, :, :, (2, 1, 0)] # RGB -> BGR np_image = np.ascontiguousarray(np.rollaxis(np_image, 3, 1)) assert ( 1, 3, self.model_height, self.model_width, ) == np_image.shape, "Image must be resized to model shape" if self.is_fp16: np_image = np_image.astype(np.float16) self.cfx.push() try: inputs, outputs, bindings, stream = common.allocate_buffers( self.engine) # Do inference inputs[0].host = np_image trt_outputs = common.do_inference_v2( self.context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, ) finally: self.cfx.pop() # very important # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. # There should be nothing to 'round' here. If there is, we made a mistake earlier output_shapes = [( 1, (len(self.labels) + 5) * 5, int(self.model_height / 32), int(self.model_width / 32), )] trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] trt_outputs = np.squeeze(trt_outputs).transpose( (1, 2, 0)).astype(np.float32) return trt_outputs
def main(): model_path = 'weights/bts_nyu_320_mem.trt' input_image_path = 'images/NYU0937.jpg' input_resolution = (320, 320) vs = WebcamVideoStream().start() accum_time = 0 curr_fps = 0 fps = "FPS: ??" with get_engine(model_path) as engine, engine.create_execution_context( ) as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) while True: prev_time = time.time() frame = vs.read() image = preprocess(frame, input_resolution) inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings, inputs, outputs, stream)[-1] vis = postprocess(trt_outputs, input_resolution) curr_time = time.time() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) print(fps) curr_fps = 0 cv2.imshow('frame', vis) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows() vs.stop()
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = "./models_trained/797-AG-BC.onnx" engine_file_path = "./models_trained/797-AG-BC.trt" onnx_file_path = "./models_trained/544-CH-CA.onnx" engine_file_path = "./models_trained/544-CH-CA.trt" # Download a dog image and save it to the following file path: input_image_path = "./imgs_prueba_clasificacion/AG_BC1.png" input_image_path = "./imgs_prueba_deteccion/CH_CA.png" imagen = Image.open(input_image_path) mean = np.array([0.5, 0.5, 0.5]) std = np.array([0.5, 0.5, 0.5]) #loader = transforms.Compose( #transforms.Resize(128), transforms.ToTensor(), transforms.Normalize(std, mean)]) loader = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(std, mean)]) imagen = loader(imagen).float() #imagen = imagen.unsqueeze(0) image = imagen.numpy() # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) print(trt_outputs)
def __call__(self, input): # init image to input location. np.copyto(self.inputs[0].host, input.ravel()) # When infering on single image, we measure inference # time to output it to the user inference_start_time = time.time() # Fetch output from the model [heatmaps, pafs] = common.do_inference_v2(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream) # Output inference time # print("TensorRT inference time: {} ms".format( # int(round((time.time() - inference_start_time) * 1000)))) # And return results return pafs, heatmaps
def inference(data_dir, engine_path, long_side_size=1024): filenames = glob.glob(data_root + '/*g')[:10] times = [] TRT_LOGGER = trt.Logger() trt_runtime = trt.Runtime(TRT_LOGGER) engine = load_engine(trt_runtime, engine_path) with engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) for filename in tqdm(filenames): ori_image = cv2.imread(filename) image, im_scales = scale(ori_image, long_size=long_side_size) image, shape = preprocess(image) inputs[0].host = image t1 = time.time() trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) dur = time.time() - t1 times.append(dur) gaussian_map = trt_outputs[0].reshape( (shape[0] // 4, shape[1] // 4)) boxes, scores = postprocess(gaussian_map) polys = [] if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) * 1. / im_scales for box in boxes: box = sort_poly(box.astype(np.int32)) polys.append(box) polys = np.array(polys, dtype=np.float32).reshape((-1, 8)) result_im = draw_polys(ori_image, polys) cv2.imwrite( "res/{}.jpg".format(filename.split('/')[-1].split('.')[0]), result_im) print("mean_time:", np.mean(times))
def main(): model_path = "alexnet.trt" TRT_LOGGER = trt.Logger(trt.Logger.WARNING) runtime = trt.Runtime(TRT_LOGGER) f = open(model_path, "rb") engine = runtime.deserialize_cuda_engine(f.read()) context = engine.create_execution_context() f.close() for binding in engine: size = trt.volume( engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) # Append to the appropriate list. if engine.binding_is_input(binding): print("input_size: ", size, "dtype: ", dtype) else: print("output_size: ", size, "dtype: ", dtype) inputs, outputs, bindings, stream = common.allocate_buffers(engine) length = input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3] data = np.zeros(length, dtype=np.float32) data[:] = 1.0 inputs[0].host = data.reshape(input_shape) print(inputs[0].host[0][0][0][:10]) outputs[0].host = np.zeros(output_shape, dtype=np.float32) trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) print(trt_outputs[0].shape) print(trt_outputs[0][0][0][:10])
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-v', '--verbose', action='store_true', help='enable verbose output (for debugging)') parser.add_argument( '-m', '--model', type=str, default='model', ) args = parser.parse_args() trt_file_path = '%s.trt' % args.model if not os.path.isfile(trt_file_path): raise SystemExit('ERROR: file (%s) not found!' % trt_file_path) engine_file_path = '%s.trt' % args.model engine = load_engine(trt_file_path, args.verbose) h_inputs, h_outputs, bindings, stream = common.allocate_buffers(engine) cap = cv2.VideoCapture(2) with engine.create_execution_context() as context: while True: _,frame = cap.read() t1 = time.time() img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) img = img_transforms(img).numpy() h_inputs[0].host = img t3 = time.time() trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=h_inputs, outputs=h_outputs, stream=stream) t4 = time.time() out_j = trt_outputs[0].reshape(101, 56, 4) prob = scipy.special.softmax(out_j[:-1, :, :], axis=0) idx = np.arange(100) + 1 idx = idx.reshape(-1, 1, 1) loc = np.sum(prob * idx, axis=0) out_j = np.argmax(out_j, axis=0) loc[out_j == 100] = 0 out_j = loc # import pdb; pdb.set_trace() vis = frame for i in range(out_j.shape[1]): if np.sum(out_j[:, i] != 0) > 2: for k in range(out_j.shape[0]): if out_j[k, i] > 0: ppp = (int(out_j[k, i] * col_sample_w * img_w / 800) - 1, int(img_h * (row_anchor[k]/288)) - 1 ) cv2.circle(vis,ppp, img_w//300 ,color[i],-1) t2 = time.time() print('Inference time', (t4-t3)*1000) print('FPS', int(1/((t2-t1)))) cv2.imshow("OUTPUT", vis) cv2.waitKey(1)
def main(): # model label_file_path = 'models/starwars.names' engine_file_path = "models/starwars_yolov3_fp16.trt" # label list all_classes = img_utils.load_label_classes(label_file_path) num_classes = len(all_classes) trt_runtime = trt.Runtime(TRT_LOGGER) print(f"Lade TensorRT Engine {engine_file_path}") trt_engine = common.load_engine(trt_runtime, engine_file_path) inputs, outputs, bindings, streams = common.allocate_buffers(trt_engine) context = trt_engine.create_execution_context() new_width = 416 new_height = 416 input_shape = (new_width, new_height) output_shapes = [(1, -1, new_height // 32, new_width // 32), (1, -1, new_height // 16, new_width // 16), (1, -1, new_height // 8, new_width // 8)] # open webcam cap = cv2.VideoCapture(0) if not cap.isOpened(): print("Kann die Webcam, nicht öffnen") exit() while cap.isOpened(): # read frame from webcam status, frame = cap.read() if not status: print("Kann kein Bild laden") exit() #fps = cap.get(cv2.CAP_PROP_FPS) #print(f"Frames per second using video.get(cv2.CAP_PROP_FPS) : {fps}") image_resized = cv2.resize(frame, (new_width, new_height), interpolation = cv2.INTER_AREA) image_resized = np.array(image_resized, dtype=np.float32, order='C') image_resized /= 255.0 image_processed = np.transpose(image_resized, [2, 0, 1]) image_processed = np.expand_dims(image_processed, axis=0) image_processed = np.array(image_processed, dtype=np.float32, order='C') inputs[0].host = image_processed trt_outputs = common.do_inference_v2( context, bindings=bindings, inputs=inputs, outputs=outputs, stream=streams ) trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] resolution_raw = (int(frame.shape[1]), int(frame.shape[0])) bboxes = process_outputs(trt_outputs, num_classes, resolution_raw, input_shape, 0.6) # loop through detected bounding boxes for bbox in bboxes: # get corner points of face rectangle coor = np.array(bbox[:4], dtype=np.int32) (startX, startY) = coor[0], coor[1] (endX, endY) = coor[2], coor[3] # draw rectangle over the detected object cv2.rectangle(frame, (int(startX+20),int(startY+20)), (int(endX-20),int(endY-20)), (0,255,0), 2) # get label with max accuracy score = bbox[4] score = '%.2f' % score class_ind = int(bbox[5]) class_name = all_classes[class_ind] print(f"{class_name} LEGO Figur erkannt zu {score}%") # write label and confidence above face rectangle cv2.putText(frame, class_name, (startX, startY), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) # display output cv2.imshow("LEGO Star Wars Object Detection", frame) # press "Q" to stop if cv2.waitKey(1) & 0xFF == ord('q'): break # release resources cap.release() cv2.destroyAllWindows()
def predict(self, input_path='dog.jpg', output_save_root='./output', write_txt=False): ''' :param input_path: 输入:单张图像路径,图像文件夹,单个视频文件路径 :param output_save_root: 要求全部保存到文件夹内,若是视频统一保存为mp4 :param write_txt: 将预测的框坐标-类别-置信度以txt保存 :return: ''' # 开始判断图像,文件夹,视频 is_video = False path = input_path if os.path.isdir(path): # 图像文件夹 img_names = os.listdir(path) img_names = [ name for name in img_names if name.split('.')[-1] in self.img_formats ] elif os.path.isfile(path): # 将 '/hme/ai/111.jpg' -> ('/hme/ai', '111.jpg') path, img_name = os.path.split(path) # 标记 video if img_name.split('.')[-1] in self.vid_formats: is_video = True else: assert img_name.split('.')[-1] in self.img_formats, "必须是单张图像路径" img_names = [img_name] else: print("输入无效!!!" * 3) # 创建保存文件夹 check_path(output_save_root) # 判断是否是视频 if is_video: assert img_name.count('.') == 1, "视频名字必须只有1个 . " # 读取视频 cap = cv2.VideoCapture(os.path.join(path, img_name)) # # 获取视频的fps, width height fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 视频总帧数 # 创建视频 video_save_path = os.path.join( output_save_root, img_name.split('.')[0] + '_pred.mp4') fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_writer = cv2.VideoWriter(video_save_path, fourcc=fourcc, fps=fps, frameSize=(width, height)) else: num = len(img_names) # 图像数量 # 推理 默认是0卡 inputs, outputs, bindings, stream = common.allocate_buffers( self.engine) # Do inference for i in range(num): # 预处理 if is_video: cap.set(cv2.CAP_PROP_POS_FRAMES, i) # 读取指定帧 image = cap.read() # 输入的是bgr帧矩阵 image_raw, image = self.preprocessor.process(image) else: # 输入的默认是图像路径 image_raw, image = self.preprocessor.process( os.path.join(path, img_names[i])) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(self.context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # list中的输出个数,本来要位于外面一层的,但是考虑重新输入图像 trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, self.output_shapes) ] # 后处理,按照2种方式判断处理,yolov4原始的预测-参考yolov5变化后的预测 # 图像原始尺寸 WH,因为时PIL读取 shape_orig_WH = image_raw.size # 后处理是可以处理batch>=1的,但是这里的类写的只能是batch=1 outputs_pred = self.postprocessor.process(trt_outputs, shape_orig_WH) # TODO 将预测的框坐标-类别-置信度 写入txt # 画框,由于这里只能是单张图像,因此不必for遍历 boxes, classes, scores = outputs_pred[0][0] obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, self.all_categories) # 视频按照帧数来保存,图像按照名字保存, 注意一般视频不会超过5位数 # TODO 视频的预测写入视频 if is_video: obj_detected_img.save( os.path.join(output_save_root, str(i).zfill(5))) else: obj_detected_img.save( os.path.join(output_save_root, img_names[i])) # 若是视频,需要 release if is_video: cap.release() cv2.destroyAllWindows()
if __name__ == '__main__': onnx_model_file = "grid_sample.onnx" export_onnx_model(onnx_model_file) modify_onnx(onnx_model_file) # Build a TensorRT engine. with build_engine_onnx(onnx_model_file) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. inputs, outputs, bindings, stream = common.allocate_buffers( engine, True, 2) # Contexts are used to perform inference. with engine.create_execution_context() as context: # test 1. float16 input, via nvprof, you can see __half populated template function is called # test 2. Dims of input and grid is -1 on batch dim. Set context binding shape and feed proper data input = input_rand[0:2, :, :, :].astype('float16') grid = grid_rand[0:2, :, :, :].astype('float16') context.set_binding_shape(0, (2, 1, 4, 4)) context.set_binding_shape(1, (2, 4, 4, 2)) inputs[0].host = input inputs[1].host = grid trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) print(trt_outputs)
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" input_image_path = "demo/test2.jpg" # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.shape[:2] # Output shapes expected by the post-processor output_shapes = [(batch_size, 180, 19, 19), (batch_size, 180, 38, 38), (batch_size, 180, 76, 76)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: t1 = time.time() print(engine.max_batch_size) inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t2 = time.time() # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.1, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.6, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW, "batch_size": batch_size } postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) t3 = time.time() # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'result.jpg' cv2.imwrite(output_image_path, obj_detected_img) print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path)) print(f"model time: {t2 - t1}s, process time: {t3 - t2}s.")
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: #获取onnx模型和相应引擎文件的路径 onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" # Download a dog image and save it to the following file path: #下载相关的图片数据 input_image_path = common.download_file( 'dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered #网络的输入图片weidth和height input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 #PreprocessYOLO参考data_processing.py的实现 #加载图片并进行相应的预处理 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version #从相应路径加载一张图片,将加载的原图和预处理后的图像一起返回 #具体参考data_processing.py的实现 image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later #存储原始图片的维度 shape_orig_WH = image_raw.size # Output shapes expected by the post-processor #输出层的维度 output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Do inference with TensorRT #进行trt的推理 trt_outputs = [] #get_engine参考本文件的实现 #获取引擎文件并创建相关的推理上下文 with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: #分配相应的内存缓冲区 inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. #将相应数据传到主机内存 inputs[0].host = image #进行相应的推理 trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. #得到推理的输出 trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } #接下来就是相关的后处理内容了 postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path))
def main(): """Create a TensorRT engine for ONNX-based YOLOv3 """ # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = ONNX_FILE_PATH engine_file_path = ENGINE_FILE_PATH # Download a dog image and save it to the following file path: input_image_path = TEST_IMAGE # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (320, 800) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] output_shapes = get_outshape(3, 800, 320) # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'onnx_trans_test.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path))
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = './yolov3.onnx' engine_file_path = "yolov3.trt" data_path = "./data/unrel.data" data = parse_data_cfg(data_path) nc = int(data['classes']) # number of classes path = data['valid'] # path to test images names = load_classes(data['names']) # class names iouv = torch.linspace(0.5, 0.95, 1, dtype=torch.float32) # iou vector for [email protected]:0.95 niou = 1 conf_thres = 0.001 iou_thres = 0.6 verbose = True # Genearte custom dataloader img_size = 448 # copy form pytorch src batch_size = 16 dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True) batch_size = min(batch_size, len(dataset)) dataloader = data_loader(dataset, batch_size, img_size) # Output shapes expected by the post-processor output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. pbar = tqdm.tqdm(dataloader, desc=s) stats, ap, ap_class = [], [], [] seen = 0 for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar): imgs = imgs.astype(np.float32) / 255.0 nb, _, height, width = imgs.shape # batch size, channels, height, width whwh = np.array([width, height, width, height]) inputs[0].host = imgs postprocessor_args = { "yolo_masks": [ (6, 7, 8), (3, 4, 5), (0, 1, 2) ], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), ( 62, 45 ), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "num_classes": 37, "stride": [32, 16, 8] } postprocessor = PostprocessYOLO(**postprocessor_args) # Do layers before yolo t = time.time() trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] trt_outputs = [ np.ascontiguousarray( otpt[:, :, :int(imgs.shape[2] * (2**i) / 32), :int(imgs.shape[3] * (2**i) / 32)], dtype=np.float32) for i, otpt in enumerate(trt_outputs) ] output_list = postprocessor.process(trt_outputs) t0 += time.time() - t inf_out = torch.cat(output_list, 1) t = time.time() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) # nms t1 += time.time() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh tbox = tbox.type(torch.float32) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[ j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = 'test_batch%g_gt.jpg' % batch_i # filename plot_images(imgs, targets, paths=paths, names=names, fname=f) # ground truth f = 'test_batch%g_pred.jpg' % batch_i plot_images(imgs, output_to_target(output, width, height), paths=paths, names=names, fname=f) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) if niou > 1: p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean( 1), ap[:, 0] # [P, R, [email protected]:0.95, [email protected]] mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Print speeds if verbose: t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + ( img_size, img_size, batch_size) # tuple print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
def main(): parser = argparse.ArgumentParser(description="TensorRT model inference") parser.add_argument("--model", "-m", required=True, type=str, help="TensorRT model path") parser.add_argument("--input_shape", "-in", required=True, type=int, nargs="+", help="input shape") parser.add_argument("--output_shape", "-out", required=True, type=int, nargs="+", help="output shape") args = parser.parse_args() # model_path="9_16_22_7.model.trt" model_path = args.model input_shape = tuple(args.input_shape) output_shape = tuple(args.output_shape) print("input_shape: ", input_shape) print("output_shape: ", output_shape) TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) runtime = trt.Runtime(TRT_LOGGER) f = open(model_path, "rb") engine = runtime.deserialize_cuda_engine(f.read()) context = engine.create_execution_context() f.close() for binding in engine: size = trt.volume( engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) # Append to the appropriate list. if engine.binding_is_input(binding): print("input_size: ", size, "dtype: ", dtype) else: print("output_size: ", size, "dtype: ", dtype) inputs, outputs, bindings, stream = common.allocate_buffers(engine) length = input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3] data = np.zeros(length, dtype=np.float32) data[:] = 1.0 inputs[0].host = data.reshape(input_shape) print(inputs[0].host[0][0][0][:10]) outputs[0].host = np.zeros(output_shape, dtype=np.float32) trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) print(trt_outputs[0].shape) print(trt_outputs[0]) print("starting...") starttime = time.time() for i in range(1000 * 2 * 10): trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # time.sleep(10) # print(trt_outputs[0]) endtime = time.time() print(endtime - starttime) print(trt_outputs[0])
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" # Download a dog image and save it to the following file path: #input_image_path = 'images/dog.jpg' # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (416, 416) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version #image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later #shape_orig_WH = image_raw.size # Output shapes expected by the post-processor #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] # Do inference with TensorRT trt_outputs = [] # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } capture = cv2.VideoCapture(r"D:\b站下载视频\飙车.mp4") fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = capture.get(cv2.CAP_PROP_FPS) size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter('camera_test.mp4', fourcc, fps, size) fps = 0 while (True): t1 = time.time() ref, frame = capture.read() # 格式转变,BGRtoRGB frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # 转变成Image frame = Image.fromarray(np.uint8(frame)) image_raw, image = preprocessor2.process(frame) shape_orig_WH = image_raw.size with get_engine( onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print( 'Running inference on image {}...'.format('input_image_path')) inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] # postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks # "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors # (59, 119), (116, 90), (156, 198), (373, 326)], # "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 # "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 # "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) frame = cv2.cvtColor(obj_detected_img, cv2.COLOR_RGB2BGR) fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %.2f" % (fps)) frame = cv2.putText(frame, "fps= %.2f" % (fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) out.write(frame) cv2.imshow("video", frame) c = cv2.waitKey(1) & 0xff if c == 27: capture.release() break # output_image_path = 'dog_bboxes.png' # obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'. format('output_image_path'))