def predict(inp: Image, metadata): image_raw, image = preprocessor.process(inp) shape_orig_WH = image_raw.size inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image') # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image a = perf_counter() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) b = perf_counter() metadata['TensorRT Inference Latency (s)'] = (b - a) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) return obj_detected_img
def _post_process(self, output): # start = timer() # A = TestRunner.mem_to_tensor(output[0], self.output_shapes[0]) # B = TestRunner.mem_to_tensor(output[1], self.output_shapes[1]) # C = TestRunner.mem_to_tensor(output[2], self.output_shapes[2]) # tensor_creation_time = timer() - start # print(f"Creating tensor {round((tensor_creation_time)*1000)} [ms] ") # print(A.size(), B.size(), C.size()) # anchors_A = ([self.anchors[i] for i in self.yolo_masks[0]]) # anchors_B = ([self.anchors[i] for i in self.yolo_masks[1]]) # anchors_C = ([self.anchors[i] for i in self.yolo_masks[2]]) # print(anchors_A, anchors_B, anchors_C) # output_A = self._forward_yolo_output(A, anchors_A) # output_B = self._forward_yolo_output(B, anchors_B) # output_C = self._forward_yolo_output(C, anchors_C) # print(output_A.size(), output_B.size(), output_C.size()) # full_output = torch.cat((output_A, output_B, output_C), 1) # print(full_output.size()) # w, h = self.raw_image.size # pad_h, pad_w, ratio = calculate_padding(h, w, self.image_height, self.image_width) # for detections in full_output: # detections = detections[detections[:, 4] > self.conf_thres] # box_corner = torch.zeros((detections.shape[0], 4), device=detections.device) # xy = detections[:, 0:2] # wh = detections[:, 2:4] / 2 # box_corner[:, 0:2] = xy - wh # box_corner[:, 2:4] = xy + wh # probabilities = detections[:, 4] # nms_indices = nms(box_corner, probabilities, self.nms_thres) # main_box_corner = box_corner[nms_indices] # probabilities_nms = probabilities[nms_indices] # if nms_indices.shape[0] == 0: # continue # BB_list = [] # for i in range(len(main_box_corner)): # x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w # y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h # x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w # y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h # # draw.rectangle((x0, y0, x1, y1), outline="red") # # print("BB ", i, "| x = ", x0, "y = ", y0, "w = ", x1 - x0, "h = ", y1 - y0, "probability = ", probabilities_nms[i].item()) # BB = [round(x0), round(y0), round(y1 - y0), round(x1 - x0)] # x, y, h, w # BB_list.append(BB) # return BB_list, probabilities_nms # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. output = [output.reshape(shape) for output, shape in zip(output, self.output_shapes)] postprocessor_args = {"yolo_masks": self.yolo_masks, # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": self.anchors, # A list of 9 two-dimensional tuples for the YOLO anchors "obj_threshold": 0.5, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.25, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": self.input_resolution} postprocessor = PostprocessYOLO(**postprocessor_args) # # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects return postprocessor.process(output, (self.raw_image.size))
def main(FLAGS): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" input_image_path = 'debug_image/test1.jpg' input_resolution_yolov3_HW = (608, 608) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) image_raw, image = preprocessor.process(input_image_path) shape_orig_WH = image_raw.size trt_outputs = [] with get_engine(onnx_file_path, FLAGS, engine_file_path) as engine, \ engine.create_execution_context() as context: inputs, outputs, bindings, stream = allocate_buffers(engine) # print('Running inference on image {}...'.format(input_image_path)) max_batch_size = engine.max_batch_size image = np.tile(image, [36, 1, 1, 1]) inputs[0].host = image inf_batch = 36 trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=inf_batch) output_shapes = [(max_batch_size, 255, 19, 19), (max_batch_size, 255, 38, 38), (max_batch_size, 255, 76, 76)] trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] # [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, "nms_threshold": 0.5, "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) feat_batch = [[trt_outputs[j][i] for j in range(len(trt_outputs))] for i in range(len(trt_outputs[0]))] for idx, layers in enumerate(feat_batch): boxes, classes, scores = postprocessor.process(layers, (shape_orig_WH))
def process_multi(img_path, yolo, engine,context): start_tf=time.time() image=cv2.imread(img_path) img_persons_new, boxes_new, trans=yolo.process_image(image) start_tf = time.time() img_persons_new, boxes_new, trans = yolo.process_image(image) img=draw(image,boxes_new) cv2.imwrite('img.jpg',img) print('process time for tf is',time.time()-start_tf) start_trt=time.time() input_resolution_yolov3_HW = (608, 608) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) image_raw, image = preprocessor.process(img_path) shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Do inference with TensorRT trt_outputs = [] inputs, outputs, bindings, stream = common_utils.allocate_buffers(engine) inputs[0].host = image trt_outputs = common_utils.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) start_trt = time.time() trt_outputs = common_utils.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] print('process time for trt is', time.time() - start_trt) post_trt=time.time() postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) obj_detected_img.save('out_boxes.png', 'PNG') print('process time for trt post is', time.time() - post_trt)
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = '/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/fast_yolo.onnx' engine_file_path = "/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/fast_yolo.trt" # Download a dog image and save it to the following file path: input_image_path = "/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/sample.png" # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (352, 352) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(1, 18, 11, 11)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image # start = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # print("time: %.2f s" %(time.time()-start)) # print(trt_outputs) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(188,15), (351,16), (351,30)], # A list of 9 two-dimensional tuples for the YOLO anchors], "obj_threshold": 0.5, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.2, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file # obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) # output_image_path = 'dog_bboxes.png' # obj_detected_img.save(output_image_path, 'PNG') # print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path)) return boxes, classes, scores
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" # Download a dog image and save it to the following file path: input_image_path = 'dog.jpg' # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image #for input in inputs: #print(input.host) trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
def main(): ENGINE_FILE_PATH = "ped3_416.trt" INPUT_LIST_FILE = './ped_list.txt' INPUT_SIZE = 416 filenames = [] with open(INPUT_LIST_FILE, 'r') as l: lines = l.readlines() for line in lines: filename = line.strip() filenames.append(filename) input_resolution_yolov3_HW = (INPUT_SIZE, INPUT_SIZE) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)], "yolo_anchors": [(8,34), (14,60), (23,94), (39,149), (87,291), (187,472)], "obj_threshold": 0.1, "nms_threshold": 0.3, "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) output_shapes = output_shapes_dic[str(INPUT_SIZE)] with get_engine(ENGINE_FILE_PATH) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) for filename in filenames: image_raw, image = preprocessor.process(filename) shape_orig_WH = image_raw.size trt_outputs = [] # Do inference print('Running inference on image {}...'.format(filename)) inputs[0].host = image c_time = 0 t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t2 = time.time() c_time = t2-t1 print(c_time) trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) if len(boxes) != 0: obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) else: obj_detected_img = image_raw savename_0 = filename.split('/')[-1] savename = savename_0.split('.')[0] output_image_path = './images_results/' + savename + '_' + str(INPUT_SIZE) + '.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
def infer_cam(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx'; engine_file_path = 'yolov3.trt' # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Create a pre-processor object by specifying the required input resolution for YOLOv3 postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} cap = cv2.VideoCapture(0) trt_outputs = [] # Do inference with TensorRT with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) while True: ret, frame = cap.read(); assert ret # Load an image from the specified input path, and return it together with a pre-processed version preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) image_raw, image = preprocessor.process(frame) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size; t = time() # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t = time()-t; fps = 1/t; print("infer: %.2fms, fps: %.2f" % (t*1000, fps)) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) im = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) im = np.asarray(im)[...,::-1] cv2.putText(im, "%.2f"%fps, (12,12), 3, 1, (0,255,0)) cv2.imshow("det",im) if cv2.waitKey(5) == 27: break cap.release(); cv2.destroyAllWindows()
def batch_show(image_path, image_save_path, onnx_file_path, engine_file_path): img_list = gb.glob(image_path + r"/*.png") # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (352, 352) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Create a post-processor object by specifying the required input resolution for YOLOv3 postprocessor_args = {"yolo_masks": [(0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(188,15), (351,16), (351,30)], # A list of 9 two-dimensional tuples for the YOLO anchors], "obj_threshold": 0.5, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.2, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = input_resolution_yolov3_HW # Output shapes expected by the post-processor output_shapes = [(1, 18, 11, 11)] # Do inference with TensorRT total_time, trt_outputs = 0, [] with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference # print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. for i, img_file in enumerate(img_list): image_raw, image = preprocessor.process(img_file) inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) image_show = draw_bboxes(image_raw, boxes, scores, classes, ['defect'], bbox_color='yellow') # Save the marked image filename, suffix = os.path.split(img_file) _, fname = os.path.splitext(filename) save_name = os.path.join(image_save_path, fname+suffix) image_show.save(save_name) print("Image", save_name, "saved.")
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" args = parser.parse_args() # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" cam = cv.VideoCapture(args.video) # img = cv.imread("dog.jpg") input_resolution_yolov3_HW = (608, 608) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, "nms_threshold": 0.5, "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) while True: _ret, img = cam.read() if(_ret is False): break image_raw, image = preprocessor.process_image(img) shape_orig_WH = image_raw.size output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] trt_outputs = [] inputs[0].host = image trt_outputs = common.do_inference( context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] boxes, classes, scores = postprocessor.process( trt_outputs, (shape_orig_WH)) if(boxes is None): continue obj_detected_img = draw_bboxes( image_raw, boxes, scores, classes, ALL_CATEGORIES) det_img = np.array(obj_detected_img) cv.imshow("frame", det_img) cv.waitKey(5)
class YoloTRT(object): def __init__(self): super().__init__() # resolution self.preprocessor = PreprocessYOLO((608, 608)) self.trt = TensorRT("yolov3.trt") postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, "nms_threshold": 0.5, "yolo_input_resolution": (608, 608)} self.postprocessor = PostprocessYOLO(**postprocessor_args) # def _preprocess(self, input_array:np.ndarray) -> np.ndarray: # # return self.preprocessor.process(input_array) # in: <NHWC> raw image batch , out: <NCHW> resized <N,3,608,608> def _inference(self, input: np.ndarray) -> list: # trt_outputs = self.trt.inference(input) output_shapes = [(self.trt.max_batch_size, 255, 19, 19), (self.trt.max_batch_size, 255, 38, 38), (self.trt.max_batch_size, 255, 76, 76)] trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] # [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] return trt_outputs # in: <NCHW> <N,3,608,608>, out: [(N, 255, 19, 19), (N, 255, 38, 38), (N, 255, 76, 76)] # def _postprocess(self, feat_batch, shape_orig_WH:tuple): # return [[self.postprocessor.process(feat,shape_orig)]for feat, shape_orig in zip(feat_batch,shape_orig_WH)] @profile def inference(self, input_array:np.ndarray): # img_array <N,H,W,C> pre = self.preprocessor.process(input_array) # in: <NHWC> raw image batch , out: <NCHW> resized <N,3,608,608> trt_outputs = self._inference(pre) # out: [(N, 255, 19, 19), (N, 255, 38, 38), (N, 255, 76, 76)] feat_batch = [[trt_outputs[j][i] for j in range(len(trt_outputs))] for i in range(len(trt_outputs[0]))] post = [[self.postprocessor.process(feat,input_array.shape)]for feat in feat_batch] # out:[[bbox,score,categories,confidences],...] post = post[:len(input_array)] return post
0.6, # 对象覆盖的阈值,[0,1]之间 "nms_threshold": 0.5, # nms的阈值,[0,1]之间 "yolo_input_resolution": input_resolution_yolov3_HW } # 创建后处理类的实例 postprocessor = PostprocessYOLO(**postprocessor_args) print("saving...") for idx in range(len(results)): trt_results = [ results[idx]["082_convolutional"][0], results[idx]["094_convolutional"][0], results[idx]["106_convolutional"][0] ] trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_results, output_shapes) ] # 运行后处理算法,并得到检测到对象的bounding box boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH[idx])) obj_detected_img = draw_bboxes(image_raws[idx], boxes, scores, classes, ALL_CATEGORIES) output_image_path = FLAGS.output + "{0}".format( filenames[idx].split("/")[-1]) print(output_image_path) obj_detected_img.save(output_image_path)
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = './yolov3.onnx' engine_file_path = "yolov3.trt" data_path = "./data/unrel.data" data = parse_data_cfg(data_path) nc = int(data['classes']) # number of classes path = data['valid'] # path to test images names = load_classes(data['names']) # class names iouv = torch.linspace(0.5, 0.95, 1, dtype=torch.float32) # iou vector for [email protected]:0.95 niou = 1 conf_thres = 0.001 iou_thres = 0.6 verbose = True # Genearte custom dataloader img_size = 448 # copy form pytorch src batch_size = 16 dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True) batch_size = min(batch_size, len(dataset)) dataloader = data_loader(dataset, batch_size, img_size) # Output shapes expected by the post-processor output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. pbar = tqdm.tqdm(dataloader, desc=s) stats, ap, ap_class = [], [], [] seen = 0 for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar): imgs = imgs.astype(np.float32) / 255.0 nb, _, height, width = imgs.shape # batch size, channels, height, width whwh = np.array([width, height, width, height]) inputs[0].host = imgs postprocessor_args = { "yolo_masks": [ (6, 7, 8), (3, 4, 5), (0, 1, 2) ], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), ( 62, 45 ), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "num_classes": 37, "stride": [32, 16, 8] } postprocessor = PostprocessYOLO(**postprocessor_args) # Do layers before yolo t = time.time() trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] trt_outputs = [ np.ascontiguousarray( otpt[:, :, :int(imgs.shape[2] * (2**i) / 32), :int(imgs.shape[3] * (2**i) / 32)], dtype=np.float32) for i, otpt in enumerate(trt_outputs) ] output_list = postprocessor.process(trt_outputs) t0 += time.time() - t inf_out = torch.cat(output_list, 1) t = time.time() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) # nms t1 += time.time() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh tbox = tbox.type(torch.float32) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[ j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = 'test_batch%g_gt.jpg' % batch_i # filename plot_images(imgs, targets, paths=paths, names=names, fname=f) # ground truth f = 'test_batch%g_pred.jpg' % batch_i plot_images(imgs, output_to_target(output, width, height), paths=paths, names=names, fname=f) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) if niou > 1: p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean( 1), ap[:, 0] # [P, R, [email protected]:0.95, [email protected]] mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Print speeds if verbose: t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + ( img_size, img_size, batch_size) # tuple print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
def read_queue(queue): # 3.load model # initialize TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') runtime = trt.Runtime(TRT_LOGGER) # create engine with open('model.bin', 'rb') as f: buf = f.read() engine = runtime.deserialize_cuda_engine(buf) # create buffer host_inputs = [] cuda_inputs = [] host_outputs = [] cuda_outputs = [] bindings = [] stream = cuda.Stream() for binding in engine: size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size host_mem = cuda.pagelocked_empty(size, np.float32) cuda_mem = cuda.mem_alloc(host_mem.nbytes) bindings.append(int(cuda_mem)) if engine.binding_is_input(binding): host_inputs.append(host_mem) cuda_inputs.append(cuda_mem) else: host_outputs.append(host_mem) cuda_outputs.append(cuda_mem) context = engine.create_execution_context() batch_size = 1 input_size = 416 output_shapes_416 = [(batch_size, 54, 13, 13), (batch_size, 54, 26, 26), (batch_size, 54, 52, 52)] output_shapes_480 = [(batch_size, 54, 15, 15), (batch_size, 54, 30, 30), (batch_size, 54, 60, 60)] output_shapes_544 = [(batch_size, 54, 17, 17), (batch_size, 54, 34, 34), (batch_size, 54, 68, 68)] output_shapes_608 = [(batch_size, 54, 19, 19), (batch_size, 54, 38, 38), (batch_size, 54, 72, 72)] output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608} output_shapes = output_shapes_dic[str(input_size)] input_resolution_yolov3_HW = (input_size, input_size) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(4,7), (7,15), (13,25), (25,42), (41,67), (75,94), (91,162), (158,205), (250,332)], "obj_threshold": 0.5, "nms_threshold": 0.35, "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) inputs, outputs, bindings, stream = allocate_buffers(engine) print('3.Load model successful.') print('Everything is ready.') num = 0 while cap.isOpened() and ser.isOpen(): if queue.empty(): continue frame = queue.get() images = [] image_raw, image = preprocessor.process(frame) images.append(image) num = num + 1 images_batch = np.concatenate(images, axis=0) inputs[0].host = images_batch #t1 = time.time() trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] shape_orig_WH = image_raw.size boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), 0) #t2 = time.time() #t_inf = t2 - t1 #print("time consumption:",t_inf) print(boxes, scores, classes) images.clear() if np.all(scores == 0): ser.write("h".encode("utf-8")) print('exception.') continue index = np.nonzero(classes) label = classes[index[0]] cv2.imwrite('tmp/'+str(num)+'.jpg', frame) if label == 0: ser.write("c".encode("utf-8")) print('plate front.') elif label == 1: ser.write("d".encode("utf-8")) print('plate back.') elif label == 2: ser.write("f".encode("utf-8")) print('bowl front.') elif label == 3: ser.write("e".encode("utf-8")) print('bowl back.') elif label == 4: ser.write("g".encode("utf-8")) print('glass cup side.') elif label == 5: ser.write("g".encode("utf-8")) print('glass cup back.') elif label == 6: ser.write("g".encode("utf-8")) print('glass cup front.') elif label == 7: ser.write("i".encode("utf-8")) print('teacup side.') elif label == 8: ser.write("j".encode("utf-8")) print('teacup back.') elif label == 9: ser.write("k".encode("utf-8")) print('teacup front.') elif label == 10: ser.write("g".encode("utf-8")) print('cup side.') elif label == 11: ser.write("g".encode("utf-8")) print('cup back.') elif label == 12: ser.write("g".encode("utf-8")) print('cup front.') else: ser.write("h".encode("utf-8")) print('exception.')
def main(): args = build_argparser().parse_args() model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" # ------------- 1. Plugin initialization for specified device and load extensions library if specified ------------- log.info("Creating Inference Engine...") ie = IECore() if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") # -------------------- 2. Reading the IR generated by the Model Optimizer (.xml and .bin files) -------------------- log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) # ---------------------------------- 3. Load CPU extension for support specific layer ------------------------------ if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by the plugin for specified device {}:\n {}". format(args.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) assert len(net.inputs.keys()) == 1, "Sample supports only YOLO V3 based single input topologies" # ---------------------------------------------- 4. Preparing inputs ----------------------------------------------- log.info("Preparing inputs") input_blob = next(iter(net.inputs)) output_blob = next(iter(net.outputs)) batch_size = 16 img_size = 448 data_path = "/usr/src/app/data/unrel.data" data = parse_data_cfg(data_path) nc = 37#int(data['classes']) path = data['valid'] names = load_classes(data['names']) iouv = torch.linspace(0.5, 0.95, 10, dtype=torch.float32) # iou vector for [email protected]:0.95 iouv = iouv[0].view(1) niou = iouv.numel()#1 conf_thres = 0.001 iou_thres = 0.6 verbose = True dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=False) batch_size = min(batch_size, len(dataset)) dataloader = data_loader(dataset, batch_size, img_size) # Output shapes expected by the post-processor output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)] n, c, h, w = net.inputs[input_blob].shape log.info("Loading model to the plugin") exec_net = ie.load_network(network=net, device_name=args.device) # ----------------------------------------- 5. Loading model to the plugin ----------------------------------------- log.info("Starting inference...") s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. pbar = tqdm.tqdm(dataloader, desc=s) stats, ap, ap_class = [], [], [] seen = 0 print('HERE0') for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar): imgs = imgs.astype(np.float32) / 255.0 #print(imgs) nb, _, height, width = imgs.shape # batch size, channels, height, width #print(height,width) whwh = np.array([width, height, width, height]) #print('HERE1') postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "num_classes": 37, "stride":[32, 16, 8]} postprocessor = PostprocessYOLO(**postprocessor_args) # Start inference t = time() #print(imgs) res = exec_net.infer(inputs={input_blob: imgs}) #print(res) res0 = list(res.values())[0] res1 = list(res.values())[1] res2 = list(res.values())[2] res = [res2,res0,res1] #print(res0) #print(res[0].shape,res[1].shape,res[2].shape) #print('HERE2') #res = [output.reshape(shape) for output, shape in zip(res0, output_shapes)] res = [np.ascontiguousarray(otpt[:, :, :int(imgs.shape[2]*(2**i)/32), :int(imgs.shape[3]*(2**i)/32)], dtype=np.float32) for i, otpt in enumerate(res)] #print(res[0].shape,res[1].shape,res[2].shape) output_list = postprocessor.process(res) #print('HERE2.25') #print(output_list[0].shape,output_list[1].shape,output_list[2].shape) #print('HERE2.5') t0 += time() - t inf_out = torch.cat(output_list, 1) print(inf_out.shape) print(inf_out) """
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: #获取onnx模型和相应引擎文件的路径 onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" # Download a dog image and save it to the following file path: #下载相关的图片数据 input_image_path = common.download_file( 'dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered #网络的输入图片weidth和height input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 #PreprocessYOLO参考data_processing.py的实现 #加载图片并进行相应的预处理 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version #从相应路径加载一张图片,将加载的原图和预处理后的图像一起返回 #具体参考data_processing.py的实现 image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later #存储原始图片的维度 shape_orig_WH = image_raw.size # Output shapes expected by the post-processor #输出层的维度 output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Do inference with TensorRT #进行trt的推理 trt_outputs = [] #get_engine参考本文件的实现 #获取引擎文件并创建相关的推理上下文 with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: #分配相应的内存缓冲区 inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. #将相应数据传到主机内存 inputs[0].host = image #进行相应的推理 trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. #得到推理的输出 trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } #接下来就是相关的后处理内容了 postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path))
def main(): ######################################################################### # $ python3 onnx_to_tensorrt.py v3 608 ######################################################################### dir_onnx = sys.argv[1] fn_onnx = sys.argv[2] onnx_file_path = os.path.join(dir_onnx, fn_onnx) #print('fn_onnx : ', fn_onnx); exit() t1, t2 = get_exact_file_name_from_path(fn_onnx).split('_') v_yolo = t1[4:] said = int(t2) #print('v_yolo : ', v_yolo, ', said : ', said); exit() #said = int(sys.argv[2]) """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: #onnx_file_path = 'yolo{}_{}.onnx'.format(v_yolo, said) engine_file_path = os.path.join(dir_onnx, 'yolo{}_{}.trt'.format(v_yolo, said)) # Download a dog image and save it to the following file path: input_image_path = download_file( 'dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (said, said) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size #print('image_raw.size : ', image_raw.size); print('image.size : ', image.size); exit() # Output shapes expected by the post-processor #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] output_shapes = get_output_shapes(v_yolo, said) # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image ''' print('len(inputs) : ', len(inputs)); # 1 print('len(outputs) : ', len(outputs)); # 2 for v3-tiny, 3 for v3 #exit() print('type(stream) : ', type(stream)); exit() print('type(outputs[0] : ', type(outputs[0])); #exit() print('type(outputs[1] : ', type(outputs[1])); exit() ''' # start = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # print("time: %.2f s" %(time.time()-start)) ''' print('len(trt_outputs) : ', len(trt_outputs)); print('trt_outputs[0].shape : ', trt_outputs[0].shape) print('trt_outputs[1].shape : ', trt_outputs[1].shape); exit() ''' print(trt_outputs) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor_args = get_postprocessor_args(v_yolo, input_resolution_yolov3_HW) #print('postprocessor_args : ', postprocessor_args); exit() ''' postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} ''' postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes_{}_{}.png'.format(v_yolo, said) obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path))
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: if COCO: onnx_file_path = os.path.join('./engine/onnx/', 'yolov3-' + str(SIZE) + '.onnx') engine_file_path = os.path.join('./engine/trt/', 'yolov3-' + str(SIZE) + BUILD + '.trt') else: onnx_file_path = os.path.join('./engine/onnx/', 'yolov3-voc-' + str(SIZE) + '.onnx') engine_file_path = os.path.join( './engine/trt/', 'yolov3-voc-' + str(SIZE) + BUILD + '.trt') # onnx_file_path = "./engine/yolov3-608.onnx" # engine_file_path = "./engine/yolov3-608-voc-f32.trt" # loop over images if COCO: test_images_file = './coco/5k.txt' #for coco else: test_images_file = './VOC/data/dataset/voc_test.txt' #for voc with open(test_images_file, 'r') as f: txt = f.readlines() test_images = [line.strip() for line in txt] timeRecSave = [] input_resolution_yolov3_HW = (SIZE, SIZE) predicted_dir_path = './mAP/predicted' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) os.mkdir(predicted_dir_path) # ground_truth_dirs_path = './mAP/ground-truth' # if os.path.exists(ground_truth_dir_path): # shutil.rmtree(ground_truth_dir_path) # os.mkdir(ground_truth_dir_path) with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) for idx, input_image_path in enumerate(test_images): #print("image path = ", input_image_path) filename = os.path.split(input_image_path)[1] #print("filename = ",filename) # try: # label_file = './coco/labels/val2014/' + os.path.splitext(filename)[0]+'.txt' # with open(label_file, 'r') as f: # labels = f.readlines() # except: # continue # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later # print("image shape = ", image.shape) # print("image data = ") # print(image) shape_orig_WH = image_raw.size # print("image_raw.size = ", image_raw.size) # print("image_raw.shape = ", image_raw.shape) # Output shapes expected by the post-processor # output_shapes = [(1, 255, 10, 10), (1, 255, 20, 20), (1, 255, 40, 40)] #for 320 # output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] #for 416 output_shapes = [(1, int(OUT), int(SIZE / 32), int(SIZE / 32)), (1, int(OUT), int(SIZE / 16), int(SIZE / 16)), (1, int(OUT), int(SIZE / 8), int(SIZE / 8)) ] #for 608 # Do inference with TensorRT trt_outputs = [] # with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference # print('Running inference on image {}...'.format(input_image_path)) # if idx==0 else 0 # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image # start = time.time() trt_outputs, timeRec = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # print("time: %.2f s" %(time.time()-start)) # print(trt_outputs) timeRecSave.append(timeRec) print('%d, Image %s, Recognition Time %0.3f seconds' % (idx, filename, timeRec)) # # Before the post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] # A list of 3 three-dimensional tuples for the YOLO masks # A list of 9 two-dimensional tuples for the YOLO anchors postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], \ "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), \ (59, 119), (116, 90), (156, 198), (373, 326)],\ # Threshold for object coverage, float value between 0 and 1 "obj_threshold": 0.6,\ # Threshold for non-max suppression algorithm, float value between 0 and 1 "nms_threshold": 0.5,\ "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process( trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file if PRINT_RESULTS: obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = './results/yolo_' + filename obj_detected_img.save(output_image_path) print( 'Saved image with bounding boxes of detected objects to {}.' .format(output_image_path)) predict_result_path = os.path.join(predicted_dir_path, str(idx) + '.txt') # ground_truth_path = os.path.join(ground_truth_dir_path, str(idx) + '.txt') with open(predict_result_path, 'w') as f: if boxes is not None: for box, score, category_idx in zip( boxes, scores, classes): x_coord, y_coord, width, height = box box = [ x_coord, y_coord, x_coord + width, y_coord + height ] # fit YunYang1994's mAP calculation input format category = ALL_CATEGORIES[category_idx] category = "".join(category.split()) # print("score info = ", score, score.type) box = list(map(int, box)) xmin, ymin, xmax, ymax = list(map(str, box)) # bbox_mess = ' '.join([category, score, xmin, ymin, xmax, ymax]) + '\n' bbox_mess = ' '.join([ category, "{:.4f}".format(score), xmin, ymin, xmax, ymax ]) + '\n' # print(bbox_mess) f.write(bbox_mess) timeRecMean = np.mean(timeRecSave) print('The mean recognition time is {0:0.3f} seconds'.format(timeRecMean)) # %% Visualization of results if PRINT_RESULTS: np.save('results/timeRecognition.npy', timeRecSave) plt.figure(figsize=(8, 5)) plt.plot(timeRecSave, label='Recg_time') plt.ylim([0, 0.05]) plt.xlabel('Test image number'), plt.ylabel('Time [second]'), plt.title( 'Recognition time of Yolov3_DarkNet_ONNX_TensorRT_GPU_coco_test_2017' ) plt.hlines(y=timeRecMean, xmin=0, xmax=len(test_images), linewidth=3, color='r', label='Mean') plt.savefig( 'results/Yolov3_DarkNet_ONNX_TensorRT_GPU_coco_test_2017.png', bbox_inches='tight') plt.show()
class Detect: def __init__(self, yaml_path): # yaml_path 参数配置文件路径 with open(yaml_path, 'r', encoding='utf-8') as f: self.param_dict = yaml.load(f, Loader=yaml.FullLoader) # 获取engine context self.engine = get_engine(self.param_dict['onnx_path'], self.param_dict['engine_path'], self.param_dict['input_shape'], self.param_dict['int8_calibration']) # context 执行在engine后面 self.context = self.engine.create_execution_context() # yolo 数据预处理 PreprocessYOLO类 assert len(self.param_dict['input_shape']) == 4, "input_shape必须是4个维度" batch, _, height, width = self.param_dict['input_shape'] self.preprocessor = PreprocessYOLO((height, width)) # 生成预先的anchor [x,y,w,h,f_w,f_h]: xy是feature_map的列行坐标,wh是anchor,f_wh是feature_map大小 self.prior_anchors = PriorBox(cfg=self.param_dict).forward() # 一些配置 # 标签名字 self.all_categories = load_label_categories( self.param_dict['label_file_path']) classes_num = len(self.all_categories) # trt输出shape stride = self.param_dict['stride'] num_anchors = self.param_dict['num_anchors'] grid_num = (height // stride[0]) * ( width // stride[0]) * num_anchors[0] + (height // stride[1]) * ( width // stride[1]) * num_anchors[1] + ( height // stride[2]) * (width // stride[2]) * num_anchors[2] self.output_shapes = [(batch, grid_num, (classes_num + 5))] self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes self.vid_formats = [ 'mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv' ] # acceptable video suffixes # yolo 后处理, yolov4将3个输出 concat在一起,[N, AHW*3, classes_num+5],可判断yolov4原始预测 or yolov5新式预测 self.postprocessor = PostprocessYOLO(self.prior_anchors, self.param_dict) def predict(self, input_path='dog.jpg', output_save_root='./output', write_txt=False): ''' :param input_path: 输入:单张图像路径,图像文件夹,单个视频文件路径 :param output_save_root: 要求全部保存到文件夹内,若是视频统一保存为mp4 :param write_txt: 将预测的框坐标-类别-置信度以txt保存 :return: ''' # 开始判断图像,文件夹,视频 is_video = False path = input_path if os.path.isdir(path): # 图像文件夹 img_names = os.listdir(path) img_names = [ name for name in img_names if name.split('.')[-1] in self.img_formats ] elif os.path.isfile(path): # 将 '/hme/ai/111.jpg' -> ('/hme/ai', '111.jpg') path, img_name = os.path.split(path) # 标记 video if img_name.split('.')[-1] in self.vid_formats: is_video = True else: assert img_name.split('.')[-1] in self.img_formats, "必须是单张图像路径" img_names = [img_name] else: print("输入无效!!!" * 3) # 创建保存文件夹 check_path(output_save_root) # 判断是否是视频 if is_video: assert img_name.count('.') == 1, "视频名字必须只有1个 . " # 读取视频 cap = cv2.VideoCapture(os.path.join(path, img_name)) # # 获取视频的fps, width height fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 视频总帧数 # 创建视频 video_save_path = os.path.join( output_save_root, img_name.split('.')[0] + '_pred.mp4') fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_writer = cv2.VideoWriter(video_save_path, fourcc=fourcc, fps=fps, frameSize=(width, height)) else: num = len(img_names) # 图像数量 # 推理 默认是0卡 inputs, outputs, bindings, stream = common.allocate_buffers( self.engine) # Do inference for i in range(num): # 预处理 if is_video: cap.set(cv2.CAP_PROP_POS_FRAMES, i) # 读取指定帧 image = cap.read() # 输入的是bgr帧矩阵 image_raw, image = self.preprocessor.process(image) else: # 输入的默认是图像路径 image_raw, image = self.preprocessor.process( os.path.join(path, img_names[i])) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(self.context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # list中的输出个数,本来要位于外面一层的,但是考虑重新输入图像 trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, self.output_shapes) ] # 后处理,按照2种方式判断处理,yolov4原始的预测-参考yolov5变化后的预测 # 图像原始尺寸 WH,因为时PIL读取 shape_orig_WH = image_raw.size # 后处理是可以处理batch>=1的,但是这里的类写的只能是batch=1 outputs_pred = self.postprocessor.process(trt_outputs, shape_orig_WH) # TODO 将预测的框坐标-类别-置信度 写入txt # 画框,由于这里只能是单张图像,因此不必for遍历 boxes, classes, scores = outputs_pred[0][0] obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, self.all_categories) # 视频按照帧数来保存,图像按照名字保存, 注意一般视频不会超过5位数 # TODO 视频的预测写入视频 if is_video: obj_detected_img.save( os.path.join(output_save_root, str(i).zfill(5))) else: obj_detected_img.save( os.path.join(output_save_root, img_names[i])) # 若是视频,需要 release if is_video: cap.release() cv2.destroyAllWindows()
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" parser = argparse.ArgumentParser( prog='ONNX to TensorRT conversion', description='Convert the Yolo ONNX model to TensorRT') parser.add_argument('--input_size', help='Input size model', default='416') parser.add_argument('--onnx_file_path', help='ONNX model\'s path (.onnx)', default='../../model_data/Suspect/yolov3-suspect.onnx') parser.add_argument('--engine_file_path', help='TensorRT engine\'s path (.trt)', default='trt_model/yolov3-suspect_2_fp32.trt') parser.add_argument('--num_classes', help='Number of classes', default='3') parser.add_argument('--dataset_path', help='Path of the folder Dataset', default='../../Datasets/Suspect/images-416/') parser.add_argument( '--pred_dataset_path', help='Output path of Yolo predictions', default='../../Datasets/Suspect/Predictions/TensorRT/Yolo-Tiny-128/') parser.add_argument( '--result_images_path', help='Path of images with predict bounding box', default='../../Datasets/Suspect/Images_result/TensorRT/Yolo-Tiny-128/') args = parser.parse_args() input_size = int(args.input_size) onnx_file_path = args.onnx_file_path engine_file_path = args.engine_file_path num_classes = int(args.num_classes) test_dataset_path = args.dataset_path save_path = args.result_images_path pred_dataset_path = args.pred_dataset_path fp16_on = False batch_size = 2 filters = (4 + 1 + num_classes) * 3 output_shapes_416 = [ (batch_size, filters, 13, 13), (batch_size, filters, 26, 26) ] # 2 ème variable = (5+nbr classes)*3 (255 pour coco, 33 pour key,...) output_shapes_480 = [(batch_size, filters, 15, 15), (batch_size, filters, 30, 30)] output_shapes_544 = [(batch_size, filters, 17, 17), (batch_size, filters, 34, 34)] output_shapes_608 = [(batch_size, filters, 19, 19), (batch_size, filters, 38, 38)] output_shapes_dic = { '416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608 } filenames = glob.glob(os.path.join(test_dataset_path, '*.jpg')) nums = len(filenames) input_resolution_yolov3_HW = (input_size, input_size) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) output_shapes = output_shapes_dic[str(input_size)] postprocessor_args = { #"yolo_masks": [(3, 4, 5), (0, 1, 2)], #tiny "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], #"yolo_anchors": [(10,14), (23,27), (37,58), (81,82), (135,169), (344,319)], #tiny-yolov3 "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], #YoloV3 "obj_threshold": 0.5, "nms_threshold": 0.35, "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Do inference with TensorRT filenames_batch = [] images = [] images_raw = [] trt_outputs = [] index = 0 moy_inf_time = 0 moy = 0 with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path ) as engine, engine.create_execution_context() as context: # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference for filename in filenames: #print("Path file : ", filename) #path = filename.split('.')[4] #path2 = path.split('/')[4] #print("PATH: ", path2) #name_ann = os.path.join(pred_dataset_path, path2) #annotation_path = name_ann + '.txt' #print("ANNOTATION : ", annotation_path) filenames_batch.append(filename) '''if os.path.isfile(annotation_path) == True : os.remove(annotation_path) print("Delete !")''' image_raw, image = preprocessor.process(filename) images_raw.append(image_raw) images.append(image) index += 1 if index != nums and len(images_raw) != batch_size: continue inputs, outputs, bindings, stream = common.allocate_buffers(engine) images_batch = np.concatenate(images, axis=0) inputs[0].host = images_batch t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) t2 = time.time() t_inf = int(round((t2 - t1) * 1000)) #print("Inf time : ",t_inf) moy_inf_time += t_inf #print("MOY : ", moy) print(len(trt_outputs)) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] for i in range(len(filenames_batch)): fname = filenames_batch[i].split('/') fname = fname[-1].split('.')[0] print(fname) name_ann = os.path.join(pred_dataset_path, fname) annotation_path = name_ann + '.txt' #print("ANNOTATION : ", annotation_path) if os.path.isfile(annotation_path) == True: os.remove(annotation_path) print("Delete !") img_raw = images_raw[i] #print(img_raw) shape_orig_WH = img_raw.size print("SHAPE : ", shape_orig_WH) boxes, classes, scores = postprocessor.process( trt_outputs, (shape_orig_WH), i) if boxes is not None: print("boxes size:", len(boxes)) else: continue # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes, ALL_CATEGORIES, annotation_path) output_image_path = save_path + fname + '_' + str( input_size) + '_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print( 'Saved image with bounding boxes of detected objects to {}.' .format(output_image_path)) filenames_batch = [] images_batch = [] images = [] images_raw = [] trt_outputs = [] print(len(filenames)) moy_inf_time = moy_inf_time / len(filenames) print("Moyenne temps d'inférence (par image) : ", moy_inf_time, "ms") fps = 1 / moy_inf_time * 1000 print("FPS : ", fps)
def main(): # Parse the command line parameters parser = argparse.ArgumentParser( description='Tiny YOLO v2 Object Detector') parser.add_argument('--camera', '-c', \ type=int, default=0, metavar='CAMERA_NUM', \ help='Camera number') parser.add_argument('--csi', \ action='store_true', \ help='Use CSI camera') parser.add_argument('--width', \ type=int, default=1280, metavar='WIDTH', \ help='Capture width') parser.add_argument('--height', \ type=int, default=720, metavar='HEIGHT', \ help='Capture height') parser.add_argument('--objth', \ type=float, default=0.6, metavar='OBJ_THRESH', \ help='Threshold of object confidence score (between 0 and 1)') parser.add_argument('--nmsth', \ type=float, default=0.3, metavar='NMS_THRESH', \ help='Threshold of NMS algorithm (between 0 and 1)') parser.add_argument('--host', \ type=str, default='localhost', metavar='MQTT_HOST', \ help='MQTT remote broker IP address') parser.add_argument('--topic', \ type=str, metavar='MQTT_TOPIC', \ help='MQTT topic to be published on') parser.add_argument('--port', \ type=int, default=1883, metavar='MQTT_PORT', \ help='MQTT port number') parser.add_argument('--novout', \ action='store_true', \ help='No video output') args = parser.parse_args() client = None if args.topic is not None: client = init_mqtt(args.host, args.port) if args.csi or (args.camera < 0): if args.camera < 0: args.camera = 0 # Open the MIPI-CSI camera gst_cmd = appbase.GST_STR_CSI \ % (args.width, args.height, appbase.FPS, args.camera, args.width, args.height) cap = cv2.VideoCapture(gst_cmd, cv2.CAP_GSTREAMER) else: # Open the V4L2 camera cap = cv2.VideoCapture(args.camera) # Set the capture parameters #cap.set(cv2.CAP_PROP_FPS, FPS) # Comment-out for OpenCV 4.1 cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) # Get the actual frame size # OpenCV 4.1 does not get the correct frame size #act_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) #act_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) act_width = args.width act_height = args.height frame_info = 'Frame:%dx%d' % (act_width, act_height) # Download the label data categories = appbase.download_label() # Configure the post-processing postprocessor_args = { # YOLO masks (Tiny YOLO v2 has only single scale.) "yolo_masks": [(0, 1, 2, 3, 4)], # YOLO anchors "yolo_anchors": [(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], # Threshold of object confidence score (between 0 and 1) "obj_threshold": args.objth, # Threshold of NMS algorithm (between 0 and 1) "nms_threshold": args.nmsth, # Input image resolution "yolo_input_resolution": appbase.INPUT_RES, # Number of object classes "num_categories": len(categories) } postprocessor = PostprocessYOLO(**postprocessor_args) # Image shape expected by the post-processing output_shapes = [(1, 125, 13, 13)] # Download the Tiny YOLO v2 ONNX model onnx_file_path = appbase.download_model() # Define the file name of local saved TensorRT plan engine_file_path = 'model.trt' time_list = np.zeros(10) # Load the model into TensorRT with get_engine(onnx_file_path, engine_file_path) as engine, \ engine.create_execution_context() as context: # Allocate buffer memory for TensorRT inputs, outputs, bindings, stream = common.allocate_buffers(engine) fps = 0.0 frame_count = 0 print('video capture started') try: while True: # Get the frame start time for FPS calculation start_time = time.time() # Capture a frame ret, img = cap.read() if ret != True: continue # Reshape the capture image for Tiny YOLO v2 rs_img = cv2.resize(img, appbase.INPUT_RES) rs_img = cv2.cvtColor(rs_img, cv2.COLOR_BGRA2RGB) src_img = appbase.reshape_image(rs_img) # Execute an inference in TensorRT inputs[0].host = src_img trt_outputs = common.do_inference(context, bindings=bindings, \ inputs=inputs, outputs=outputs, stream=stream) # Reshape the network output for the post-processing trt_outputs = [output.reshape(shape) \ for output, shape in zip(trt_outputs, output_shapes)] # Calculates the bounding boxes boxes, classes, scores \ = postprocessor.process(trt_outputs, (act_width, act_height)) if boxes is not None: publish_bboxes(client, args.topic, frame_count, \ img, boxes, scores, classes, categories) if not args.novout: # Draw the bounding boxes if boxes is not None: appbase.draw_bboxes(img, boxes, scores, classes, categories) if frame_count > 10: fps_info = '{0}{1:.2f}'.format('FPS:', fps) msg = '%s %s' % (frame_info, fps_info) appbase.draw_message(img, msg) # Show the results cv2.imshow(appbase.WINDOW_NAME, img) # Check if ESC key is pressed to terminate this application key = cv2.waitKey(20) if key == 27: # ESC break # Check if the window was closed if cv2.getWindowProperty(appbase.WINDOW_NAME, cv2.WND_PROP_AUTOSIZE) < 0: break # Calculate the average FPS value of the last ten frames elapsed_time = time.time() - start_time time_list = np.append(time_list, elapsed_time) time_list = np.delete(time_list, 0) avg_time = np.average(time_list) fps = 1.0 / avg_time frame_count += 1 except KeyboardInterrupt: print('exitting..') # Release the capture object cap.release() if not args.novout: cv2.destroyAllWindows() if client is not None: client.disconnect()
def main(self, image_name): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # output_shapes_416 = [(self.batch_size, 18, 13, 13), (self.batch_size, 18, 26, 26)] output_shapes_416 = [(self.batch_size, 255, 13, 13), (self.batch_size, 255, 26, 26)] output_shapes_480 = [(self.batch_size, 18, 15, 15), (self.batch_size, 18, 30, 30)] output_shapes_544 = [(self.batch_size, 18, 17, 17), (self.batch_size, 18, 34, 34)] output_shapes_608 = [(self.batch_size, 18, 19, 19), (self.batch_size, 18, 38, 38)] output_shapes_dic = { '416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608 } # with open(input_file_list, 'r') as f: # filenames = [] # for line in f.readlines(): # filenames.append(line.strip()) # # filenames = glob.glob(os.path.join(IMAGE_PATH, '*.jpg')) # # nums = len(filenames) # print(filenames) input_resolution_yolov3_HW = (self.input_size, self.input_size) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) output_shapes = output_shapes_dic[str(self.input_size)] postprocessor_args = { "yolo_masks": [(3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (17, 23), (26, 29), (46, 75), (72, 167), (179, 323)], "obj_threshold": 0.5, "nms_threshold": 0.35, "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Do inference with TensorRT filenames_batch = [] images = [] images_raw = [] trt_outputs = [] index = 0 # with self.get_engine(self.onnx_file_path, self.batch_size, self.fp16_on, self.engine_file_path) as engine, engine.create_execution_context() as context: # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference filename = image_name filenames_batch.append(filename) image_raw, image = preprocessor.process(filename) images_raw.append(image_raw) images.append(image) # index += 1 # if len(images_raw) != self.batch_size: # continue inputs, outputs, bindings, stream = common.allocate_buffers( self.engine) images_batch = np.concatenate(images, axis=0) inputs[0].host = images_batch t1 = time.time() trt_outputs = common.do_inference(self.context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=self.batch_size) t2 = time.time() t_inf = t2 - t1 print("time spent:", t_inf) print(len(trt_outputs)) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] #print('test') for i in range(len(filenames_batch)): fname = filenames_batch[i].split('/') fname = fname[-1].split('.')[0] img_raw = images_raw[i] shape_orig_WH = img_raw.size boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), i) # print("boxes size:",len(boxes)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = self.draw_bboxes(img_raw, boxes, scores, classes, ALL_CATEGORIES) if os.path.exists(self.save_path): pass else: os.makedirs(self.save_path) output_image_path = self.save_path + fname + '_' + str( self.input_size) + '_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'. format(output_image_path))
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: input_size = 608 batch_size = 1 fp16_on = True onnx_file_path = 'ped3_' + str(input_size) + '_' + str(batch_size) + '.onnx' engine_file_path = 'ped3_' + str(input_size) + '_' + str(batch_size) + '.trt' input_file_list = './ped_list.txt' IMAGE_PATH = './images/' save_path = './img_re/' output_shapes_416 = [(batch_size, 18, 13, 13), (batch_size, 18, 26, 26)] output_shapes_480 = [(batch_size, 18, 15, 15), (batch_size, 18, 30, 30)] output_shapes_544 = [(batch_size, 18, 17, 17), (batch_size, 18, 34, 34)] output_shapes_608 = [(batch_size, 18, 19, 19), (batch_size, 18, 38, 38)] output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608} with open(input_file_list, 'r') as f: filenames = [] for line in f.readlines(): filenames.append(line.strip()) # filenames = glob.glob(os.path.join(IMAGE_PATH, '*.jpg')) nums = len(filenames) # print(filenames) input_resolution_yolov3_HW = (input_size, input_size) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) output_shapes = output_shapes_dic[str(input_size)] postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)], "yolo_anchors": [(8,34), (14,60), (23,94), (39,149), (87,291), (187,472)], "obj_threshold": 0.1, "nms_threshold": 0.3, "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Do inference with TensorRT filenames_batch = [] images = [] images_raw = [] trt_outputs = [] index = 0 with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path) as engine, engine.create_execution_context() as context: # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference for filename in filenames: filenames_batch.append(filename) image_raw, image = preprocessor.process(filename) images_raw.append(image_raw) images.append(image) index += 1 if index != nums and len(images_raw) != batch_size: continue inputs, outputs, bindings, stream = common.allocate_buffers(engine) images_batch = np.concatenate(images, axis=0) inputs[0].host = images_batch t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) t2 = time.time() t_inf = t2 - t1 print(t_inf) print(len(trt_outputs)) trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] print('test') for i in range(len(filenames_batch)): fname = filenames_batch[i].split('/') fname = fname[-1].split('.')[0] img_raw = images_raw[i] shape_orig_WH = img_raw.size boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), i) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = save_path + fname + '_' + str(input_size) + '_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path)) filenames_batch = [] images_batch = [] images = [] images_raw = [] trt_outputs = []
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" onnx_file_path = 'yolov3.onnx' engine_file_path = 'yolo_in8.trt' cfg_file_path = "yolov3.cfg" input_image_path = download_file( 'dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] middle_output_shapes = [] # calibrator definition calibration_dataset_loc = "calibration_dataset/" calibration_cache = "yolo_calibration.cache" calib = calibra.PythonEntropyCalibrator(calibration_dataset_loc, cache_file=calibration_cache) # define the layer output you want to visualize output_layer_name = [ "001_convolutional", "002_convolutional", "003_convolutional", "005_shortcut", "006_convolutional" ] # get filter number of defined layer name filter_num = get_filter_num(cfg_file_path, output_layer_name) # Do inference with TensorRT trt_outputs = [] with build_int8_engine( onnx_file_path, calib, cfg_file_path, output_layer_name, engine_file_path) as engine, engine.create_execution_context( ) as context: start = time.time() inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. # if batch size != 1 you can use load_random_batch to do test inference, here I just use 1 image as test set # inputs[0].host = load_random_batch(calib) inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. end = time.time() print("Inference costs %.02f sec." % (end - start)) for i, output in enumerate(trt_outputs[:len(filter_num)]): # length of inference output should be filter_num*h*h if "convolutional" in output_layer_name[i]: h = int(math.sqrt(output.shape[0] / filter_num[i])) w = h else: h = int(math.sqrt(output.shape[0] / filter_num[i] / 2)) w = 2 * h middle_output_shapes.append((1, filter_num[i], w, h)) # reshape middle_output = [ output.reshape(shape) for output, shape in zip( trt_outputs[:len(filter_num)], middle_output_shapes) ] # save middle output as grey image for name, output in zip(output_layer_name, middle_output): w, h = output.shape[2], output.shape[3] img = misc.toimage(output.sum(axis=1).reshape(w, h)) img.save("{}.tiff".format(name)) print("Saveing middle output {}".format(output_layer_name)) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs[len(filter_num):], output_shapes) ] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path))
def main(width=608, height=608, batch_size=1, dataset='coco_label.txt', int8mode=False, calib_file='yolo_calibration.cache', onnx_file='yolov3.onnx', engine_file='yolov3.trt', image_file='dog.jpg', result_file='dog_bboxes.png'): """Load labels of the correspond dataset.""" label_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), dataset) all_categories = load_label_categories(label_file_path) classes = len(all_categories) """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = onnx_file engine_file_path = engine_file # Download a dog image and save it to the following file path: input_image_path = image_file # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (height, width) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path, batch_size) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(batch_size, (classes + 5) * 3, height // 32, width // 32), (batch_size, (classes + 5) * 3, height // 16, width // 16), (batch_size, (classes + 5) * 3, height // 8, width // 8)] # Do inference with TensorRT with get_engine(onnx_file_path, width, height, batch_size, engine_file_path, int8mode, calib_file) as engine, \ engine.create_execution_context() as context: start = time.time() inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) end = time.time() print("Inference costs %.03f sec." % (end - start)) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects trt_outputs_1 = [np.expand_dims(trt_outputs[0][0], axis=0), np.expand_dims(trt_outputs[1][0], axis=0), np.expand_dims(trt_outputs[2][0], axis=0)] boxes, classes, scores = postprocessor.process(trt_outputs_1, (shape_orig_WH), classes) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, all_categories) output_image_path = result_file obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
def myinfer(image, context, inputs, outputs, bindings, stream): # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (416, 416) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(image) # Store the shape of the original input image in WH format, we will need it for later shape_orig_HW = image_raw.shape[:2] H, W = shape_orig_HW # Output shapes expected by the post-processor output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26)] # Do inference with TensorRT trt_outputs = [] # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.2, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_HW)) # print(boxes,classes,scores) # Draw the bounding boxes onto the original input image and save it as a PNG file if boxes is not None: obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' cv2.imshow("test", obj_detected_img) if boxes is not None: boxes[:, 0] = boxes[:, 0] / W boxes[:, 1] = boxes[:, 1] / H boxes[:, 2] = boxes[:, 2] / W boxes[:, 3] = boxes[:, 3] / H return boxes, classes, scores
def main(inputSize): #Load PAR model """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" global vs, outputFrame, lock, t0, t1, fps, sess, input_name, label_name, PAR_Model #model = ResNet50_nFC(30) #model = load_network(model) #torch.save(model.state_dict(), "model") #device = torch.device('cuda') #model.to(device) #model.eval() # Set graph optimization level #sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED # To enable model serialization after graph optimization set this #sess_options.optimized_model_filepath = "resnet50_nFC.onnx" #sess = rt.InferenceSession("resnet50_nFC.onnx", sess_options) #sess.set_providers(['CUDAExecutionProvider']) #sess.set_providers(['CPUExecutionProvider']) cuda.init() device = cuda.Device(0) onnx_file_path = 'yolov3-{}.onnx'.format(inputSize) engine_file_path = 'yolov3-{}.trt'.format(inputSize) h, w = (inputSize, inputSize) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (inputSize, inputSize) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Output shapes expected by the post-processor output_shapes = [(1, 255, h // 32, w // 32), (1, 255, h // 16, w // 16), (1, 255, h // 8, w // 8)] """output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26)]""" # Do inference with TensorRT cuda.init() # Initialize CUDA ctx = make_default_context() # Create CUDA context postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.5, "nms_threshold": 0.35, "yolo_input_resolution": input_resolution_yolov3_HW } """postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10,14), (23,27), (37,58), (81,82), (135,169), (344,319)], "obj_threshold": 0.4, "nms_threshold": 0.5, "yolo_input_resolution": input_resolution_yolov3_HW}""" postprocessor = PostprocessYOLO(**postprocessor_args) with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: print("performing inference") inputs, outputs, bindings, stream = common.allocate_buffers(engine) while True: trt_outputs = [] #image_raw=vs.read() T0 = time.time() ret, image_raw = cap.read() if image_raw is not None: image_raw, image = preprocessor.process(image_raw) shape_orig_WH = image_raw.size inputs[0].host = image T1 = time.time() t0 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] T2 = time.time() #here we have Yolo output boxes, classes, scores = postprocessor.process( trt_outputs, (shape_orig_WH)) t1 = time.time() t_inf = t1 - t0 fps = 1 / t_inf draw = True if (boxes is None): print("no bboxes") draw = False if (classes is None): print("no classes") draw = False if (scores is None): print("no scores") draw = False if draw: obj_detected_img = draw_bboxes( image_raw, bboxes=boxes, confidences=scores, categories=classes, all_categories=ALL_CATEGORIES) else: obj_detected_img = image_raw #now stream this image T3 = time.time() total = T3 - T0 """print("Total time per frame: {:.3f}s (~{:.2f}FPS)".format(total,1/total)) print("Pre process: {:.2f}%".format((T1-T0)/total)) print("Inference: {:.2f}%".format((T2-T1)/total)) print("Post process: {:.2f}%".format((T3-T2)/total))""" with lock: outputFrame = np.array(obj_detected_img) ctx.pop()
def main(): # Parse the command line parameters parser = argparse.ArgumentParser(description='Tiny YOLO v2 Object Detector') parser.add_argument('--camera', '-c', \ type=int, default=0, metavar='CAMERA_NUM', \ help='Camera number, use any negative integer for MIPI-CSI camera') parser.add_argument('--width', \ type=int, default=1280, metavar='WIDTH', \ help='Capture width') parser.add_argument('--height', \ type=int, default=720, metavar='HEIGHT', \ help='Capture height') parser.add_argument('--objth', \ type=float, default=0.6, metavar='OBJ_THRESH', \ help='Threshold of object confidence score (between 0 and 1)') parser.add_argument('--nmsth', \ type=float, default=0.3, metavar='NMS_THRESH', \ help='Threshold of NMS algorithm (between 0 and 1)') args = parser.parse_args() if args.camera < 0: # Open the MIPI-CSI camera gst_cmd = GST_STR_CSI \ % (args.width, args.height, FPS, args.width, args.height) cap = cv2.VideoCapture(gst_cmd, cv2.CAP_GSTREAMER) else: # Open the V4L2 camera cap = cv2.VideoCapture(args.camera) # Set the capture parameters #cap.set(cv2.CAP_PROP_FPS, FPS) # Comment-out for OpenCV 4.1 cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) # Get the actual frame size # OpenCV 4.1 does not get the correct frame size #act_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) #act_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) act_width = args.width act_height = args.height frame_info = 'Frame:%dx%d' % (act_width, act_height) # Download the label data categories = download_label() # Configure the post-processing postprocessor_args = { # YOLO masks (Tiny YOLO v2 has only single scale.) "yolo_masks": [(0, 1, 2, 3, 4)], # YOLO anchors "yolo_anchors": [(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], # Threshold of object confidence score (between 0 and 1) "obj_threshold": args.objth, # Threshold of NMS algorithm (between 0 and 1) "nms_threshold": args.nmsth, # Input image resolution "yolo_input_resolution": INPUT_RES, # Number of object classes "num_categories": len(categories)} postprocessor = PostprocessYOLO(**postprocessor_args) # Image shape expected by the post-processing output_shapes = [(1, 125, 13, 13)] # Download the Tiny YOLO v2 ONNX model onnx_file_path = download_model() # Define the file name of local saved TensorRT plan engine_file_path = 'model.trt' time_list = np.zeros(10) # Load the model into TensorRT with get_engine(onnx_file_path, engine_file_path) as engine, \ engine.create_execution_context() as context: # Allocate buffer memory for TensorRT inputs, outputs, bindings, stream = common.allocate_buffers(engine) fps = 0.0 frame_count = 0 while True: # Get the frame start time for FPS calculation start_time = time.time() # Capture a frame ret, img = cap.read() if ret != True: continue # Reshape the capture image for Tiny YOLO v2 rs_img = cv2.resize(img, INPUT_RES) rs_img = cv2.cvtColor(rs_img, cv2.COLOR_BGRA2RGB) src_img = reshape_image(rs_img) # Execute an inference in TensorRT inputs[0].host = src_img trt_outputs = common.do_inference(context, bindings=bindings, \ inputs=inputs, outputs=outputs, stream=stream) # Reshape the network output for the post-processing trt_outputs = [output.reshape(shape) \ for output, shape in zip(trt_outputs, output_shapes)] # Calculates the bounding boxes boxes, classes, scores \ = postprocessor.process(trt_outputs, (act_width, act_height)) # Draw the bounding boxes if boxes is not None: draw_bboxes(img, boxes, scores, classes, categories) #c.okihara 2020-07-22(3/3) # Turn-on enable_GPIO and turn-off buzzer_GPIO if bounding boxes is not else: GPIO.output(Buz_pin, GPIO.LOW) GPIO.output(enable_pin, GPIO.HIGH) if frame_count > 10: fps_info = '{0}{1:.2f}'.format('FPS:', fps) msg = '%s %s' % (frame_info, fps_info) draw_message(img, msg) # Show the results cv2.imshow(WINDOW_NAME, img) # Check if ESC key is pressed to terminate this application key = cv2.waitKey(20) if key == 27: # ESC break # Calculate the average FPS value of the last ten frames elapsed_time = time.time() - start_time time_list = np.append(time_list, elapsed_time) time_list = np.delete(time_list, 0) avg_time = np.average(time_list) fps = 1.0 / avg_time frame_count += 1 # Release the capture object cap.release()
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" # Download a dog image and save it to the following file path: #input_image_path = 'images/dog.jpg' # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (416, 416) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version #image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later #shape_orig_WH = image_raw.size # Output shapes expected by the post-processor #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] # Do inference with TensorRT trt_outputs = [] # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } capture = cv2.VideoCapture(r"D:\b站下载视频\飙车.mp4") fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = capture.get(cv2.CAP_PROP_FPS) size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter('camera_test.mp4', fourcc, fps, size) fps = 0 while (True): t1 = time.time() ref, frame = capture.read() # 格式转变,BGRtoRGB frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # 转变成Image frame = Image.fromarray(np.uint8(frame)) image_raw, image = preprocessor2.process(frame) shape_orig_WH = image_raw.size with get_engine( onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print( 'Running inference on image {}...'.format('input_image_path')) inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] # postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks # "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors # (59, 119), (116, 90), (156, 198), (373, 326)], # "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 # "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 # "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) frame = cv2.cvtColor(obj_detected_img, cv2.COLOR_RGB2BGR) fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %.2f" % (fps)) frame = cv2.putText(frame, "fps= %.2f" % (fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) out.write(frame) cv2.imshow("video", frame) c = cv2.waitKey(1) & 0xff if c == 27: capture.release() break # output_image_path = 'dog_bboxes.png' # obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'. format('output_image_path'))