def infer_img(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" # Download a dog image and save it to the following file path: input_image_path = download_file('dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file im = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) im = np.asarray(im)[...,::-1]; cv2.imshow("det",im) cv2.waitKey(); cv2.destroyAllWindows()
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" onnx_file_path = 'yolov3.onnx' engine_file_path = 'yolo_in8.trt' cfg_file_path = "yolov3.cfg" input_image_path = download_file( 'dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] middle_output_shapes = [] # calibrator definition calibration_dataset_loc = "calibration_dataset/" calibration_cache = "yolo_calibration.cache" calib = calibra.PythonEntropyCalibrator(calibration_dataset_loc, cache_file=calibration_cache) # define the layer output you want to visualize output_layer_name = [ "001_convolutional", "002_convolutional", "003_convolutional", "005_shortcut", "006_convolutional" ] # get filter number of defined layer name filter_num = get_filter_num(cfg_file_path, output_layer_name) # Do inference with TensorRT trt_outputs = [] with build_int8_engine( onnx_file_path, calib, cfg_file_path, output_layer_name, engine_file_path) as engine, engine.create_execution_context( ) as context: start = time.time() inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. # if batch size != 1 you can use load_random_batch to do test inference, here I just use 1 image as test set # inputs[0].host = load_random_batch(calib) inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. end = time.time() print("Inference costs %.02f sec." % (end - start)) for i, output in enumerate(trt_outputs[:len(filter_num)]): # length of inference output should be filter_num*h*h if "convolutional" in output_layer_name[i]: h = int(math.sqrt(output.shape[0] / filter_num[i])) w = h else: h = int(math.sqrt(output.shape[0] / filter_num[i] / 2)) w = 2 * h middle_output_shapes.append((1, filter_num[i], w, h)) # reshape middle_output = [ output.reshape(shape) for output, shape in zip( trt_outputs[:len(filter_num)], middle_output_shapes) ] # save middle output as grey image for name, output in zip(output_layer_name, middle_output): w, h = output.shape[2], output.shape[3] img = misc.toimage(output.sum(axis=1).reshape(w, h)) img.save("{}.tiff".format(name)) print("Saveing middle output {}".format(output_layer_name)) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs[len(filter_num):], output_shapes) ] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path))
def main(): ######################################################################### # $ python3 onnx_to_tensorrt.py v3 608 ######################################################################### dir_onnx = sys.argv[1] fn_onnx = sys.argv[2] onnx_file_path = os.path.join(dir_onnx, fn_onnx) #print('fn_onnx : ', fn_onnx); exit() t1, t2 = get_exact_file_name_from_path(fn_onnx).split('_') v_yolo = t1[4:] said = int(t2) #print('v_yolo : ', v_yolo, ', said : ', said); exit() #said = int(sys.argv[2]) """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: #onnx_file_path = 'yolo{}_{}.onnx'.format(v_yolo, said) engine_file_path = os.path.join(dir_onnx, 'yolo{}_{}.trt'.format(v_yolo, said)) # Download a dog image and save it to the following file path: input_image_path = download_file( 'dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (said, said) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size #print('image_raw.size : ', image_raw.size); print('image.size : ', image.size); exit() # Output shapes expected by the post-processor #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] output_shapes = get_output_shapes(v_yolo, said) # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image ''' print('len(inputs) : ', len(inputs)); # 1 print('len(outputs) : ', len(outputs)); # 2 for v3-tiny, 3 for v3 #exit() print('type(stream) : ', type(stream)); exit() print('type(outputs[0] : ', type(outputs[0])); #exit() print('type(outputs[1] : ', type(outputs[1])); exit() ''' # start = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # print("time: %.2f s" %(time.time()-start)) ''' print('len(trt_outputs) : ', len(trt_outputs)); print('trt_outputs[0].shape : ', trt_outputs[0].shape) print('trt_outputs[1].shape : ', trt_outputs[1].shape); exit() ''' print(trt_outputs) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor_args = get_postprocessor_args(v_yolo, input_resolution_yolov3_HW) #print('postprocessor_args : ', postprocessor_args); exit() ''' postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} ''' postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes_{}_{}.png'.format(v_yolo, said) obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path))