Example #1
0
    def _post_process(self, output):
        # start = timer()
        # A = TestRunner.mem_to_tensor(output[0], self.output_shapes[0])
        # B = TestRunner.mem_to_tensor(output[1], self.output_shapes[1])
        # C = TestRunner.mem_to_tensor(output[2], self.output_shapes[2])
        # tensor_creation_time = timer() - start
        # print(f"Creating tensor {round((tensor_creation_time)*1000)} [ms] ")
        # print(A.size(), B.size(), C.size())
        # anchors_A = ([self.anchors[i] for i in self.yolo_masks[0]])
        # anchors_B = ([self.anchors[i] for i in self.yolo_masks[1]])
        # anchors_C = ([self.anchors[i] for i in self.yolo_masks[2]])
        # print(anchors_A, anchors_B, anchors_C)
        # output_A = self._forward_yolo_output(A, anchors_A)
        # output_B = self._forward_yolo_output(B, anchors_B)
        # output_C = self._forward_yolo_output(C, anchors_C)
        # print(output_A.size(), output_B.size(), output_C.size())
        # full_output = torch.cat((output_A, output_B, output_C), 1)
        # print(full_output.size())
        # w, h = self.raw_image.size
        # pad_h, pad_w, ratio = calculate_padding(h, w, self.image_height, self.image_width)
        # for detections in full_output:
        #     detections = detections[detections[:, 4] > self.conf_thres]
        #     box_corner = torch.zeros((detections.shape[0], 4), device=detections.device)
        #     xy = detections[:, 0:2]
        #     wh = detections[:, 2:4] / 2
        #     box_corner[:, 0:2] = xy - wh
        #     box_corner[:, 2:4] = xy + wh
        #     probabilities = detections[:, 4]
        #     nms_indices = nms(box_corner, probabilities, self.nms_thres)
        #     main_box_corner = box_corner[nms_indices]
        #     probabilities_nms = probabilities[nms_indices]
        #     if nms_indices.shape[0] == 0:  
        #         continue

        # BB_list = []
        # for i in range(len(main_box_corner)):
        #     x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w
        #     y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h
        #     x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w
        #     y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h 
        #     # draw.rectangle((x0, y0, x1, y1), outline="red")
        #     # print("BB ", i, "| x = ", x0, "y = ", y0, "w = ", x1 - x0, "h = ", y1 - y0, "probability = ", probabilities_nms[i].item())
        #     BB = [round(x0), round(y0), round(y1 - y0), round(x1 - x0)]  # x, y, h, w
        #     BB_list.append(BB)
        # return BB_list, probabilities_nms

        # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
        output = [output.reshape(shape) for output, shape in zip(output, self.output_shapes)]
        

        postprocessor_args = {"yolo_masks": self.yolo_masks,                    # A list of 3 three-dimensional tuples for the YOLO masks
                            "yolo_anchors": self.anchors,                                          # A list of 9 two-dimensional tuples for the YOLO anchors
                            "obj_threshold": 0.5,                                               # Threshold for object coverage, float value between 0 and 1
                            "nms_threshold": 0.25,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                            "yolo_input_resolution": self.input_resolution}

        postprocessor = PostprocessYOLO(**postprocessor_args)

        # # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
        return postprocessor.process(output, (self.raw_image.size))
Example #2
0
def predict(inp: Image, metadata):
    image_raw, image = preprocessor.process(inp)
    shape_orig_WH = image_raw.size
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    # Do inference
    print('Running inference on image')
    # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
    inputs[0].host = image
    a = perf_counter()
    trt_outputs = common.do_inference(context,
                                      bindings=bindings,
                                      inputs=inputs,
                                      outputs=outputs,
                                      stream=stream)
    b = perf_counter()
    metadata['TensorRT Inference Latency (s)'] = (b - a)
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]
    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    return obj_detected_img
Example #3
0
def main(FLAGS):
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"
    input_image_path = 'debug_image/test1.jpg'
    input_resolution_yolov3_HW = (608, 608)
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    image_raw, image = preprocessor.process(input_image_path)

    shape_orig_WH = image_raw.size

    trt_outputs = []

    with get_engine(onnx_file_path, FLAGS, engine_file_path) as engine, \
        engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = allocate_buffers(engine)

        # print('Running inference on image {}...'.format(input_image_path))
        max_batch_size = engine.max_batch_size
        image = np.tile(image, [36, 1, 1, 1])
        inputs[0].host = image

        inf_batch = 36
        trt_outputs = do_inference(context,
                                   bindings=bindings,
                                   inputs=inputs,
                                   outputs=outputs,
                                   stream=stream,
                                   batch_size=inf_batch)

    output_shapes = [(max_batch_size, 255, 19, 19),
                     (max_batch_size, 255, 38, 38),
                     (max_batch_size, 255, 76, 76)]

    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]  # [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]

    postprocessor_args = {
        "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
        "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                         (59, 119), (116, 90), (156, 198), (373, 326)],
        "obj_threshold":
        0.6,
        "nms_threshold":
        0.5,
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    feat_batch = [[trt_outputs[j][i] for j in range(len(trt_outputs))]
                  for i in range(len(trt_outputs[0]))]

    for idx, layers in enumerate(feat_batch):
        boxes, classes, scores = postprocessor.process(layers, (shape_orig_WH))
Example #4
0
 def __init__(self):
     super().__init__()
     # resolution
     self.preprocessor = PreprocessYOLO((608, 608))
     self.trt = TensorRT("yolov3.trt")
     postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    
                       "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), 
                                        (59, 119), (116, 90), (156, 198), (373, 326)],
                       "obj_threshold": 0.6,
                       "nms_threshold": 0.5,
                       "yolo_input_resolution": (608, 608)}
     self.postprocessor = PostprocessYOLO(**postprocessor_args)
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = '/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/fast_yolo.onnx'
    engine_file_path = "/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/fast_yolo.trt"
    # Download a dog image and save it to the following file path:
    input_image_path = "/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/sample.png"

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (352, 352)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 18, 11, 11)]
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        # start = time.time()
        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
        # print("time: %.2f s" %(time.time()-start))
        # print(trt_outputs)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

    postprocessor_args = {"yolo_masks": [(0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(188,15), (351,16), (351,30)],  # A list of 9 two-dimensional tuples for the YOLO anchors],
                          "obj_threshold": 0.5,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.2,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    # obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
    # output_image_path = 'dog_bboxes.png'
    # obj_detected_img.save(output_image_path, 'PNG')
    # print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))

    return boxes, classes, scores
Example #6
0
def process_multi(img_path, yolo, engine,context):
    start_tf=time.time()
    image=cv2.imread(img_path)

    img_persons_new, boxes_new, trans=yolo.process_image(image)
    start_tf = time.time()
    img_persons_new, boxes_new, trans = yolo.process_image(image)
    img=draw(image,boxes_new)
    cv2.imwrite('img.jpg',img)
    print('process time for tf is',time.time()-start_tf)

    start_trt=time.time()



    input_resolution_yolov3_HW = (608, 608)
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    image_raw, image = preprocessor.process(img_path)
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Do inference with TensorRT
    trt_outputs = []

    inputs, outputs, bindings, stream = common_utils.allocate_buffers(engine)
    inputs[0].host = image
    trt_outputs = common_utils.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
    start_trt = time.time()
    trt_outputs = common_utils.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]
    print('process time for trt is', time.time() - start_trt)
    post_trt=time.time()
    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
                          # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                                           # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,  # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,
                          # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
    obj_detected_img.save('out_boxes.png', 'PNG')
    print('process time for trt post is', time.time() - post_trt)
Example #7
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"
    # Download a dog image and save it to the following file path:
    input_image_path = 'dog.jpg'

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        #for input in inputs:
            #print(input.host) 
        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
    output_image_path = 'dog_bboxes.png'
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
def main():
    ENGINE_FILE_PATH = "ped3_416.trt"
    INPUT_LIST_FILE = './ped_list.txt'
    INPUT_SIZE = 416

    filenames = []
    with open(INPUT_LIST_FILE, 'r') as l:
        lines = l.readlines()
        for line in lines:
            filename = line.strip()
            filenames.append(filename)   
    input_resolution_yolov3_HW = (INPUT_SIZE, INPUT_SIZE)
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)],
                          "yolo_anchors": [(8,34),  (14,60),  (23,94),  (39,149),  (87,291),  (187,472)],
                          "obj_threshold": 0.1,
                          "nms_threshold": 0.3,
                          "yolo_input_resolution": input_resolution_yolov3_HW}
    postprocessor = PostprocessYOLO(**postprocessor_args)
    output_shapes = output_shapes_dic[str(INPUT_SIZE)]
    with get_engine(ENGINE_FILE_PATH) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)       
        for filename in filenames:
            image_raw, image = preprocessor.process(filename)
            shape_orig_WH = image_raw.size
            trt_outputs = []
        
            # Do inference
            print('Running inference on image {}...'.format(filename))
            inputs[0].host = image
            c_time = 0
            t1 = time.time()
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            t2 = time.time()
            c_time = t2-t1  
            print(c_time)
            trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

            boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
            if len(boxes) != 0:
                obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
            else:
                obj_detected_img = image_raw
            savename_0 = filename.split('/')[-1]
            savename = savename_0.split('.')[0]
            output_image_path = './images_results/' + savename + '_' + str(INPUT_SIZE) + '.png'
            obj_detected_img.save(output_image_path, 'PNG')
            print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
Example #9
0
def infer_cam():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'; engine_file_path = 'yolov3.trt'
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)
    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    cap = cv2.VideoCapture(0)
    trt_outputs = [] # Do inference with TensorRT
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        while True:
            ret, frame = cap.read(); assert ret
            # Load an image from the specified input path, and return it together with  a pre-processed version
            preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
            image_raw, image = preprocessor.process(frame)
            # Store the shape of the original input image in WH format, we will need it for later
            shape_orig_WH = image_raw.size; t = time()

            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inputs[0].host = image
            trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            t = time()-t; fps = 1/t; print("infer: %.2fms, fps: %.2f" % (t*1000, fps))

            # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
            trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

            postprocessor = PostprocessYOLO(**postprocessor_args)
            # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
            boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))

            im = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)

            im = np.asarray(im)[...,::-1]
            cv2.putText(im, "%.2f"%fps, (12,12), 3, 1, (0,255,0))
            cv2.imshow("det",im)

            if cv2.waitKey(5) == 27: break
    cap.release(); cv2.destroyAllWindows()
def batch_show(image_path, image_save_path, onnx_file_path, engine_file_path):
    img_list = gb.glob(image_path + r"/*.png")

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (352, 352)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Create a post-processor object by specifying the required input resolution for YOLOv3
    postprocessor_args = {"yolo_masks": [(0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(188,15), (351,16), (351,30)],  # A list of 9 two-dimensional tuples for the YOLO anchors],
                          "obj_threshold": 0.5,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.2,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)
    
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = input_resolution_yolov3_HW

    # Output shapes expected by the post-processor
    output_shapes = [(1, 18, 11, 11)]
    # Do inference with TensorRT
    total_time, trt_outputs = 0, []
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        # print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        for i, img_file in enumerate(img_list):
            image_raw, image = preprocessor.process(img_file)
            inputs[0].host = image
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

            # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
            trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

            # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
            boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
            image_show = draw_bboxes(image_raw, boxes, scores, classes, ['defect'], bbox_color='yellow')
            
            # Save the marked image
            filename, suffix = os.path.split(img_file)
            _, fname = os.path.splitext(filename)
            save_name = os.path.join(image_save_path, fname+suffix)
            image_show.save(save_name)
            print("Image", save_name, "saved.")
Example #11
0
    def __init__(self, yaml_path):
        # yaml_path 参数配置文件路径
        with open(yaml_path, 'r', encoding='utf-8') as f:
            self.param_dict = yaml.load(f, Loader=yaml.FullLoader)

        # 获取engine context
        self.engine = get_engine(self.param_dict['onnx_path'],
                                 self.param_dict['engine_path'],
                                 self.param_dict['input_shape'],
                                 self.param_dict['int8_calibration'])
        # context 执行在engine后面
        self.context = self.engine.create_execution_context()

        # yolo 数据预处理 PreprocessYOLO类
        assert len(self.param_dict['input_shape']) == 4, "input_shape必须是4个维度"
        batch, _, height, width = self.param_dict['input_shape']
        self.preprocessor = PreprocessYOLO((height, width))

        # 生成预先的anchor [x,y,w,h,f_w,f_h]: xy是feature_map的列行坐标,wh是anchor,f_wh是feature_map大小
        self.prior_anchors = PriorBox(cfg=self.param_dict).forward()

        # 一些配置
        # 标签名字
        self.all_categories = load_label_categories(
            self.param_dict['label_file_path'])
        classes_num = len(self.all_categories)
        # trt输出shape
        stride = self.param_dict['stride']
        num_anchors = self.param_dict['num_anchors']

        grid_num = (height // stride[0]) * (
            width // stride[0]) * num_anchors[0] + (height // stride[1]) * (
                width // stride[1]) * num_anchors[1] + (
                    height // stride[2]) * (width //
                                            stride[2]) * num_anchors[2]
        self.output_shapes = [(batch, grid_num, (classes_num + 5))]

        self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff',
                            'dng']  # acceptable image suffixes
        self.vid_formats = [
            'mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'
        ]  # acceptable video suffixes

        # yolo 后处理, yolov4将3个输出 concat在一起,[N, AHW*3, classes_num+5],可判断yolov4原始预测 or yolov5新式预测
        self.postprocessor = PostprocessYOLO(self.prior_anchors,
                                             self.param_dict)
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    args = parser.parse_args()
    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"

    cam = cv.VideoCapture(args.video)
    # img = cv.imread("dog.jpg")

    input_resolution_yolov3_HW = (608, 608)

    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,
                          "nms_threshold": 0.5,
                          "yolo_input_resolution": input_resolution_yolov3_HW}
    postprocessor = PostprocessYOLO(**postprocessor_args)
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        while True:
            _ret, img = cam.read()
            if(_ret is False):
                break
            image_raw, image = preprocessor.process_image(img)
            shape_orig_WH = image_raw.size
            output_shapes = [(1, 255, 19, 19),
                             (1, 255, 38, 38), (1, 255, 76, 76)]
            trt_outputs = []
            inputs[0].host = image
            trt_outputs = common.do_inference(
                context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            trt_outputs = [output.reshape(shape)
                           for output, shape in zip(trt_outputs, output_shapes)]
            boxes, classes, scores = postprocessor.process(
                trt_outputs, (shape_orig_WH))
            if(boxes is None):
                continue
            obj_detected_img = draw_bboxes(
                image_raw, boxes, scores, classes, ALL_CATEGORIES)
            det_img = np.array(obj_detected_img)
            cv.imshow("frame", det_img)
            cv.waitKey(5)
Example #13
0
class YoloTRT(object):
    def __init__(self):
        super().__init__()
        # resolution
        self.preprocessor = PreprocessYOLO((608, 608))
        self.trt = TensorRT("yolov3.trt")
        postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), 
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,
                          "nms_threshold": 0.5,
                          "yolo_input_resolution": (608, 608)}
        self.postprocessor = PostprocessYOLO(**postprocessor_args)
        
    # def _preprocess(self, input_array:np.ndarray) -> np.ndarray: # 
    #     return self.preprocessor.process(input_array) # in: <NHWC> raw image batch , out: <NCHW> resized <N,3,608,608>

    def _inference(self, input: np.ndarray) -> list: # 
        trt_outputs = self.trt.inference(input)
        output_shapes = [(self.trt.max_batch_size, 255, 19, 19), 
                         (self.trt.max_batch_size, 255, 38, 38), 
                         (self.trt.max_batch_size, 255, 76, 76)]
                         
        trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]  # [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
        return trt_outputs # in: <NCHW> <N,3,608,608>, out: [(N, 255, 19, 19), (N, 255, 38, 38), (N, 255, 76, 76)]

    # def _postprocess(self, feat_batch, shape_orig_WH:tuple): 
    #     return [[self.postprocessor.process(feat,shape_orig)]for feat, shape_orig in zip(feat_batch,shape_orig_WH)]

    @profile
    def inference(self, input_array:np.ndarray): # img_array <N,H,W,C>

        pre = self.preprocessor.process(input_array) # in: <NHWC> raw image batch , out: <NCHW> resized <N,3,608,608>
        trt_outputs = self._inference(pre) # out: [(N, 255, 19, 19), (N, 255, 38, 38), (N, 255, 76, 76)]

        feat_batch = [[trt_outputs[j][i] for j in range(len(trt_outputs))] for i in range(len(trt_outputs[0]))]
        post = [[self.postprocessor.process(feat,input_array.shape)]for feat in feat_batch] # out:[[bbox,score,categories,confidences],...]
        post = post[:len(input_array)]
        return post
Example #14
0
def main():

    #########################################################################
    #   $ python3 onnx_to_tensorrt.py v3 608
    #########################################################################
    dir_onnx = sys.argv[1]
    fn_onnx = sys.argv[2]
    onnx_file_path = os.path.join(dir_onnx, fn_onnx)
    #print('fn_onnx : ', fn_onnx);   exit()
    t1, t2 = get_exact_file_name_from_path(fn_onnx).split('_')
    v_yolo = t1[4:]
    said = int(t2)
    #print('v_yolo : ', v_yolo, ', said : ', said);  exit()
    #said = int(sys.argv[2])
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    #onnx_file_path = 'yolo{}_{}.onnx'.format(v_yolo, said)
    engine_file_path = os.path.join(dir_onnx,
                                    'yolo{}_{}.trt'.format(v_yolo, said))

    # Download a dog image and save it to the following file path:
    input_image_path = download_file(
        'dog.jpg',
        'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg',
        checksum_reference=None)

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (said, said)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size
    #print('image_raw.size : ', image_raw.size);     print('image.size : ', image.size); exit()
    # Output shapes expected by the post-processor
    #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    output_shapes = get_output_shapes(v_yolo, said)
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        '''
        print('len(inputs) : ', len(inputs));   #   1   
        print('len(outputs) : ', len(outputs));   # 2 for v3-tiny, 3 for v3 #exit()
        print('type(stream) : ', type(stream));  exit()
        print('type(outputs[0] : ', type(outputs[0]));  #exit()
        print('type(outputs[1] : ', type(outputs[1]));  exit()
        '''
        # start = time.time()
        trt_outputs = common.do_inference(context,
                                          bindings=bindings,
                                          inputs=inputs,
                                          outputs=outputs,
                                          stream=stream)
        # print("time: %.2f s" %(time.time()-start))
        ''' 
        print('len(trt_outputs) : ', len(trt_outputs));
        print('trt_outputs[0].shape : ', trt_outputs[0].shape)
        print('trt_outputs[1].shape : ', trt_outputs[1].shape); exit()
        '''
        print(trt_outputs)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]

    postprocessor_args = get_postprocessor_args(v_yolo,
                                                input_resolution_yolov3_HW)
    #print('postprocessor_args : ', postprocessor_args); exit()
    '''
    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}
    '''
    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))

    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    output_image_path = 'dog_bboxes_{}_{}.png'.format(v_yolo, said)
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(
        output_image_path))
Example #15
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    parser = argparse.ArgumentParser(
        prog='ONNX to TensorRT conversion',
        description='Convert the Yolo ONNX model to TensorRT')

    parser.add_argument('--input_size', help='Input size model', default='416')

    parser.add_argument('--onnx_file_path',
                        help='ONNX model\'s path (.onnx)',
                        default='../../model_data/Suspect/yolov3-suspect.onnx')

    parser.add_argument('--engine_file_path',
                        help='TensorRT engine\'s path (.trt)',
                        default='trt_model/yolov3-suspect_2_fp32.trt')

    parser.add_argument('--num_classes', help='Number of classes', default='3')

    parser.add_argument('--dataset_path',
                        help='Path of the folder Dataset',
                        default='../../Datasets/Suspect/images-416/')

    parser.add_argument(
        '--pred_dataset_path',
        help='Output path of Yolo predictions',
        default='../../Datasets/Suspect/Predictions/TensorRT/Yolo-Tiny-128/')

    parser.add_argument(
        '--result_images_path',
        help='Path of images with predict bounding box',
        default='../../Datasets/Suspect/Images_result/TensorRT/Yolo-Tiny-128/')

    args = parser.parse_args()

    input_size = int(args.input_size)
    onnx_file_path = args.onnx_file_path
    engine_file_path = args.engine_file_path
    num_classes = int(args.num_classes)
    test_dataset_path = args.dataset_path
    save_path = args.result_images_path
    pred_dataset_path = args.pred_dataset_path

    fp16_on = False
    batch_size = 2

    filters = (4 + 1 + num_classes) * 3

    output_shapes_416 = [
        (batch_size, filters, 13, 13), (batch_size, filters, 26, 26)
    ]  # 2 ème variable = (5+nbr classes)*3 (255 pour coco, 33 pour key,...)
    output_shapes_480 = [(batch_size, filters, 15, 15),
                         (batch_size, filters, 30, 30)]
    output_shapes_544 = [(batch_size, filters, 17, 17),
                         (batch_size, filters, 34, 34)]
    output_shapes_608 = [(batch_size, filters, 19, 19),
                         (batch_size, filters, 38, 38)]
    output_shapes_dic = {
        '416': output_shapes_416,
        '480': output_shapes_480,
        '544': output_shapes_544,
        '608': output_shapes_608
    }

    filenames = glob.glob(os.path.join(test_dataset_path, '*.jpg'))

    nums = len(filenames)

    input_resolution_yolov3_HW = (input_size, input_size)

    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    output_shapes = output_shapes_dic[str(input_size)]

    postprocessor_args = {  #"yolo_masks": [(3, 4, 5), (0, 1, 2)], #tiny
        "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
        #"yolo_anchors": [(10,14),  (23,27),  (37,58),  (81,82),  (135,169),  (344,319)], #tiny-yolov3
        "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                         (59, 119), (116, 90), (156, 198),
                         (373, 326)],  #YoloV3
        "obj_threshold":
        0.5,
        "nms_threshold":
        0.35,
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Do inference with TensorRT
    filenames_batch = []
    images = []
    images_raw = []
    trt_outputs = []
    index = 0
    moy_inf_time = 0
    moy = 0

    with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        # inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        for filename in filenames:
            #print("Path file : ", filename)
            #path = filename.split('.')[4]
            #path2 = path.split('/')[4]
            #print("PATH: ", path2)
            #name_ann = os.path.join(pred_dataset_path, path2)
            #annotation_path = name_ann + '.txt'
            #print("ANNOTATION : ", annotation_path)
            filenames_batch.append(filename)
            '''if os.path.isfile(annotation_path) == True :
                os.remove(annotation_path)
                print("Delete !")'''

            image_raw, image = preprocessor.process(filename)
            images_raw.append(image_raw)
            images.append(image)
            index += 1

            if index != nums and len(images_raw) != batch_size:
                continue

            inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            images_batch = np.concatenate(images, axis=0)
            inputs[0].host = images_batch
            t1 = time.time()
            trt_outputs = common.do_inference(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream,
                                              batch_size=batch_size)
            t2 = time.time()
            t_inf = int(round((t2 - t1) * 1000))
            #print("Inf time : ",t_inf)
            moy_inf_time += t_inf
            #print("MOY : ", moy)

            print(len(trt_outputs))
            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, output_shapes)
            ]
            for i in range(len(filenames_batch)):
                fname = filenames_batch[i].split('/')
                fname = fname[-1].split('.')[0]
                print(fname)
                name_ann = os.path.join(pred_dataset_path, fname)
                annotation_path = name_ann + '.txt'
                #print("ANNOTATION : ", annotation_path)
                if os.path.isfile(annotation_path) == True:
                    os.remove(annotation_path)
                    print("Delete !")
                img_raw = images_raw[i]
                #print(img_raw)
                shape_orig_WH = img_raw.size
                print("SHAPE : ", shape_orig_WH)

                boxes, classes, scores = postprocessor.process(
                    trt_outputs, (shape_orig_WH), i)

                if boxes is not None:
                    print("boxes size:", len(boxes))
                else:
                    continue

# Draw the bounding boxes onto the original input image and save it as a PNG file
                obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes,
                                               ALL_CATEGORIES, annotation_path)
                output_image_path = save_path + fname + '_' + str(
                    input_size) + '_bboxes.png'
                obj_detected_img.save(output_image_path, 'PNG')
                print(
                    'Saved image with bounding boxes of detected objects to {}.'
                    .format(output_image_path))

            filenames_batch = []
            images_batch = []
            images = []
            images_raw = []
            trt_outputs = []
    print(len(filenames))
    moy_inf_time = moy_inf_time / len(filenames)
    print("Moyenne temps d'inférence (par image) : ", moy_inf_time, "ms")
    fps = 1 / moy_inf_time * 1000
    print("FPS : ", fps)
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    input_size = 608
    batch_size = 1
    fp16_on = True
    onnx_file_path = 'ped3_' + str(input_size) + '_' + str(batch_size) + '.onnx'
    engine_file_path = 'ped3_' + str(input_size) + '_' + str(batch_size) + '.trt'
    input_file_list = './ped_list.txt'
    IMAGE_PATH = './images/'
    save_path = './img_re/'
    
    output_shapes_416 = [(batch_size, 18, 13, 13), (batch_size, 18, 26, 26)]
    output_shapes_480 = [(batch_size, 18, 15, 15), (batch_size, 18, 30, 30)]
    output_shapes_544 = [(batch_size, 18, 17, 17), (batch_size, 18, 34, 34)]
    output_shapes_608 = [(batch_size, 18, 19, 19), (batch_size, 18, 38, 38)]
    output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608}
    
    with open(input_file_list, 'r') as f:
        filenames = []
        for line in f.readlines():
            filenames.append(line.strip())

    # filenames = glob.glob(os.path.join(IMAGE_PATH, '*.jpg'))
    
    nums = len(filenames)
    # print(filenames)

    input_resolution_yolov3_HW = (input_size, input_size)
    
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    
    output_shapes = output_shapes_dic[str(input_size)]

    postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)],
                          "yolo_anchors": [(8,34),  (14,60),  (23,94),  (39,149),  (87,291),  (187,472)],
                          "obj_threshold": 0.1,
                          "nms_threshold": 0.3,
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)
    
    # Do inference with TensorRT
    filenames_batch = []
    images = []
    images_raw = []
    trt_outputs = []
    index = 0
    with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path) as engine, engine.create_execution_context() as context:
        # inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        for filename in filenames:
            filenames_batch.append(filename)
            image_raw, image = preprocessor.process(filename)
            images_raw.append(image_raw)
            images.append(image)
            index += 1
            if index != nums and len(images_raw) != batch_size:
                continue
            inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            images_batch = np.concatenate(images, axis=0)
            inputs[0].host = images_batch
            t1 = time.time()
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size)
            t2 = time.time()
            t_inf = t2 - t1
            print(t_inf)
            print(len(trt_outputs))
            trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

	    print('test')
	    for i in range(len(filenames_batch)):
                fname = filenames_batch[i].split('/')
                fname = fname[-1].split('.')[0]
		img_raw = images_raw[i]
		shape_orig_WH = img_raw.size
		boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), i)
		# Draw the bounding boxes onto the original input image and save it as a PNG file
		obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes, ALL_CATEGORIES)
		output_image_path = save_path + fname + '_' + str(input_size) + '_bboxes.png'
		obj_detected_img.save(output_image_path, 'PNG')
		print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
            filenames_batch = []
            images_batch = []
	    images = []
	    images_raw = []
	    trt_outputs = []
def read_queue(queue):
    # 3.load model
    # initialize
    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
    trt.init_libnvinfer_plugins(TRT_LOGGER, '')
    runtime = trt.Runtime(TRT_LOGGER)


    # create engine
    with open('model.bin', 'rb') as f:
        buf = f.read()
        engine = runtime.deserialize_cuda_engine(buf)

    # create buffer
    host_inputs  = []
    cuda_inputs  = []
    host_outputs = []
    cuda_outputs = []
    bindings = []
    stream = cuda.Stream()

    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        host_mem = cuda.pagelocked_empty(size, np.float32)
        cuda_mem = cuda.mem_alloc(host_mem.nbytes)

        bindings.append(int(cuda_mem))
        if engine.binding_is_input(binding):
            host_inputs.append(host_mem)
            cuda_inputs.append(cuda_mem)
        else:
            host_outputs.append(host_mem)
            cuda_outputs.append(cuda_mem)
    context = engine.create_execution_context()

    batch_size = 1
    input_size = 416
    output_shapes_416 = [(batch_size, 54, 13, 13), (batch_size, 54, 26, 26), (batch_size, 54, 52, 52)]
    output_shapes_480 = [(batch_size, 54, 15, 15), (batch_size, 54, 30, 30), (batch_size, 54, 60, 60)]
    output_shapes_544 = [(batch_size, 54, 17, 17), (batch_size, 54, 34, 34), (batch_size, 54, 68, 68)]
    output_shapes_608 = [(batch_size, 54, 19, 19), (batch_size, 54, 38, 38), (batch_size, 54, 72, 72)]
    output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608}
    

    output_shapes = output_shapes_dic[str(input_size)]
    input_resolution_yolov3_HW = (input_size, input_size)
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
                    "yolo_anchors": [(4,7), (7,15), (13,25),   (25,42), (41,67), (75,94),   (91,162), (158,205), (250,332)],
                    "obj_threshold": 0.5, 
                    "nms_threshold": 0.35,
                    "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)
    inputs, outputs, bindings, stream = allocate_buffers(engine)
    print('3.Load model successful.')
    print('Everything is ready.')

    num = 0
    while cap.isOpened() and ser.isOpen():
        if queue.empty():
            continue
        frame = queue.get()
        images = []
        image_raw, image = preprocessor.process(frame)
        images.append(image)
        num = num + 1
        images_batch = np.concatenate(images, axis=0)
        inputs[0].host = images_batch
        #t1 = time.time()
        trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size)
        trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]
        shape_orig_WH = image_raw.size
        boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), 0)
        #t2 = time.time()
        #t_inf = t2 - t1
        #print("time consumption:",t_inf)
        print(boxes, scores, classes)
        images.clear()
        if np.all(scores == 0):
            ser.write("h".encode("utf-8"))
            print('exception.')
            continue	
        index = np.nonzero(classes)
        label = classes[index[0]]
        cv2.imwrite('tmp/'+str(num)+'.jpg', frame)
        if label == 0:
            ser.write("c".encode("utf-8"))
            print('plate front.')
        elif label == 1:
            ser.write("d".encode("utf-8"))
            print('plate back.')
        elif label == 2:
            ser.write("f".encode("utf-8"))
            print('bowl front.')
        elif label == 3:
            ser.write("e".encode("utf-8"))
            print('bowl back.')
        elif label == 4:
            ser.write("g".encode("utf-8"))
            print('glass cup side.')
        elif label == 5:
            ser.write("g".encode("utf-8"))
            print('glass cup back.')
        elif label == 6:
            ser.write("g".encode("utf-8"))
            print('glass cup front.')
        elif label == 7:
            ser.write("i".encode("utf-8"))
            print('teacup side.')
        elif label == 8:
            ser.write("j".encode("utf-8"))
            print('teacup back.')
        elif label == 9:
            ser.write("k".encode("utf-8"))
            print('teacup front.')
        elif label == 10:
            ser.write("g".encode("utf-8"))
            print('cup side.')
        elif label == 11:
            ser.write("g".encode("utf-8"))
            print('cup back.')
        elif label == 12:
            ser.write("g".encode("utf-8"))
            print('cup front.')
        else:
            ser.write("h".encode("utf-8"))
            print('exception.')
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = './yolov3.onnx'
    engine_file_path = "yolov3.trt"
    data_path = "./data/unrel.data"

    data = parse_data_cfg(data_path)
    nc = int(data['classes'])  # number of classes
    path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names

    iouv = torch.linspace(0.5, 0.95, 1,
                          dtype=torch.float32)  # iou vector for [email protected]:0.95
    niou = 1

    conf_thres = 0.001
    iou_thres = 0.6
    verbose = True

    # Genearte custom dataloader
    img_size = 448  # copy form pytorch src
    batch_size = 16

    dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True)
    batch_size = min(batch_size, len(dataset))
    dataloader = data_loader(dataset, batch_size, img_size)

    # Output shapes expected by the post-processor
    output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)]

    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                     '[email protected]', 'F1')
        p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
        pbar = tqdm.tqdm(dataloader, desc=s)
        stats, ap, ap_class = [], [], []
        seen = 0

        for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar):

            imgs = imgs.astype(np.float32) / 255.0
            nb, _, height, width = imgs.shape  # batch size, channels, height, width
            whwh = np.array([width, height, width, height])

            inputs[0].host = imgs

            postprocessor_args = {
                "yolo_masks": [
                    (6, 7, 8), (3, 4, 5), (0, 1, 2)
                ],  # A list of 3 three-dimensional tuples for the YOLO masks
                "yolo_anchors": [
                    (10, 13),
                    (16, 30),
                    (33, 23),
                    (30, 61),
                    (
                        62, 45
                    ),  # A list of 9 two-dimensional tuples for the YOLO anchors
                    (59, 119),
                    (116, 90),
                    (156, 198),
                    (373, 326)
                ],
                "num_classes":
                37,
                "stride": [32, 16, 8]
            }

            postprocessor = PostprocessYOLO(**postprocessor_args)

            # Do layers before yolo
            t = time.time()
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)

            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, output_shapes)
            ]

            trt_outputs = [
                np.ascontiguousarray(
                    otpt[:, :, :int(imgs.shape[2] * (2**i) /
                                    32), :int(imgs.shape[3] * (2**i) / 32)],
                    dtype=np.float32) for i, otpt in enumerate(trt_outputs)
            ]

            output_list = postprocessor.process(trt_outputs)

            t0 += time.time() - t

            inf_out = torch.cat(output_list, 1)
            t = time.time()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres)  # nms
            t1 += time.time() - t

            # Statistics per image
            for si, pred in enumerate(output):
                labels = targets[targets[:, 0] == si, 1:]
                nl = len(labels)
                tcls = labels[:, 0].tolist() if nl else []  # target class
                seen += 1

                if pred is None:
                    if nl:
                        stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                      torch.Tensor(), torch.Tensor(), tcls))
                    continue

                # Assign all predictions as incorrect
                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
                if nl:
                    detected = []  # target indices
                    tcls_tensor = labels[:, 0]

                    # target boxes
                    tbox = xywh2xyxy(labels[:, 1:5]) * whwh
                    tbox = tbox.type(torch.float32)

                    # Per target class
                    for cls in torch.unique(tcls_tensor):
                        ti = (cls == tcls_tensor).nonzero().view(
                            -1)  # prediction indices
                        pi = (cls == pred[:, 5]).nonzero().view(
                            -1)  # target indices

                        # Search for detections
                        if pi.shape[0]:
                            # Prediction to target ious
                            ious, i = box_iou(pred[pi, :4], tbox[ti]).max(
                                1)  # best ious, indices

                            # Append detections
                            for j in (ious > iouv[0]).nonzero():
                                d = ti[i[j]]  # detected target
                                if d not in detected:
                                    detected.append(d)
                                    correct[pi[j]] = ious[
                                        j] > iouv  # iou_thres is 1xn
                                    if len(
                                            detected
                                    ) == nl:  # all targets already located in image
                                        break

                # Append statistics (correct, conf, pcls, tcls)
                stats.append(
                    (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

            # Plot images
            if batch_i < 1:
                f = 'test_batch%g_gt.jpg' % batch_i  # filename
                plot_images(imgs, targets, paths=paths, names=names,
                            fname=f)  # ground truth
                f = 'test_batch%g_pred.jpg' % batch_i
                plot_images(imgs,
                            output_to_target(output, width, height),
                            paths=paths,
                            names=names,
                            fname=f)  # predictions

        # Compute statistics
        stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
        if len(stats):
            p, r, ap, f1, ap_class = ap_per_class(*stats)
            if niou > 1:
                p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(
                    1), ap[:, 0]  # [P, R, [email protected]:0.95, [email protected]]
            mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
            nt = np.bincount(stats[3].astype(np.int64),
                             minlength=nc)  # number of targets per class
        else:
            nt = torch.zeros(1)

        # Print results
        pf = '%20s' + '%10.3g' * 6  # print format
        print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

        # Print results per class
        if verbose and nc > 1 and len(stats):
            for i, c in enumerate(ap_class):
                print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

        # Print speeds
        if verbose:
            t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (
                img_size, img_size, batch_size)  # tuple
            print(
                'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
                % t)
Example #19
0
class Detect:
    def __init__(self, yaml_path):
        # yaml_path 参数配置文件路径
        with open(yaml_path, 'r', encoding='utf-8') as f:
            self.param_dict = yaml.load(f, Loader=yaml.FullLoader)

        # 获取engine context
        self.engine = get_engine(self.param_dict['onnx_path'],
                                 self.param_dict['engine_path'],
                                 self.param_dict['input_shape'],
                                 self.param_dict['int8_calibration'])
        # context 执行在engine后面
        self.context = self.engine.create_execution_context()

        # yolo 数据预处理 PreprocessYOLO类
        assert len(self.param_dict['input_shape']) == 4, "input_shape必须是4个维度"
        batch, _, height, width = self.param_dict['input_shape']
        self.preprocessor = PreprocessYOLO((height, width))

        # 生成预先的anchor [x,y,w,h,f_w,f_h]: xy是feature_map的列行坐标,wh是anchor,f_wh是feature_map大小
        self.prior_anchors = PriorBox(cfg=self.param_dict).forward()

        # 一些配置
        # 标签名字
        self.all_categories = load_label_categories(
            self.param_dict['label_file_path'])
        classes_num = len(self.all_categories)
        # trt输出shape
        stride = self.param_dict['stride']
        num_anchors = self.param_dict['num_anchors']

        grid_num = (height // stride[0]) * (
            width // stride[0]) * num_anchors[0] + (height // stride[1]) * (
                width // stride[1]) * num_anchors[1] + (
                    height // stride[2]) * (width //
                                            stride[2]) * num_anchors[2]
        self.output_shapes = [(batch, grid_num, (classes_num + 5))]

        self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff',
                            'dng']  # acceptable image suffixes
        self.vid_formats = [
            'mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'
        ]  # acceptable video suffixes

        # yolo 后处理, yolov4将3个输出 concat在一起,[N, AHW*3, classes_num+5],可判断yolov4原始预测 or yolov5新式预测
        self.postprocessor = PostprocessYOLO(self.prior_anchors,
                                             self.param_dict)

    def predict(self,
                input_path='dog.jpg',
                output_save_root='./output',
                write_txt=False):
        '''
        :param input_path:  输入:单张图像路径,图像文件夹,单个视频文件路径
        :param output_save_root: 要求全部保存到文件夹内,若是视频统一保存为mp4
        :param write_txt: 将预测的框坐标-类别-置信度以txt保存
        :return:
        '''
        # 开始判断图像,文件夹,视频
        is_video = False
        path = input_path
        if os.path.isdir(path):
            # 图像文件夹
            img_names = os.listdir(path)
            img_names = [
                name for name in img_names
                if name.split('.')[-1] in self.img_formats
            ]
        elif os.path.isfile(path):
            # 将 '/hme/ai/111.jpg' -> ('/hme/ai', '111.jpg')
            path, img_name = os.path.split(path)
            # 标记 video
            if img_name.split('.')[-1] in self.vid_formats:
                is_video = True
            else:
                assert img_name.split('.')[-1] in self.img_formats, "必须是单张图像路径"
                img_names = [img_name]
        else:
            print("输入无效!!!" * 3)

        # 创建保存文件夹
        check_path(output_save_root)
        # 判断是否是视频
        if is_video:
            assert img_name.count('.') == 1, "视频名字必须只有1个 . "

            # 读取视频
            cap = cv2.VideoCapture(os.path.join(path, img_name))
            # # 获取视频的fps, width height
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # 视频总帧数
            # 创建视频
            video_save_path = os.path.join(
                output_save_root,
                img_name.split('.')[0] + '_pred.mp4')
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            video_writer = cv2.VideoWriter(video_save_path,
                                           fourcc=fourcc,
                                           fps=fps,
                                           frameSize=(width, height))
        else:
            num = len(img_names)  # 图像数量

        # 推理 默认是0卡
        inputs, outputs, bindings, stream = common.allocate_buffers(
            self.engine)
        # Do inference
        for i in range(num):
            # 预处理
            if is_video:
                cap.set(cv2.CAP_PROP_POS_FRAMES, i)  # 读取指定帧
                image = cap.read()
                # 输入的是bgr帧矩阵
                image_raw, image = self.preprocessor.process(image)
            else:
                # 输入的默认是图像路径
                image_raw, image = self.preprocessor.process(
                    os.path.join(path, img_names[i]))

            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inputs[0].host = image
            trt_outputs = common.do_inference_v2(self.context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)

            # list中的输出个数,本来要位于外面一层的,但是考虑重新输入图像
            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, self.output_shapes)
            ]

            # 后处理,按照2种方式判断处理,yolov4原始的预测-参考yolov5变化后的预测
            # 图像原始尺寸 WH,因为时PIL读取
            shape_orig_WH = image_raw.size

            # 后处理是可以处理batch>=1的,但是这里的类写的只能是batch=1
            outputs_pred = self.postprocessor.process(trt_outputs,
                                                      shape_orig_WH)

            # TODO 将预测的框坐标-类别-置信度 写入txt

            # 画框,由于这里只能是单张图像,因此不必for遍历
            boxes, classes, scores = outputs_pred[0][0]
            obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                           self.all_categories)

            # 视频按照帧数来保存,图像按照名字保存,  注意一般视频不会超过5位数
            # TODO 视频的预测写入视频
            if is_video:
                obj_detected_img.save(
                    os.path.join(output_save_root,
                                 str(i).zfill(5)))
            else:
                obj_detected_img.save(
                    os.path.join(output_save_root, img_names[i]))

        # 若是视频,需要 release
        if is_video:
            cap.release()
            cv2.destroyAllWindows()
def main():
    args = build_argparser().parse_args()
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"

    # ------------- 1. Plugin initialization for specified device and load extensions library if specified -------------
    log.info("Creating Inference Engine...")
    ie = IECore()
    if args.cpu_extension and 'CPU' in args.device:
        ie.add_extension(args.cpu_extension, "CPU")

    # -------------------- 2. Reading the IR generated by the Model Optimizer (.xml and .bin files) --------------------
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = IENetwork(model=model_xml, weights=model_bin)

    # ---------------------------------- 3. Load CPU extension for support specific layer ------------------------------
    if "CPU" in args.device:
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
        if len(not_supported_layers) != 0:
            log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                      format(args.device, ', '.join(not_supported_layers)))
            log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                      "or --cpu_extension command line argument")
            sys.exit(1)

    assert len(net.inputs.keys()) == 1, "Sample supports only YOLO V3 based single input topologies"

    # ---------------------------------------------- 4. Preparing inputs -----------------------------------------------
    log.info("Preparing inputs")
    input_blob = next(iter(net.inputs))
    output_blob = next(iter(net.outputs))
	
    batch_size = 16
    img_size = 448
    data_path = "/usr/src/app/data/unrel.data"
    data = parse_data_cfg(data_path)
    nc = 37#int(data['classes'])
    path = data['valid']
    names = load_classes(data['names'])

    iouv = torch.linspace(0.5, 0.95, 10, dtype=torch.float32)  # iou vector for [email protected]:0.95
    iouv = iouv[0].view(1)
    niou = iouv.numel()#1

    conf_thres = 0.001
    iou_thres = 0.6
    verbose = True
	
    dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=False)
    batch_size = min(batch_size, len(dataset))
    dataloader = data_loader(dataset, batch_size, img_size)

    # Output shapes expected by the post-processor
    output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)]
    n, c, h, w = net.inputs[input_blob].shape
	 
    log.info("Loading model to the plugin")
    exec_net = ie.load_network(network=net, device_name=args.device)

    # ----------------------------------------- 5. Loading model to the plugin -----------------------------------------
    log.info("Starting inference...")
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1')
    p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    pbar = tqdm.tqdm(dataloader, desc=s)
    stats, ap, ap_class =  [], [], []
    seen = 0
    print('HERE0')
    for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar):
        imgs = imgs.astype(np.float32) / 255.0
        #print(imgs)
        
        nb, _, height, width = imgs.shape  # batch size, channels, height, width
        #print(height,width)
        whwh = np.array([width, height, width, height])
        #print('HERE1')
        postprocessor_args = {"yolo_masks":   [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                  # A list of 3 three-dimensional tuples for the YOLO masks
                               "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                                  (59, 119), (116, 90), (156, 198), (373, 326)],
                               "num_classes": 37,
                               "stride":[32, 16, 8]}
        postprocessor = PostprocessYOLO(**postprocessor_args)
	# Start inference
        t = time()
        #print(imgs)
        res = exec_net.infer(inputs={input_blob: imgs})
        #print(res)
        res0 = list(res.values())[0]
        res1 = list(res.values())[1]
        res2 = list(res.values())[2]
        res = [res2,res0,res1]
        #print(res0)
        
        #print(res[0].shape,res[1].shape,res[2].shape)
        #print('HERE2')
        #res = [output.reshape(shape) for output, shape in zip(res0, output_shapes)]
        res = [np.ascontiguousarray(otpt[:, :, :int(imgs.shape[2]*(2**i)/32), :int(imgs.shape[3]*(2**i)/32)], dtype=np.float32) for i, otpt in enumerate(res)]
        #print(res[0].shape,res[1].shape,res[2].shape)
        output_list = postprocessor.process(res)
        #print('HERE2.25')
        #print(output_list[0].shape,output_list[1].shape,output_list[2].shape)
        #print('HERE2.5')
        t0 += time() - t
		
        inf_out = torch.cat(output_list, 1)
        
        print(inf_out.shape)
        print(inf_out)
        """
Example #21
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    onnx_file_path = 'yolov3.onnx'
    engine_file_path = 'yolo_in8.trt'
    cfg_file_path = "yolov3.cfg"

    input_image_path = download_file(
        'dog.jpg',
        'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg',
        checksum_reference=None)
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    middle_output_shapes = []

    # calibrator definition
    calibration_dataset_loc = "calibration_dataset/"
    calibration_cache = "yolo_calibration.cache"
    calib = calibra.PythonEntropyCalibrator(calibration_dataset_loc,
                                            cache_file=calibration_cache)

    # define the layer output you want to visualize
    output_layer_name = [
        "001_convolutional", "002_convolutional", "003_convolutional",
        "005_shortcut", "006_convolutional"
    ]
    # get filter number of defined layer name
    filter_num = get_filter_num(cfg_file_path, output_layer_name)

    # Do inference with TensorRT
    trt_outputs = []
    with build_int8_engine(
            onnx_file_path, calib, cfg_file_path, output_layer_name,
            engine_file_path) as engine, engine.create_execution_context(
            ) as context:
        start = time.time()
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        # if batch size != 1 you can use load_random_batch to do test inference, here I just use 1 image as test set
        # inputs[0].host = load_random_batch(calib)
        inputs[0].host = image
        trt_outputs = common.do_inference(context,
                                          bindings=bindings,
                                          inputs=inputs,
                                          outputs=outputs,
                                          stream=stream,
                                          batch_size=1)
    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    end = time.time()
    print("Inference costs %.02f sec." % (end - start))
    for i, output in enumerate(trt_outputs[:len(filter_num)]):
        # length of inference output should be filter_num*h*h
        if "convolutional" in output_layer_name[i]:
            h = int(math.sqrt(output.shape[0] / filter_num[i]))
            w = h
        else:
            h = int(math.sqrt(output.shape[0] / filter_num[i] / 2))
            w = 2 * h
        middle_output_shapes.append((1, filter_num[i], w, h))
    # reshape
    middle_output = [
        output.reshape(shape) for output, shape in zip(
            trt_outputs[:len(filter_num)], middle_output_shapes)
    ]
    # save middle output as grey image
    for name, output in zip(output_layer_name, middle_output):
        w, h = output.shape[2], output.shape[3]
        img = misc.toimage(output.sum(axis=1).reshape(w, h))
        img.save("{}.tiff".format(name))
    print("Saveing middle output {}".format(output_layer_name))
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs[len(filter_num):], output_shapes)
    ]

    postprocessor_args = {
        "yolo_masks":
        [(6, 7, 8), (3, 4, 5),
         (0, 1, 2)],  # A list of 3 three-dimensional tuples for the YOLO masks
        "yolo_anchors": [
            (10, 13),
            (16, 30),
            (33, 23),
            (30, 61),
            (62,
             45),  # A list of 9 two-dimensional tuples for the YOLO anchors
            (59, 119),
            (116, 90),
            (156, 198),
            (373, 326)
        ],
        "obj_threshold":
        0.6,  # Threshold for object coverage, float value between 0 and 1
        "nms_threshold":
        0.5,  # Threshold for non-max suppression algorithm, float value between 0 and 1
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    output_image_path = 'dog_bboxes.png'
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(
        output_image_path))
def main(width=608, height=608, batch_size=1, dataset='coco_label.txt', int8mode=False, calib_file='yolo_calibration.cache',
         onnx_file='yolov3.onnx', engine_file='yolov3.trt', image_file='dog.jpg', result_file='dog_bboxes.png'):

    """Load labels of the correspond dataset."""
    label_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), dataset)
    all_categories = load_label_categories(label_file_path)
    classes = len(all_categories)

    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = onnx_file
    engine_file_path = engine_file
    # Download a dog image and save it to the following file path:
    input_image_path = image_file
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (height, width)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path, batch_size)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(batch_size, (classes + 5) * 3, height // 32, width // 32),
                     (batch_size, (classes + 5) * 3, height // 16, width // 16),
                     (batch_size, (classes + 5) * 3, height // 8,  width // 8)]
    # Do inference with TensorRT
    with get_engine(onnx_file_path, width, height, batch_size, engine_file_path, int8mode, calib_file) as engine, \
            engine.create_execution_context() as context:
        start = time.time()
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
    end = time.time()
    print("Inference costs %.03f sec." % (end - start))
    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    trt_outputs_1 = [np.expand_dims(trt_outputs[0][0], axis=0),
                     np.expand_dims(trt_outputs[1][0], axis=0),
                     np.expand_dims(trt_outputs[2][0], axis=0)]
    boxes, classes, scores = postprocessor.process(trt_outputs_1, (shape_orig_WH), classes)
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, all_categories)
    output_image_path = result_file
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
anchors = np.array([(10,14),  (23,27),  (37,58),  (81,82),  (135,169),  (344,319)])
classes_num = 80
score_threshold = 0.5

output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)]
input_resolution_yolov3_HW = (416, 416)
postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.2,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
postprocessor = PostprocessYOLO(**postprocessor_args)



def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'):
    """Draw the bounding boxes on the original input image and return it.

    Keyword arguments:
    image_raw -- a raw PIL Image
    bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4).
    categories -- NumPy array containing the corresponding category for each object,
    with shape (N,)
    confidences -- NumPy array containing the corresponding confidence for each object,
    with shape (N,)
    all_categories -- a list of all categories in the correct ordered (required for looking up
    the category name)
Example #24
0
def myinfer(image, context, inputs, outputs, bindings, stream):
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (416, 416)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(image)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_HW = image_raw.shape[:2]
    H, W = shape_orig_HW

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26)]
    # Do inference with TensorRT

    trt_outputs = []

    # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
    inputs[0].host = image
    trt_outputs = common.do_inference(context,
                                      bindings=bindings,
                                      inputs=inputs,
                                      outputs=outputs,
                                      stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]
    postprocessor_args = {
        "yolo_masks":
        [(6, 7, 8), (3, 4, 5),
         (0, 1, 2)],  # A list of 3 three-dimensional tuples for the YOLO masks
        "yolo_anchors": [
            (10, 13),
            (16, 30),
            (33, 23),
            (30, 61),
            (62,
             45),  # A list of 9 two-dimensional tuples for the YOLO anchors
            (59, 119),
            (116, 90),
            (156, 198),
            (373, 326)
        ],
        "obj_threshold":
        0.6,  # Threshold for object coverage, float value between 0 and 1
        "nms_threshold":
        0.2,  # Threshold for non-max suppression algorithm, float value between 0 and 1
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_HW))
    # print(boxes,classes,scores)
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    if boxes is not None:
        obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                       ALL_CATEGORIES)
        output_image_path = 'dog_bboxes.png'
        cv2.imshow("test", obj_detected_img)
    if boxes is not None:
        boxes[:, 0] = boxes[:, 0] / W
        boxes[:, 1] = boxes[:, 1] / H
        boxes[:, 2] = boxes[:, 2] / W
        boxes[:, 3] = boxes[:, 3] / H
    return boxes, classes, scores
    # postprocess(results[idx], result_filenames[idx], FLAGS.batch_size)
    # '''后处理'''
    postprocessor_args = {
        "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
        "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                         (59, 119), (116, 90), (156, 198), (373, 326)],
        "obj_threshold":
        0.6,  # 对象覆盖的阈值,[0,1]之间
        "nms_threshold":
        0.5,  # nms的阈值,[0,1]之间
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }
    # 创建后处理类的实例
    postprocessor = PostprocessYOLO(**postprocessor_args)

    print("saving...")
    for idx in range(len(results)):
        trt_results = [
            results[idx]["082_convolutional"][0],
            results[idx]["094_convolutional"][0],
            results[idx]["106_convolutional"][0]
        ]

        trt_outputs = [
            output.reshape(shape)
            for output, shape in zip(trt_results, output_shapes)
        ]
        # 运行后处理算法,并得到检测到对象的bounding box
        boxes, classes, scores = postprocessor.process(trt_outputs,
Example #26
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    input_size = 416
    batch_size = 1
    fp16_on = False
    onnx_file_path = '../../model_data/Suspect/yolov3-tiny-suspect.onnx'
    engine_file_path = 'trt_model/yolov3-tiny-suspect_1_fp32.trt'

    num_classes = 3
    filters = (4 + 1 + num_classes) * 3

    output_shapes_416 = [
        (batch_size, filters, 13, 13), (batch_size, filters, 26, 26)
    ]  # 2 ème variable = (5+nbr classes)*3 (255 pour coco, 33 pour key,...)
    output_shapes_480 = [(batch_size, filters, 15, 15),
                         (batch_size, filters, 30, 30)]
    output_shapes_544 = [(batch_size, filters, 17, 17),
                         (batch_size, filters, 34, 34)]
    output_shapes_608 = [(batch_size, filters, 19, 19),
                         (batch_size, filters, 38, 38)]
    output_shapes_dic = {
        '416': output_shapes_416,
        '480': output_shapes_480,
        '544': output_shapes_544,
        '608': output_shapes_608
    }

    font = cv2.FONT_HERSHEY_SIMPLEX

    cap = cv2.VideoCapture("../../Datasets/test_suspect.mp4")
    #cap.set(cv2.CAP_PROP_FRAME_WIDTH,640)
    #cap.set(cv2.CAP_PROP_FRAME_HEIGHT,360) #don't work on files
    print("Width : ", int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)))
    print("Height : ", int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))

    fps_display_interval = 5  # seconds
    frame_rate = 0
    frame_count = 0
    frame_rate_tab = []
    start_time = time.time()

    nums = 1000000

    input_resolution_yolov3_HW = (input_size, input_size)

    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    postprocessor_args = {
        "yolo_masks": [(3, 4, 5), (0, 1, 2)],
        #"yolo_masks": [(6,7,8), (3, 4, 5), (0, 1, 2)],
        "yolo_anchors": [(10, 14), (23, 27), (37, 58), (81, 82), (135, 169),
                         (344, 319)],  #tiny-yolov3-416
        #"yolo_anchors": [(10,13),  (16,30),  (33,23),  (30,61),  (62,45),  (59,119),  (116,90),  (156,198),  (373,326)],
        "obj_threshold":
        0.5,
        "nms_threshold":
        0.35,
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Do inference with TensorRT
    filenames_batch = []
    images = []
    images_raw = []
    trt_outputs = []
    index = 0

    with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        # Do inference

        while (True):
            ret, frame = cap.read()

            if ret == True:
                frame_rsz = cv2.resize(frame,
                                       input_resolution_yolov3_HW,
                                       interpolation=cv2.INTER_AREA)
                frame_stream = cv2.resize(frame, (640, 360),
                                          interpolation=cv2.INTER_AREA)
                filenames_batch.append(frame_stream)
                image_raw, image = preprocessor.process_frame(frame_stream)
                images_raw.append(image_raw)
                images.append(image)
                index += 1
                if index != nums and len(images_raw) != batch_size:
                    continue
                inputs, outputs, bindings, stream = common.allocate_buffers(
                    engine)
                images_batch = np.concatenate(images, axis=0)
                shape_orig_WH = image_raw.size
                output_shapes = output_shapes_dic[str(input_size)]
                inputs[0].host = images_batch
                trt_outputs = common.do_inference(context,
                                                  bindings=bindings,
                                                  inputs=inputs,
                                                  outputs=outputs,
                                                  stream=stream,
                                                  batch_size=batch_size)
                trt_outputs = [
                    output.reshape(shape)
                    for output, shape in zip(trt_outputs, output_shapes)
                ]
                for i in range(len(filenames_batch)):
                    boxes, classes, scores = postprocessor.process_frame2(
                        trt_outputs, (shape_orig_WH), i)

                    end_time = time.time()
                    if (end_time - start_time) > fps_display_interval:
                        frame_rate = int(frame_count / (end_time - start_time))
                        frame_rate_tab.append(frame_rate)
                        start_time = time.time()
                        frame_count = 0

                    frame_count += 1

                    if boxes is None:
                        det_img = frame_stream
                    else:
                        obj_detected_img = draw_bboxes(image_raw, boxes,
                                                       scores, classes,
                                                       ALL_CATEGORIES)
                        det_img = np.array(obj_detected_img)
                    cv2.putText(det_img,
                                str(frame_rate) + " fps", (500, 50),
                                font,
                                1, (255, 0, 0),
                                thickness=3,
                                lineType=2)
                    cv2.imshow("frame", det_img)
                filenames_batch = []
                images_batch = []
                images = []
                images_raw = []
                trt_outputs = []
            else:
                break

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        print(frame_rate_tab)
        moy_FPS = np.mean(frame_rate_tab)
        print("FPS min : ", min(frame_rate_tab))
        print("FPS max : ", max(frame_rate_tab))
        print("FPS moyen :", moy_FPS)

        cap.release()
        cv2.destroyAllWindows()
def main(inputSize):
    #Load PAR model
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    global vs, outputFrame, lock, t0, t1, fps, sess, input_name, label_name, PAR_Model

    #model = ResNet50_nFC(30)

    #model = load_network(model)

    #torch.save(model.state_dict(), "model")
    #device = torch.device('cuda')
    #model.to(device)
    #model.eval()

    # Set graph optimization level
    #sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED

    # To enable model serialization after graph optimization set this
    #sess_options.optimized_model_filepath = "resnet50_nFC.onnx"

    #sess = rt.InferenceSession("resnet50_nFC.onnx", sess_options)
    #sess.set_providers(['CUDAExecutionProvider'])
    #sess.set_providers(['CPUExecutionProvider'])

    cuda.init()
    device = cuda.Device(0)
    onnx_file_path = 'yolov3-{}.onnx'.format(inputSize)
    engine_file_path = 'yolov3-{}.trt'.format(inputSize)
    h, w = (inputSize, inputSize)
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (inputSize, inputSize)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, h // 32, w // 32), (1, 255, h // 16, w // 16),
                     (1, 255, h // 8, w // 8)]
    """output_shapes = [(1, 255, 13, 13), 
                     (1, 255, 26, 26)]"""

    # Do inference with TensorRT
    cuda.init()  # Initialize CUDA
    ctx = make_default_context()  # Create CUDA context
    postprocessor_args = {
        "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
        "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                         (59, 119), (116, 90), (156, 198), (373, 326)],
        "obj_threshold":
        0.5,
        "nms_threshold":
        0.35,
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }
    """postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)],
                          "yolo_anchors": [(10,14),  (23,27),  (37,58),  (81,82),  (135,169),  (344,319)],
                          "obj_threshold": 0.4, 
                          "nms_threshold": 0.5,
                          "yolo_input_resolution": input_resolution_yolov3_HW}"""

    postprocessor = PostprocessYOLO(**postprocessor_args)
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:

        print("performing inference")
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        while True:
            trt_outputs = []
            #image_raw=vs.read()
            T0 = time.time()

            ret, image_raw = cap.read()
            if image_raw is not None:
                image_raw, image = preprocessor.process(image_raw)
                shape_orig_WH = image_raw.size
                inputs[0].host = image
                T1 = time.time()
                t0 = time.time()
                trt_outputs = common.do_inference(context,
                                                  bindings=bindings,
                                                  inputs=inputs,
                                                  outputs=outputs,
                                                  stream=stream)

                trt_outputs = [
                    output.reshape(shape)
                    for output, shape in zip(trt_outputs, output_shapes)
                ]
                T2 = time.time()
                #here we have Yolo output

                boxes, classes, scores = postprocessor.process(
                    trt_outputs, (shape_orig_WH))
                t1 = time.time()
                t_inf = t1 - t0
                fps = 1 / t_inf
                draw = True
                if (boxes is None):
                    print("no bboxes")
                    draw = False
                if (classes is None):
                    print("no classes")
                    draw = False
                if (scores is None):
                    print("no scores")
                    draw = False
                if draw:
                    obj_detected_img = draw_bboxes(
                        image_raw,
                        bboxes=boxes,
                        confidences=scores,
                        categories=classes,
                        all_categories=ALL_CATEGORIES)
                else:
                    obj_detected_img = image_raw
    #now stream this image
                T3 = time.time()
                total = T3 - T0
                """print("Total time per frame: {:.3f}s (~{:.2f}FPS)".format(total,1/total))
                print("Pre process: {:.2f}%".format((T1-T0)/total))
                print("Inference: {:.2f}%".format((T2-T1)/total))
                print("Post process: {:.2f}%".format((T3-T2)/total))"""
                with lock:
                    outputFrame = np.array(obj_detected_img)

    ctx.pop()
Example #28
0
def main():
    # Parse the command line parameters
    parser = argparse.ArgumentParser(
        description='Tiny YOLO v2 Object Detector')
    parser.add_argument('--camera', '-c', \
        type=int, default=0, metavar='CAMERA_NUM', \
        help='Camera number')
    parser.add_argument('--csi', \
        action='store_true', \
        help='Use CSI camera')
    parser.add_argument('--width', \
        type=int, default=1280, metavar='WIDTH', \
        help='Capture width')
    parser.add_argument('--height', \
        type=int, default=720, metavar='HEIGHT', \
        help='Capture height')
    parser.add_argument('--objth', \
        type=float, default=0.6, metavar='OBJ_THRESH', \
        help='Threshold of object confidence score (between 0 and 1)')
    parser.add_argument('--nmsth', \
        type=float, default=0.3, metavar='NMS_THRESH', \
        help='Threshold of NMS algorithm (between 0 and 1)')
    parser.add_argument('--host', \
        type=str, default='localhost', metavar='MQTT_HOST', \
        help='MQTT remote broker IP address')
    parser.add_argument('--topic', \
        type=str, metavar='MQTT_TOPIC', \
        help='MQTT topic to be published on')
    parser.add_argument('--port', \
        type=int, default=1883, metavar='MQTT_PORT', \
        help='MQTT port number')
    parser.add_argument('--novout', \
        action='store_true', \
        help='No video output')
    args = parser.parse_args()

    client = None
    if args.topic is not None:
        client = init_mqtt(args.host, args.port)

    if args.csi or (args.camera < 0):
        if args.camera < 0:
            args.camera = 0
        # Open the MIPI-CSI camera
        gst_cmd = appbase.GST_STR_CSI \
            % (args.width, args.height, appbase.FPS, args.camera, args.width, args.height)
        cap = cv2.VideoCapture(gst_cmd, cv2.CAP_GSTREAMER)
    else:
        # Open the V4L2 camera
        cap = cv2.VideoCapture(args.camera)
        # Set the capture parameters
        #cap.set(cv2.CAP_PROP_FPS, FPS)     # Comment-out for OpenCV 4.1
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)

    # Get the actual frame size
    # OpenCV 4.1 does not get the correct frame size
    #act_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    #act_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    act_width = args.width
    act_height = args.height
    frame_info = 'Frame:%dx%d' % (act_width, act_height)

    # Download the label data
    categories = appbase.download_label()

    # Configure the post-processing
    postprocessor_args = {
        # YOLO masks (Tiny YOLO v2 has only single scale.)
        "yolo_masks": [(0, 1, 2, 3, 4)],
        # YOLO anchors
        "yolo_anchors": [(1.08, 1.19), (3.42, 4.41), (6.63, 11.38),
                         (9.42, 5.11), (16.62, 10.52)],
        # Threshold of object confidence score (between 0 and 1)
        "obj_threshold":
        args.objth,
        # Threshold of NMS algorithm (between 0 and 1)
        "nms_threshold":
        args.nmsth,
        # Input image resolution
        "yolo_input_resolution":
        appbase.INPUT_RES,
        # Number of object classes
        "num_categories":
        len(categories)
    }
    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Image shape expected by the post-processing
    output_shapes = [(1, 125, 13, 13)]

    # Download the Tiny YOLO v2 ONNX model
    onnx_file_path = appbase.download_model()

    # Define the file name of local saved TensorRT plan
    engine_file_path = 'model.trt'

    time_list = np.zeros(10)

    # Load the model into TensorRT
    with get_engine(onnx_file_path, engine_file_path) as engine, \
        engine.create_execution_context() as context:

        # Allocate buffer memory for TensorRT
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        fps = 0.0
        frame_count = 0

        print('video capture started')

        try:
            while True:
                # Get the frame start time for FPS calculation
                start_time = time.time()

                # Capture a frame
                ret, img = cap.read()
                if ret != True:
                    continue

                # Reshape the capture image for Tiny YOLO v2
                rs_img = cv2.resize(img, appbase.INPUT_RES)
                rs_img = cv2.cvtColor(rs_img, cv2.COLOR_BGRA2RGB)
                src_img = appbase.reshape_image(rs_img)

                # Execute an inference in TensorRT
                inputs[0].host = src_img
                trt_outputs = common.do_inference(context, bindings=bindings, \
                    inputs=inputs, outputs=outputs, stream=stream)

                # Reshape the network output for the post-processing
                trt_outputs = [output.reshape(shape) \
                    for output, shape in zip(trt_outputs, output_shapes)]

                # Calculates the bounding boxes
                boxes, classes, scores \
                    = postprocessor.process(trt_outputs, (act_width, act_height))

                if boxes is not None:
                    publish_bboxes(client, args.topic, frame_count, \
                        img, boxes, scores, classes, categories)

                if not args.novout:
                    # Draw the bounding boxes
                    if boxes is not None:
                        appbase.draw_bboxes(img, boxes, scores, classes,
                                            categories)
                    if frame_count > 10:
                        fps_info = '{0}{1:.2f}'.format('FPS:', fps)
                        msg = '%s %s' % (frame_info, fps_info)
                        appbase.draw_message(img, msg)

                    # Show the results
                    cv2.imshow(appbase.WINDOW_NAME, img)

                    # Check if ESC key is pressed to terminate this application
                    key = cv2.waitKey(20)
                    if key == 27:  # ESC
                        break

                    # Check if the window was closed
                    if cv2.getWindowProperty(appbase.WINDOW_NAME,
                                             cv2.WND_PROP_AUTOSIZE) < 0:
                        break

                # Calculate the average FPS value of the last ten frames
                elapsed_time = time.time() - start_time
                time_list = np.append(time_list, elapsed_time)
                time_list = np.delete(time_list, 0)
                avg_time = np.average(time_list)
                fps = 1.0 / avg_time

                frame_count += 1

        except KeyboardInterrupt:
            print('exitting..')

    # Release the capture object
    cap.release()

    if not args.novout:
        cv2.destroyAllWindows()

    if client is not None:
        client.disconnect()
def main():
    # Parse the command line parameters
    parser = argparse.ArgumentParser(description='Tiny YOLO v2 Object Detector')
    parser.add_argument('--camera', '-c', \
        type=int, default=0, metavar='CAMERA_NUM', \
        help='Camera number, use any negative integer for MIPI-CSI camera')
    parser.add_argument('--width', \
        type=int, default=1280, metavar='WIDTH', \
        help='Capture width')
    parser.add_argument('--height', \
        type=int, default=720, metavar='HEIGHT', \
        help='Capture height')
    parser.add_argument('--objth', \
        type=float, default=0.6, metavar='OBJ_THRESH', \
        help='Threshold of object confidence score (between 0 and 1)')
    parser.add_argument('--nmsth', \
        type=float, default=0.3, metavar='NMS_THRESH', \
        help='Threshold of NMS algorithm (between 0 and 1)')
    args = parser.parse_args()

    if args.camera < 0:
        # Open the MIPI-CSI camera
        gst_cmd = GST_STR_CSI \
            % (args.width, args.height, FPS, args.width, args.height)
        cap = cv2.VideoCapture(gst_cmd, cv2.CAP_GSTREAMER)
    else:
        # Open the V4L2 camera
        cap = cv2.VideoCapture(args.camera)
        # Set the capture parameters
        #cap.set(cv2.CAP_PROP_FPS, FPS)     # Comment-out for OpenCV 4.1
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)

    # Get the actual frame size
    # OpenCV 4.1 does not get the correct frame size
    #act_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    #act_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    act_width = args.width
    act_height = args.height
    frame_info = 'Frame:%dx%d' %  (act_width, act_height)

    # Download the label data
    categories = download_label()

    # Configure the post-processing
    postprocessor_args = {
        # YOLO masks (Tiny YOLO v2 has only single scale.)
        "yolo_masks": [(0, 1, 2, 3, 4)],
        # YOLO anchors
        "yolo_anchors": [(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)],
        # Threshold of object confidence score (between 0 and 1)
        "obj_threshold": args.objth,
        # Threshold of NMS algorithm (between 0 and 1)
        "nms_threshold": args.nmsth,
        # Input image resolution
        "yolo_input_resolution": INPUT_RES,
        # Number of object classes
        "num_categories": len(categories)}
    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Image shape expected by the post-processing
    output_shapes = [(1, 125, 13, 13)]

    # Download the Tiny YOLO v2 ONNX model
    onnx_file_path = download_model()

    # Define the file name of local saved TensorRT plan
    engine_file_path = 'model.trt'

    time_list = np.zeros(10)

    # Load the model into TensorRT
    with get_engine(onnx_file_path, engine_file_path) as engine, \
        engine.create_execution_context() as context:

        # Allocate buffer memory for TensorRT
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        fps = 0.0
        frame_count = 0

        while True:
            # Get the frame start time for FPS calculation
            start_time = time.time()

            # Capture a frame
            ret, img = cap.read()
            if ret != True:
                continue

            # Reshape the capture image for Tiny YOLO v2
            rs_img = cv2.resize(img, INPUT_RES)
            rs_img = cv2.cvtColor(rs_img, cv2.COLOR_BGRA2RGB)
            src_img = reshape_image(rs_img)

            # Execute an inference in TensorRT
            inputs[0].host = src_img
            trt_outputs = common.do_inference(context, bindings=bindings, \
                inputs=inputs, outputs=outputs, stream=stream)

            # Reshape the network output for the post-processing
            trt_outputs = [output.reshape(shape) \
                for output, shape in zip(trt_outputs, output_shapes)]

            # Calculates the bounding boxes
            boxes, classes, scores \
                = postprocessor.process(trt_outputs, (act_width, act_height))

            # Draw the bounding boxes
            if boxes is not None:
                draw_bboxes(img, boxes, scores, classes, categories)

            #c.okihara  2020-07-22(3/3)
            # Turn-on enable_GPIO and turn-off buzzer_GPIO if bounding boxes is not
            else:
                GPIO.output(Buz_pin, GPIO.LOW)
                GPIO.output(enable_pin, GPIO.HIGH)

            if frame_count > 10:
                fps_info = '{0}{1:.2f}'.format('FPS:', fps)
                msg = '%s %s' % (frame_info, fps_info)
                draw_message(img, msg)

            # Show the results
            cv2.imshow(WINDOW_NAME, img)

            # Check if ESC key is pressed to terminate this application
            key = cv2.waitKey(20)
            if key == 27: # ESC
                break

            # Calculate the average FPS value of the last ten frames
            elapsed_time = time.time() - start_time
            time_list = np.append(time_list, elapsed_time)
            time_list = np.delete(time_list, 0)
            avg_time = np.average(time_list)
            fps = 1.0 / avg_time

            frame_count += 1

    # Release the capture object
    cap.release()
Example #30
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    if COCO:
        onnx_file_path = os.path.join('./engine/onnx/',
                                      'yolov3-' + str(SIZE) + '.onnx')
        engine_file_path = os.path.join('./engine/trt/',
                                        'yolov3-' + str(SIZE) + BUILD + '.trt')
    else:
        onnx_file_path = os.path.join('./engine/onnx/',
                                      'yolov3-voc-' + str(SIZE) + '.onnx')
        engine_file_path = os.path.join(
            './engine/trt/', 'yolov3-voc-' + str(SIZE) + BUILD + '.trt')

    # onnx_file_path = "./engine/yolov3-608.onnx"
    # engine_file_path = "./engine/yolov3-608-voc-f32.trt"

    # loop over images
    if COCO:
        test_images_file = './coco/5k.txt'  #for coco
    else:
        test_images_file = './VOC/data/dataset/voc_test.txt'  #for voc

    with open(test_images_file, 'r') as f:
        txt = f.readlines()
        test_images = [line.strip() for line in txt]

    timeRecSave = []

    input_resolution_yolov3_HW = (SIZE, SIZE)

    predicted_dir_path = './mAP/predicted'
    if os.path.exists(predicted_dir_path):
        shutil.rmtree(predicted_dir_path)
    os.mkdir(predicted_dir_path)

    # ground_truth_dirs_path = './mAP/ground-truth'
    # if os.path.exists(ground_truth_dir_path):
    #     shutil.rmtree(ground_truth_dir_path)
    # os.mkdir(ground_truth_dir_path)

    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        for idx, input_image_path in enumerate(test_images):

            #print("image path = ", input_image_path)
            filename = os.path.split(input_image_path)[1]
            #print("filename = ",filename)

            # try:
            #     label_file = './coco/labels/val2014/' + os.path.splitext(filename)[0]+'.txt'
            #     with open(label_file, 'r') as f:
            #         labels = f.readlines()
            # except:
            #     continue

            # Create a pre-processor object by specifying the required input resolution for YOLOv3
            preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
            # Load an image from the specified input path, and return it together with  a pre-processed version
            image_raw, image = preprocessor.process(input_image_path)
            # Store the shape of the original input image in WH format, we will need it for later
            # print("image shape = ", image.shape)
            # print("image data = ")
            # print(image)
            shape_orig_WH = image_raw.size
            # print("image_raw.size = ", image_raw.size)
            # print("image_raw.shape = ", image_raw.shape)

            # Output shapes expected by the post-processor
            # output_shapes = [(1, 255, 10, 10), (1, 255, 20, 20), (1, 255, 40, 40)] #for 320
            # output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] #for 416
            output_shapes = [(1, int(OUT), int(SIZE / 32), int(SIZE / 32)),
                             (1, int(OUT), int(SIZE / 16), int(SIZE / 16)),
                             (1, int(OUT), int(SIZE / 8), int(SIZE / 8))
                             ]  #for 608

            # Do inference with TensorRT
            trt_outputs = []
            # with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
            #     inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            # Do inference
            # print('Running inference on image {}...'.format(input_image_path)) # if idx==0 else 0
            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inputs[0].host = image
            # start = time.time()
            trt_outputs, timeRec = common.do_inference(context,
                                                       bindings=bindings,
                                                       inputs=inputs,
                                                       outputs=outputs,
                                                       stream=stream)
            # print("time: %.2f s" %(time.time()-start))
            # print(trt_outputs)
            timeRecSave.append(timeRec)
            print('%d, Image %s, Recognition Time %0.3f seconds' %
                  (idx, filename, timeRec))

            # # Before the post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, output_shapes)
            ]

            # A list of 3 three-dimensional tuples for the YOLO masks
            # A list of 9 two-dimensional tuples for the YOLO anchors
            postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],   \
                                 "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), \
                                                (59, 119), (116, 90), (156, 198), (373, 326)],\
                                # Threshold for object coverage, float value between 0 and 1

                                "obj_threshold": 0.6,\
                                 # Threshold for non-max suppression algorithm, float value between 0 and 1

                                 "nms_threshold": 0.5,\
                                 "yolo_input_resolution": input_resolution_yolov3_HW}

            postprocessor = PostprocessYOLO(**postprocessor_args)

            # # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
            boxes, classes, scores = postprocessor.process(
                trt_outputs, (shape_orig_WH))

            # Draw the bounding boxes onto the original input image and save it as a PNG file
            if PRINT_RESULTS:
                obj_detected_img = draw_bboxes(image_raw, boxes, scores,
                                               classes, ALL_CATEGORIES)
                output_image_path = './results/yolo_' + filename
                obj_detected_img.save(output_image_path)
                print(
                    'Saved image with bounding boxes of detected objects to {}.'
                    .format(output_image_path))

            predict_result_path = os.path.join(predicted_dir_path,
                                               str(idx) + '.txt')
            # ground_truth_path = os.path.join(ground_truth_dir_path, str(idx) + '.txt')

            with open(predict_result_path, 'w') as f:
                if boxes is not None:
                    for box, score, category_idx in zip(
                            boxes, scores, classes):
                        x_coord, y_coord, width, height = box
                        box = [
                            x_coord, y_coord, x_coord + width, y_coord + height
                        ]  # fit YunYang1994's mAP calculation input format
                        category = ALL_CATEGORIES[category_idx]
                        category = "".join(category.split())
                        # print("score info = ", score, score.type)
                        box = list(map(int, box))
                        xmin, ymin, xmax, ymax = list(map(str, box))
                        # bbox_mess = ' '.join([category, score, xmin, ymin, xmax, ymax]) + '\n'
                        bbox_mess = ' '.join([
                            category, "{:.4f}".format(score), xmin, ymin, xmax,
                            ymax
                        ]) + '\n'
                        # print(bbox_mess)
                        f.write(bbox_mess)

    timeRecMean = np.mean(timeRecSave)
    print('The mean recognition time is {0:0.3f} seconds'.format(timeRecMean))

    # %%    Visualization of results
    if PRINT_RESULTS:
        np.save('results/timeRecognition.npy', timeRecSave)
        plt.figure(figsize=(8, 5))
        plt.plot(timeRecSave, label='Recg_time')
        plt.ylim([0, 0.05])
        plt.xlabel('Test image number'),
        plt.ylabel('Time [second]'),
        plt.title(
            'Recognition time of Yolov3_DarkNet_ONNX_TensorRT_GPU_coco_test_2017'
        )
        plt.hlines(y=timeRecMean,
                   xmin=0,
                   xmax=len(test_images),
                   linewidth=3,
                   color='r',
                   label='Mean')
        plt.savefig(
            'results/Yolov3_DarkNet_ONNX_TensorRT_GPU_coco_test_2017.png',
            bbox_inches='tight')
        plt.show()