def main():
    engine_file_path = 'plate_detection.trt'
    input_image_path = '../cat.jpg'

    input_resolution_plate_detection_HW = (325, 325)
    preprocessor = PreprocessYOLO(input_resolution_plate_detection_HW)
    image_raw, image = preprocessor.process(input_image_path)
    print(image.shape)

    trt_outputs = []
    with get_engine_from_bin(engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image

        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1)

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 10000
        
        for i in range(n_time_inference):
            t1 = time.time()
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1)
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print('inference-{} cost: {}ms'.format(str(i+1), delta_time*1000))
        avg_time_original_model = total_time / n_time_inference
        print("average inference time: {}ms".format(avg_time_original_model*1000))
        print(trt_outputs[0].shape)
        print(trt_outputs[1].shape)
Beispiel #2
0
def main(FLAGS):
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"
    input_image_path = 'debug_image/test1.jpg'
    input_resolution_yolov3_HW = (608, 608)
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    image_raw, image = preprocessor.process(input_image_path)

    shape_orig_WH = image_raw.size

    trt_outputs = []

    with get_engine(onnx_file_path, FLAGS, engine_file_path) as engine, \
        engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = allocate_buffers(engine)

        # print('Running inference on image {}...'.format(input_image_path))
        max_batch_size = engine.max_batch_size
        image = np.tile(image, [36, 1, 1, 1])
        inputs[0].host = image

        inf_batch = 36
        trt_outputs = do_inference(context,
                                   bindings=bindings,
                                   inputs=inputs,
                                   outputs=outputs,
                                   stream=stream,
                                   batch_size=inf_batch)

    output_shapes = [(max_batch_size, 255, 19, 19),
                     (max_batch_size, 255, 38, 38),
                     (max_batch_size, 255, 76, 76)]

    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]  # [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]

    postprocessor_args = {
        "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
        "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                         (59, 119), (116, 90), (156, 198), (373, 326)],
        "obj_threshold":
        0.6,
        "nms_threshold":
        0.5,
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    feat_batch = [[trt_outputs[j][i] for j in range(len(trt_outputs))]
                  for i in range(len(trt_outputs[0]))]

    for idx, layers in enumerate(feat_batch):
        boxes, classes, scores = postprocessor.process(layers, (shape_orig_WH))
    def read_batch_file(self, filename):
        batch = []
        input_resolution_yolov3_HW = (608, 608)
        for img_path in filename:
            preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
            image = preprocessor.process(img_path)
            batch.append(image[1])
        batch = np.array(batch)
        batch.shape = self.batch_size, 3, 608, 608

        return batch
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = '/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/fast_yolo.onnx'
    engine_file_path = "/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/fast_yolo.trt"
    # Download a dog image and save it to the following file path:
    input_image_path = "/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/sample.png"

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (352, 352)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 18, 11, 11)]
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        # start = time.time()
        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
        # print("time: %.2f s" %(time.time()-start))
        # print(trt_outputs)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

    postprocessor_args = {"yolo_masks": [(0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(188,15), (351,16), (351,30)],  # A list of 9 two-dimensional tuples for the YOLO anchors],
                          "obj_threshold": 0.5,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.2,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    # obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
    # output_image_path = 'dog_bboxes.png'
    # obj_detected_img.save(output_image_path, 'PNG')
    # print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))

    return boxes, classes, scores
Beispiel #5
0
 def __init__(self):
     super().__init__()
     # resolution
     self.preprocessor = PreprocessYOLO((608, 608))
     self.trt = TensorRT("yolov3.trt")
     postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    
                       "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), 
                                        (59, 119), (116, 90), (156, 198), (373, 326)],
                       "obj_threshold": 0.6,
                       "nms_threshold": 0.5,
                       "yolo_input_resolution": (608, 608)}
     self.postprocessor = PostprocessYOLO(**postprocessor_args)
Beispiel #6
0
def process_multi(img_path, yolo, engine,context):
    start_tf=time.time()
    image=cv2.imread(img_path)

    img_persons_new, boxes_new, trans=yolo.process_image(image)
    start_tf = time.time()
    img_persons_new, boxes_new, trans = yolo.process_image(image)
    img=draw(image,boxes_new)
    cv2.imwrite('img.jpg',img)
    print('process time for tf is',time.time()-start_tf)

    start_trt=time.time()



    input_resolution_yolov3_HW = (608, 608)
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    image_raw, image = preprocessor.process(img_path)
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Do inference with TensorRT
    trt_outputs = []

    inputs, outputs, bindings, stream = common_utils.allocate_buffers(engine)
    inputs[0].host = image
    trt_outputs = common_utils.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
    start_trt = time.time()
    trt_outputs = common_utils.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]
    print('process time for trt is', time.time() - start_trt)
    post_trt=time.time()
    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
                          # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                                           # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,  # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,
                          # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
    obj_detected_img.save('out_boxes.png', 'PNG')
    print('process time for trt post is', time.time() - post_trt)
Beispiel #7
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"
    # Download a dog image and save it to the following file path:
    input_image_path = 'dog.jpg'

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        #for input in inputs:
            #print(input.host) 
        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
    output_image_path = 'dog_bboxes.png'
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
def main():
    ENGINE_FILE_PATH = "ped3_416.trt"
    INPUT_LIST_FILE = './ped_list.txt'
    INPUT_SIZE = 416

    filenames = []
    with open(INPUT_LIST_FILE, 'r') as l:
        lines = l.readlines()
        for line in lines:
            filename = line.strip()
            filenames.append(filename)   
    input_resolution_yolov3_HW = (INPUT_SIZE, INPUT_SIZE)
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)],
                          "yolo_anchors": [(8,34),  (14,60),  (23,94),  (39,149),  (87,291),  (187,472)],
                          "obj_threshold": 0.1,
                          "nms_threshold": 0.3,
                          "yolo_input_resolution": input_resolution_yolov3_HW}
    postprocessor = PostprocessYOLO(**postprocessor_args)
    output_shapes = output_shapes_dic[str(INPUT_SIZE)]
    with get_engine(ENGINE_FILE_PATH) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)       
        for filename in filenames:
            image_raw, image = preprocessor.process(filename)
            shape_orig_WH = image_raw.size
            trt_outputs = []
        
            # Do inference
            print('Running inference on image {}...'.format(filename))
            inputs[0].host = image
            c_time = 0
            t1 = time.time()
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            t2 = time.time()
            c_time = t2-t1  
            print(c_time)
            trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

            boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
            if len(boxes) != 0:
                obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
            else:
                obj_detected_img = image_raw
            savename_0 = filename.split('/')[-1]
            savename = savename_0.split('.')[0]
            output_image_path = './images_results/' + savename + '_' + str(INPUT_SIZE) + '.png'
            obj_detected_img.save(output_image_path, 'PNG')
            print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
Beispiel #9
0
def infer_cam():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'; engine_file_path = 'yolov3.trt'
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)
    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    cap = cv2.VideoCapture(0)
    trt_outputs = [] # Do inference with TensorRT
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        while True:
            ret, frame = cap.read(); assert ret
            # Load an image from the specified input path, and return it together with  a pre-processed version
            preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
            image_raw, image = preprocessor.process(frame)
            # Store the shape of the original input image in WH format, we will need it for later
            shape_orig_WH = image_raw.size; t = time()

            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inputs[0].host = image
            trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            t = time()-t; fps = 1/t; print("infer: %.2fms, fps: %.2f" % (t*1000, fps))

            # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
            trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

            postprocessor = PostprocessYOLO(**postprocessor_args)
            # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
            boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))

            im = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)

            im = np.asarray(im)[...,::-1]
            cv2.putText(im, "%.2f"%fps, (12,12), 3, 1, (0,255,0))
            cv2.imshow("det",im)

            if cv2.waitKey(5) == 27: break
    cap.release(); cv2.destroyAllWindows()
Beispiel #10
0
def main():
    """
    Create a TensorRT engine for ONNX-based plate_detection and run inference.
    """
    # Try to load a previously generated plate_detection graph in ONNX format:
    onnx_file_path = 'yolov3-tiny.onnx'
    engine_file_path = 'yolov3-tiny.trt'

    input_image_path = 'cat.jpg'

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_plate_detection_HW = (416, 416)
    preprocessor = PreprocessYOLO(input_resolution_plate_detection_HW)
    image_raw, image = preprocessor.process(input_image_path)
    print(image.shape)

    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        print('length of inputs is: ', len(inputs))
        print('inputs[0] is: \n', inputs[0])
        print('length of outputs is: ', len(outputs))
        print('outputs[0] is: \n', outputs[0])
        print('outputs[1] is: \n', outputs[1])

        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image

        for inp in inputs:
            print(inp.device)
            print(inp.host.shape)

        for i in range(100):
            t1 = time.time()
            trt_outputs = common.do_inference(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream,
                                              batch_size=1)
            t2 = time.time()
            print('inference cost: ', (t2 - t1) * 1000, 'ms')
        print(trt_outputs[0])
        print(trt_outputs[1])
def batch_show(image_path, image_save_path, onnx_file_path, engine_file_path):
    img_list = gb.glob(image_path + r"/*.png")

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (352, 352)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Create a post-processor object by specifying the required input resolution for YOLOv3
    postprocessor_args = {"yolo_masks": [(0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(188,15), (351,16), (351,30)],  # A list of 9 two-dimensional tuples for the YOLO anchors],
                          "obj_threshold": 0.5,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.2,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)
    
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = input_resolution_yolov3_HW

    # Output shapes expected by the post-processor
    output_shapes = [(1, 18, 11, 11)]
    # Do inference with TensorRT
    total_time, trt_outputs = 0, []
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        # print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        for i, img_file in enumerate(img_list):
            image_raw, image = preprocessor.process(img_file)
            inputs[0].host = image
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

            # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
            trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

            # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
            boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
            image_show = draw_bboxes(image_raw, boxes, scores, classes, ['defect'], bbox_color='yellow')
            
            # Save the marked image
            filename, suffix = os.path.split(img_file)
            _, fname = os.path.splitext(filename)
            save_name = os.path.join(image_save_path, fname+suffix)
            image_show.save(save_name)
            print("Image", save_name, "saved.")
Beispiel #12
0
    def __init__(self, yaml_path):
        # yaml_path 参数配置文件路径
        with open(yaml_path, 'r', encoding='utf-8') as f:
            self.param_dict = yaml.load(f, Loader=yaml.FullLoader)

        # 获取engine context
        self.engine = get_engine(self.param_dict['onnx_path'],
                                 self.param_dict['engine_path'],
                                 self.param_dict['input_shape'],
                                 self.param_dict['int8_calibration'])
        # context 执行在engine后面
        self.context = self.engine.create_execution_context()

        # yolo 数据预处理 PreprocessYOLO类
        assert len(self.param_dict['input_shape']) == 4, "input_shape必须是4个维度"
        batch, _, height, width = self.param_dict['input_shape']
        self.preprocessor = PreprocessYOLO((height, width))

        # 生成预先的anchor [x,y,w,h,f_w,f_h]: xy是feature_map的列行坐标,wh是anchor,f_wh是feature_map大小
        self.prior_anchors = PriorBox(cfg=self.param_dict).forward()

        # 一些配置
        # 标签名字
        self.all_categories = load_label_categories(
            self.param_dict['label_file_path'])
        classes_num = len(self.all_categories)
        # trt输出shape
        stride = self.param_dict['stride']
        num_anchors = self.param_dict['num_anchors']

        grid_num = (height // stride[0]) * (
            width // stride[0]) * num_anchors[0] + (height // stride[1]) * (
                width // stride[1]) * num_anchors[1] + (
                    height // stride[2]) * (width //
                                            stride[2]) * num_anchors[2]
        self.output_shapes = [(batch, grid_num, (classes_num + 5))]

        self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff',
                            'dng']  # acceptable image suffixes
        self.vid_formats = [
            'mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'
        ]  # acceptable video suffixes

        # yolo 后处理, yolov4将3个输出 concat在一起,[N, AHW*3, classes_num+5],可判断yolov4原始预测 or yolov5新式预测
        self.postprocessor = PostprocessYOLO(self.prior_anchors,
                                             self.param_dict)
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    args = parser.parse_args()
    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"

    cam = cv.VideoCapture(args.video)
    # img = cv.imread("dog.jpg")

    input_resolution_yolov3_HW = (608, 608)

    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,
                          "nms_threshold": 0.5,
                          "yolo_input_resolution": input_resolution_yolov3_HW}
    postprocessor = PostprocessYOLO(**postprocessor_args)
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        while True:
            _ret, img = cam.read()
            if(_ret is False):
                break
            image_raw, image = preprocessor.process_image(img)
            shape_orig_WH = image_raw.size
            output_shapes = [(1, 255, 19, 19),
                             (1, 255, 38, 38), (1, 255, 76, 76)]
            trt_outputs = []
            inputs[0].host = image
            trt_outputs = common.do_inference(
                context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            trt_outputs = [output.reshape(shape)
                           for output, shape in zip(trt_outputs, output_shapes)]
            boxes, classes, scores = postprocessor.process(
                trt_outputs, (shape_orig_WH))
            if(boxes is None):
                continue
            obj_detected_img = draw_bboxes(
                image_raw, boxes, scores, classes, ALL_CATEGORIES)
            det_img = np.array(obj_detected_img)
            cv.imshow("frame", det_img)
            cv.waitKey(5)
Beispiel #14
0
class YoloTRT(object):
    def __init__(self):
        super().__init__()
        # resolution
        self.preprocessor = PreprocessYOLO((608, 608))
        self.trt = TensorRT("yolov3.trt")
        postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), 
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,
                          "nms_threshold": 0.5,
                          "yolo_input_resolution": (608, 608)}
        self.postprocessor = PostprocessYOLO(**postprocessor_args)
        
    # def _preprocess(self, input_array:np.ndarray) -> np.ndarray: # 
    #     return self.preprocessor.process(input_array) # in: <NHWC> raw image batch , out: <NCHW> resized <N,3,608,608>

    def _inference(self, input: np.ndarray) -> list: # 
        trt_outputs = self.trt.inference(input)
        output_shapes = [(self.trt.max_batch_size, 255, 19, 19), 
                         (self.trt.max_batch_size, 255, 38, 38), 
                         (self.trt.max_batch_size, 255, 76, 76)]
                         
        trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]  # [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
        return trt_outputs # in: <NCHW> <N,3,608,608>, out: [(N, 255, 19, 19), (N, 255, 38, 38), (N, 255, 76, 76)]

    # def _postprocess(self, feat_batch, shape_orig_WH:tuple): 
    #     return [[self.postprocessor.process(feat,shape_orig)]for feat, shape_orig in zip(feat_batch,shape_orig_WH)]

    @profile
    def inference(self, input_array:np.ndarray): # img_array <N,H,W,C>

        pre = self.preprocessor.process(input_array) # in: <NHWC> raw image batch , out: <NCHW> resized <N,3,608,608>
        trt_outputs = self._inference(pre) # out: [(N, 255, 19, 19), (N, 255, 38, 38), (N, 255, 76, 76)]

        feat_batch = [[trt_outputs[j][i] for j in range(len(trt_outputs))] for i in range(len(trt_outputs[0]))]
        post = [[self.postprocessor.process(feat,input_array.shape)]for feat in feat_batch] # out:[[bbox,score,categories,confidences],...]
        post = post[:len(input_array)]
        return post
        filenames = [
            os.path.join(FLAGS.image_filename, f)
            for f in os.listdir(FLAGS.image_filename)
            if os.path.isfile(os.path.join(FLAGS.image_filename, f))
        ]
    else:
        filenames = [FLAGS.image_filename]

    filenames.sort()

    # Preprocess the images into input data according to model

    # yolov3网络的输入size,HW顺序
    input_resolution_yolov3_HW = (608, 608)
    # 创建一个预处理来处理任意图片,以符合yolov3的输入
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    shape_orig_WH = []

    # requirements
    image_data = []
    image_raws = []

    for filename in filenames:
        image_raw, image = preprocessor.process(filename)
        image_data.append(image[0])
        image_raws.append(image_raw)
        shape_orig_WH.append(image_raw.size)

    # Send requests of FLAGS.batch_size images. If the number of
    # images isn't an exact multiple of FLAGS.batch_size then just
    # start over with the first images until the batch is filled.
Beispiel #16
0
class Detect:
    def __init__(self, yaml_path):
        # yaml_path 参数配置文件路径
        with open(yaml_path, 'r', encoding='utf-8') as f:
            self.param_dict = yaml.load(f, Loader=yaml.FullLoader)

        # 获取engine context
        self.engine = get_engine(self.param_dict['onnx_path'],
                                 self.param_dict['engine_path'],
                                 self.param_dict['input_shape'],
                                 self.param_dict['int8_calibration'])
        # context 执行在engine后面
        self.context = self.engine.create_execution_context()

        # yolo 数据预处理 PreprocessYOLO类
        assert len(self.param_dict['input_shape']) == 4, "input_shape必须是4个维度"
        batch, _, height, width = self.param_dict['input_shape']
        self.preprocessor = PreprocessYOLO((height, width))

        # 生成预先的anchor [x,y,w,h,f_w,f_h]: xy是feature_map的列行坐标,wh是anchor,f_wh是feature_map大小
        self.prior_anchors = PriorBox(cfg=self.param_dict).forward()

        # 一些配置
        # 标签名字
        self.all_categories = load_label_categories(
            self.param_dict['label_file_path'])
        classes_num = len(self.all_categories)
        # trt输出shape
        stride = self.param_dict['stride']
        num_anchors = self.param_dict['num_anchors']

        grid_num = (height // stride[0]) * (
            width // stride[0]) * num_anchors[0] + (height // stride[1]) * (
                width // stride[1]) * num_anchors[1] + (
                    height // stride[2]) * (width //
                                            stride[2]) * num_anchors[2]
        self.output_shapes = [(batch, grid_num, (classes_num + 5))]

        self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff',
                            'dng']  # acceptable image suffixes
        self.vid_formats = [
            'mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'
        ]  # acceptable video suffixes

        # yolo 后处理, yolov4将3个输出 concat在一起,[N, AHW*3, classes_num+5],可判断yolov4原始预测 or yolov5新式预测
        self.postprocessor = PostprocessYOLO(self.prior_anchors,
                                             self.param_dict)

    def predict(self,
                input_path='dog.jpg',
                output_save_root='./output',
                write_txt=False):
        '''
        :param input_path:  输入:单张图像路径,图像文件夹,单个视频文件路径
        :param output_save_root: 要求全部保存到文件夹内,若是视频统一保存为mp4
        :param write_txt: 将预测的框坐标-类别-置信度以txt保存
        :return:
        '''
        # 开始判断图像,文件夹,视频
        is_video = False
        path = input_path
        if os.path.isdir(path):
            # 图像文件夹
            img_names = os.listdir(path)
            img_names = [
                name for name in img_names
                if name.split('.')[-1] in self.img_formats
            ]
        elif os.path.isfile(path):
            # 将 '/hme/ai/111.jpg' -> ('/hme/ai', '111.jpg')
            path, img_name = os.path.split(path)
            # 标记 video
            if img_name.split('.')[-1] in self.vid_formats:
                is_video = True
            else:
                assert img_name.split('.')[-1] in self.img_formats, "必须是单张图像路径"
                img_names = [img_name]
        else:
            print("输入无效!!!" * 3)

        # 创建保存文件夹
        check_path(output_save_root)
        # 判断是否是视频
        if is_video:
            assert img_name.count('.') == 1, "视频名字必须只有1个 . "

            # 读取视频
            cap = cv2.VideoCapture(os.path.join(path, img_name))
            # # 获取视频的fps, width height
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # 视频总帧数
            # 创建视频
            video_save_path = os.path.join(
                output_save_root,
                img_name.split('.')[0] + '_pred.mp4')
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            video_writer = cv2.VideoWriter(video_save_path,
                                           fourcc=fourcc,
                                           fps=fps,
                                           frameSize=(width, height))
        else:
            num = len(img_names)  # 图像数量

        # 推理 默认是0卡
        inputs, outputs, bindings, stream = common.allocate_buffers(
            self.engine)
        # Do inference
        for i in range(num):
            # 预处理
            if is_video:
                cap.set(cv2.CAP_PROP_POS_FRAMES, i)  # 读取指定帧
                image = cap.read()
                # 输入的是bgr帧矩阵
                image_raw, image = self.preprocessor.process(image)
            else:
                # 输入的默认是图像路径
                image_raw, image = self.preprocessor.process(
                    os.path.join(path, img_names[i]))

            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inputs[0].host = image
            trt_outputs = common.do_inference_v2(self.context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)

            # list中的输出个数,本来要位于外面一层的,但是考虑重新输入图像
            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, self.output_shapes)
            ]

            # 后处理,按照2种方式判断处理,yolov4原始的预测-参考yolov5变化后的预测
            # 图像原始尺寸 WH,因为时PIL读取
            shape_orig_WH = image_raw.size

            # 后处理是可以处理batch>=1的,但是这里的类写的只能是batch=1
            outputs_pred = self.postprocessor.process(trt_outputs,
                                                      shape_orig_WH)

            # TODO 将预测的框坐标-类别-置信度 写入txt

            # 画框,由于这里只能是单张图像,因此不必for遍历
            boxes, classes, scores = outputs_pred[0][0]
            obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                           self.all_categories)

            # 视频按照帧数来保存,图像按照名字保存,  注意一般视频不会超过5位数
            # TODO 视频的预测写入视频
            if is_video:
                obj_detected_img.save(
                    os.path.join(output_save_root,
                                 str(i).zfill(5)))
            else:
                obj_detected_img.save(
                    os.path.join(output_save_root, img_names[i]))

        # 若是视频,需要 release
        if is_video:
            cap.release()
            cv2.destroyAllWindows()
def read_queue(queue):
    # 3.load model
    # initialize
    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
    trt.init_libnvinfer_plugins(TRT_LOGGER, '')
    runtime = trt.Runtime(TRT_LOGGER)


    # create engine
    with open('model.bin', 'rb') as f:
        buf = f.read()
        engine = runtime.deserialize_cuda_engine(buf)

    # create buffer
    host_inputs  = []
    cuda_inputs  = []
    host_outputs = []
    cuda_outputs = []
    bindings = []
    stream = cuda.Stream()

    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        host_mem = cuda.pagelocked_empty(size, np.float32)
        cuda_mem = cuda.mem_alloc(host_mem.nbytes)

        bindings.append(int(cuda_mem))
        if engine.binding_is_input(binding):
            host_inputs.append(host_mem)
            cuda_inputs.append(cuda_mem)
        else:
            host_outputs.append(host_mem)
            cuda_outputs.append(cuda_mem)
    context = engine.create_execution_context()

    batch_size = 1
    input_size = 416
    output_shapes_416 = [(batch_size, 54, 13, 13), (batch_size, 54, 26, 26), (batch_size, 54, 52, 52)]
    output_shapes_480 = [(batch_size, 54, 15, 15), (batch_size, 54, 30, 30), (batch_size, 54, 60, 60)]
    output_shapes_544 = [(batch_size, 54, 17, 17), (batch_size, 54, 34, 34), (batch_size, 54, 68, 68)]
    output_shapes_608 = [(batch_size, 54, 19, 19), (batch_size, 54, 38, 38), (batch_size, 54, 72, 72)]
    output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608}
    

    output_shapes = output_shapes_dic[str(input_size)]
    input_resolution_yolov3_HW = (input_size, input_size)
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
                    "yolo_anchors": [(4,7), (7,15), (13,25),   (25,42), (41,67), (75,94),   (91,162), (158,205), (250,332)],
                    "obj_threshold": 0.5, 
                    "nms_threshold": 0.35,
                    "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)
    inputs, outputs, bindings, stream = allocate_buffers(engine)
    print('3.Load model successful.')
    print('Everything is ready.')

    num = 0
    while cap.isOpened() and ser.isOpen():
        if queue.empty():
            continue
        frame = queue.get()
        images = []
        image_raw, image = preprocessor.process(frame)
        images.append(image)
        num = num + 1
        images_batch = np.concatenate(images, axis=0)
        inputs[0].host = images_batch
        #t1 = time.time()
        trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size)
        trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]
        shape_orig_WH = image_raw.size
        boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), 0)
        #t2 = time.time()
        #t_inf = t2 - t1
        #print("time consumption:",t_inf)
        print(boxes, scores, classes)
        images.clear()
        if np.all(scores == 0):
            ser.write("h".encode("utf-8"))
            print('exception.')
            continue	
        index = np.nonzero(classes)
        label = classes[index[0]]
        cv2.imwrite('tmp/'+str(num)+'.jpg', frame)
        if label == 0:
            ser.write("c".encode("utf-8"))
            print('plate front.')
        elif label == 1:
            ser.write("d".encode("utf-8"))
            print('plate back.')
        elif label == 2:
            ser.write("f".encode("utf-8"))
            print('bowl front.')
        elif label == 3:
            ser.write("e".encode("utf-8"))
            print('bowl back.')
        elif label == 4:
            ser.write("g".encode("utf-8"))
            print('glass cup side.')
        elif label == 5:
            ser.write("g".encode("utf-8"))
            print('glass cup back.')
        elif label == 6:
            ser.write("g".encode("utf-8"))
            print('glass cup front.')
        elif label == 7:
            ser.write("i".encode("utf-8"))
            print('teacup side.')
        elif label == 8:
            ser.write("j".encode("utf-8"))
            print('teacup back.')
        elif label == 9:
            ser.write("k".encode("utf-8"))
            print('teacup front.')
        elif label == 10:
            ser.write("g".encode("utf-8"))
            print('cup side.')
        elif label == 11:
            ser.write("g".encode("utf-8"))
            print('cup back.')
        elif label == 12:
            ser.write("g".encode("utf-8"))
            print('cup front.')
        else:
            ser.write("h".encode("utf-8"))
            print('exception.')
import common,cv2

anchors = np.array([(10,14),  (23,27),  (37,58),  (81,82),  (135,169),  (344,319)])
classes_num = 80
score_threshold = 0.5

output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)]
input_resolution_yolov3_HW = (416, 416)
postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.2,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
postprocessor = PostprocessYOLO(**postprocessor_args)



def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'):
    """Draw the bounding boxes on the original input image and return it.

    Keyword arguments:
    image_raw -- a raw PIL Image
    bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4).
    categories -- NumPy array containing the corresponding category for each object,
    with shape (N,)
    confidences -- NumPy array containing the corresponding confidence for each object,
    with shape (N,)
    all_categories -- a list of all categories in the correct ordered (required for looking up
Beispiel #19
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    parser = argparse.ArgumentParser(
        prog='ONNX to TensorRT conversion',
        description='Convert the Yolo ONNX model to TensorRT')

    parser.add_argument('--input_size', help='Input size model', default='416')

    parser.add_argument('--onnx_file_path',
                        help='ONNX model\'s path (.onnx)',
                        default='../../model_data/Suspect/yolov3-suspect.onnx')

    parser.add_argument('--engine_file_path',
                        help='TensorRT engine\'s path (.trt)',
                        default='trt_model/yolov3-suspect_2_fp32.trt')

    parser.add_argument('--num_classes', help='Number of classes', default='3')

    parser.add_argument('--dataset_path',
                        help='Path of the folder Dataset',
                        default='../../Datasets/Suspect/images-416/')

    parser.add_argument(
        '--pred_dataset_path',
        help='Output path of Yolo predictions',
        default='../../Datasets/Suspect/Predictions/TensorRT/Yolo-Tiny-128/')

    parser.add_argument(
        '--result_images_path',
        help='Path of images with predict bounding box',
        default='../../Datasets/Suspect/Images_result/TensorRT/Yolo-Tiny-128/')

    args = parser.parse_args()

    input_size = int(args.input_size)
    onnx_file_path = args.onnx_file_path
    engine_file_path = args.engine_file_path
    num_classes = int(args.num_classes)
    test_dataset_path = args.dataset_path
    save_path = args.result_images_path
    pred_dataset_path = args.pred_dataset_path

    fp16_on = False
    batch_size = 2

    filters = (4 + 1 + num_classes) * 3

    output_shapes_416 = [
        (batch_size, filters, 13, 13), (batch_size, filters, 26, 26)
    ]  # 2 ème variable = (5+nbr classes)*3 (255 pour coco, 33 pour key,...)
    output_shapes_480 = [(batch_size, filters, 15, 15),
                         (batch_size, filters, 30, 30)]
    output_shapes_544 = [(batch_size, filters, 17, 17),
                         (batch_size, filters, 34, 34)]
    output_shapes_608 = [(batch_size, filters, 19, 19),
                         (batch_size, filters, 38, 38)]
    output_shapes_dic = {
        '416': output_shapes_416,
        '480': output_shapes_480,
        '544': output_shapes_544,
        '608': output_shapes_608
    }

    filenames = glob.glob(os.path.join(test_dataset_path, '*.jpg'))

    nums = len(filenames)

    input_resolution_yolov3_HW = (input_size, input_size)

    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    output_shapes = output_shapes_dic[str(input_size)]

    postprocessor_args = {  #"yolo_masks": [(3, 4, 5), (0, 1, 2)], #tiny
        "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
        #"yolo_anchors": [(10,14),  (23,27),  (37,58),  (81,82),  (135,169),  (344,319)], #tiny-yolov3
        "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                         (59, 119), (116, 90), (156, 198),
                         (373, 326)],  #YoloV3
        "obj_threshold":
        0.5,
        "nms_threshold":
        0.35,
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Do inference with TensorRT
    filenames_batch = []
    images = []
    images_raw = []
    trt_outputs = []
    index = 0
    moy_inf_time = 0
    moy = 0

    with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        # inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        for filename in filenames:
            #print("Path file : ", filename)
            #path = filename.split('.')[4]
            #path2 = path.split('/')[4]
            #print("PATH: ", path2)
            #name_ann = os.path.join(pred_dataset_path, path2)
            #annotation_path = name_ann + '.txt'
            #print("ANNOTATION : ", annotation_path)
            filenames_batch.append(filename)
            '''if os.path.isfile(annotation_path) == True :
                os.remove(annotation_path)
                print("Delete !")'''

            image_raw, image = preprocessor.process(filename)
            images_raw.append(image_raw)
            images.append(image)
            index += 1

            if index != nums and len(images_raw) != batch_size:
                continue

            inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            images_batch = np.concatenate(images, axis=0)
            inputs[0].host = images_batch
            t1 = time.time()
            trt_outputs = common.do_inference(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream,
                                              batch_size=batch_size)
            t2 = time.time()
            t_inf = int(round((t2 - t1) * 1000))
            #print("Inf time : ",t_inf)
            moy_inf_time += t_inf
            #print("MOY : ", moy)

            print(len(trt_outputs))
            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, output_shapes)
            ]
            for i in range(len(filenames_batch)):
                fname = filenames_batch[i].split('/')
                fname = fname[-1].split('.')[0]
                print(fname)
                name_ann = os.path.join(pred_dataset_path, fname)
                annotation_path = name_ann + '.txt'
                #print("ANNOTATION : ", annotation_path)
                if os.path.isfile(annotation_path) == True:
                    os.remove(annotation_path)
                    print("Delete !")
                img_raw = images_raw[i]
                #print(img_raw)
                shape_orig_WH = img_raw.size
                print("SHAPE : ", shape_orig_WH)

                boxes, classes, scores = postprocessor.process(
                    trt_outputs, (shape_orig_WH), i)

                if boxes is not None:
                    print("boxes size:", len(boxes))
                else:
                    continue

# Draw the bounding boxes onto the original input image and save it as a PNG file
                obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes,
                                               ALL_CATEGORIES, annotation_path)
                output_image_path = save_path + fname + '_' + str(
                    input_size) + '_bboxes.png'
                obj_detected_img.save(output_image_path, 'PNG')
                print(
                    'Saved image with bounding boxes of detected objects to {}.'
                    .format(output_image_path))

            filenames_batch = []
            images_batch = []
            images = []
            images_raw = []
            trt_outputs = []
    print(len(filenames))
    moy_inf_time = moy_inf_time / len(filenames)
    print("Moyenne temps d'inférence (par image) : ", moy_inf_time, "ms")
    fps = 1 / moy_inf_time * 1000
    print("FPS : ", fps)
Beispiel #20
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3-608.onnx'
    engine_file_path = "yolov3-608.trt"
    input_image_path = "./images/b.jpg"

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)

    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)

    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)]

    # Do inference with TensorRT
    trt_outputs = []
    a = torch.cuda.FloatTensor()
    average_inference_time = 0
    average_yolo_time = 0
    counter = 10
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        while counter:
            # Do inference
            print('Running inference on image {}...'.format(input_image_path))
            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inference_start = time.time()
            inputs[0].host = image
            trt_outputs = common.do_inference(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream)
            inference_end = time.time()
            inference_time = inference_end - inference_start
            average_inference_time = average_inference_time + inference_time
            print('inference time : %f' % (inference_end - inference_start))

            # Do yolo_layer with pytorch
            inp_dim = 608
            num_classes = 80
            CUDA = True
            yolo_anchors = [[(116, 90), (156, 198), (373, 326)],
                            [(30, 61), (62, 45), (59, 119)],
                            [(10, 13), (16, 30), (33, 23)]]
            write = 0
            yolo_start = time.time()
            for output, shape, anchors in zip(trt_outputs, output_shapes,
                                              yolo_anchors):
                output = output.reshape(shape)
                trt_output = torch.from_numpy(output).cuda()
                trt_output = trt_output.data
                trt_output = predict_transform(trt_output, inp_dim, anchors,
                                               num_classes, CUDA)

                if type(trt_output) == int:
                    continue

                if not write:
                    detections = trt_output
                    write = 1

                else:
                    detections = torch.cat((detections, trt_output), 1)
            dets = dynamic_write_results(detections,
                                         0.5,
                                         num_classes,
                                         nms=True,
                                         nms_conf=0.45)  #0.008
            yolo_end = time.time()
            yolo_time = yolo_end - yolo_start
            average_yolo_time = average_yolo_time + yolo_time
            print('yolo time : %f' % (yolo_end - yolo_start))
            print('all time : %f' % (yolo_end - inference_start))
            counter = counter - 1

        average_yolo_time = average_yolo_time / 10
        average_inference_time = average_inference_time / 10
        print("--------------------------------------------------------")
        print('average yolo time : %f' % (average_yolo_time))
        print('average inference time : %f' % (average_inference_time))
        print("--------------------------------------------------------")
def main(width=608, height=608, batch_size=1, dataset='coco_label.txt', int8mode=False, calib_file='yolo_calibration.cache',
         onnx_file='yolov3.onnx', engine_file='yolov3.trt', image_file='dog.jpg', result_file='dog_bboxes.png'):

    """Load labels of the correspond dataset."""
    label_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), dataset)
    all_categories = load_label_categories(label_file_path)
    classes = len(all_categories)

    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = onnx_file
    engine_file_path = engine_file
    # Download a dog image and save it to the following file path:
    input_image_path = image_file
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (height, width)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path, batch_size)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(batch_size, (classes + 5) * 3, height // 32, width // 32),
                     (batch_size, (classes + 5) * 3, height // 16, width // 16),
                     (batch_size, (classes + 5) * 3, height // 8,  width // 8)]
    # Do inference with TensorRT
    with get_engine(onnx_file_path, width, height, batch_size, engine_file_path, int8mode, calib_file) as engine, \
            engine.create_execution_context() as context:
        start = time.time()
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
    end = time.time()
    print("Inference costs %.03f sec." % (end - start))
    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    trt_outputs_1 = [np.expand_dims(trt_outputs[0][0], axis=0),
                     np.expand_dims(trt_outputs[1][0], axis=0),
                     np.expand_dims(trt_outputs[2][0], axis=0)]
    boxes, classes, scores = postprocessor.process(trt_outputs_1, (shape_orig_WH), classes)
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, all_categories)
    output_image_path = result_file
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
Beispiel #22
0
 def _preprocess(self):
     start = timer()
     # Create a pre-processor object by specifying the required input resolution for YOLOv3
     preprocessor = PreprocessYOLO(self.input_resolution)
     # Load an image from the specified input path, and return it together with  a pre-processed version
     return preprocessor.process(self.raw_image)
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    input_size = 608
    batch_size = 1
    fp16_on = True
    onnx_file_path = 'ped3_' + str(input_size) + '_' + str(batch_size) + '.onnx'
    engine_file_path = 'ped3_' + str(input_size) + '_' + str(batch_size) + '.trt'
    input_file_list = './ped_list.txt'
    IMAGE_PATH = './images/'
    save_path = './img_re/'
    
    output_shapes_416 = [(batch_size, 18, 13, 13), (batch_size, 18, 26, 26)]
    output_shapes_480 = [(batch_size, 18, 15, 15), (batch_size, 18, 30, 30)]
    output_shapes_544 = [(batch_size, 18, 17, 17), (batch_size, 18, 34, 34)]
    output_shapes_608 = [(batch_size, 18, 19, 19), (batch_size, 18, 38, 38)]
    output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608}
    
    with open(input_file_list, 'r') as f:
        filenames = []
        for line in f.readlines():
            filenames.append(line.strip())

    # filenames = glob.glob(os.path.join(IMAGE_PATH, '*.jpg'))
    
    nums = len(filenames)
    # print(filenames)

    input_resolution_yolov3_HW = (input_size, input_size)
    
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    
    output_shapes = output_shapes_dic[str(input_size)]

    postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)],
                          "yolo_anchors": [(8,34),  (14,60),  (23,94),  (39,149),  (87,291),  (187,472)],
                          "obj_threshold": 0.1,
                          "nms_threshold": 0.3,
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)
    
    # Do inference with TensorRT
    filenames_batch = []
    images = []
    images_raw = []
    trt_outputs = []
    index = 0
    with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path) as engine, engine.create_execution_context() as context:
        # inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        for filename in filenames:
            filenames_batch.append(filename)
            image_raw, image = preprocessor.process(filename)
            images_raw.append(image_raw)
            images.append(image)
            index += 1
            if index != nums and len(images_raw) != batch_size:
                continue
            inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            images_batch = np.concatenate(images, axis=0)
            inputs[0].host = images_batch
            t1 = time.time()
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size)
            t2 = time.time()
            t_inf = t2 - t1
            print(t_inf)
            print(len(trt_outputs))
            trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

	    print('test')
	    for i in range(len(filenames_batch)):
                fname = filenames_batch[i].split('/')
                fname = fname[-1].split('.')[0]
		img_raw = images_raw[i]
		shape_orig_WH = img_raw.size
		boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), i)
		# Draw the bounding boxes onto the original input image and save it as a PNG file
		obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes, ALL_CATEGORIES)
		output_image_path = save_path + fname + '_' + str(input_size) + '_bboxes.png'
		obj_detected_img.save(output_image_path, 'PNG')
		print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
            filenames_batch = []
            images_batch = []
	    images = []
	    images_raw = []
	    trt_outputs = []
Beispiel #24
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    input_size = 416
    batch_size = 1
    fp16_on = False
    onnx_file_path = '../../model_data/Suspect/yolov3-tiny-suspect.onnx'
    engine_file_path = 'trt_model/yolov3-tiny-suspect_1_fp32.trt'

    num_classes = 3
    filters = (4 + 1 + num_classes) * 3

    output_shapes_416 = [
        (batch_size, filters, 13, 13), (batch_size, filters, 26, 26)
    ]  # 2 ème variable = (5+nbr classes)*3 (255 pour coco, 33 pour key,...)
    output_shapes_480 = [(batch_size, filters, 15, 15),
                         (batch_size, filters, 30, 30)]
    output_shapes_544 = [(batch_size, filters, 17, 17),
                         (batch_size, filters, 34, 34)]
    output_shapes_608 = [(batch_size, filters, 19, 19),
                         (batch_size, filters, 38, 38)]
    output_shapes_dic = {
        '416': output_shapes_416,
        '480': output_shapes_480,
        '544': output_shapes_544,
        '608': output_shapes_608
    }

    font = cv2.FONT_HERSHEY_SIMPLEX

    cap = cv2.VideoCapture("../../Datasets/test_suspect.mp4")
    #cap.set(cv2.CAP_PROP_FRAME_WIDTH,640)
    #cap.set(cv2.CAP_PROP_FRAME_HEIGHT,360) #don't work on files
    print("Width : ", int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)))
    print("Height : ", int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))

    fps_display_interval = 5  # seconds
    frame_rate = 0
    frame_count = 0
    frame_rate_tab = []
    start_time = time.time()

    nums = 1000000

    input_resolution_yolov3_HW = (input_size, input_size)

    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    postprocessor_args = {
        "yolo_masks": [(3, 4, 5), (0, 1, 2)],
        #"yolo_masks": [(6,7,8), (3, 4, 5), (0, 1, 2)],
        "yolo_anchors": [(10, 14), (23, 27), (37, 58), (81, 82), (135, 169),
                         (344, 319)],  #tiny-yolov3-416
        #"yolo_anchors": [(10,13),  (16,30),  (33,23),  (30,61),  (62,45),  (59,119),  (116,90),  (156,198),  (373,326)],
        "obj_threshold":
        0.5,
        "nms_threshold":
        0.35,
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Do inference with TensorRT
    filenames_batch = []
    images = []
    images_raw = []
    trt_outputs = []
    index = 0

    with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        # Do inference

        while (True):
            ret, frame = cap.read()

            if ret == True:
                frame_rsz = cv2.resize(frame,
                                       input_resolution_yolov3_HW,
                                       interpolation=cv2.INTER_AREA)
                frame_stream = cv2.resize(frame, (640, 360),
                                          interpolation=cv2.INTER_AREA)
                filenames_batch.append(frame_stream)
                image_raw, image = preprocessor.process_frame(frame_stream)
                images_raw.append(image_raw)
                images.append(image)
                index += 1
                if index != nums and len(images_raw) != batch_size:
                    continue
                inputs, outputs, bindings, stream = common.allocate_buffers(
                    engine)
                images_batch = np.concatenate(images, axis=0)
                shape_orig_WH = image_raw.size
                output_shapes = output_shapes_dic[str(input_size)]
                inputs[0].host = images_batch
                trt_outputs = common.do_inference(context,
                                                  bindings=bindings,
                                                  inputs=inputs,
                                                  outputs=outputs,
                                                  stream=stream,
                                                  batch_size=batch_size)
                trt_outputs = [
                    output.reshape(shape)
                    for output, shape in zip(trt_outputs, output_shapes)
                ]
                for i in range(len(filenames_batch)):
                    boxes, classes, scores = postprocessor.process_frame2(
                        trt_outputs, (shape_orig_WH), i)

                    end_time = time.time()
                    if (end_time - start_time) > fps_display_interval:
                        frame_rate = int(frame_count / (end_time - start_time))
                        frame_rate_tab.append(frame_rate)
                        start_time = time.time()
                        frame_count = 0

                    frame_count += 1

                    if boxes is None:
                        det_img = frame_stream
                    else:
                        obj_detected_img = draw_bboxes(image_raw, boxes,
                                                       scores, classes,
                                                       ALL_CATEGORIES)
                        det_img = np.array(obj_detected_img)
                    cv2.putText(det_img,
                                str(frame_rate) + " fps", (500, 50),
                                font,
                                1, (255, 0, 0),
                                thickness=3,
                                lineType=2)
                    cv2.imshow("frame", det_img)
                filenames_batch = []
                images_batch = []
                images = []
                images_raw = []
                trt_outputs = []
            else:
                break

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        print(frame_rate_tab)
        moy_FPS = np.mean(frame_rate_tab)
        print("FPS min : ", min(frame_rate_tab))
        print("FPS max : ", max(frame_rate_tab))
        print("FPS moyen :", moy_FPS)

        cap.release()
        cv2.destroyAllWindows()
Beispiel #25
0
def myinfer(image, context, inputs, outputs, bindings, stream):
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (416, 416)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(image)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_HW = image_raw.shape[:2]
    H, W = shape_orig_HW

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26)]
    # Do inference with TensorRT

    trt_outputs = []

    # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
    inputs[0].host = image
    trt_outputs = common.do_inference(context,
                                      bindings=bindings,
                                      inputs=inputs,
                                      outputs=outputs,
                                      stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]
    postprocessor_args = {
        "yolo_masks":
        [(6, 7, 8), (3, 4, 5),
         (0, 1, 2)],  # A list of 3 three-dimensional tuples for the YOLO masks
        "yolo_anchors": [
            (10, 13),
            (16, 30),
            (33, 23),
            (30, 61),
            (62,
             45),  # A list of 9 two-dimensional tuples for the YOLO anchors
            (59, 119),
            (116, 90),
            (156, 198),
            (373, 326)
        ],
        "obj_threshold":
        0.6,  # Threshold for object coverage, float value between 0 and 1
        "nms_threshold":
        0.2,  # Threshold for non-max suppression algorithm, float value between 0 and 1
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_HW))
    # print(boxes,classes,scores)
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    if boxes is not None:
        obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                       ALL_CATEGORIES)
        output_image_path = 'dog_bboxes.png'
        cv2.imshow("test", obj_detected_img)
    if boxes is not None:
        boxes[:, 0] = boxes[:, 0] / W
        boxes[:, 1] = boxes[:, 1] / H
        boxes[:, 2] = boxes[:, 2] / W
        boxes[:, 3] = boxes[:, 3] / H
    return boxes, classes, scores
Beispiel #26
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    onnx_file_path = 'yolov3.onnx'
    engine_file_path = 'yolo_in8.trt'
    cfg_file_path = "yolov3.cfg"

    input_image_path = download_file(
        'dog.jpg',
        'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg',
        checksum_reference=None)
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    middle_output_shapes = []

    # calibrator definition
    calibration_dataset_loc = "calibration_dataset/"
    calibration_cache = "yolo_calibration.cache"
    calib = calibra.PythonEntropyCalibrator(calibration_dataset_loc,
                                            cache_file=calibration_cache)

    # define the layer output you want to visualize
    output_layer_name = [
        "001_convolutional", "002_convolutional", "003_convolutional",
        "005_shortcut", "006_convolutional"
    ]
    # get filter number of defined layer name
    filter_num = get_filter_num(cfg_file_path, output_layer_name)

    # Do inference with TensorRT
    trt_outputs = []
    with build_int8_engine(
            onnx_file_path, calib, cfg_file_path, output_layer_name,
            engine_file_path) as engine, engine.create_execution_context(
            ) as context:
        start = time.time()
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        # if batch size != 1 you can use load_random_batch to do test inference, here I just use 1 image as test set
        # inputs[0].host = load_random_batch(calib)
        inputs[0].host = image
        trt_outputs = common.do_inference(context,
                                          bindings=bindings,
                                          inputs=inputs,
                                          outputs=outputs,
                                          stream=stream,
                                          batch_size=1)
    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    end = time.time()
    print("Inference costs %.02f sec." % (end - start))
    for i, output in enumerate(trt_outputs[:len(filter_num)]):
        # length of inference output should be filter_num*h*h
        if "convolutional" in output_layer_name[i]:
            h = int(math.sqrt(output.shape[0] / filter_num[i]))
            w = h
        else:
            h = int(math.sqrt(output.shape[0] / filter_num[i] / 2))
            w = 2 * h
        middle_output_shapes.append((1, filter_num[i], w, h))
    # reshape
    middle_output = [
        output.reshape(shape) for output, shape in zip(
            trt_outputs[:len(filter_num)], middle_output_shapes)
    ]
    # save middle output as grey image
    for name, output in zip(output_layer_name, middle_output):
        w, h = output.shape[2], output.shape[3]
        img = misc.toimage(output.sum(axis=1).reshape(w, h))
        img.save("{}.tiff".format(name))
    print("Saveing middle output {}".format(output_layer_name))
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs[len(filter_num):], output_shapes)
    ]

    postprocessor_args = {
        "yolo_masks":
        [(6, 7, 8), (3, 4, 5),
         (0, 1, 2)],  # A list of 3 three-dimensional tuples for the YOLO masks
        "yolo_anchors": [
            (10, 13),
            (16, 30),
            (33, 23),
            (30, 61),
            (62,
             45),  # A list of 9 two-dimensional tuples for the YOLO anchors
            (59, 119),
            (116, 90),
            (156, 198),
            (373, 326)
        ],
        "obj_threshold":
        0.6,  # Threshold for object coverage, float value between 0 and 1
        "nms_threshold":
        0.5,  # Threshold for non-max suppression algorithm, float value between 0 and 1
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    output_image_path = 'dog_bboxes.png'
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(
        output_image_path))
def main(inputSize):
    #Load PAR model
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    global vs, outputFrame, lock, t0, t1, fps, sess, input_name, label_name, PAR_Model

    #model = ResNet50_nFC(30)

    #model = load_network(model)

    #torch.save(model.state_dict(), "model")
    #device = torch.device('cuda')
    #model.to(device)
    #model.eval()

    # Set graph optimization level
    #sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED

    # To enable model serialization after graph optimization set this
    #sess_options.optimized_model_filepath = "resnet50_nFC.onnx"

    #sess = rt.InferenceSession("resnet50_nFC.onnx", sess_options)
    #sess.set_providers(['CUDAExecutionProvider'])
    #sess.set_providers(['CPUExecutionProvider'])

    cuda.init()
    device = cuda.Device(0)
    onnx_file_path = 'yolov3-{}.onnx'.format(inputSize)
    engine_file_path = 'yolov3-{}.trt'.format(inputSize)
    h, w = (inputSize, inputSize)
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (inputSize, inputSize)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, h // 32, w // 32), (1, 255, h // 16, w // 16),
                     (1, 255, h // 8, w // 8)]
    """output_shapes = [(1, 255, 13, 13), 
                     (1, 255, 26, 26)]"""

    # Do inference with TensorRT
    cuda.init()  # Initialize CUDA
    ctx = make_default_context()  # Create CUDA context
    postprocessor_args = {
        "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
        "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                         (59, 119), (116, 90), (156, 198), (373, 326)],
        "obj_threshold":
        0.5,
        "nms_threshold":
        0.35,
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }
    """postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)],
                          "yolo_anchors": [(10,14),  (23,27),  (37,58),  (81,82),  (135,169),  (344,319)],
                          "obj_threshold": 0.4, 
                          "nms_threshold": 0.5,
                          "yolo_input_resolution": input_resolution_yolov3_HW}"""

    postprocessor = PostprocessYOLO(**postprocessor_args)
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:

        print("performing inference")
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        while True:
            trt_outputs = []
            #image_raw=vs.read()
            T0 = time.time()

            ret, image_raw = cap.read()
            if image_raw is not None:
                image_raw, image = preprocessor.process(image_raw)
                shape_orig_WH = image_raw.size
                inputs[0].host = image
                T1 = time.time()
                t0 = time.time()
                trt_outputs = common.do_inference(context,
                                                  bindings=bindings,
                                                  inputs=inputs,
                                                  outputs=outputs,
                                                  stream=stream)

                trt_outputs = [
                    output.reshape(shape)
                    for output, shape in zip(trt_outputs, output_shapes)
                ]
                T2 = time.time()
                #here we have Yolo output

                boxes, classes, scores = postprocessor.process(
                    trt_outputs, (shape_orig_WH))
                t1 = time.time()
                t_inf = t1 - t0
                fps = 1 / t_inf
                draw = True
                if (boxes is None):
                    print("no bboxes")
                    draw = False
                if (classes is None):
                    print("no classes")
                    draw = False
                if (scores is None):
                    print("no scores")
                    draw = False
                if draw:
                    obj_detected_img = draw_bboxes(
                        image_raw,
                        bboxes=boxes,
                        confidences=scores,
                        categories=classes,
                        all_categories=ALL_CATEGORIES)
                else:
                    obj_detected_img = image_raw
    #now stream this image
                T3 = time.time()
                total = T3 - T0
                """print("Total time per frame: {:.3f}s (~{:.2f}FPS)".format(total,1/total))
                print("Pre process: {:.2f}%".format((T1-T0)/total))
                print("Inference: {:.2f}%".format((T2-T1)/total))
                print("Post process: {:.2f}%".format((T3-T2)/total))"""
                with lock:
                    outputFrame = np.array(obj_detected_img)

    ctx.pop()
Beispiel #28
0
def main():

    #########################################################################
    #   $ python3 onnx_to_tensorrt.py v3 608
    #########################################################################
    dir_onnx = sys.argv[1]
    fn_onnx = sys.argv[2]
    onnx_file_path = os.path.join(dir_onnx, fn_onnx)
    #print('fn_onnx : ', fn_onnx);   exit()
    t1, t2 = get_exact_file_name_from_path(fn_onnx).split('_')
    v_yolo = t1[4:]
    said = int(t2)
    #print('v_yolo : ', v_yolo, ', said : ', said);  exit()
    #said = int(sys.argv[2])
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    #onnx_file_path = 'yolo{}_{}.onnx'.format(v_yolo, said)
    engine_file_path = os.path.join(dir_onnx,
                                    'yolo{}_{}.trt'.format(v_yolo, said))

    # Download a dog image and save it to the following file path:
    input_image_path = download_file(
        'dog.jpg',
        'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg',
        checksum_reference=None)

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (said, said)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size
    #print('image_raw.size : ', image_raw.size);     print('image.size : ', image.size); exit()
    # Output shapes expected by the post-processor
    #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    output_shapes = get_output_shapes(v_yolo, said)
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        '''
        print('len(inputs) : ', len(inputs));   #   1   
        print('len(outputs) : ', len(outputs));   # 2 for v3-tiny, 3 for v3 #exit()
        print('type(stream) : ', type(stream));  exit()
        print('type(outputs[0] : ', type(outputs[0]));  #exit()
        print('type(outputs[1] : ', type(outputs[1]));  exit()
        '''
        # start = time.time()
        trt_outputs = common.do_inference(context,
                                          bindings=bindings,
                                          inputs=inputs,
                                          outputs=outputs,
                                          stream=stream)
        # print("time: %.2f s" %(time.time()-start))
        ''' 
        print('len(trt_outputs) : ', len(trt_outputs));
        print('trt_outputs[0].shape : ', trt_outputs[0].shape)
        print('trt_outputs[1].shape : ', trt_outputs[1].shape); exit()
        '''
        print(trt_outputs)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]

    postprocessor_args = get_postprocessor_args(v_yolo,
                                                input_resolution_yolov3_HW)
    #print('postprocessor_args : ', postprocessor_args); exit()
    '''
    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}
    '''
    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))

    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    output_image_path = 'dog_bboxes_{}_{}.png'.format(v_yolo, said)
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(
        output_image_path))
Beispiel #29
0
try:
    data_dir = os.environ['TESTDATADIR']
except KeyError:
    data_dir = '/tmp/dataset-nctu/clothes/clothes_test'


def get_engine(engine_file_path="clothes.trt"):
    print("Reading engine from file {}".format(engine_file_path))
    with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
        return runtime.deserialize_cuda_engine(f.read())


input_HW = (416, 416)
output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)]
preprocessor = PreprocessYOLO(input_HW)
postprocessor_args = {
    "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
    "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                     (59, 119), (116, 90), (156, 198), (373, 326)],
    "obj_threshold":
    0.5,
    "nms_threshold":
    0.2,
    "yolo_input_resolution":
    input_HW
}
postprocessor = PostprocessYOLO(**postprocessor_args)
eps = 1e-6
beta = 2.0
beta_s = beta * beta
Beispiel #30
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    if COCO:
        onnx_file_path = os.path.join('./engine/onnx/',
                                      'yolov3-' + str(SIZE) + '.onnx')
        engine_file_path = os.path.join('./engine/trt/',
                                        'yolov3-' + str(SIZE) + BUILD + '.trt')
    else:
        onnx_file_path = os.path.join('./engine/onnx/',
                                      'yolov3-voc-' + str(SIZE) + '.onnx')
        engine_file_path = os.path.join(
            './engine/trt/', 'yolov3-voc-' + str(SIZE) + BUILD + '.trt')

    # onnx_file_path = "./engine/yolov3-608.onnx"
    # engine_file_path = "./engine/yolov3-608-voc-f32.trt"

    # loop over images
    if COCO:
        test_images_file = './coco/5k.txt'  #for coco
    else:
        test_images_file = './VOC/data/dataset/voc_test.txt'  #for voc

    with open(test_images_file, 'r') as f:
        txt = f.readlines()
        test_images = [line.strip() for line in txt]

    timeRecSave = []

    input_resolution_yolov3_HW = (SIZE, SIZE)

    predicted_dir_path = './mAP/predicted'
    if os.path.exists(predicted_dir_path):
        shutil.rmtree(predicted_dir_path)
    os.mkdir(predicted_dir_path)

    # ground_truth_dirs_path = './mAP/ground-truth'
    # if os.path.exists(ground_truth_dir_path):
    #     shutil.rmtree(ground_truth_dir_path)
    # os.mkdir(ground_truth_dir_path)

    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        for idx, input_image_path in enumerate(test_images):

            #print("image path = ", input_image_path)
            filename = os.path.split(input_image_path)[1]
            #print("filename = ",filename)

            # try:
            #     label_file = './coco/labels/val2014/' + os.path.splitext(filename)[0]+'.txt'
            #     with open(label_file, 'r') as f:
            #         labels = f.readlines()
            # except:
            #     continue

            # Create a pre-processor object by specifying the required input resolution for YOLOv3
            preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
            # Load an image from the specified input path, and return it together with  a pre-processed version
            image_raw, image = preprocessor.process(input_image_path)
            # Store the shape of the original input image in WH format, we will need it for later
            # print("image shape = ", image.shape)
            # print("image data = ")
            # print(image)
            shape_orig_WH = image_raw.size
            # print("image_raw.size = ", image_raw.size)
            # print("image_raw.shape = ", image_raw.shape)

            # Output shapes expected by the post-processor
            # output_shapes = [(1, 255, 10, 10), (1, 255, 20, 20), (1, 255, 40, 40)] #for 320
            # output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] #for 416
            output_shapes = [(1, int(OUT), int(SIZE / 32), int(SIZE / 32)),
                             (1, int(OUT), int(SIZE / 16), int(SIZE / 16)),
                             (1, int(OUT), int(SIZE / 8), int(SIZE / 8))
                             ]  #for 608

            # Do inference with TensorRT
            trt_outputs = []
            # with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
            #     inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            # Do inference
            # print('Running inference on image {}...'.format(input_image_path)) # if idx==0 else 0
            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inputs[0].host = image
            # start = time.time()
            trt_outputs, timeRec = common.do_inference(context,
                                                       bindings=bindings,
                                                       inputs=inputs,
                                                       outputs=outputs,
                                                       stream=stream)
            # print("time: %.2f s" %(time.time()-start))
            # print(trt_outputs)
            timeRecSave.append(timeRec)
            print('%d, Image %s, Recognition Time %0.3f seconds' %
                  (idx, filename, timeRec))

            # # Before the post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, output_shapes)
            ]

            # A list of 3 three-dimensional tuples for the YOLO masks
            # A list of 9 two-dimensional tuples for the YOLO anchors
            postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],   \
                                 "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), \
                                                (59, 119), (116, 90), (156, 198), (373, 326)],\
                                # Threshold for object coverage, float value between 0 and 1

                                "obj_threshold": 0.6,\
                                 # Threshold for non-max suppression algorithm, float value between 0 and 1

                                 "nms_threshold": 0.5,\
                                 "yolo_input_resolution": input_resolution_yolov3_HW}

            postprocessor = PostprocessYOLO(**postprocessor_args)

            # # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
            boxes, classes, scores = postprocessor.process(
                trt_outputs, (shape_orig_WH))

            # Draw the bounding boxes onto the original input image and save it as a PNG file
            if PRINT_RESULTS:
                obj_detected_img = draw_bboxes(image_raw, boxes, scores,
                                               classes, ALL_CATEGORIES)
                output_image_path = './results/yolo_' + filename
                obj_detected_img.save(output_image_path)
                print(
                    'Saved image with bounding boxes of detected objects to {}.'
                    .format(output_image_path))

            predict_result_path = os.path.join(predicted_dir_path,
                                               str(idx) + '.txt')
            # ground_truth_path = os.path.join(ground_truth_dir_path, str(idx) + '.txt')

            with open(predict_result_path, 'w') as f:
                if boxes is not None:
                    for box, score, category_idx in zip(
                            boxes, scores, classes):
                        x_coord, y_coord, width, height = box
                        box = [
                            x_coord, y_coord, x_coord + width, y_coord + height
                        ]  # fit YunYang1994's mAP calculation input format
                        category = ALL_CATEGORIES[category_idx]
                        category = "".join(category.split())
                        # print("score info = ", score, score.type)
                        box = list(map(int, box))
                        xmin, ymin, xmax, ymax = list(map(str, box))
                        # bbox_mess = ' '.join([category, score, xmin, ymin, xmax, ymax]) + '\n'
                        bbox_mess = ' '.join([
                            category, "{:.4f}".format(score), xmin, ymin, xmax,
                            ymax
                        ]) + '\n'
                        # print(bbox_mess)
                        f.write(bbox_mess)

    timeRecMean = np.mean(timeRecSave)
    print('The mean recognition time is {0:0.3f} seconds'.format(timeRecMean))

    # %%    Visualization of results
    if PRINT_RESULTS:
        np.save('results/timeRecognition.npy', timeRecSave)
        plt.figure(figsize=(8, 5))
        plt.plot(timeRecSave, label='Recg_time')
        plt.ylim([0, 0.05])
        plt.xlabel('Test image number'),
        plt.ylabel('Time [second]'),
        plt.title(
            'Recognition time of Yolov3_DarkNet_ONNX_TensorRT_GPU_coco_test_2017'
        )
        plt.hlines(y=timeRecMean,
                   xmin=0,
                   xmax=len(test_images),
                   linewidth=3,
                   color='r',
                   label='Mean')
        plt.savefig(
            'results/Yolov3_DarkNet_ONNX_TensorRT_GPU_coco_test_2017.png',
            bbox_inches='tight')
        plt.show()