Ejemplo n.º 1
0
    def __init__(self, trt_deploy_path, trt_engine_path, trt_model_path, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1):
        """Initializes TensorRT objects needed for model inference.

        Args:
            trt_engine_path (str): path where TensorRT engine should be stored
            trt_model_path (str): path of caffe model
            trt_engine_datatype (trt.DataType):
                requested precision of TensorRT engine used for inference
            batch_size (int): batch size for which engine
                should be optimized for
        """

        # We first load all custom plugins shipped with TensorRT,
        # some of them will be needed during inference
        trt.init_libnvinfer_plugins(TRT_LOGGER, '')

        # Initialize runtime needed for loading TensorRT engine from file
        self.trt_runtime = trt.Runtime(TRT_LOGGER)
        # TRT engine placeholder
        self.trt_engine = None

        self.datatype = DATATYPE[trt_engine_datatype]
        # Display requested engine settings to stdout
        print("TensorRT inference engine settings:")
        print("  * Inference precision - {}".format(trt_engine_datatype))
        print("  * Max batch size - {}\n".format(batch_size))

        # If engine is not cached, we need to build it
        if not os.path.exists(trt_engine_path):
           # For more details, check implmentation
            self.trt_engine = engine_utils.build_engine(
                trt_deploy_path, trt_model_path, TRT_LOGGER,
                trt_engine_datatype=trt_engine_datatype,
                batch_size=batch_size)
            print("self.trt_engine:",self.trt_engine)
            # Save the engine to file
            engine_utils.save_engine(self.trt_engine, trt_engine_path)

        # If we get here, the file with engine exists, so we can load it
        if not self.trt_engine:
            print("Loading cached TensorRT engine from {}".format(
                trt_engine_path))
            self.trt_engine = engine_utils.load_engine(
                self.trt_runtime, trt_engine_path)

        # This allocates memory for network inputs/outputs on both CPU and GPU
        self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(self.trt_engine)

        # Execution context is needed for inference
        self.context = self.trt_engine.create_execution_context()

        # Allocate memory for multiple usage [e.g. multiple batch inference]
        input_volume = trt.volume(model_utils.ModelData.INPUT_SHAPE)
        print("input_volume:",input_volume)
        print("self.trt_engine.max_batch_size:",self.trt_engine.max_batch_size)
        self.numpy_array = np.zeros((self.trt_engine.max_batch_size, input_volume))
def main_yolov3tiny_test():
    anchors = [[(81, 82), (135, 169), (344, 319)],
               [(10, 14), (23, 27), (37, 58)]]

    yolo1 = YOLO_NP(anchors[0], 2, 416)
    yolo2 = YOLO_NP(anchors[1], 2, 416)

    trt_engine = './weights/yolov3-mytiny_98_0.96_warehouse_3.trt'  # 128 float16 0.4s

    #
    engine = load_engine(trt_engine)
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    t1 = time.time()

    img, org_size = get_sample()
    #

    with engine.create_execution_context() as context:
        # case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host)
        # For more information on performing inference, refer to the introductory samples.
        # The common.do_inference function will return a list of outputs - we only have one in this case.
        np.copyto(inputs[0].host, img)
        res = common.do_inference(context,
                                  bindings=bindings,
                                  inputs=inputs,
                                  outputs=outputs,
                                  stream=stream)
        t2 = time.time()
        # print(len(res))
        # print(res[0].shape,res[1].shape,res[2].shape)

        o1 = res[0].reshape((1, 21, 13, 13))
        o2 = res[1].reshape((1, 21, 26, 26))

        yolo_output1 = yolo1(o1)
        yolo_output2 = yolo2(o2)

        detections = np.concatenate([yolo_output1, yolo_output2], 1)
        # print(detections.shape)

        detections = non_max_suppression_np(detections, 0.5, 0.4)[0]

        # print('org_size',org_size)
        detections = rescale_boxes(np.array(detections), 416, org_size)
        t3 = time.time()

        print('detect res ', len(detections))
        # print(detections)

        print('raw_foward', t2 - t1)
        print('with nms', t3 - t1)
Ejemplo n.º 3
0
    def __init__(self, cfg, engine_file_path):
        self.cfg = cfg
        # self.args = args

        self.deepsort = build_tracker(cfg, use_cuda=True)
        #---tensorrt----#
        self.engine = get_engine(engine_file_path)
        self.context = self.engine.create_execution_context()
        self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(
            self.engine)
        # ---tensorrt----#

        #---input info for yolov3-416------#
        self.input_resolution_yolov3_HW = (416, 416)

        self.preprocessor = PreprocessYOLO(self.input_resolution_yolov3_HW)

        # self.image_raw, self.image = self.preprocessor.process(ori_im)

        # self.shape_orig_WH = image_raw.size

        self.output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26),
                              (1, 255, 52, 52)]
        self.postprocessor_args = {
            "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
            # A list of 3 three-dimensional tuples for the YOLO masks
            "yolo_anchors": [
                (10, 13),
                (16, 30),
                (33, 23),
                (30, 61),
                (62, 45),
                # A list of 9 two-dimensional tuples for the YOLO anchors
                (59, 119),
                (116, 90),
                (156, 198),
                (373, 326)
            ],
            "obj_threshold":
            0.6,  # Threshold for object coverage, float value between 0 and 1
            "nms_threshold":
            0.5,
            # Threshold for non-max suppression algorithm, float value between 0 and 1
            "yolo_input_resolution":
            self.input_resolution_yolov3_HW
        }

        self.postprocessor = PostprocessYOLO(**self.postprocessor_args)
Ejemplo n.º 4
0
def main():
    args = parse_args()
    cfg = CFG(args)
    '''
	tensorrt.DataType.FLOAT 	tensorrt.float32
	tensorrt.DataType.HALF 		tensorrt.float16
	tensorrt.DataType.INT32		tensorrt.int32
	tensorrt.DataType.INT8 		tensorrt.int8
	'''

    # assert os.path.exists(args.model_path)

    output_shapes = (64, 21, 10, 16)

    input_img = cv2.imread('trump.jpg')  # BGR  , HWC
    ori_shape = input_img.shape
    print(ori_shape)

    input_img = input_img[:, :, [2, 1, 0]]  # BGR - RGB  , HWC

    # bgr = input_img[:,:,::-1] # RGB - BGR  , HWC
    # cv2.imwrite("testing/test2.jpg",bgr)

    batch_img = list(np.tile(input_img, [64, 1, 1, 1]))

    # pre-processing
    print(1, 64, batch_img[0].shape)
    batch_img = batch_resize(batch_img)
    print(2, batch_img.shape)
    batch_img = normalize(batch_img)
    print(3, batch_img.shape)

    # TensorRT
    batch_img = np.array(batch_img, dtype=np.float32, order='C')
    with get_engine(
            args, cfg) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        inputs[0].host = batch_img

        trt_outputs = common.do_inference(context,
                                          bindings=bindings,
                                          inputs=inputs,
                                          outputs=outputs,
                                          stream=stream,
                                          batch_size=args.batch_size)

        print(trt_outputs)

    trt_outputs = trt_outputs[0].reshape(output_shapes)
    np.save('trt_outputs.npy', trt_outputs)
    print(trt_outputs.shape)
    rs = trt_outputs[0]
    print(rs.shape)

    # om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()

    om = np.argmax(rs, axis=0)
    print(om.shape)

    rgb = decode_segmap(om)

    bgr = rgb[:, :, ::-1]  # RGB - BGR
    # rgb = rgb[...,[2,0,1]] # RGB2BGR

    print('rgb', bgr.shape)
    frame = cv2.resize(bgr, (ori_shape[0], ori_shape[1]),
                       interpolation=cv2.INTER_LINEAR)
    frame = np.transpose(frame, (1, 0, 2))  # BGR  , HWC
    cv2.imwrite("testing/test.jpg", frame)

    # import matplotlib.pyplot as plt
    # plt.imshow(rgb); plt.show()
    exit()

    # batch_img = np.ascontiguousarray(batch_img)
    # temp_img = temp_img.flatten()

    # get_engine(args,cfg)

    # print(trt_outputs)
    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    # print(trt_outputs.shape)
    # for trt_output in trt_outputs:
    # 	print(trt_output)

    # om = np.argmax(trt_outputs)

    # with open('testing/colors.txt') as infile:
    # 	classes = [line.split('\n')[0]for line in infile.readlines()]
    # 	classes = np.array([[int(x)for x in shape.split(" ")] for shape in classes])
    # print(classes.shape)

    for idx, _class in enumerate(classes):
        '''
		print(idx, _class)
		# frame = np.array([np.ones((10,16))* RGB for RGB in _class])
		# print(trt_outputs[idx])
		frame = np.multiply(trt_outputs[idx],_class.reshape(3,1,1))  # RGB  , CHW
		
		print(frame.shape)
		print(frame)
		# frame = np.dot(frame,trt_outputs[0][idx])
		# print(frame)
	# for idx,value in enumerate(trt_outputs[0]):
		frame = np.transpose(frame,(1,2,0)) # RGB  , HWC
		print(frame.shape, ori_shape)
		frame = cv2.resize(frame, (ori_shape[0],ori_shape[1]), interpolation=cv2.INTER_LINEAR)

		# frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
		frame = frame[...,[2,0,1]]

		# normalise
		frame *= (255.0/frame.max())

		print(frame)
		# cv2.imwrite("testing/layer_{}.jpg".format(idx),frame)
		'''
        temp = cv2.resize(trt_outputs[idx], (ori_shape[1], ori_shape[0]),
                          interpolation=cv2.INTER_LINEAR)
        # temp += 100
        # print(temp.max(),temp.min())
        # cv2.imwrite("testing/layer_{}.jpg".format(idx),temp)
    # cv2.imwrite("testing/test.jpg",input_img[0])

    # trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

    # print(load_onnx_model2)
    '''
	# Template
	TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
	builder = trt.Builder(TRT_LOGGER)

	network = builder.create_network()
	dataLayer = network.add_input('data',trt.DataType.FLOAT,(c,h,w))
	# Add network layer
	network.mark_output(outputLayer.get_output(0))

	engine = builder.build_cuda_engine(network)
	context = engine.create_execution_context()
	context.execute_async(bindings=[d_input,d_output])
	'''
    '''
def main_yolov3_test():
    anchors = [
        [(116, 90), (156, 198), (373, 326)],  # 13*13 上预测最大的
        [(30, 61), (62, 45), (59, 119)],  # 26*26 上预测次大的
        [(10, 13), (16, 30), (33, 23)],  # 13*13 上预测最小的
    ]
    yolo1 = YOLO_NP(anchors[0], 2, 416)
    yolo2 = YOLO_NP(anchors[1], 2, 416)
    yolo3 = YOLO_NP(anchors[2], 2, 416)

    img, org_size = get_sample()
    # img1,org_size1 = get_sample()

    # print(sum(img-img1))
    #
    # time.sleep(100000)

    print(img.shape)
    # trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_2.trt' # 128 2.6s一张
    # trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_3.trt'  # 256 3.0s
    # trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_4.trt'  # 64 2.8s
    trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_5.trt'  # 128 float16 0.4s

    #
    engine = load_engine(trt_engine)
    #
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    #
    with engine.create_execution_context() as context:
        # case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host)
        # For more information on performing inference, refer to the introductory samples.
        # The common.do_inference function will return a list of outputs - we only have one in this case.
        np.copyto(inputs[0].host, img)
        t1 = time.time()
        res = common.do_inference(context,
                                  bindings=bindings,
                                  inputs=inputs,
                                  outputs=outputs,
                                  stream=stream)
        t2 = time.time()
        # print(len(res))
        # print(res[0].shape,res[1].shape,res[2].shape)

        o1 = res[0].reshape((1, 21, 13, 13))
        o2 = res[1].reshape((1, 21, 26, 26))
        o3 = res[2].reshape((1, 21, 52, 52))

        yolo_output1 = yolo1(o1)
        yolo_output2 = yolo2(o2)
        yolo_output3 = yolo3(o3)

        detections = np.concatenate([yolo_output1, yolo_output2, yolo_output3],
                                    1)
        # print(detections.shape)

        detections = non_max_suppression_np(detections, 0.5, 0.4)[0]

        # print('org_size',org_size)
        detections = rescale_boxes(np.array(detections), 416, org_size)
        t3 = time.time()

        # print('detect res ', len(detections))
        # print(detections)

        print('raw_foward', t2 - t1)
        print('with nms', t3 - t1)