Exemple #1
0
def main():
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    mnist_model = model.MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    engine = build_engine(weights)

    # Build an engine, allocate buffers and create a stream.
    # For more information on buffer allocation, refer to the introductory samples.
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    context = engine.create_execution_context()

    case_num = load_random_test_case(mnist_model,
                                     pagelocked_buffer=inputs[0].host)
    # For more information on performing inference, refer to the introductory samples.
    # The common.do_inference function will return a list of outputs - we only have one in this case.
    [output] = common.do_inference_v2(context,
                                      bindings=bindings,
                                      inputs=inputs,
                                      outputs=outputs,
                                      stream=stream)
    pred = np.argmax(output)
    print("Test Case: " + str(case_num))
    print("Prediction: " + str(pred))
def main():
    """Create a TensorRT engine for ONNX-based model and run inference."""

    # Try to load a previously generated network graph in ONNX format:
    onnx_file_path = '/models/run09/jetracer.onnx'
    engine_file_path = '/models/run09/jetracer.trt'
    ino = 378
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image')
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        image = cv2.imread(
            f'/models/train_data/Images/{ino:03d}.jpg').transpose(
                2, 0, 1).reshape(1, 3, 320, 640)
        inputs[0].host = np.array(image, dtype=np.float16, order='C')
        trt_outputs = common.do_inference_v2(context,
                                             bindings=bindings,
                                             inputs=inputs,
                                             outputs=outputs,
                                             stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    #mask = trt_outputs.reshape(320,640).numpy()[0][0]>0.4
    print(trt_outputs[0].shape)
Exemple #3
0
 def predict(self, img, min_scale=736):
     # with self.engine.create_execution_context() as context:
     img = self.resize_image(img, min_scale=min_scale)
     self.load_normalized_test_case(img, self.inputs[0].host)
     trt_outputs = common.do_inference_v2(self.context,
                                          bindings=self.bindings,
                                          inputs=self.inputs,
                                          outputs=self.outputs,
                                          stream=self.stream)
     preds = trt_outputs[0].reshape(1, 2, 736, 736)
     mask = preds[0, 0, ...]
     batch = {'shape': [(736, 736)]}
     box_list, score_list = SegDetectorRepresenter(thresh=0.5,
                                                   box_thresh=0.7,
                                                   max_candidates=1000,
                                                   unclip_ratio=1.5)(batch,
                                                                     preds)
     box_list, score_list = box_list[0], score_list[0]
     is_output_polygon = False
     if len(box_list) > 0:
         if is_output_polygon:
             idx = [x.sum() > 0 for x in box_list]
             box_list = [box_list[i] for i, v in enumerate(idx) if v]
             score_list = [score_list[i] for i, v in enumerate(idx) if v]
         else:
             idx = box_list.reshape(box_list.shape[0],
                                    -1).sum(axis=1) > 0  # 去掉全为0的框
             box_list, score_list = box_list[idx], score_list[idx]
     else:
         box_list, score_list = [], []
     return mask, box_list, score_list
Exemple #4
0
def main(args):
    with get_engine(args.engine_path, args.model_dir) as engine:
        with engine.create_execution_context() as context:
            origin_img = cv2.imread(args.image_path)
            t1 = time.time()
            img, (ratio_h, ratio_w) = preprocess(origin_img)
            cv2.imwrite("processed.jpg", img)
            h, w, _ = img.shape
            # hwc to chw
            img = img.transpose((2, 0, 1))
            # flatten the image into a 1D array
            img = img.ravel()
            context.set_binding_shape(0, (1, 3, h, w))
            # allocate buffers and create a stream.
            inputs, outputs, bindings, stream = common.allocate_buffers(
                engine, context)
            # copy to pagelocked memory
            np.copyto(inputs[0].host, img)
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference_v2(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream)
            # reshape 1D array to chw
            output = np.reshape(output, (6, h // 4, w // 4))
            # transpose chw to hwc
            output = output.transpose(1, 2, 0)
            boxes = postprocess(origin_img, output, ratio_h, ratio_w)
            t2 = time.time()
            print("total cost %fms" % ((t2 - t1) * 1000))
            draw_result(origin_img, boxes)
Exemple #5
0
def main():
    onnx_file_path = 'test3.onnx'
    engine_file_path = "model_engine.trt"
    input_image_path="../yoloF_test/YOLOF/datasets/coco/val2017/000000000285.jpg"
    image_raw=Image.open(input_image_path)
    w, h = image_raw.size
    w_, h_ = resize_im(w, h, scale=800, max_scale=4000)
    print(w_,h_)
    image_resized=image_raw.resize((w_,h_),resample=Image.BICUBIC)
    image_resized = np.array(image_resized, dtype=np.int32, order='C')
    
    output_shapes = [(1, 512, 28, 25)]
    trt_outputs = []
    inputs = []
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs,outputs, bindings, stream = allocate_buffers2(engine,w_, h_)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image_resized
        context.set_binding_shape(0, (1, 3, h_, w_))
        trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]
    print(trt_outputs[0])
def main():
    # Set the data path to the directory that contains the trained models and test images for inference.
    _, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"])
    # Get test images, models and labels.
    test_images = data_files[0:3]
    onnx_model_file, labels_file = data_files[3:]
    labels = open(labels_file, 'r').read().split('\n')

    # Build a TensorRT engine.
    with build_engine_onnx(onnx_model_file) as engine:
        # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
        # Allocate buffers and create a CUDA stream.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Contexts are used to perform inference.
        with engine.create_execution_context() as context:
            # Load a normalized test case into the host input page-locked buffer.
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, inputs[0].host)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            # We use the highest probability as our prediction. Its index corresponds to the predicted label.
            pred = labels[np.argmax(trt_outputs[0])]
            if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)
Exemple #7
0
def trt_inference(input):
    global INPUTS, OUTPUTS, BINDINGS, STREAM, CONTEXT
    INPUTS[0].host = input
    trt_outputs = common.do_inference_v2(CONTEXT,
                                         bindings=BINDINGS,
                                         inputs=INPUTS,
                                         outputs=OUTPUTS,
                                         stream=STREAM)

    output = trt_outputs[0].reshape(SHAPE[1], SHAPE[2], SHAPE[3])
    return output
Exemple #8
0
def iter_inf(ds_path, batch_size, model):
    dataloader = data_loader(ds_path, batch_size, (224, 224))
    num_cls = len(glob.glob(os.path.join(ds_path, '*')))
    rslts = {}
    rslts['eval_num'] = len(glob.glob(os.path.join(ds_path, '*/*')))
    print("ds_path",ds_path)
    print("rslts eval_num",rslts['eval_num'])
    correct, counter, inf_time = 0, 0, 0.0
    batch_num = rslts['eval_num'] // batch_size + 1
    remainder = rslts['eval_num'] % batch_size

    print("[ INFO ] Building engine.")
    with build_engine(batch_size, model) as engine, \
            engine.create_execution_context() as context:

        rslts['warm_up_start'] = time.time()
        # For multi profile
        # context.active_optimization_profile = 0
        context.set_binding_shape(0, (batch_size, 3, 224, 224))
        print("[ INFO ] Inference start.")
        pbar = tqdm.tqdm(dataloader)
        for batch in pbar:
            imgs, labels = batch

            inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            inputs[0].host = imgs
            counter += 1

            inf_start = time.time()
            trt_outputs = common.do_inference_v2(
                    context, bindings, inputs, outputs, stream)
            inf_time += time.time() - inf_start

            splited_outputs = ([[output[i:i+num_cls]
                                 for i in range(0, len(output), num_cls)]
                                for output in trt_outputs])
            preds = [np.argsort(splited_output)[:, -1]
                     for splited_output in splited_outputs]

            # FIXME Only count first output here
            if counter != batch_num:
                correct += np.sum(np.equal(preds[0], labels))
            else:
                correct += np.sum(
                        np.equal(preds[0][:remainder], labels[:remainder]))

    rslts['inf_time'] = inf_time
    rslts['end'] = time.time()
    rslts['correct'] = correct
    print("[ INFO ] Inference done.")

    return rslts
Exemple #9
0
def infer_img():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"
    # Download a dog image and save it to the following file path:
    input_image_path = download_file('dog.jpg',
        'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None)

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    im = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)

    im = np.asarray(im)[...,::-1]; cv2.imshow("det",im)
    cv2.waitKey(); cv2.destroyAllWindows()
def main():
    onnx_file_path = 'bidaf-modified.onnx'
    engine_file_path = "bidaf.trt"

    # input
    context = 'A quick brown fox jumps over the lazy dog.'
    query = 'What color is the fox?'
    cw_str, _ = preprocess(context)
    # get ravelled data
    cw, cc, qw, qc = get_inputs(context, query)

    # Do inference with TensorRT
    refit_weights = np.load("Parameter576_B_0.npy")
    fake_weights = np.ones_like(refit_weights)
    engine = get_engine(onnx_file_path, engine_file_path)
    refitter = trt.Refitter(engine, TRT_LOGGER)
    context = engine.create_execution_context()

    for weights, answer_correct in [(fake_weights, False), (refit_weights, True)]:
        print("Refitting engine...")
        # To get a list of all refittable weights' names
        # in the network, use refitter.get_all_weights().

        # Refit named weights via set_named_weights
        refitter.set_named_weights('Parameter576_B_0', weights)
        # Get missing weights names. This should return empty
        # lists in this case.
        missing_weights = refitter.get_missing_weights()
        assert len(
            missing_weights) == 0, "Refitter found missing weights. Call set_named_weights() or set_weights() for all missing weights"
        # Refit the engine with the new weights. This will return True if
        # the refit operation succeeded.
        assert refitter.refit_cuda_engine()

        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        print("Doing inference...")
        # Do inference
        # Set host input. The common.do_inference_v2 function will copy the input to the GPU before executing.
        inputs[0].host = cw
        inputs[1].host = cc
        inputs[2].host = qw
        inputs[3].host = qc
        trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

        start = np.asscalar(trt_outputs[0])
        end = np.asscalar(trt_outputs[1])
        answer = [w.encode() for w in cw_str[start:end + 1].reshape(-1)]
        assert answer_correct == (answer == [b'brown'])
    print("Passed")
def main():
    for bs in BATCH_SIZEs:
        onnx_file_path = 'lab5_model.onnx'
        engine_file_path = ("model%d.trt" % (bs))
        # Do inference with TensorRT

        test_loader = torch.utils.data.DataLoader(test_set,
                                                  batch_size=bs,
                                                  shuffle=False)
        trt_outputs = []
        with get_engine(
                onnx_file_path, engine_file_path,
                bs) as engine, engine.create_execution_context() as context:
            inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            # Do inference
            print('Running inference ')
            print('Batch size: %d' % (bs))
            right = 0
            number = 0
            start_time = time.time()
            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            for i, (images, labels) in enumerate(test_loader, 0):
                images = images.numpy()
                inputs[0].host = images
                [results] = common.do_inference_v2(context,
                                                   bindings=bindings,
                                                   inputs=inputs,
                                                   outputs=outputs,
                                                   stream=stream)
                for j in range(len(labels)):
                    result = results[j * 11:(j + 1) * 11]
                    pred = argmax(result)
                    if (pred == labels[j]):
                        right += 1
                    number += 1
            latency = time.time() - start_time
            print('Time elapsed: %.4f' % (latency))
            print(right / number)
            latencies.append(latency)
            FPSs.append(len(test_set) / (latency))

    plt.subplot(2, 1, 1)
    plt.plot(BATCH_SIZEs, latencies)
    plt.ylabel('latency (s)')
    plt.subplot(2, 1, 2)
    plt.plot(BATCH_SIZEs, FPSs)
    plt.xlabel('batch size')
    plt.ylabel('FPS')
    plt.savefig('./test.png')
Exemple #12
0
def infer_cam():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'; engine_file_path = 'yolov3.trt'
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)
    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    cap = cv2.VideoCapture(0)
    trt_outputs = [] # Do inference with TensorRT
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        while True:
            ret, frame = cap.read(); assert ret
            # Load an image from the specified input path, and return it together with  a pre-processed version
            preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
            image_raw, image = preprocessor.process(frame)
            # Store the shape of the original input image in WH format, we will need it for later
            shape_orig_WH = image_raw.size; t = time()

            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inputs[0].host = image
            trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            t = time()-t; fps = 1/t; print("infer: %.2fms, fps: %.2f" % (t*1000, fps))

            # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
            trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

            postprocessor = PostprocessYOLO(**postprocessor_args)
            # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
            boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))

            im = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)

            im = np.asarray(im)[...,::-1]
            cv2.putText(im, "%.2f"%fps, (12,12), 3, 1, (0,255,0))
            cv2.imshow("det",im)

            if cv2.waitKey(5) == 27: break
    cap.release(); cv2.destroyAllWindows()
Exemple #13
0
def main():
    # Set the data path to the directory that contains the trained models and test images for inference.
    kDEFAULT_DATA_ROOT = os.path.join(os.sep, "data")
    # Get test images, models and labels.

    labels_file = './labels.txt'
    labels = open(labels_file, 'r').read().split('\n')
    onnx_model_file = './weights/resnet152_f.onnx'

    classes_folder = glob.glob('./arranged_data_final/val/*')
    all_images = [
        glob.glob(classes_folder[i] + '/*') for i in range(len(classes_folder))
    ]
    merged_images = list(itertools.chain.from_iterable(all_images))

    with open('./weights/resnet152.engine',
              'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())

    # Allocate buffers and create a CUDA stream.
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    with engine.create_execution_context() as context:
        # Load a normalized test case into the host input page-locked buffer.
        start_time = time.time()
        count = 0
        for test_image in merged_images:
            test_case = load_normalized_image(test_image, inputs[0].host)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)
            # We use the highest probability as our prediction. Its index corresponds to the predicted label.
            pred = labels[np.argmax(trt_outputs[0])]

            # if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
            #     print("Correctly recognized " + test_case + " as " + pred)
            # else:
            #     print("Incorrectly recognized " + test_case + " as " + pred)
            count = count + 1

        end_time = time.time()
        print('Total time=', end_time - start_time)
        print('Total images processed=', count)
        print('Frames Per Seconds with Tensorrt Engine =',
              count / (end_time - start_time))
Exemple #14
0
    def predict(self, preprocessed_image):
        np_image = np.array(preprocessed_image,
                            dtype=np.float32)[np.newaxis, :, :,
                                              (2, 1, 0)]  # RGB -> BGR
        np_image = np.ascontiguousarray(np.rollaxis(np_image, 3, 1))
        assert (
            1,
            3,
            self.model_height,
            self.model_width,
        ) == np_image.shape, "Image must be resized to model shape"

        if self.is_fp16:
            np_image = np_image.astype(np.float16)

        self.cfx.push()
        try:
            inputs, outputs, bindings, stream = common.allocate_buffers(
                self.engine)
            # Do inference
            inputs[0].host = np_image
            trt_outputs = common.do_inference_v2(
                self.context,
                bindings=bindings,
                inputs=inputs,
                outputs=outputs,
                stream=stream,
            )
        finally:
            self.cfx.pop()  # very important
        # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
        # There should be nothing to 'round' here. If there is, we made a mistake earlier
        output_shapes = [(
            1,
            (len(self.labels) + 5) * 5,
            int(self.model_height / 32),
            int(self.model_width / 32),
        )]
        trt_outputs = [
            output.reshape(shape)
            for output, shape in zip(trt_outputs, output_shapes)
        ]
        trt_outputs = np.squeeze(trt_outputs).transpose(
            (1, 2, 0)).astype(np.float32)
        return trt_outputs
def main():
    model_path = 'weights/bts_nyu_320_mem.trt'
    input_image_path = 'images/NYU0937.jpg'
    input_resolution = (320, 320)

    vs = WebcamVideoStream().start()
    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"

    with get_engine(model_path) as engine, engine.create_execution_context(
    ) as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        while True:
            prev_time = time.time()

            frame = vs.read()
            image = preprocess(frame, input_resolution)

            inputs[0].host = image

            trt_outputs = common.do_inference_v2(context, bindings, inputs,
                                                 outputs, stream)[-1]

            vis = postprocess(trt_outputs, input_resolution)

            curr_time = time.time()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                print(fps)
                curr_fps = 0

            cv2.imshow('frame', vis)

            if cv2.waitKey(1) == ord('q'):
                break

        cv2.destroyAllWindows()
        vs.stop()
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = "./models_trained/797-AG-BC.onnx"
    engine_file_path = "./models_trained/797-AG-BC.trt"
    onnx_file_path = "./models_trained/544-CH-CA.onnx"
    engine_file_path = "./models_trained/544-CH-CA.trt"

    # Download a dog image and save it to the following file path:

    input_image_path = "./imgs_prueba_clasificacion/AG_BC1.png"
    input_image_path = "./imgs_prueba_deteccion/CH_CA.png"

    imagen = Image.open(input_image_path)
    mean = np.array([0.5, 0.5, 0.5])
    std = np.array([0.5, 0.5, 0.5])

    #loader = transforms.Compose(
    #transforms.Resize(128), transforms.ToTensor(), transforms.Normalize(std, mean)])
    loader = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(std, mean)])
    imagen = loader(imagen).float()
    #imagen = imagen.unsqueeze(0)
    image = imagen.numpy()

    # Do inference with TensorRT
    trt_outputs = []

    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        trt_outputs = common.do_inference_v2(context,
                                             bindings=bindings,
                                             inputs=inputs,
                                             outputs=outputs,
                                             stream=stream)
        print(trt_outputs)
    def __call__(self, input):
        # init image to input location.
        np.copyto(self.inputs[0].host, input.ravel())

        # When infering on single image, we measure inference
        # time to output it to the user
        inference_start_time = time.time()

        # Fetch output from the model
        [heatmaps, pafs] = common.do_inference_v2(self.context,
                                                  bindings=self.bindings,
                                                  inputs=self.inputs,
                                                  outputs=self.outputs,
                                                  stream=self.stream)

        # Output inference time
        # print("TensorRT inference time: {} ms".format(
        #     int(round((time.time() - inference_start_time) * 1000))))

        # And return results
        return pafs, heatmaps
Exemple #18
0
def inference(data_dir, engine_path, long_side_size=1024):
    filenames = glob.glob(data_root + '/*g')[:10]
    times = []
    TRT_LOGGER = trt.Logger()
    trt_runtime = trt.Runtime(TRT_LOGGER)
    engine = load_engine(trt_runtime, engine_path)

    with engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        for filename in tqdm(filenames):
            ori_image = cv2.imread(filename)
            image, im_scales = scale(ori_image, long_size=long_side_size)
            image, shape = preprocess(image)
            inputs[0].host = image

            t1 = time.time()
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)
            dur = time.time() - t1
            times.append(dur)
            gaussian_map = trt_outputs[0].reshape(
                (shape[0] // 4, shape[1] // 4))
            boxes, scores = postprocess(gaussian_map)

            polys = []
            if boxes is not None:
                boxes = boxes[:, :8].reshape((-1, 4, 2)) * 1. / im_scales
                for box in boxes:
                    box = sort_poly(box.astype(np.int32))
                    polys.append(box)
                polys = np.array(polys, dtype=np.float32).reshape((-1, 8))
                result_im = draw_polys(ori_image, polys)
                cv2.imwrite(
                    "res/{}.jpg".format(filename.split('/')[-1].split('.')[0]),
                    result_im)

    print("mean_time:", np.mean(times))
Exemple #19
0
def main():
    model_path = "alexnet.trt"

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    runtime = trt.Runtime(TRT_LOGGER)
    f = open(model_path, "rb")
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()
    f.close()

    for binding in engine:
        size = trt.volume(
            engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            print("input_size: ", size, "dtype: ", dtype)
        else:
            print("output_size: ", size, "dtype: ", dtype)

    inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    length = input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3]
    data = np.zeros(length, dtype=np.float32)
    data[:] = 1.0
    inputs[0].host = data.reshape(input_shape)
    print(inputs[0].host[0][0][0][:10])

    outputs[0].host = np.zeros(output_shape, dtype=np.float32)
    trt_outputs = common.do_inference_v2(context,
                                         bindings=bindings,
                                         inputs=inputs,
                                         outputs=outputs,
                                         stream=stream)
    print(trt_outputs[0].shape)
    print(trt_outputs[0][0][0][:10])
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-v', '--verbose', action='store_true',
        help='enable verbose output (for debugging)')
    parser.add_argument(
        '-m', '--model', type=str, default='model', 
        )
    args = parser.parse_args() 



    trt_file_path = '%s.trt' % args.model
    if not os.path.isfile(trt_file_path):
        raise SystemExit('ERROR: file (%s) not found!' % trt_file_path)
    engine_file_path = '%s.trt' % args.model
    engine = load_engine(trt_file_path, args.verbose)

    h_inputs, h_outputs, bindings, stream = common.allocate_buffers(engine)


    cap = cv2.VideoCapture(2)
    with engine.create_execution_context() as context:
        while True:
            _,frame = cap.read()
            t1 = time.time()
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(img)
            img = img_transforms(img).numpy()

            h_inputs[0].host = img
            t3 = time.time()
            trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=h_inputs, outputs=h_outputs, stream=stream)
            t4 = time.time()
            

            out_j = trt_outputs[0].reshape(101, 56, 4)
            
            prob = scipy.special.softmax(out_j[:-1, :, :], axis=0)


            idx = np.arange(100) + 1
            idx = idx.reshape(-1, 1, 1)

            loc = np.sum(prob * idx, axis=0)
            out_j = np.argmax(out_j, axis=0)
            loc[out_j == 100] = 0
            out_j = loc

            # import pdb; pdb.set_trace()
            vis = frame
            for i in range(out_j.shape[1]):
                if np.sum(out_j[:, i] != 0) > 2:
                    for k in range(out_j.shape[0]):
                        if out_j[k, i] > 0:
                            ppp = (int(out_j[k, i] * col_sample_w * img_w / 800) - 1, int(img_h * (row_anchor[k]/288)) - 1 )
                            cv2.circle(vis,ppp,  img_w//300 ,color[i],-1)

            t2 = time.time()
            print('Inference time', (t4-t3)*1000)
            print('FPS', int(1/((t2-t1))))
            cv2.imshow("OUTPUT", vis)
            cv2.waitKey(1)
def main():
    # model
    label_file_path = 'models/starwars.names'
    engine_file_path = "models/starwars_yolov3_fp16.trt"

    # label list
    all_classes = img_utils.load_label_classes(label_file_path)
    num_classes = len(all_classes)

    trt_runtime = trt.Runtime(TRT_LOGGER)

    print(f"Lade TensorRT Engine {engine_file_path}")
    trt_engine = common.load_engine(trt_runtime, engine_file_path)

    inputs, outputs, bindings, streams = common.allocate_buffers(trt_engine)
    context = trt_engine.create_execution_context()

    new_width = 416
    new_height = 416
    input_shape = (new_width, new_height)
    output_shapes = [(1, -1, new_height // 32, new_width // 32),
                     (1, -1, new_height // 16, new_width // 16),
                     (1, -1, new_height //  8, new_width //  8)]

    # open webcam
    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("Kann die Webcam, nicht öffnen")
        exit()

    while cap.isOpened():
        
        # read frame from webcam 
        status, frame = cap.read()

        if not status:
            print("Kann kein Bild laden")
            exit()

        #fps = cap.get(cv2.CAP_PROP_FPS)
        #print(f"Frames per second using video.get(cv2.CAP_PROP_FPS) : {fps}")


        image_resized = cv2.resize(frame, (new_width, new_height), interpolation = cv2.INTER_AREA)
        image_resized = np.array(image_resized, dtype=np.float32, order='C')
        image_resized /= 255.0
        image_processed = np.transpose(image_resized, [2, 0, 1])
        image_processed = np.expand_dims(image_processed, axis=0)
        image_processed = np.array(image_processed, dtype=np.float32, order='C')


        inputs[0].host = image_processed

        trt_outputs = common.do_inference_v2(
           context,
           bindings=bindings,
           inputs=inputs,
           outputs=outputs,
           stream=streams
        )

        trt_outputs  = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

        resolution_raw = (int(frame.shape[1]), int(frame.shape[0]))

        bboxes = process_outputs(trt_outputs, num_classes, resolution_raw, input_shape, 0.6)
        

        # loop through detected bounding boxes
        for bbox in bboxes:
            # get corner points of face rectangle        
            coor = np.array(bbox[:4], dtype=np.int32)
            (startX, startY) = coor[0], coor[1]
            (endX, endY) = coor[2], coor[3]

            # draw rectangle over the detected object
            cv2.rectangle(frame, (int(startX+20),int(startY+20)), (int(endX-20),int(endY-20)), (0,255,0), 2)

            # get label with max accuracy
            score = bbox[4]
            score = '%.2f' % score

            class_ind = int(bbox[5])
            class_name = all_classes[class_ind]

            print(f"{class_name} LEGO Figur erkannt zu {score}%")

            # write label and confidence above face rectangle
            cv2.putText(frame, class_name, (startX, startY),  cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (0, 255, 0), 2)

        # display output
        cv2.imshow("LEGO Star Wars Object Detection", frame)

        # press "Q" to stop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # release resources
    cap.release()
    cv2.destroyAllWindows()
Exemple #22
0
    def predict(self,
                input_path='dog.jpg',
                output_save_root='./output',
                write_txt=False):
        '''
        :param input_path:  输入:单张图像路径,图像文件夹,单个视频文件路径
        :param output_save_root: 要求全部保存到文件夹内,若是视频统一保存为mp4
        :param write_txt: 将预测的框坐标-类别-置信度以txt保存
        :return:
        '''
        # 开始判断图像,文件夹,视频
        is_video = False
        path = input_path
        if os.path.isdir(path):
            # 图像文件夹
            img_names = os.listdir(path)
            img_names = [
                name for name in img_names
                if name.split('.')[-1] in self.img_formats
            ]
        elif os.path.isfile(path):
            # 将 '/hme/ai/111.jpg' -> ('/hme/ai', '111.jpg')
            path, img_name = os.path.split(path)
            # 标记 video
            if img_name.split('.')[-1] in self.vid_formats:
                is_video = True
            else:
                assert img_name.split('.')[-1] in self.img_formats, "必须是单张图像路径"
                img_names = [img_name]
        else:
            print("输入无效!!!" * 3)

        # 创建保存文件夹
        check_path(output_save_root)
        # 判断是否是视频
        if is_video:
            assert img_name.count('.') == 1, "视频名字必须只有1个 . "

            # 读取视频
            cap = cv2.VideoCapture(os.path.join(path, img_name))
            # # 获取视频的fps, width height
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # 视频总帧数
            # 创建视频
            video_save_path = os.path.join(
                output_save_root,
                img_name.split('.')[0] + '_pred.mp4')
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            video_writer = cv2.VideoWriter(video_save_path,
                                           fourcc=fourcc,
                                           fps=fps,
                                           frameSize=(width, height))
        else:
            num = len(img_names)  # 图像数量

        # 推理 默认是0卡
        inputs, outputs, bindings, stream = common.allocate_buffers(
            self.engine)
        # Do inference
        for i in range(num):
            # 预处理
            if is_video:
                cap.set(cv2.CAP_PROP_POS_FRAMES, i)  # 读取指定帧
                image = cap.read()
                # 输入的是bgr帧矩阵
                image_raw, image = self.preprocessor.process(image)
            else:
                # 输入的默认是图像路径
                image_raw, image = self.preprocessor.process(
                    os.path.join(path, img_names[i]))

            # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
            inputs[0].host = image
            trt_outputs = common.do_inference_v2(self.context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)

            # list中的输出个数,本来要位于外面一层的,但是考虑重新输入图像
            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, self.output_shapes)
            ]

            # 后处理,按照2种方式判断处理,yolov4原始的预测-参考yolov5变化后的预测
            # 图像原始尺寸 WH,因为时PIL读取
            shape_orig_WH = image_raw.size

            # 后处理是可以处理batch>=1的,但是这里的类写的只能是batch=1
            outputs_pred = self.postprocessor.process(trt_outputs,
                                                      shape_orig_WH)

            # TODO 将预测的框坐标-类别-置信度 写入txt

            # 画框,由于这里只能是单张图像,因此不必for遍历
            boxes, classes, scores = outputs_pred[0][0]
            obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                           self.all_categories)

            # 视频按照帧数来保存,图像按照名字保存,  注意一般视频不会超过5位数
            # TODO 视频的预测写入视频
            if is_video:
                obj_detected_img.save(
                    os.path.join(output_save_root,
                                 str(i).zfill(5)))
            else:
                obj_detected_img.save(
                    os.path.join(output_save_root, img_names[i]))

        # 若是视频,需要 release
        if is_video:
            cap.release()
            cv2.destroyAllWindows()
Exemple #23
0

if __name__ == '__main__':

    onnx_model_file = "grid_sample.onnx"
    export_onnx_model(onnx_model_file)
    modify_onnx(onnx_model_file)

    # Build a TensorRT engine.
    with build_engine_onnx(onnx_model_file) as engine:
        # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
        # Allocate buffers and create a CUDA stream.
        inputs, outputs, bindings, stream = common.allocate_buffers(
            engine, True, 2)
        # Contexts are used to perform inference.
        with engine.create_execution_context() as context:
            # test 1. float16 input, via nvprof, you can see __half populated template function is called
            # test 2. Dims of input and grid is -1 on batch dim. Set context binding shape and feed proper data
            input = input_rand[0:2, :, :, :].astype('float16')
            grid = grid_rand[0:2, :, :, :].astype('float16')
            context.set_binding_shape(0, (2, 1, 4, 4))
            context.set_binding_shape(1, (2, 4, 4, 2))
            inputs[0].host = input
            inputs[1].host = grid
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)
            print(trt_outputs)
Exemple #24
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"
    input_image_path = "demo/test2.jpg"
    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (608, 608)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.shape[:2]

    # Output shapes expected by the post-processor
    output_shapes = [(batch_size, 180, 19, 19), (batch_size, 180, 38, 38),
                     (batch_size, 180, 76, 76)]
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        t1 = time.time()
        print(engine.max_batch_size)
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        trt_outputs = common.do_inference_v2(context,
                                             bindings=bindings,
                                             inputs=inputs,
                                             outputs=outputs,
                                             stream=stream)
    t2 = time.time()
    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]

    postprocessor_args = {
        "yolo_masks":
        [(6, 7, 8), (3, 4, 5),
         (0, 1, 2)],  # A list of 3 three-dimensional tuples for the YOLO masks
        "yolo_anchors": [
            (10, 13),
            (16, 30),
            (33, 23),
            (30, 61),
            (62,
             45),  # A list of 9 two-dimensional tuples for the YOLO anchors
            (59, 119),
            (116, 90),
            (156, 198),
            (373, 326)
        ],
        "obj_threshold":
        0.1,  # Threshold for object coverage, float value between 0 and 1
        "nms_threshold":
        0.6,  # Threshold for non-max suppression algorithm, float value between 0 and 1
        "yolo_input_resolution":
        input_resolution_yolov3_HW,
        "batch_size":
        batch_size
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))
    t3 = time.time()
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    output_image_path = 'result.jpg'
    cv2.imwrite(output_image_path, obj_detected_img)
    print('Saved image with bounding boxes of detected objects to {}.'.format(
        output_image_path))
    print(f"model time: {t2 - t1}s, process time: {t3 - t2}s.")
Exemple #25
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    #获取onnx模型和相应引擎文件的路径
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"
    # Download a dog image and save it to the following file path:
    #下载相关的图片数据
    input_image_path = common.download_file(
        'dog.jpg',
        'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg',
        checksum_reference=None)

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    #网络的输入图片weidth和height
    input_resolution_yolov3_HW = (608, 608)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    #PreprocessYOLO参考data_processing.py的实现
    #加载图片并进行相应的预处理
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    #从相应路径加载一张图片,将加载的原图和预处理后的图像一起返回
    #具体参考data_processing.py的实现
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    #存储原始图片的维度
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    #输出层的维度
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Do inference with TensorRT
    #进行trt的推理
    trt_outputs = []
    #get_engine参考本文件的实现
    #获取引擎文件并创建相关的推理上下文
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        #分配相应的内存缓冲区
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        #将相应数据传到主机内存
        inputs[0].host = image
        #进行相应的推理
        trt_outputs = common.do_inference_v2(context,
                                             bindings=bindings,
                                             inputs=inputs,
                                             outputs=outputs,
                                             stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    #得到推理的输出
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]

    postprocessor_args = {
        "yolo_masks":
        [(6, 7, 8), (3, 4, 5),
         (0, 1, 2)],  # A list of 3 three-dimensional tuples for the YOLO masks
        "yolo_anchors": [
            (10, 13),
            (16, 30),
            (33, 23),
            (30, 61),
            (62,
             45),  # A list of 9 two-dimensional tuples for the YOLO anchors
            (59, 119),
            (116, 90),
            (156, 198),
            (373, 326)
        ],
        "obj_threshold":
        0.6,  # Threshold for object coverage, float value between 0 and 1
        "nms_threshold":
        0.5,  # Threshold for non-max suppression algorithm, float value between 0 and 1
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }
    #接下来就是相关的后处理内容了
    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    output_image_path = 'dog_bboxes.png'
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(
        output_image_path))
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3 """

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = ONNX_FILE_PATH
    engine_file_path = ENGINE_FILE_PATH
    # Download a dog image and save it to the following file path:
    input_image_path = TEST_IMAGE

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (320, 800)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    output_shapes = get_outshape(3, 800, 320)
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        trt_outputs = common.do_inference_v2(context,
                                             bindings=bindings,
                                             inputs=inputs,
                                             outputs=outputs,
                                             stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]

    postprocessor_args = {
        "yolo_masks":
        [(6, 7, 8), (3, 4, 5),
         (0, 1, 2)],  # A list of 3 three-dimensional tuples for the YOLO masks
        "yolo_anchors": [
            (10, 13),
            (16, 30),
            (33, 23),
            (30, 61),
            (62,
             45),  # A list of 9 two-dimensional tuples for the YOLO anchors
            (59, 119),
            (116, 90),
            (156, 198),
            (373, 326)
        ],
        "obj_threshold":
        0.6,  # Threshold for object coverage, float value between 0 and 1
        "nms_threshold":
        0.5,  # Threshold for non-max suppression algorithm, float value between 0 and 1
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    output_image_path = 'onnx_trans_test.png'
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(
        output_image_path))
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = './yolov3.onnx'
    engine_file_path = "yolov3.trt"
    data_path = "./data/unrel.data"

    data = parse_data_cfg(data_path)
    nc = int(data['classes'])  # number of classes
    path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names

    iouv = torch.linspace(0.5, 0.95, 1,
                          dtype=torch.float32)  # iou vector for [email protected]:0.95
    niou = 1

    conf_thres = 0.001
    iou_thres = 0.6
    verbose = True

    # Genearte custom dataloader
    img_size = 448  # copy form pytorch src
    batch_size = 16

    dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True)
    batch_size = min(batch_size, len(dataset))
    dataloader = data_loader(dataset, batch_size, img_size)

    # Output shapes expected by the post-processor
    output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)]

    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                     '[email protected]', 'F1')
        p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
        pbar = tqdm.tqdm(dataloader, desc=s)
        stats, ap, ap_class = [], [], []
        seen = 0

        for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar):

            imgs = imgs.astype(np.float32) / 255.0
            nb, _, height, width = imgs.shape  # batch size, channels, height, width
            whwh = np.array([width, height, width, height])

            inputs[0].host = imgs

            postprocessor_args = {
                "yolo_masks": [
                    (6, 7, 8), (3, 4, 5), (0, 1, 2)
                ],  # A list of 3 three-dimensional tuples for the YOLO masks
                "yolo_anchors": [
                    (10, 13),
                    (16, 30),
                    (33, 23),
                    (30, 61),
                    (
                        62, 45
                    ),  # A list of 9 two-dimensional tuples for the YOLO anchors
                    (59, 119),
                    (116, 90),
                    (156, 198),
                    (373, 326)
                ],
                "num_classes":
                37,
                "stride": [32, 16, 8]
            }

            postprocessor = PostprocessYOLO(**postprocessor_args)

            # Do layers before yolo
            t = time.time()
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)

            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, output_shapes)
            ]

            trt_outputs = [
                np.ascontiguousarray(
                    otpt[:, :, :int(imgs.shape[2] * (2**i) /
                                    32), :int(imgs.shape[3] * (2**i) / 32)],
                    dtype=np.float32) for i, otpt in enumerate(trt_outputs)
            ]

            output_list = postprocessor.process(trt_outputs)

            t0 += time.time() - t

            inf_out = torch.cat(output_list, 1)
            t = time.time()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres)  # nms
            t1 += time.time() - t

            # Statistics per image
            for si, pred in enumerate(output):
                labels = targets[targets[:, 0] == si, 1:]
                nl = len(labels)
                tcls = labels[:, 0].tolist() if nl else []  # target class
                seen += 1

                if pred is None:
                    if nl:
                        stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                      torch.Tensor(), torch.Tensor(), tcls))
                    continue

                # Assign all predictions as incorrect
                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
                if nl:
                    detected = []  # target indices
                    tcls_tensor = labels[:, 0]

                    # target boxes
                    tbox = xywh2xyxy(labels[:, 1:5]) * whwh
                    tbox = tbox.type(torch.float32)

                    # Per target class
                    for cls in torch.unique(tcls_tensor):
                        ti = (cls == tcls_tensor).nonzero().view(
                            -1)  # prediction indices
                        pi = (cls == pred[:, 5]).nonzero().view(
                            -1)  # target indices

                        # Search for detections
                        if pi.shape[0]:
                            # Prediction to target ious
                            ious, i = box_iou(pred[pi, :4], tbox[ti]).max(
                                1)  # best ious, indices

                            # Append detections
                            for j in (ious > iouv[0]).nonzero():
                                d = ti[i[j]]  # detected target
                                if d not in detected:
                                    detected.append(d)
                                    correct[pi[j]] = ious[
                                        j] > iouv  # iou_thres is 1xn
                                    if len(
                                            detected
                                    ) == nl:  # all targets already located in image
                                        break

                # Append statistics (correct, conf, pcls, tcls)
                stats.append(
                    (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

            # Plot images
            if batch_i < 1:
                f = 'test_batch%g_gt.jpg' % batch_i  # filename
                plot_images(imgs, targets, paths=paths, names=names,
                            fname=f)  # ground truth
                f = 'test_batch%g_pred.jpg' % batch_i
                plot_images(imgs,
                            output_to_target(output, width, height),
                            paths=paths,
                            names=names,
                            fname=f)  # predictions

        # Compute statistics
        stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
        if len(stats):
            p, r, ap, f1, ap_class = ap_per_class(*stats)
            if niou > 1:
                p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(
                    1), ap[:, 0]  # [P, R, [email protected]:0.95, [email protected]]
            mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
            nt = np.bincount(stats[3].astype(np.int64),
                             minlength=nc)  # number of targets per class
        else:
            nt = torch.zeros(1)

        # Print results
        pf = '%20s' + '%10.3g' * 6  # print format
        print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

        # Print results per class
        if verbose and nc > 1 and len(stats):
            for i, c in enumerate(ap_class):
                print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

        # Print speeds
        if verbose:
            t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (
                img_size, img_size, batch_size)  # tuple
            print(
                'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
                % t)
Exemple #28
0
def main():
    parser = argparse.ArgumentParser(description="TensorRT model inference")
    parser.add_argument("--model",
                        "-m",
                        required=True,
                        type=str,
                        help="TensorRT model path")
    parser.add_argument("--input_shape",
                        "-in",
                        required=True,
                        type=int,
                        nargs="+",
                        help="input shape")
    parser.add_argument("--output_shape",
                        "-out",
                        required=True,
                        type=int,
                        nargs="+",
                        help="output shape")
    args = parser.parse_args()

    # model_path="9_16_22_7.model.trt"
    model_path = args.model
    input_shape = tuple(args.input_shape)
    output_shape = tuple(args.output_shape)
    print("input_shape: ", input_shape)
    print("output_shape: ", output_shape)

    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
    runtime = trt.Runtime(TRT_LOGGER)
    f = open(model_path, "rb")
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()
    f.close()

    for binding in engine:
        size = trt.volume(
            engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            print("input_size: ", size, "dtype: ", dtype)
        else:
            print("output_size: ", size, "dtype: ", dtype)

    inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    length = input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3]
    data = np.zeros(length, dtype=np.float32)
    data[:] = 1.0
    inputs[0].host = data.reshape(input_shape)
    print(inputs[0].host[0][0][0][:10])

    outputs[0].host = np.zeros(output_shape, dtype=np.float32)
    trt_outputs = common.do_inference_v2(context,
                                         bindings=bindings,
                                         inputs=inputs,
                                         outputs=outputs,
                                         stream=stream)
    print(trt_outputs[0].shape)
    print(trt_outputs[0])

    print("starting...")
    starttime = time.time()
    for i in range(1000 * 2 * 10):
        trt_outputs = common.do_inference_v2(context,
                                             bindings=bindings,
                                             inputs=inputs,
                                             outputs=outputs,
                                             stream=stream)
        # time.sleep(10)
        # print(trt_outputs[0])
    endtime = time.time()
    print(endtime - starttime)

    print(trt_outputs[0])
Exemple #29
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"
    # Download a dog image and save it to the following file path:
    #input_image_path = 'images/dog.jpg'

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (416, 416)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    #image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    #shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)]
    # Do inference with TensorRT
    trt_outputs = []

    # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
    postprocessor_args = {
        "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
        # A list of 3 three-dimensional tuples for the YOLO masks
        "yolo_anchors": [
            (10, 13),
            (16, 30),
            (33, 23),
            (30, 61),
            (62, 45),
            # A list of 9 two-dimensional tuples for the YOLO anchors
            (59, 119),
            (116, 90),
            (156, 198),
            (373, 326)
        ],
        "obj_threshold":
        0.6,  # Threshold for object coverage, float value between 0 and 1
        "nms_threshold":
        0.5,
        # Threshold for non-max suppression algorithm, float value between 0 and 1
        "yolo_input_resolution":
        input_resolution_yolov3_HW
    }

    capture = cv2.VideoCapture(r"D:\b站下载视频\飙车.mp4")

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fps = capture.get(cv2.CAP_PROP_FPS)
    size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    out = cv2.VideoWriter('camera_test.mp4', fourcc, fps, size)
    fps = 0
    while (True):
        t1 = time.time()
        ref, frame = capture.read()
        # 格式转变,BGRtoRGB
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # 转变成Image
        frame = Image.fromarray(np.uint8(frame))
        image_raw, image = preprocessor2.process(frame)
        shape_orig_WH = image_raw.size
        with get_engine(
                onnx_file_path, engine_file_path
        ) as engine, engine.create_execution_context() as context:
            inputs, outputs, bindings, stream = common.allocate_buffers(engine)

            # Do inference
            print(
                'Running inference on image {}...'.format('input_image_path'))

            inputs[0].host = image
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)

        # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
        trt_outputs = [
            output.reshape(shape)
            for output, shape in zip(trt_outputs, output_shapes)
        ]

        # postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
        #                       "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
        #                                        (59, 119), (116, 90), (156, 198), (373, 326)],
        #                       "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
        #                       "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
        #                       "yolo_input_resolution": input_resolution_yolov3_HW}

        postprocessor = PostprocessYOLO(**postprocessor_args)

        # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
        boxes, classes, scores = postprocessor.process(trt_outputs,
                                                       (shape_orig_WH))
        # Draw the bounding boxes onto the original input image and save it as a PNG file
        obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                       ALL_CATEGORIES)

        frame = cv2.cvtColor(obj_detected_img, cv2.COLOR_RGB2BGR)
        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %.2f" % (fps))
        frame = cv2.putText(frame, "fps= %.2f" % (fps), (0, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        out.write(frame)
        cv2.imshow("video", frame)

        c = cv2.waitKey(1) & 0xff
        if c == 27:
            capture.release()
            break
        # output_image_path = 'dog_bboxes.png'
        # obj_detected_img.save(output_image_path, 'PNG')
        print('Saved image with bounding boxes of detected objects to {}.'.
              format('output_image_path'))