Beispiel #1
0
def main():
    # Load the shared object file containing the Clip plugin implementation.
    # By doing this, you will also register the Clip plugin with the TensorRT
    # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present
    # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details.
    if not os.path.isfile(CLIP_PLUGIN_LIBRARY):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY),
            "Please build the Clip sample plugin.",
            "For more information, see the included README.md"))
    ctypes.CDLL(CLIP_PLUGIN_LIBRARY)

    # Load pretrained model
    if not os.path.isfile(MODEL_PATH):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load model file ({}).".format(MODEL_PATH),
            "Please use 'python lenet5.py' to train and save the model.",
            "For more information, see the included README.md"))

    # Build an engine and retrieve the image mean from the model.
    with build_engine(MODEL_PATH) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    trt_rok = gs.create_plugin_node(name="trt_rok",
                                    op="RegionOfKeypoints_TRT",
                                    region_shape=5)
def main():
    # Get data files for the model.
    data_paths, [
        deploy_file, model_file, mean_proto
    ] = common.find_sample_data(
        description="Runs an MNIST network using a Caffe model file",
        subfolder="mnist",
        find_files=[
            "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto"
        ])

    # Cache the engine in a temporary directory.
    engine_path = os.path.join(tempfile.gettempdir(), "mnist.engine")
    with get_engine(deploy_file, model_file, engine_path
                    ) as engine, engine.create_execution_context() as context:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        mean = retrieve_mean(mean_proto)
        # For more information on performing inference, refer to the introductory samples.
        inputs[0].host, case_num = load_normalized_test_case(data_paths, mean)
        # The common.do_inference function will return a list of outputs - we only have one in this case.
        [output] = common.do_inference(context,
                                       bindings=bindings,
                                       inputs=inputs,
                                       outputs=outputs,
                                       stream=stream)
        pred = np.argmax(output)
        print("Test Case: " + str(case_num))
        print("Prediction: " + str(pred))
def get_buffer(engine, img_np):
    # allocate buffers
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    # load data
    inputs[0].host = img_np

    return inputs, outputs, bindings, stream
def main():
    data_path = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    model_file = ModelData.MODEL_FILE
    t1 = time.clock()

    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        #with open('/home/nvidia/procedure/lenet5.engine','wb') as f:
        #f.write(engine.serialize())
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(
                data_path, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
            t2 = time.clock()
            print("use_time:" + str(t2 - t1))
Beispiel #5
0
    def predict(self, preprocessed_image):
        np_image = preprocessed_image
        assert (
            1,
            self.channels,
            self.model_height,
            self.model_width,
        ) == np_image.shape, "Image must be resized to model shape"

        if self.is_fp16:
            np_image = np_image.astype(np.float16)

        self.cfx.push()
        try:
            inputs, outputs, bindings, stream = common.allocate_buffers(self.engine)
            # Do inference
            inputs[0].host = np_image
            trt_outputs = do_inference(
                self.context,
                bindings=bindings,
                inputs=inputs,
                outputs=outputs,
                stream=stream,
            )
        finally:
            self.cfx.pop()  # very important
        # logger.debug('Len of outputs: ', len(trt_outputs))
        num_classes = len(self.labels)
        trt_outputs[0] = trt_outputs[0].reshape(1, -1, 1, 4)
        trt_outputs[1] = trt_outputs[1].reshape(1, -1, num_classes)
        return trt_outputs
Beispiel #6
0
def main():
    data_paths, _ = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    model_path = os.environ.get("MODEL_PATH") or os.path.join(
        os.path.dirname(__file__), "models")
    model_file = os.path.join(model_path, ModelData.MODEL_FILE)

    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(
                data_paths, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
def main():
    # Load the shared object file containing the Clip plugin implementation.
    # By doing this, you will also register the Clip plugin with the TensorRT
    # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present
    # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details.
    if not os.path.isfile(CLIP_PLUGIN_LIBRARY):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY),
            "Please build the Clip sample plugin.",
            "For more information, see the included README.md"))
    ctypes.CDLL(CLIP_PLUGIN_LIBRARY)

    # Load pretrained model
    if not os.path.isfile(MODEL_PATH):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load model file ({}).".format(MODEL_PATH),
            "Please use 'python lenet5.py' to train and save the model.",
            "For more information, see the included README.md"))

    # Build an engine and retrieve the image mean from the model.
    with build_engine(MODEL_PATH) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            print("\n=== Testing ===")
            test_case = load_normalized_test_case(inputs[0].host)
            print("Loading Test Case: " + str(test_case))
            # The common do_inference function will return a list of outputs - we only have one in this case.
            [pred] = common.do_inference(context,
                                         bindings=bindings,
                                         inputs=inputs,
                                         outputs=outputs,
                                         stream=stream)
            print("Prediction: " + str(np.argmax(pred)))
def main():
    # By doing this, you will also register the Clip plugin with the TensorRT
    # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present
    # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details.
    if not os.path.isfile(CLIP_PLUGIN_LIBRARY):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY),
            "Please build the Clip sample plugin.",
            "For more information, see the included README.md"))

    # Train MNIST data and get weights.
    mnist_model = MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()

    # Do inference with TensorRT.
    with build_engine(weights) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_random_test_case(mnist_model,
                                             pagelocked_buffer=inputs[0].host)
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
Beispiel #9
0
def main():

    engine = init_construct_network()
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    with engine.create_execution_context() as context:
        print(np.ones((x1 * y1 * z1), np.float32).reshape(-1))
        np.copyto(inputs[0].host,
                  np.ones((x1 * y1 * z1), np.float32).reshape(-1))
        np.copyto(inputs[1].host,
                  np.ones((x2 * y2 * z2), np.float32).reshape(-1))

        time_start = time.time()
        output = common.do_inference(context,
                                     bindings=bindings,
                                     inputs=inputs,
                                     outputs=outputs,
                                     stream=stream)
        time_end = time.time()
        print("time ", time_end - time_start)
        print(output[0].reshape((x1 * y1 * z1)))
        data = output[0].reshape((x1 * y1 * z1))
        print(data[0])
        print(output[1].reshape((x1 * y1 * z1)))

    print("ok")
Beispiel #10
0
def main():
    # Get data files for the model.
    data_path, [deploy_file, model_file, mean_proto] = common.find_sample_data(
        description="Runs an MNIST network using a Caffe model file",
        subfolder="mnist",
        find_files=[
            "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto"
        ])

    with build_engine(deploy_file, model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        mean = retrieve_mean(mean_proto)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(data_path, inputs[0].host,
                                                 mean)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))

    # After the engine is destroyed, we destroy the plugin. This function is exposed through the binding code in plugin/pyFullyConnected.cpp.
    fc_factory.destroy_plugin()
def check_accuracy(context, batch_size, test_set, test_labels):
    inputs, outputs, bindings, stream = common.allocate_buffers(context.engine)

    num_correct = 0
    num_total = 0

    batch_num = 0
    for start_idx in range(0, test_set.shape[0], batch_size):
        batch_num += 1
        if batch_num % 10 == 0:
            print("Validating batch {:}".format(batch_num))
        # If the number of images in the test set is not divisible by the batch size, the last batch will be smaller.
        # This logic is used for handling that case.
        end_idx = min(start_idx + batch_size, test_set.shape[0])
        effective_batch_size = end_idx - start_idx

        # Do inference for every batch.
        inputs[0].host = test_set[start_idx:start_idx + effective_batch_size]
        [output] = common.do_inference(context,
                                       bindings=bindings,
                                       inputs=inputs,
                                       outputs=outputs,
                                       stream=stream,
                                       batch_size=effective_batch_size)

        # Use argmax to get predictions and then check accuracy
        preds = np.argmax(output.reshape(32, 10)[0:effective_batch_size],
                          axis=1)
        labels = test_labels[start_idx:start_idx + effective_batch_size]
        num_total += effective_batch_size
        num_correct += np.count_nonzero(np.equal(preds, labels))

    percent_correct = 100 * num_correct / float(num_total)
    print("Total Accuracy: {:}%".format(percent_correct))
def main():
    # Set the data path to the directory that contains the trained models and test images for inference.
    _, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"])
    # Get test images, models and labels.
    test_images = data_files[0:3]
    onnx_model_file, labels_file = data_files[3:]
    labels = open(labels_file, 'r').read().split('\n')

    # Build a TensorRT engine.
    with build_engine_onnx(onnx_model_file) as engine:
        # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
        # Allocate buffers and create a CUDA stream.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Contexts are used to perform inference.
        with engine.create_execution_context() as context:
            # Load a normalized test case into the host input page-locked buffer.
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, inputs[0].host)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            # We use the highest probability as our prediction. Its index corresponds to the predicted label.
            pred = labels[np.argmax(trt_outputs[0])]
            if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)
Beispiel #13
0
def main():
    data_path, data_files = common.find_sample_data(
        description="Runs a Caffe MNIST network in Int8 mode",
        subfolder="mnist",
        find_files=["batches", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH])
    [batch_data_dir, deploy_file, model_file] = data_files

    # Now we create a calibrator and give it the location of our calibration data.
    # We also allow it to cache calibration data for faster engine building.
    calibration_cache = "mnist_calibration.cache"
    calib = calibrator.MNISTEntropyCalibrator(batch_data_dir,
                                              cache_file=calibration_cache)
    # We will use the calibrator batch size across the board.
    # This is not a requirement, but in this case it is convenient.
    batch_size = calib.get_batch_size()

    with build_int8_engine(
            deploy_file, model_file,
            calib) as engine, engine.create_execution_context() as context:
        # Allocate engine buffers.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        # Do inference for the whole batch. We have to specify batch size here, as the common.do_inference uses a default
        inputs[0].host, labels = load_random_batch(calib)
        [output] = common.do_inference(context,
                                       bindings=bindings,
                                       inputs=inputs,
                                       outputs=outputs,
                                       stream=stream,
                                       batch_size=batch_size)
        # Next we need to reshape the output to Nx10 (10 probabilities, one per digit), where N is batch size.
        output = output.reshape(batch_size, 10)
        validate_output(output, labels)
Beispiel #14
0
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
    global INPUTS, OUTPUTS, BINDINGS, STREAM, CONTEXT

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:

    onnx_file_path = os.path.join(
        sys.path[0],
        '/home/wlz/catkin_ws/src/krrt-planner/opnet/models/no_surf_80_32.onnx')
    engine_file_path = os.path.join(
        sys.path[0],
        '/home/wlz/catkin_ws/src/krrt-planner/opnet/models/no_surf_80_32.trt')

    input = np.random.randn(1, SHAPE[1], SHAPE[2], SHAPE[3]).astype(np.float32)
    # Output shapes expected by the post-processor
    output_shapes = [SHAPE]
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        print("GET ENGINE SUCCEED")

        CONTEXT = context
        INPUTS, OUTPUTS, BINDINGS, STREAM = common.allocate_buffers(engine)
        # inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        time0 = time.time()
        for i in range(10):
            # print('Running inference')
            output = trt_inference(input)
        print("prepocess time: %fs" % (time.time() - time0))

    print('done')
def main():
    # Set the data path to the directory that contains the trained models and test images for inference.
    # data_path, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, "class_labels.txt"])
    # Get test images, models and labels.
    # test_images = data_files[0:3]
    # test_image = "0.jpg"
    test_image_list = os.listdir('car_rec_test')
    print(test_image_list)
    engine_file_path = "car_rec.trt"
    caffe_model_file, caffe_deploy_file, labels_file = [ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, ModelData.LABEL_PATH]
    labels = open(labels_file, 'r').read().split('\n')

    # Build a TensorRT engine.
    with build_engine_caffe(caffe_model_file, caffe_deploy_file, engine_file_path) as engine:
        # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
        # Allocate buffers and create a CUDA stream.
        # h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Contexts are used to perform inference.
        with engine.create_execution_context() as context:
            # Load a normalized test case into the host input page-locked buffer.
            # test_image = random.choice(test_images)
            # test_case = load_normalized_test_case(test_image, h_input)
            test_cases = load_normalized_test_cases(test_image_list, inputs)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            # do_inference(context, h_input, d_input, h_output, d_output, stream)
            trt_outputs = common.do_inference(context, bindings, inputs, outputs, stream, 16)
            outs = trt_outputs[0].reshape(16,427)
            print(outs)
            for x in range(0,len(outs)):
            	pred = labels[np.argmax(outs[x])]
            	print(pred)
            	pass
Beispiel #16
0
def main(args):
    with get_engine(args.engine_path, args.model_dir) as engine:
        with engine.create_execution_context() as context:
            origin_img = cv2.imread(args.image_path)
            t1 = time.time()
            img, (ratio_h, ratio_w) = preprocess(origin_img)
            cv2.imwrite("processed.jpg", img)
            h, w, _ = img.shape
            # hwc to chw
            img = img.transpose((2, 0, 1))
            # flatten the image into a 1D array
            img = img.ravel()
            context.set_binding_shape(0, (1, 3, h, w))
            # allocate buffers and create a stream.
            inputs, outputs, bindings, stream = common.allocate_buffers(
                engine, context)
            # copy to pagelocked memory
            np.copyto(inputs[0].host, img)
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference_v2(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream)
            # reshape 1D array to chw
            output = np.reshape(output, (6, h // 4, w // 4))
            # transpose chw to hwc
            output = output.transpose(1, 2, 0)
            boxes = postprocess(origin_img, output, ratio_h, ratio_w)
            t2 = time.time()
            print("total cost %fms" % ((t2 - t1) * 1000))
            draw_result(origin_img, boxes)
def main():
    #data_path, _ = common.find_sample_data(description="Runs an MNIST network using a UFF model file", subfolder="mnist")
    data_path = '/home/ai/tensorrt_tar/TensorRT-5.1.5.0/data/mnist'
    model_path = os.environ.get("MODEL_PATH") or os.path.join(
        os.path.dirname(__file__), "models")
    model_file = os.path.join(model_path, ModelData.MODEL_FILE)

    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(
                data_path, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            start1 = time.time()
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)

            print("Prediction: " + str(pred), 'time is :',
                  time.time() - start1)
            model1 = model.create_model()
            model1.load_weights("models/lenet5.pb")
            start2 = time.time()
            output = model1.predict(inputs[0])
            pred = np.argmax(output)

            print("Prediction: " + str(pred), 'time is :',
                  time.time() - start2)
Beispiel #18
0
def predict(inp: Image, metadata):
    image_raw, image = preprocessor.process(inp)
    shape_orig_WH = image_raw.size
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    # Do inference
    print('Running inference on image')
    # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
    inputs[0].host = image
    a = perf_counter()
    trt_outputs = common.do_inference(context,
                                      bindings=bindings,
                                      inputs=inputs,
                                      outputs=outputs,
                                      stream=stream)
    b = perf_counter()
    metadata['TensorRT Inference Latency (s)'] = (b - a)
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]
    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    return obj_detected_img
Beispiel #19
0
def main():
    #命令行参数解析器
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    #创建一个模型类实例
    mnist_model = model.MnistModel()
    #进行训练
    mnist_model.learn()
    #获取相应的权重
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    #进行推理
    #build_engine具体参考本文件的实现
    with build_engine(weights) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        #分配相应的缓冲区内存
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            #load_random_test_case参考本文件下的实现
            #随即加载测试数据,复制到主机内存
            case_num = load_random_test_case(mnist_model,
                                             pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            #进行推理
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            #获取最终结果
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
def main3():
    device = torch.device('cuda:0')
    path = '04.jpg'

    onnx_file_path = './models/ResNet50.onnx'
    engine_file_path = './models/ResNet50.trt'

    with get_engine(
            onnx_file_path,
            engine_file_path,
    ) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            load_normalized_test_case(path, inputs[0].host)
            start1 = time.time()
            trt_outputs = common.do_inference(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream)
    output = trt_outputs

    print('processing time3 is', time.time() - start1, np.argmax(output))

    return output
Beispiel #21
0
def main(engine_path, image_path, image_size):
    with get_engine(engine_path) as engine, engine.create_execution_context(
    ) as context:
        buffers = common.allocate_buffers(engine)
        image_src = cv2.imread(image_path)

        detect(engine, context, buffers, image_src, image_size)
def main():
    engine_file_path = 'plate_detection.trt'
    input_image_path = '../cat.jpg'

    input_resolution_plate_detection_HW = (325, 325)
    preprocessor = PreprocessYOLO(input_resolution_plate_detection_HW)
    image_raw, image = preprocessor.process(input_image_path)
    print(image.shape)

    trt_outputs = []
    with get_engine_from_bin(engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image

        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1)

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 10000
        
        for i in range(n_time_inference):
            t1 = time.time()
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1)
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print('inference-{} cost: {}ms'.format(str(i+1), delta_time*1000))
        avg_time_original_model = total_time / n_time_inference
        print("average inference time: {}ms".format(avg_time_original_model*1000))
        print(trt_outputs[0].shape)
        print(trt_outputs[1].shape)
def main():
    """Create a TensorRT engine for ONNX-based model and run inference."""

    # Try to load a previously generated network graph in ONNX format:
    onnx_file_path = '/models/run09/jetracer.onnx'
    engine_file_path = '/models/run09/jetracer.trt'
    ino = 378
    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image')
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        image = cv2.imread(
            f'/models/train_data/Images/{ino:03d}.jpg').transpose(
                2, 0, 1).reshape(1, 3, 320, 640)
        inputs[0].host = np.array(image, dtype=np.float16, order='C')
        trt_outputs = common.do_inference_v2(context,
                                             bindings=bindings,
                                             inputs=inputs,
                                             outputs=outputs,
                                             stream=stream)

    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    #mask = trt_outputs.reshape(320,640).numpy()[0][0]>0.4
    print(trt_outputs[0].shape)
def main():
    x = "/home/dgxuser125/rt-kennan/Swish3/build/libswish.so"
    ctypes.CDLL(x)
    data_paths, _ = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    model_path = os.environ.get("MODEL_PATH") or os.path.join(
        os.path.dirname(__file__), "models")
    model_file = os.path.join(model_path, ModelData.MODEL_FILE)
    with build_engine(model_file) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            # # Start measuring time
            inference_start_time = time.time()
            for i in range(1000):
                case_num = load_normalized_test_case(
                    data_paths, pagelocked_buffer=inputs[0].host)
                [output] = common.do_inference(context,
                                               bindings=bindings,
                                               inputs=inputs,
                                               outputs=outputs,
                                               stream=stream)
                pred = np.argmax(output)
                # print("Test Case: " + str(case_num))
                # print("Prediction: " + str(pred))
            end_time = time.time()
            print("time taken for one input with tenosrrt: ",
                  (end_time - inference_start_time) / 1000)
Beispiel #25
0
def main():
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    mnist_model = model.MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    engine = build_engine(weights)

    # Build an engine, allocate buffers and create a stream.
    # For more information on buffer allocation, refer to the introductory samples.
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    context = engine.create_execution_context()

    case_num = load_random_test_case(mnist_model,
                                     pagelocked_buffer=inputs[0].host)
    # For more information on performing inference, refer to the introductory samples.
    # The common.do_inference function will return a list of outputs - we only have one in this case.
    [output] = common.do_inference(context,
                                   bindings=bindings,
                                   inputs=inputs,
                                   outputs=outputs,
                                   stream=stream)
    pred = np.argmax(output)
    print("Test Case: " + str(case_num))
    print("Prediction: " + str(pred))
Beispiel #26
0
def main():
    # load label
    labels = [line.rstrip('\n') for line in open('class_labels.txt')]

    # load engine
    trt_engine = './vgg16_32.trt'
    Cifar10_engine = load_engine(trt_engine)

    dispW = 1280
    dispH = 720
    flip = 0
    fpsReport = 0
    camSet = 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=1280, height=720, format=NV12, framerate=60/1 \
              ! nvvidconv flip-method=' + str(
        flip) + ' ! video/x-raw, width=' + str(dispW) + ', height=' + str(
            dispH) + ',\
               format=BGRx ! videoconvert !video/x-raw, format=BGR ! appsink'

    cap = cv2.VideoCapture(camSet, cv2.CAP_GSTREAMER)
    timeStamp = time.time()
    font = cv2.FONT_HERSHEY_SIMPLEX
    while True:
        _, frame = cap.read()
        frame = frame.astype('float32')
        frameRGB = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        img = cv2.resize(frameRGB, (32, 32)) / 255
        img = img.transpose((2, 0, 1)).flatten()

        # 分配buffers給inputs和outputs
        inputs, outputs, bindings, stream = common.allocate_buffers(
            Cifar10_engine)
        inputs[0].host = img

        # inference
        with Cifar10_engine.create_execution_context() as context:
            trt_outputs = common.do_inference(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream)
        pred = trt_outputs[0].argmax(-1)

        # fps
        dt = time.time() - timeStamp
        fps = 1 / dt
        fpsReport = .9 * fpsReport + .1 * fps
        timeStamp = time.time()
        cv2.rectangle(frame, (0, 0), (350 + len(labels[pred]) * 30, 80),
                      (0, 0, 255), -1)
        cv2.putText(frame,
                    str(round(fpsReport, 1)) + 'fps' + ', ' + labels[pred],
                    (0, 60), font, 2, (0, 255, 255), 3)

        cv2.imshow('stream', frame / 255)
        if cv2.waitKey(1) == 27:
            break
    cap.release()
    cv2.destroyAllWindows()
Beispiel #27
0
def init():

    global inputs, outputs, bindings, stream, engine, TRT_LOGGER, context
    TRT_LOGGER = trt.Logger()
    onnx_file_path = 'yolov3.onnx'
    engine_file_path = "yolov3.trt"
    engine = get_engine(TRT_LOGGER, onnx_file_path, engine_file_path)
    context = engine.create_execution_context()
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
def main(win_title):

    # load trt engine
    print('load trt engine')
    trt_path = 'engine.trt'
    engine = load_engine(trt_runtime, trt_path)

    print('load labels')
    label = get_label('keras_models/labels.txt')

    # allocate buffers
    print('allocate buffers')
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    print('create execution context')
    context = engine.create_execution_context()

    print('start stream')
    fps = -1
    GSTREAMER_PIPELINE = 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=1920, height=1080, format=(string)NV12, framerate=60/1 ! nvvidconv flip-method=0 ! video/x-raw, width=640, height=480, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink'

    cap = cv2.VideoCapture(GSTREAMER_PIPELINE, cv2.CAP_GSTREAMER)
    while (1):

        t_start = time.time()
        ret, frame = cap.read()
        size = (224, 224)
        inputs[0].host = preprocess(frame)

        # with engine.create_execution_context() as context:
        trt_outputs = common.do_inference(context,
                                          bindings=bindings,
                                          inputs=inputs,
                                          outputs=outputs,
                                          stream=stream)

        preds = trt_outputs[0]

        idx = np.argmax(preds)

        result = label[idx]

        info = '{} : {:.3f} , FPS {}'.format(result, preds[idx], fps)

        cv2.putText(frame, info, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 0, 255), 4)

        cv2.imshow(win_title, frame)

        if cv2.waitKey(1) == ord('q'):
            break

        fps = int(1 / (time.time() - t_start))

    cap.release()
    cv2.destroyAllWindows()
    print('Quit')
Beispiel #29
0
    def __init__(self, onnx_path, engine_path, dimx=80, dimy=80, dimz=48):

        self.output_shape = [1, dimx, dimy, dimz]
        self.trt_logger = trt.Logger(trt.Logger.INFO)
        self.cuda_ctx = cuda.Device(0).make_context()
        self.engine = self.get_engine(onnx_path, engine_path)
        self.context = self.engine.create_execution_context()
        self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(
            self.engine)
        print("BUIND ENGINE SUCCEED")
Beispiel #30
0
 def _infer(self, input_data):
     inputs, outputs, bindings, stream = common.allocate_buffers(
         self.engine)
     # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
     inputs[0].host = input_data
     return common.do_inference(self.context,
                                bindings=bindings,
                                inputs=inputs,
                                outputs=outputs,
                                stream=stream)