コード例 #1
0
ファイル: sample.py プロジェクト: LeeWoongkyu/python
def main():

    # Parse command line arguments
    args = parse_commandline_arguments()
    
    _, data_files = common.find_sample_data(description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", "train-images-idx3-ubyte", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH])
    [test_set, test_labels, train_set, deploy_file, model_file] = data_files

    engine = None
    trt_engine_path = get_engine_path(args.precision)
    trt_runtime = trt.Runtime(TRT_LOGGER)
    

    # Inference batch size can be different from calibration batch size.
    batch_size = 32
    
    if not os.path.exists(trt_engine_path):
        # Build a TensorRT engine.
        engine = build_int8_engine(deploy_file, model_file, batch_size, trt_engine_datatype=args.trt_engine_datatype)
        # Save the engine to file
        buf = engine.serialize()
        with open(trt_engine_path, 'wb') as f:
            f.write(buf)

    # If we get here, the file with engine exists, so we can load it
    if not engine:
        print("Loading cached TensorRT engine from {}".format(trt_engine_path))
        with open(trt_engine_path, 'rb') as f:
            engine_data = f.read()
        engine = trt_runtime.deserialize_cuda_engine(engine_data)    

    with engine.create_execution_context() as context:
        # Batch size for inference can be different than batch size used for calibration.
        check_accuracy(context, batch_size, test_set=load_mnist_data(test_set), test_labels=load_mnist_labels(test_labels))
コード例 #2
0
def main():
    data_path = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    model_file = ModelData.MODEL_FILE
    t1 = time.clock()

    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        #with open('/home/nvidia/procedure/lenet5.engine','wb') as f:
        #f.write(engine.serialize())
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(
                data_path, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
            t2 = time.clock()
            print("use_time:" + str(t2 - t1))
def main():
    # Set the data path to the directory that contains the trained models and test images for inference.
    data_path, data_files = common.find_sample_data(
        description=
        "Runs a ResNet152 on Cars dataset network with a TensorRT inference engine.",
        subfolder="cars_restnet152",
        find_files=[
            "00001.jpg", "00002.jpg", "00003.jpg", "00004.jpg", "00005.jpg",
            "00006.jpg", ModelData.MODEL_PATH, "cars_labels.txt"
        ])
    # Get test images, models and labels.
    test_images = data_files[0:6]
    onnx_model_file, labels_file = data_files[6:]
    labels = open(labels_file, 'r').read().split('\n')
    #     print(onnx_model_file)
    # Build a TensorRT engine.
    with build_engine_onnx(onnx_model_file) as engine:
        # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
        # Allocate buffers and create a CUDA stream.
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
        # Contexts are used to perform inference.
        with engine.create_execution_context() as context:
            # Load a normalized test case into the host input page-locked buffer.
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, h_input)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            do_inference(context, h_input, d_input, h_output, d_output, stream)
            # We use the highest probability as our prediction. Its index corresponds to the predicted label.
            #             pred = labels[np.argmax(h_output)]
            print(h_output.shape)
コード例 #4
0
def main():
    # Get data files for the model.
    data_paths, [
        deploy_file, model_file, mean_proto
    ] = common.find_sample_data(
        description="Runs an MNIST network using a Caffe model file",
        subfolder="mnist",
        find_files=[
            "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto"
        ])

    # Cache the engine in a temporary directory.
    engine_path = os.path.join(tempfile.gettempdir(), "mnist.engine")
    with get_engine(deploy_file, model_file, engine_path
                    ) as engine, engine.create_execution_context() as context:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        mean = retrieve_mean(mean_proto)
        # For more information on performing inference, refer to the introductory samples.
        inputs[0].host, case_num = load_normalized_test_case(data_paths, mean)
        # The common.do_inference function will return a list of outputs - we only have one in this case.
        [output] = common.do_inference(context,
                                       bindings=bindings,
                                       inputs=inputs,
                                       outputs=outputs,
                                       stream=stream)
        pred = np.argmax(output)
        print("Test Case: " + str(case_num))
        print("Prediction: " + str(pred))
コード例 #5
0
def main():
    _, data_files = common.find_sample_data(
        description="Runs a Caffe MNIST network in Int8 mode",
        subfolder="mnist",
        find_files=[
            "t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte",
            "train-images-idx3-ubyte", ModelData.DEPLOY_PATH,
            ModelData.MODEL_PATH
        ])
    [test_set, test_labels, train_set, deploy_file, model_file] = data_files

    # Now we create a calibrator and give it the location of our calibration data.
    # We also allow it to cache calibration data for faster engine building.
    calibration_cache = "mnist_calibration.cache"
    calib = MNISTEntropyCalibrator(test_set, cache_file=calibration_cache)

    # Inference batch size can be different from calibration batch size.
    batch_size = 32
    with build_int8_engine(
            deploy_file, model_file, calib, batch_size
    ) as engine, engine.create_execution_context() as context:
        # Batch size for inference can be different than batch size used for calibration.
        check_accuracy(context,
                       batch_size,
                       test_set=load_mnist_data(test_set),
                       test_labels=load_mnist_labels(test_labels))
コード例 #6
0
def main():
    # Set the data path to the directory that contains the trained models and test images for inference.
    _, data_files = common.find_sample_data(
        description="Runs a ResNet50 network with a TensorRT inference engine.",
        subfolder="resnet50",
        find_files=[
            "binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg",
            ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, "class_labels.txt"
        ])
    # Get test images, models and labels.
    test_images = data_files[0:3]
    caffe_model_file, caffe_deploy_file, labels_file = data_files[3:]
    labels = open(labels_file, 'r').read().split('\n')

    # Build a TensorRT engine.
    with build_engine_caffe(caffe_model_file, caffe_deploy_file) as engine:
        # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
        # Allocate buffers and create a CUDA stream.
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
        # Contexts are used to perform inference.
        with engine.create_execution_context() as context:
            # Load a normalized test case into the host input page-locked buffer.
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, h_input)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            do_inference(context, h_input, d_input, h_output, d_output, stream)
            # We use the highest probability as our prediction. Its index corresponds to the predicted label.
            pred = labels[np.argmax(h_output)]
            if "_".join(pred.split()) in os.path.splitext(
                    os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)
コード例 #7
0
def main():
    x = "/home/dgxuser125/rt-kennan/Swish3/build/libswish.so"
    ctypes.CDLL(x)
    data_paths, _ = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    model_path = os.environ.get("MODEL_PATH") or os.path.join(
        os.path.dirname(__file__), "models")
    model_file = os.path.join(model_path, ModelData.MODEL_FILE)
    with build_engine(model_file) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            # # Start measuring time
            inference_start_time = time.time()
            for i in range(1000):
                case_num = load_normalized_test_case(
                    data_paths, pagelocked_buffer=inputs[0].host)
                [output] = common.do_inference(context,
                                               bindings=bindings,
                                               inputs=inputs,
                                               outputs=outputs,
                                               stream=stream)
                pred = np.argmax(output)
                # print("Test Case: " + str(case_num))
                # print("Prediction: " + str(pred))
            end_time = time.time()
            print("time taken for one input with tenosrrt: ",
                  (end_time - inference_start_time) / 1000)
コード例 #8
0
def main():
    data_path, data_files = common.find_sample_data(
        description="Runs a Caffe MNIST network in Int8 mode",
        subfolder="mnist",
        find_files=["batches", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH])
    [batch_data_dir, deploy_file, model_file] = data_files

    # Now we create a calibrator and give it the location of our calibration data.
    # We also allow it to cache calibration data for faster engine building.
    calibration_cache = "mnist_calibration.cache"
    calib = calibrator.MNISTEntropyCalibrator(batch_data_dir,
                                              cache_file=calibration_cache)
    # We will use the calibrator batch size across the board.
    # This is not a requirement, but in this case it is convenient.
    batch_size = calib.get_batch_size()

    with build_int8_engine(
            deploy_file, model_file,
            calib) as engine, engine.create_execution_context() as context:
        # Allocate engine buffers.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        # Do inference for the whole batch. We have to specify batch size here, as the common.do_inference uses a default
        inputs[0].host, labels = load_random_batch(calib)
        [output] = common.do_inference(context,
                                       bindings=bindings,
                                       inputs=inputs,
                                       outputs=outputs,
                                       stream=stream,
                                       batch_size=batch_size)
        # Next we need to reshape the output to Nx10 (10 probabilities, one per digit), where N is batch size.
        output = output.reshape(batch_size, 10)
        validate_output(output, labels)
コード例 #9
0
def main():
    # Get data files for the model.
    data_path, [deploy_file, model_file, mean_proto] = common.find_sample_data(
        description="Runs an MNIST network using a Caffe model file",
        subfolder="mnist",
        find_files=[
            "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto"
        ])

    with build_engine(deploy_file, model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        mean = retrieve_mean(mean_proto)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(data_path, inputs[0].host,
                                                 mean)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))

    # After the engine is destroyed, we destroy the plugin. This function is exposed through the binding code in plugin/pyFullyConnected.cpp.
    fc_factory.destroy_plugin()
コード例 #10
0
ファイル: sample.py プロジェクト: HiYx/pytorch-cifar100
def main():
    _, _ = common.find_sample_data(
        description="Runs an MNIST network using a PyTorch model",
        subfolder="mnist")
    # Train the PyTorch model
    mnist_model = model.MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    with build_engine(weights) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_random_test_case(mnist_model,
                                             pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
コード例 #11
0
def main():
    # Set the data path to the directory that contains the trained models and test images for inference.
    #data_path, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", find_files=[ModelData.MODEL_PATH, ModelData.DEPLOY_PATH])

    data_path, data_files, precision = common.find_sample_data(find_files=[".caffemodel", ".prototxt"])

    # Get test images, models and labels.
    #test_images = data_files[0:3]
    #caffe_model_file, caffe_deploy_file, labels_file = data_files[3:]
    caffe_model_file, caffe_deploy_file = data_files[:]
    #labels = open(labels_file, 'r').read().split('\n')

    # Build a TensorRT engine.
    with build_engine_caffe(caffe_model_file, caffe_deploy_file, precision) as engine:
        # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
        # Allocate buffers and create a CUDA stream.
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
        # Contexts are used to perform inference.
        with engine.create_execution_context() as context:
            # Load a normalized test case into the host input page-locked buffer.
            #test_image = random.choice(test_images)
            #test_case = load_normalized_test_case(test_image, h_input)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            do_inference(context, h_input, d_input, h_output, d_output, stream)
コード例 #12
0
ファイル: sample.py プロジェクト: laiou/trt_sample_code
def main():
    #解析样本数据得到相应的数据文件路径
    _, data_files = common.find_sample_data(
        description="Runs a Caffe MNIST network in Int8 mode",
        subfolder="mnist",
        find_files=[
            "t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte",
            "train-images-idx3-ubyte", ModelData.DEPLOY_PATH,
            ModelData.MODEL_PATH
        ],
        err_msg="Please follow the README to download the MNIST dataset")
    #给相应参数赋值
    [test_set, test_labels, train_set, deploy_file, model_file] = data_files

    # Now we create a calibrator and give it the location of our calibration data.
    # We also allow it to cache calibration data for faster engine building.
    #创建一个校准类实例
    calibration_cache = "mnist_calibration.cache"
    #MNISTEntropyCalibrator参考calibrator.py中的实现
    calib = MNISTEntropyCalibrator(test_set, cache_file=calibration_cache)

    # Inference batch size can be different from calibration batch size.
    #推理的batch_size跟校准的batch_size可以不一样
    batch_size = 32
    #build_int8_engine参考本文件下的实现
    with build_int8_engine(
            deploy_file, model_file, calib, batch_size
    ) as engine, engine.create_execution_context() as context:
        # Batch size for inference can be different than batch size used for calibration.
        #check_accuracy参考本文件下的实现
        check_accuracy(context,
                       batch_size,
                       test_set=load_mnist_data(test_set),
                       test_labels=load_mnist_labels(test_labels))
コード例 #13
0
def main():
    data_paths, _ = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    model_path = os.environ.get("MODEL_PATH") or os.path.join(
        os.path.dirname(__file__), "models")
    model_file = os.path.join(model_path, ModelData.MODEL_FILE)

    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(
                data_paths, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
コード例 #14
0
def main():
    # Set the data path to the directory that contains the trained models and test images for inference.
    #解析样本数据
    _, data_files = common.find_sample_data(
        description="Runs a ResNet50 network with a TensorRT inference engine.",
        subfolder="resnet50",
        find_files=[
            "binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg",
            ModelData.MODEL_PATH, "class_labels.txt"
        ])
    # Get test images, models and labels.
    #获取册数图片,label,和模型文件等
    test_images = data_files[0:3]
    onnx_model_file, labels_file = data_files[3:]
    labels = open(labels_file, 'r').read().split('\n')

    # Build a TensorRT engine.
    #构建相应的tensorrt引擎
    #build_engine_onnx参考本文件下的实现
    with build_engine_onnx(onnx_model_file) as engine:
        # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
        # Allocate buffers and create a CUDA stream.
        #分配缓冲区的内存
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Contexts are used to perform inference.
        #创建推理上下文
        with engine.create_execution_context() as context:
            # Load a normalized test case into the host input page-locked buffer.
            #加载测试数据
            test_image = random.choice(test_images)
            #load_normalized_test_case参考本文件下的实现
            #加载数据到主机内存
            test_case = load_normalized_test_case(test_image, inputs[0].host)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            #do_inference_v2参考common.py
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)
            # We use the highest probability as our prediction. Its index corresponds to the predicted label.
            #获取最终的输出
            pred = labels[np.argmax(trt_outputs[0])]
            if "_".join(pred.split()) in os.path.splitext(
                    os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)
コード例 #15
0
def main():
    data_path = common.find_sample_data(
        description="Runs a network using a UFF model file", subfolder=".")
    model_file = ModelData.MODEL_FILE

    with build_engine(model_file) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            input_tests = ["img.ppm", "ones.ppm", "orange.ppm", "panda.ppm"]
            for input_test in input_tests:
                load_test_case(input_test, pagelocked_buffer=inputs[0].host)
                [output] = common.do_inference(context,
                                               bindings=bindings,
                                               inputs=inputs,
                                               outputs=outputs,
                                               stream=stream)
                print(output)
コード例 #16
0
ファイル: onnx_yolov3.py プロジェクト: MoonBunnyZZZ/trt-debug
def main():
    data_path, data_files = common.find_sample_data(
        description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50",
        find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH,
                    "class_labels.txt"])
    test_images = data_files[0:3]
    onnx_model_file, labels_file = data_files[3:]
    labels = open(labels_file, 'r').read().split('\n')

    with build_engine_onnx(onnx_model_file) as engine:
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
        with engine.create_execution_context() as context:
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, h_input)
            do_inference(context, h_input, d_input, h_output, d_output, stream)
            pred = labels[np.argmax(h_output)]
            print("Recognized " + test_case + " as " + pred)
コード例 #17
0
ファイル: sample.py プロジェクト: LeeWoongkyu/python
def build_int8_engine(deploy_file, model_file, batch_size=32, trt_engine_datatype=trt.DataType.FLOAT):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        # We set the builder batch size to be the same as the calibrator's, as we use the same batches
        # during inference. Note that this is not required in general, and inference batch size is
        # independent of calibration batch size.
        builder.max_batch_size = batch_size
        builder.max_workspace_size = common.GiB(1)
        if trt_engine_datatype == trt.DataType.HALF:
            builder.fp16_mode = True
        elif trt_engine_datatype == trt.DataType.INT8:
            # Now we create a calibrator and give it the location of our calibration data.
            # We also allow it to cache calibration data for faster engine building.
            _, [calib_data] = common.find_sample_data(description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["t10k-images-idx3-ubyte"])
            calibration_cache = "mnist_calibration.cache"
            builder.int8_mode = True
            builder.int8_calibrator = MNISTEntropyCalibrator(calib_data, cache_file=calibration_cache)
        # Parse Caffe model
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        # Build engine and do int8 calibration.
        return builder.build_cuda_engine(network)
コード例 #18
0
ファイル: sample.py プロジェクト: brandy2/TensorRT-6.0.1.5
def main():
    _, _ = common.find_sample_data(
        description="Runs an MNIST network using a PyTorch model",
        subfolder="mnist")
    # Train the PyTorch model
    mnist_model = model.MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    with build_engine(weights) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        print("Output Before Engine Refit")
        check_output(engine, inputs, outputs, bindings, stream, mnist_model)

        # Refit the engine with the actual trained weights for the conv_1 layer.
        with trt.Refitter(engine, TRT_LOGGER) as refitter:
            # To get a list of all refittable layers and associated weightRoles
            # in the network, use refitter.get_all()
            # Set the actual weights for the conv_1 layer. Since it consists of
            # kernel weights and bias weights, set each of them by specifying
            # the WeightsRole.
            refitter.set_weights("conv_1", trt.WeightsRole.KERNEL,
                                 weights['conv1.weight'].numpy())
            refitter.set_weights("conv_1", trt.WeightsRole.BIAS,
                                 weights['conv1.bias'].numpy())
            # Get description of missing weights. This should return empty
            # lists in this case.
            [missingLayers, weightRoles] = refitter.get_missing()
            assert len(
                missingLayers
            ) == 0, "Refitter found missing weights. Call set_weights() for all missing weights"
            # Refit the engine with the new weights. This will return True if
            # the refit operation succeeded.
            assert refitter.refit_cuda_engine()

        print("Output After Engine Refit")
        assert check_output(engine, inputs, outputs, bindings, stream,
                            mnist_model)
コード例 #19
0
ファイル: sample.py プロジェクト: laiou/trt_sample_code
def main():
    #解析样本数据,获取数据路径
    #find_sample_data的具体实现参考common.py中的实现
    data_paths, _ = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    #获取模型文件的路径
    model_path = os.environ.get("MODEL_PATH") or os.path.join(
        os.path.dirname(__file__), "models")

    model_file = os.path.join(model_path, ModelData.MODEL_FILE)
    #创建相应的engine文件
    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        #allocate_buffers参考common.py
        #获取相应缓冲区地址的列表已经相应绑定的列表等
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        #create_execution_context新建一个IExecutionContext类实例
        with engine.create_execution_context() as context:
            #load_normalized_test_case的实现参考当前文件下的相关实现
            #将测试数据加载到提供的缓冲区里面
            case_num = load_normalized_test_case(
                data_paths, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            #进行相应的推理过程
            #do_inference的具体实现参考common.py
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            #得到最终的预测输出,也就是相应的后处理过程
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
コード例 #20
0
def main():
    global args
    args = parser.parse_args()

    # Set the data path to the directory that contains the trained models and test images for inference.
    _, data_files = common.find_sample_data(
        description="Runs a ResNet50 network with a TensorRT inference engine.",
        subfolder="resnet50",
        find_files=[
            "binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg",
            "class_labels.txt"
        ])
    labels_file = data_files[3]
    labels = open(labels_file, 'r').read().split('\n')

    # data loading
    #
    # All pre-trained models expect input images normalized in the same way,
    # i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H and
    # W are expected to be at least 224. The images have to be loaded in to a
    # range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and
    # std = [0.229, 0.224, 0.225]
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    imagenet_data = datasets.ImageNet(args.data,
                                      split='train',
                                      transform=transforms.Compose([
                                          transforms.Resize(256),
                                          transforms.CenterCrop(224),
                                          transforms.ToTensor(),
                                          normalize,
                                      ]),
                                      download=False)

    # print("size of Imagenet data is {}".format(len(imagenet_data)))

    data_loader = torch.utils.data.DataLoader(
        imagenet_data,
        batch_size=args.batch_size,
        shuffle=False,
        # num_workers=args.workers,
        num_workers=0,
        pin_memory=True)

    with get_resnet50_engine(ModelData.MODEL_PATH) as engine:
        # Allocate buffers and create a CUDA stream.
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
        # Contexts are used to perform inference.
        with engine.create_execution_context() as context:
            # Load a normalized test case into the host input page-locked buffer.
            run(data_loader, engine)
            # run(0, engine)

    # return
    # define loss function (criterion)
    # criterion = nn.CrossEntropyLoss().cuda()

    # validate(data_loader, resnet50)

    return
コード例 #21
0
    def __init__(self,
                 trt_engine_path,
                 uff_model_path,
                 trt_engine_datatype=trt.DataType.FLOAT,
                 batch_size=1):
        """Initializes TensorRT objects needed for model inference.

        Args:
            trt_engine_path (str): path where TensorRT engine should be stored
            uff_model_path (str): path of .uff model
            trt_engine_datatype (trt.DataType):
                requested precision of TensorRT engine used for inference
            batch_size (int): batch size for which engine
                should be optimized for
        """

        # We first load all custom plugins shipped with TensorRT,
        # some of them will be needed during inference
        trt.init_libnvinfer_plugins(TRT_LOGGER, '')

        # Initialize runtime needed for loading TensorRT engine from file
        self.trt_runtime = trt.Runtime(TRT_LOGGER)
        # TRT engine placeholder
        self.trt_engine = None

        # Display requested engine settings to stdout
        print("TensorRT inference engine settings:")
        print("  * Inference precision - {}".format(trt_engine_datatype))
        print("  * Max batch size - {}\n".format(batch_size))

        # If engine is not cached, we need to build it
        if not os.path.exists(trt_engine_path):
            # This function uses supplied .uff file
            # alongside with UffParser to build TensorRT
            # engine. For more details, check implmentation

            # Set up for calibration
            if trt_engine_datatype == trt.DataType.INT8:
                with open(PATHS.get_voc_image_set_path(), 'r') as f:
                    voc_image_numbers = f.readlines()
                    voc_image_numbers = [
                        line.strip() for line in voc_image_numbers
                    ]
                total_imgs = len(voc_image_numbers)
                voc_names = []

                calibration_cache = "ssd_calibration_eval.cache"

                for n in range(total_imgs):
                    voc_names.append(voc_image_numbers[n] + ".jpg")

                _, calib_data = common.find_sample_data(
                    description="Runs a ResNet50 network in Int8 mode",
                    subfolder="JPEGImages",
                    find_files=voc_names)
                calib = VOCEntropyCalibrator(calib_data,
                                             total_imgs,
                                             cache_file=calibration_cache)

            self.trt_engine = engine_utils.build_engine(
                uff_model_path,
                calib,
                TRT_LOGGER,
                trt_engine_datatype=trt_engine_datatype,
                batch_size=batch_size)
            # Save the engine to file
            engine_utils.save_engine(self.trt_engine, trt_engine_path)

        # If we get here, the file with engine exists, so we can load it
        if not self.trt_engine:
            print("Loading cached TensorRT engine from {}".format(
                trt_engine_path))
            self.trt_engine = engine_utils.load_engine(self.trt_runtime,
                                                       trt_engine_path)

        # This allocates memory for network inputs/outputs on both CPU and GPU
        self.inputs, self.outputs, self.bindings, self.stream = \
            engine_utils.allocate_buffers(self.trt_engine)

        # Execution context is needed for inference
        self.context = self.trt_engine.create_execution_context()

        # Allocate memory for multiple usage [e.g. multiple batch inference]
        input_volume = trt.volume(model_utils.ModelData.INPUT_SHAPE)
        self.numpy_array = np.zeros(
            (self.trt_engine.max_batch_size, input_volume))
def main():
    data_root = '/home/cvrr/opt/TensorRT-5.0.2.6/python/data/cars_resnet152/tiny-imagenet-200/val/'
    image_root = '/home/cvrr/opt/TensorRT-5.0.2.6/python/data/cars_resnet152/tiny-imagenet-200/val/images/'
    text_file = open(data_root + "val_annotations.txt", "r")
    anno = [line.split("\t") for line in text_file.readlines()]

    label_file = open(data_root + "labels.txt", "r")
    find_labels = [line.split(" ") for line in label_file.readlines()]
    # cars_annos_all = scipy.io.loadmat(data_root + 'cars_train_annos.mat')
    # cars_annos = cars_annos_all['annotations']
    # cars_annos = np.transpose(cars_annos)

    # Set the data path to the directory that contains the trained models and test images for inference.
    data_path, data_files = common.find_sample_data(
        description=
        "Runs a ResNet152 on Cars dataset network with a TensorRT inference engine.",
        subfolder="cars_resnet152",
        find_files=[
            "00001.jpg", "00002.jpg", "00003.jpg", "00004.jpg", "00005.jpg",
            "00006.jpg", "00007.jpg", "00008.jpg", "00009.jpg", "00010.jpg",
            ModelData.MODEL_PATH, "cars_labels.txt"
        ])
    # Get test images, models and labels.
    test_images = data_files[0:10]
    onnx_model_file, labels_file = data_files[10:]
    labels = open(labels_file, 'r').read().split('\n')
    print(len(labels))
    # print(labels)
    # add the weight of the last layer
    fc_weights = np.load(
        '/home/cvrr/opt/TensorRT-5.0.2.6/python/data/cars_resnet152/last_layer_weights.npy'
    )  # (196, 2048)
    #     print(onnx_model_file)
    # Build a TensorRT engine.
    engine_name = 'resnet152v2.engine'
    with open(engine_name, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
    # with build_engine_onnx(onnx_model_file) as engine:
    # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
    # Allocate buffers and create a CUDA stream.
    h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
    # Contexts are used to perform inference.
    with engine.create_execution_context() as context:
        t1 = time.time()
        right_count = 0

        for i in range(1000):
            test_image = image_root + 'val_%d.JPEG' % (i)
            true_label = anno[i][1]
            # bbox_x1 = cars_annos[i][0][0][0][0]
            # bbox_y1 = cars_annos[i][0][1][0][0]
            # bbox_x2 = cars_annos[i][0][2][0][0]
            # bbox_y2 = cars_annos[i][0][3][0][0]
            # true_label = int(cars_annos[i][0][4][0][0])
            print('true label', true_label)
            image = Image.open(test_image).convert('RGB')
            # image = image.crop([max(0 , bbox_x1 - 16), max(0, bbox_y1 - 16), min(image.size[0], bbox_x2 + 16), min(image.size[1], bbox_y2 + 16)])
            # image.show()
            c, h, w = ModelData.INPUT_SHAPE
            image_arr = np.asarray(image.resize(
                (w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(
                    trt.nptype(ModelData.DTYPE)).ravel()
            a = (image_arr / 255.0 - 0.45) / 0.225
            np.copyto(h_input, a)
            # h, w = img.size
            # dim_diff = np.abs(h - w)
            # print(test_image)
            # for test_image in test_images:
            # Load a normalized test case into the host input page-locked buffer.
            # test_image = random.choice(test_images)
            # test_case = load_normalized_test_case(test_image, h_input)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            do_inference(context, h_input, d_input, h_output, d_output, stream)
            # print(h_output.shape)
            # We use the highest probability as our prediction. Its index corresponds to the predicted label.
            # output = fc_weights @ h_output
            output = h_output
            # print(output)
            print('predition:', np.argmax(output))
            pred = find_labels[np.argmax(output)][0]
            if true_label == pred:
                right_count += 1
            # print('predition:',pred)
        print(right_count)
        t2 = time.time()
        print('total time:', t2 - t1)