Python do_inference Beispiele, common.do_inference Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: trt_inference.py Projekt: MAhaitao999/ONNX2TensorRT

def main():
    engine_file_path = 'plate_detection.trt'
    input_image_path = '../cat.jpg'

    input_resolution_plate_detection_HW = (325, 325)
    preprocessor = PreprocessYOLO(input_resolution_plate_detection_HW)
    image_raw, image = preprocessor.process(input_image_path)
    print(image.shape)

    trt_outputs = []
    with get_engine_from_bin(engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image

        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1)

        # in this case, it demonstrates to perform inference for 50 times
        total_time = 0; n_time_inference = 10000
        
        for i in range(n_time_inference):
            t1 = time.time()
            trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1)
            t2 = time.time()
            delta_time = t2 - t1
            total_time += delta_time
            print('inference-{} cost: {}ms'.format(str(i+1), delta_time*1000))
        avg_time_original_model = total_time / n_time_inference
        print("average inference time: {}ms".format(avg_time_original_model*1000))
        print(trt_outputs[0].shape)
        print(trt_outputs[1].shape)

Beispiel #2

0

Datei anzeigen

Datei: parse.py Projekt: lexiqi/Pytorch2TensorRT

def inference(network, engine, input_vars):
    input_vars = [input_vars[0]]
    inputs, outputs, bindings, stream = allocate_buffers(engine)
    with engine.create_execution_context() as context:
        for i, input_var in enumerate(input_vars):
            np.copyto(inputs[i].host, input_var.numpy().reshape(-1))
        do_inference(context,
                     bindings=bindings,
                     inputs=inputs,
                     outputs=outputs,
                     stream=stream)
    return outputs

Beispiel #3

0

Datei anzeigen

Datei: diff_op_test.py Projekt: lexiqi/Pytorch2TensorRT

def inference(network, builder, input_vars):
    if type(input_vars) != tuple:
        input_vars = [input_vars]
    builder.max_workspace_size = 256 << 20
    engine = builder.build_cuda_engine(network)
    inputs, outputs, bindings, stream = allocate_buffers(engine)
    for i, input_var in enumerate(input_vars):
        np.copyto(inputs[i].host, input_var.numpy().reshape(-1))
    with engine.create_execution_context() as context:
        do_inference(context,
                     bindings=bindings,
                     inputs=inputs,
                     outputs=outputs,
                     stream=stream)
    return outputs

Beispiel #4

0

Datei anzeigen

Datei: deconv-conv-trt.py Projekt: LittleReal/TensorRT-Deconvolution

def main():
    model_path = "model/trt/deconv-conv.trt"

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    runtime = trt.Runtime(TRT_LOGGER)
    f = open(model_path, "rb")
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()
    f.close()

    for binding in engine:
        size = trt.volume(
            engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            print("input_size: ", size, "dtype: ", dtype)
        else:
            print("output_size: ", size, "dtype: ", dtype)

    inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    length = input_shape[0] * input_shape[1] * input_shape[2]
    data = np.zeros(length, dtype=np.float32)
    data[:] = 1.0
    inputs[0].host = data.reshape(input_shape)
    print(inputs[0].host[0][0][:10])

    outputs[0].host = np.zeros(output_shape, dtype=np.float32)
    trt_outputs = common.do_inference(context,
                                      bindings=bindings,
                                      inputs=inputs,
                                      outputs=outputs,
                                      stream=stream)
    print(trt_outputs[0][0][0][:10])

    print("starting...")
    starttime = time.time()
    for i in range(200):
        trt_outputs = common.do_inference(context,
                                          bindings=bindings,
                                          inputs=inputs,
                                          outputs=outputs,
                                          stream=stream)
    endtime = time.time()
    print(endtime - starttime)

    print(trt_outputs[0][0][0][:10])

Beispiel #5

0

Datei anzeigen

Datei: sample.py Projekt: laiou/trt_sample_code

def main():
    #命令行参数解析器
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    #创建一个模型类实例
    mnist_model = model.MnistModel()
    #进行训练
    mnist_model.learn()
    #获取相应的权重
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    #进行推理
    #build_engine具体参考本文件的实现
    with build_engine(weights) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        #分配相应的缓冲区内存
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            #load_random_test_case参考本文件下的实现
            #随即加载测试数据，复制到主机内存
            case_num = load_random_test_case(mnist_model,
                                             pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            #进行推理
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            #获取最终结果
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))

Beispiel #6

0

Datei anzeigen

Datei: sample.py Projekt: rlrahulkanojia/TensorRT_Efficeint_Net

def check_accuracy(context, batch_size, test_set, test_labels):
    inputs, outputs, bindings, stream = common.allocate_buffers(context.engine)

    num_correct = 0
    num_total = 0

    batch_num = 0
    for start_idx in range(0, test_set.shape[0], batch_size):
        batch_num += 1
        if batch_num % 10 == 0:
            print("Validating batch {:}".format(batch_num))
        # If the number of images in the test set is not divisible by the batch size, the last batch will be smaller.
        # This logic is used for handling that case.
        end_idx = min(start_idx + batch_size, test_set.shape[0])
        effective_batch_size = end_idx - start_idx

        # Do inference for every batch.
        inputs[0].host = test_set[start_idx:start_idx + effective_batch_size]
        [output] = common.do_inference(context,
                                       bindings=bindings,
                                       inputs=inputs,
                                       outputs=outputs,
                                       stream=stream,
                                       batch_size=effective_batch_size)

        # Use argmax to get predictions and then check accuracy
        preds = np.argmax(output.reshape(32, 10)[0:effective_batch_size],
                          axis=1)
        labels = test_labels[start_idx:start_idx + effective_batch_size]
        num_total += effective_batch_size
        num_correct += np.count_nonzero(np.equal(preds, labels))

    percent_correct = 100 * num_correct / float(num_total)
    print("Total Accuracy: {:}%".format(percent_correct))

Beispiel #7

0

Datei anzeigen

Datei: caffe_inference.py Projekt: HiYx/pytorch-cifar100

    def infer_batch(self, image_paths):

        # Verify if the supplied batch size is not too big
        max_batch_size = self.trt_engine.max_batch_size
        actual_batch_size = len(image_paths)
        if actual_batch_size > max_batch_size:
            raise ValueError(
                "image_paths list bigger ({}) than engine max batch size ({})".
                format(actual_batch_size, max_batch_size))

        # Load all images to CPU...
        imgs = self._load_imgs(image_paths)

        # ...copy them into appropriate place into memory...
        # (self.inputs was returned earlier by allocate_buffers())
        np.copyto(self.inputs[0].host, imgs.ravel())

        # # Run the engine.
        #self._do_inference(self.context, self.h_input, self.d_input, self.h_output, self.d_output, self.stream)
        out = engine_common.do_inference(self.context,
                                         self.bindings,
                                         self.inputs,
                                         self.outputs,
                                         self.stream,
                                         batch_size=1)
        # pred = labels[np.argmax(h_output)]
        # print(pred)
        return out

Beispiel #8

0

Datei anzeigen

Datei: tensorrt_test.py Projekt: ZWJ-here/maskrcnn-keypoint3d

def main3():
    device = torch.device('cuda:0')
    path = '04.jpg'

    onnx_file_path = './models/ResNet50.onnx'
    engine_file_path = './models/ResNet50.trt'

    with get_engine(
            onnx_file_path,
            engine_file_path,
    ) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            load_normalized_test_case(path, inputs[0].host)
            start1 = time.time()
            trt_outputs = common.do_inference(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream)
    output = trt_outputs

    print('processing time3 is', time.time() - start1, np.argmax(output))

    return output

Beispiel #9

0

Datei anzeigen

def main():
    data_paths, _ = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    model_path = os.environ.get("MODEL_PATH") or os.path.join(
        os.path.dirname(__file__), "models")
    model_file = os.path.join(model_path, ModelData.MODEL_FILE)

    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(
                data_paths, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))

Beispiel #10

0

Datei anzeigen

    def infer(self, cv_image):
        """Infers model on given image.

        Args:
            image_path (str): image to run object detection model on
        """
        model_input_width = model_utils.ModelData.get_input_width()
        model_input_height = model_utils.ModelData.get_input_height()
        image_resized = cv2.resize(cv_image,
                                   (model_input_width, model_input_height))
        img_np = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)
        img_np = img_np.transpose((2, 0, 1)).astype(np.float32)
        img_np = (2.0 / 255.0) * img_np - 1.0
        img = img_np.ravel()

        # Load image into CPU
        #img = self._load_img(image_path)

        # Copy it into appropriate place into memory
        # (self.inputs was returned earlier by allocate_buffers())
        np.copyto(self.inputs[0].host, img.ravel())

        # When infering on single image, we measure inference
        # time to output it to the user

        # Fetch output from the model
        [detection_out,
         keepCount_out] = common.do_inference(self.context,
                                              bindings=self.bindings,
                                              inputs=self.inputs,
                                              outputs=self.outputs,
                                              stream=self.stream)

        # And return results
        return detection_out, keepCount_out

Beispiel #11

0

Datei anzeigen

Datei: compare_test.py Projekt: ZWJ-here/maskrcnn-keypoint3d

def main():
    #data_path, _ = common.find_sample_data(description="Runs an MNIST network using a UFF model file", subfolder="mnist")
    data_path = '/home/ai/tensorrt_tar/TensorRT-5.1.5.0/data/mnist'
    model_path = os.environ.get("MODEL_PATH") or os.path.join(
        os.path.dirname(__file__), "models")
    model_file = os.path.join(model_path, ModelData.MODEL_FILE)

    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(
                data_path, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            start1 = time.time()
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)

            print("Prediction: " + str(pred), 'time is :',
                  time.time() - start1)
            model1 = model.create_model()
            model1.load_weights("models/lenet5.pb")
            start2 = time.time()
            output = model1.predict(inputs[0])
            pred = np.argmax(output)

            print("Prediction: " + str(pred), 'time is :',
                  time.time() - start2)

Beispiel #12

0

Datei anzeigen

    def inference_batch(self, images):
        batch_size = len(images)
        img_batch = np.zeros(
            (self.batch_size, 3, self.img_size, self.img_size))
        for i, img in enumerate(images):
            img = cv2.resize(img, (self.img_size, self.img_size))
            img = img[..., ::-1]  # BGR to RGB
            img = img.transpose(2, 0, 1)
            img = np.array(img, dtype=np.float32)
            img = (img - 127.5) / 128
            img_batch[i] = img

        img_batch = np.array(img_batch, dtype=np.float32, order='C')
        self.inputs[0].host = img_batch
        feat_batch = common.do_inference(self.context,
                                         bindings=self.bindings,
                                         inputs=self.inputs,
                                         outputs=self.outputs,
                                         stream=self.stream,
                                         batch_size=batch_size)
        feats = np.asarray([
            self.l2_norm_numpy(feat_batch[0][i * self.feat_size:(i + 1) *
                                             self.feat_size])
            for i in range(batch_size)
        ])
        return feats

Beispiel #13

0

Datei anzeigen

    def inference_batch(self, images, batch_size):
        """
        multi-batch inference
        """
        img_batch = np.zeros((batch_size, 3, self.img_size, self.img_size))
        for i, img in enumerate(images):
            img = cv2.resize(img, (self.img_size, self.img_size))
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = img.transpose(2, 0, 1)
            img_batch[i] = img

        img_batch = np.array(img_batch, dtype=np.float32, order='C')
        self.inputs[0].host = img_batch
        fb = common.do_inference(self.context,
                                 bindings=self.bindings,
                                 inputs=self.inputs,
                                 outputs=self.outputs,
                                 stream=self.stream,
                                 batch_size=batch_size)
        fb = fb[0]
        atts = [{
            'gender':
            np.argmax(fb[i * self.feat_size:i * self.feat_size + 2]),
            'age':
            np.sum(
                np.argmax(fb[i * self.feat_size + 2:i * self.feat_size +
                             202].reshape((100, 2)),
                          axis=1))
        } for i in range(batch_size)]
        return atts

Beispiel #14

0

Datei anzeigen

Datei: sample.py Projekt: rlrahulkanojia/TensorRT_Efficeint_Net

def main():
    # Get data files for the model.
    data_paths, [
        deploy_file, model_file, mean_proto
    ] = common.find_sample_data(
        description="Runs an MNIST network using a Caffe model file",
        subfolder="mnist",
        find_files=[
            "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto"
        ])

    # Cache the engine in a temporary directory.
    engine_path = os.path.join(tempfile.gettempdir(), "mnist.engine")
    with get_engine(deploy_file, model_file, engine_path
                    ) as engine, engine.create_execution_context() as context:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        mean = retrieve_mean(mean_proto)
        # For more information on performing inference, refer to the introductory samples.
        inputs[0].host, case_num = load_normalized_test_case(data_paths, mean)
        # The common.do_inference function will return a list of outputs - we only have one in this case.
        [output] = common.do_inference(context,
                                       bindings=bindings,
                                       inputs=inputs,
                                       outputs=outputs,
                                       stream=stream)
        pred = np.argmax(output)
        print("Test Case: " + str(case_num))
        print("Prediction: " + str(pred))

Beispiel #15

0

Datei anzeigen

Datei: caffe_car_rec.py Projekt: mrjvb/tensorrt_car_rec_resnet50

def main():
    # Set the data path to the directory that contains the trained models and test images for inference.
    # data_path, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, "class_labels.txt"])
    # Get test images, models and labels.
    # test_images = data_files[0:3]
    # test_image = "0.jpg"
    test_image_list = os.listdir('car_rec_test')
    print(test_image_list)
    engine_file_path = "car_rec.trt"
    caffe_model_file, caffe_deploy_file, labels_file = [ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, ModelData.LABEL_PATH]
    labels = open(labels_file, 'r').read().split('\n')

    # Build a TensorRT engine.
    with build_engine_caffe(caffe_model_file, caffe_deploy_file, engine_file_path) as engine:
        # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
        # Allocate buffers and create a CUDA stream.
        # h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Contexts are used to perform inference.
        with engine.create_execution_context() as context:
            # Load a normalized test case into the host input page-locked buffer.
            # test_image = random.choice(test_images)
            # test_case = load_normalized_test_case(test_image, h_input)
            test_cases = load_normalized_test_cases(test_image_list, inputs)
            # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
            # probability that the image corresponds to that label
            # do_inference(context, h_input, d_input, h_output, d_output, stream)
            trt_outputs = common.do_inference(context, bindings, inputs, outputs, stream, 16)
            outs = trt_outputs[0].reshape(16,427)
            print(outs)
            for x in range(0,len(outs)):
            	pred = labels[np.argmax(outs[x])]
            	print(pred)
            	pass

Beispiel #16

0

Datei anzeigen

def main():
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    mnist_model = model.MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    engine = build_engine(weights)

    # Build an engine, allocate buffers and create a stream.
    # For more information on buffer allocation, refer to the introductory samples.
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    context = engine.create_execution_context()

    case_num = load_random_test_case(mnist_model,
                                     pagelocked_buffer=inputs[0].host)
    # For more information on performing inference, refer to the introductory samples.
    # The common.do_inference function will return a list of outputs - we only have one in this case.
    [output] = common.do_inference(context,
                                   bindings=bindings,
                                   inputs=inputs,
                                   outputs=outputs,
                                   stream=stream)
    pred = np.argmax(output)
    print("Test Case: " + str(case_num))
    print("Prediction: " + str(pred))

Beispiel #17

0

Datei anzeigen

def main():
    data_path, data_files = common.find_sample_data(
        description="Runs a Caffe MNIST network in Int8 mode",
        subfolder="mnist",
        find_files=["batches", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH])
    [batch_data_dir, deploy_file, model_file] = data_files

    # Now we create a calibrator and give it the location of our calibration data.
    # We also allow it to cache calibration data for faster engine building.
    calibration_cache = "mnist_calibration.cache"
    calib = calibrator.MNISTEntropyCalibrator(batch_data_dir,
                                              cache_file=calibration_cache)
    # We will use the calibrator batch size across the board.
    # This is not a requirement, but in this case it is convenient.
    batch_size = calib.get_batch_size()

    with build_int8_engine(
            deploy_file, model_file,
            calib) as engine, engine.create_execution_context() as context:
        # Allocate engine buffers.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        # Do inference for the whole batch. We have to specify batch size here, as the common.do_inference uses a default
        inputs[0].host, labels = load_random_batch(calib)
        [output] = common.do_inference(context,
                                       bindings=bindings,
                                       inputs=inputs,
                                       outputs=outputs,
                                       stream=stream,
                                       batch_size=batch_size)
        # Next we need to reshape the output to Nx10 (10 probabilities, one per digit), where N is batch size.
        output = output.reshape(batch_size, 10)
        validate_output(output, labels)

Beispiel #18

0

Datei anzeigen

Datei: tensorrt_server.py Projekt: harbor-ml/modelzoo

def predict(inp: Image, metadata):
    image_raw, image = preprocessor.process(inp)
    shape_orig_WH = image_raw.size
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    # Do inference
    print('Running inference on image')
    # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
    inputs[0].host = image
    a = perf_counter()
    trt_outputs = common.do_inference(context,
                                      bindings=bindings,
                                      inputs=inputs,
                                      outputs=outputs,
                                      stream=stream)
    b = perf_counter()
    metadata['TensorRT Inference Latency (s)'] = (b - a)
    trt_outputs = [
        output.reshape(shape)
        for output, shape in zip(trt_outputs, output_shapes)
    ]
    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs,
                                                   (shape_orig_WH))
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes,
                                   ALL_CATEGORIES)
    return obj_detected_img

Beispiel #19

0

Datei anzeigen

Datei: network_api_custom_plugin.py Projekt: wooglees/tensorrt

def main():
    # By doing this, you will also register the Clip plugin with the TensorRT
    # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present
    # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details.
    if not os.path.isfile(CLIP_PLUGIN_LIBRARY):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY),
            "Please build the Clip sample plugin.",
            "For more information, see the included README.md"))

    # Train MNIST data and get weights.
    mnist_model = MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()

    # Do inference with TensorRT.
    with build_engine(weights) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_random_test_case(mnist_model,
                                             pagelocked_buffer=inputs[0].host)
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))

Beispiel #20

0

Datei anzeigen

Datei: sample.py Projekt: zhucheng725/TX2-tensorRT5.0-python

def main():
    data_path = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    model_file = ModelData.MODEL_FILE
    t1 = time.clock()

    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        #with open('/home/nvidia/procedure/lenet5.engine','wb') as f:
        #f.write(engine.serialize())
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(
                data_path, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
            t2 = time.clock()
            print("use_time:" + str(t2 - t1))

Beispiel #21

0

Datei anzeigen

def get_trt_test_accuracy(engine, inputs, outputs, bindings, stream,
                          mnist_model):
    #创建一个IExecutionContext上下文实例
    with engine.create_execution_context() as context:
        #用于相应的数据统计
        correct = 0
        total = 0
        # Run inference on every sample.
        # Technically this could be batched, however this only comprises a fraction of total
        # time spent in the test.
        #循环遍历每一个测试数据
        #get_all_test_samples的具体实现参考model.py
        for test_img, test_name in mnist_model.get_all_test_samples():
            #load_img_to_input_buffer的具体实现参考本文件下的实现
            #加载图片数据到页面锁定的内存缓冲区
            load_img_to_input_buffer(test_img,
                                     pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            #进行相应的推理，do_inference的具体实现参考common.py
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            #得到相应的输出，也就是后处理的过程
            pred = np.argmax(output)
            correct += (test_name == pred)
            total += 1

        accuracy = float(correct) / total
        print("Got {} correct predictions out of {} ({:.1f}%)".format(
            correct, total, 100 * accuracy))

        return accuracy

Beispiel #22

0

Datei anzeigen

Datei: inference.py Projekt: qianlin404/tf-to-trt-converter

    def infer(self, image_path):
        """Infers model on given image.

        Args:
            image_path (str): image to run object detection model on
        """

        # Load image into CPU
        img = self._load_img(image_path)

        # Copy it into appropriate place into memory
        # (self.inputs was returned earlier by allocate_buffers())
        np.copyto(self.inputs[0].host, img.ravel())

        # When infering on single image, we measure inference
        # time to output it to the user
        inference_start_time = time.time()

        # Fetch output from the model
        [detection_out,
         keepCount_out] = common.do_inference(self.context,
                                              bindings=self.bindings,
                                              inputs=self.inputs,
                                              outputs=self.outputs,
                                              stream=self.stream)

        # Output inference time
        print("TensorRT inference time: {} ms".format(
            int(round((time.time() - inference_start_time) * 1000))))

        # And return results
        return detection_out, keepCount_out

Beispiel #23

0

Datei anzeigen

Datei: inference.py Projekt: qianlin404/tf-to-trt-converter

    def infer_batch(self, image_paths):
        """Infers model on batch of same sized images resized to fit the model.

        Args:
            image_paths (str): paths to images, that will be packed into batch
                and fed into model
        """

        # Verify if the supplied batch size is not too big
        max_batch_size = self.trt_engine.max_batch_size
        actual_batch_size = len(image_paths)
        if actual_batch_size > max_batch_size:
            raise ValueError(
                "image_paths list bigger ({}) than engine max batch size ({})".
                format(actual_batch_size, max_batch_size))

        # Load all images to CPU...
        imgs = self._load_imgs(image_paths)
        # ...copy them into appropriate place into memory...
        # (self.inputs was returned earlier by allocate_buffers())
        np.copyto(self.inputs[0].host, imgs.ravel())

        # ...fetch model outputs...
        [detection_out,
         keep_count_out] = common.do_inference(self.context,
                                               bindings=self.bindings,
                                               inputs=self.inputs,
                                               outputs=self.outputs,
                                               stream=self.stream,
                                               batch_size=max_batch_size)
        # ...and return results.
        return detection_out, keep_count_out

Beispiel #24

0

Datei anzeigen

Datei: mnist_uff_custom_plugin.py Projekt: hirotaka001/TensorRT-5.0.2.6

def main():
    # Load the shared object file containing the Clip plugin implementation.
    # By doing this, you will also register the Clip plugin with the TensorRT
    # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present
    # in the plugin implementation. Refer to plugin/clipPlugin.cpp for more details.
    if not os.path.isfile(CLIP_PLUGIN_LIBRARY):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY),
            "Please build the Clip sample plugin.",
            "For more information, see the included README.md"))
    ctypes.CDLL(CLIP_PLUGIN_LIBRARY)

    # Load pretrained model
    if not os.path.isfile(MODEL_PATH):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load model file ({}).".format(MODEL_PATH),
            "Please use 'python lenet5.py' to train and save the model.",
            "For more information, see the included README.md"))

    # Build an engine and retrieve the image mean from the model.
    with build_engine(MODEL_PATH) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            print("\n=== Testing ===")
            test_case = load_normalized_test_case(inputs[0].host)
            print("Loading Test Case: " + str(test_case))
            # The common do_inference function will return a list of outputs - we only have one in this case.
            [pred] = common.do_inference(context,
                                         bindings=bindings,
                                         inputs=inputs,
                                         outputs=outputs,
                                         stream=stream)
            print("Prediction: " + str(np.argmax(pred)))

Beispiel #25

0

Datei anzeigen

def get_trt_test_accuracy(engine, inputs, outputs, bindings, stream,
                          mnist_model):
    with engine.create_execution_context() as context:
        correct = 0
        total = 0
        # Run inference on every sample.
        # Technically this could be batched, however this only comprises a fraction of total
        # time spent in the test.
        for test_img, test_name in mnist_model.get_all_test_samples():
            load_img_to_input_buffer(test_img,
                                     pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            correct += (test_name == pred)
            total += 1

        accuracy = float(correct) / total
        print("Got {} correct predictions out of {} ({:.1f}%)".format(
            correct, total, 100 * accuracy))

        return accuracy

Beispiel #26

0

Datei anzeigen

Datei: sample.py Projekt: hirotaka001/TensorRT-5.0.2.6

def main():
    # Get data files for the model.
    data_path, [deploy_file, model_file, mean_proto] = common.find_sample_data(
        description="Runs an MNIST network using a Caffe model file",
        subfolder="mnist",
        find_files=[
            "mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto"
        ])

    with build_engine(deploy_file, model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        mean = retrieve_mean(mean_proto)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(data_path, inputs[0].host,
                                                 mean)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))

    # After the engine is destroyed, we destroy the plugin. This function is exposed through the binding code in plugin/pyFullyConnected.cpp.
    fc_factory.destroy_plugin()

Beispiel #27

0

Datei anzeigen

Datei: sample.py Projekt: Kinshukg04/Swish-Plugin-TensorRT-

def main():
    x = "/home/dgxuser125/rt-kennan/Swish3/build/libswish.so"
    ctypes.CDLL(x)
    data_paths, _ = common.find_sample_data(
        description="Runs an MNIST network using a UFF model file",
        subfolder="mnist")
    model_path = os.environ.get("MODEL_PATH") or os.path.join(
        os.path.dirname(__file__), "models")
    model_file = os.path.join(model_path, ModelData.MODEL_FILE)
    with build_engine(model_file) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            # # Start measuring time
            inference_start_time = time.time()
            for i in range(1000):
                case_num = load_normalized_test_case(
                    data_paths, pagelocked_buffer=inputs[0].host)
                [output] = common.do_inference(context,
                                               bindings=bindings,
                                               inputs=inputs,
                                               outputs=outputs,
                                               stream=stream)
                pred = np.argmax(output)
                # print("Test Case: " + str(case_num))
                # print("Prediction: " + str(pred))
            end_time = time.time()
            print("time taken for one input with tenosrrt: ",
                  (end_time - inference_start_time) / 1000)

Beispiel #28

0

Datei anzeigen

def main():

    engine = init_construct_network()
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    with engine.create_execution_context() as context:
        print(np.ones((x1 * y1 * z1), np.float32).reshape(-1))
        np.copyto(inputs[0].host,
                  np.ones((x1 * y1 * z1), np.float32).reshape(-1))
        np.copyto(inputs[1].host,
                  np.ones((x2 * y2 * z2), np.float32).reshape(-1))

        time_start = time.time()
        output = common.do_inference(context,
                                     bindings=bindings,
                                     inputs=inputs,
                                     outputs=outputs,
                                     stream=stream)
        time_end = time.time()
        print("time ", time_end - time_start)
        print(output[0].reshape((x1 * y1 * z1)))
        data = output[0].reshape((x1 * y1 * z1))
        print(data[0])
        print(output[1].reshape((x1 * y1 * z1)))

    print("ok")

Beispiel #29

0

Datei anzeigen

Datei: inference.py Projekt: raymondlin530/tensorrt

def main():
    # load label
    labels = [line.rstrip('\n') for line in open('class_labels.txt')]

    # load engine
    trt_engine = './vgg16_32.trt'
    Cifar10_engine = load_engine(trt_engine)

    dispW = 1280
    dispH = 720
    flip = 0
    fpsReport = 0
    camSet = 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=1280, height=720, format=NV12, framerate=60/1 \
              ! nvvidconv flip-method=' + str(
        flip) + ' ! video/x-raw, width=' + str(dispW) + ', height=' + str(
            dispH) + ',\
               format=BGRx ! videoconvert !video/x-raw, format=BGR ! appsink'

    cap = cv2.VideoCapture(camSet, cv2.CAP_GSTREAMER)
    timeStamp = time.time()
    font = cv2.FONT_HERSHEY_SIMPLEX
    while True:
        _, frame = cap.read()
        frame = frame.astype('float32')
        frameRGB = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        img = cv2.resize(frameRGB, (32, 32)) / 255
        img = img.transpose((2, 0, 1)).flatten()

        # 分配buffers給inputs和outputs
        inputs, outputs, bindings, stream = common.allocate_buffers(
            Cifar10_engine)
        inputs[0].host = img

        # inference
        with Cifar10_engine.create_execution_context() as context:
            trt_outputs = common.do_inference(context,
                                              bindings=bindings,
                                              inputs=inputs,
                                              outputs=outputs,
                                              stream=stream)
        pred = trt_outputs[0].argmax(-1)

        # fps
        dt = time.time() - timeStamp
        fps = 1 / dt
        fpsReport = .9 * fpsReport + .1 * fps
        timeStamp = time.time()
        cv2.rectangle(frame, (0, 0), (350 + len(labels[pred]) * 30, 80),
                      (0, 0, 255), -1)
        cv2.putText(frame,
                    str(round(fpsReport, 1)) + 'fps' + ', ' + labels[pred],
                    (0, 60), font, 2, (0, 255, 255), 3)

        cv2.imshow('stream', frame / 255)
        if cv2.waitKey(1) == 27:
            break
    cap.release()
    cv2.destroyAllWindows()

Beispiel #30

0

Datei anzeigen

Datei: tm_tensorrt.py Projekt: shahizat/FaceMaskDetection

def main(win_title):

    # load trt engine
    print('load trt engine')
    trt_path = 'engine.trt'
    engine = load_engine(trt_runtime, trt_path)

    print('load labels')
    label = get_label('keras_models/labels.txt')

    # allocate buffers
    print('allocate buffers')
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    print('create execution context')
    context = engine.create_execution_context()

    print('start stream')
    fps = -1
    GSTREAMER_PIPELINE = 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=1920, height=1080, format=(string)NV12, framerate=60/1 ! nvvidconv flip-method=0 ! video/x-raw, width=640, height=480, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink'

    cap = cv2.VideoCapture(GSTREAMER_PIPELINE, cv2.CAP_GSTREAMER)
    while (1):

        t_start = time.time()
        ret, frame = cap.read()
        size = (224, 224)
        inputs[0].host = preprocess(frame)

        # with engine.create_execution_context() as context:
        trt_outputs = common.do_inference(context,
                                          bindings=bindings,
                                          inputs=inputs,
                                          outputs=outputs,
                                          stream=stream)

        preds = trt_outputs[0]

        idx = np.argmax(preds)

        result = label[idx]

        info = '{} : {:.3f} , FPS {}'.format(result, preds[idx], fps)

        cv2.putText(frame, info, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 0, 255), 4)

        cv2.imshow(win_title, frame)

        if cv2.waitKey(1) == ord('q'):
            break

        fps = int(1 / (time.time() - t_start))

    cap.release()
    cv2.destroyAllWindows()
    print('Quit')