Beispiel #1
0
# #trtexec --verbose --onnx=resnet50.dynamic_shape.onnx --saveEngine=resnet50.dynamic_shape.trt --optShapes=input:1x3x1080x1920 --minShapes=input:1x3x1080x1920 --maxShapes=input:1x3x1080x1920

input_data = torch.randn(1, 3, 800, 1216, dtype=torch.float16, device='cuda')
nRound = 10
from trt_lite import TrtLite
import numpy as np
import os

for engine_file_path in ['fcos101.trt', 'fcos101_fp16.trt']:
    if not os.path.exists(engine_file_path):
        print('Engine file', engine_file_path,
              'doesn\'t exist. Please run trtexec and re-run this script.')
        exit(1)

    print('====', engine_file_path, '===')
    trt = TrtLite(engine_file_path=engine_file_path)
    trt.print_info()
    i2shape = {0: (1, 3, 800, 1216)}
    io_info = trt.get_io_info(i2shape)
    d_buffers = trt.allocate_io_buffers(i2shape, True)
    output_data_trt = np.zeros(io_info[1][2], dtype=np.float32)

    d_buffers[0] = input_data
    trt.execute([t.data_ptr() for t in d_buffers], i2shape)
    output_data_trt = d_buffers[1].cpu().numpy()

    torch.cuda.synchronize()
    t0 = time.time()
    for i in range(nRound):
        trt.execute([t.data_ptr() for t in d_buffers], i2shape)
    torch.cuda.synchronize()
def run_engine_dynamic(save_and_load=False):
    input_shape = (1, 1, 5, 5)
    n = reduce(lambda x, y: x * y, input_shape)
    input_data = np.asarray(range(n), dtype=np.float32).reshape(input_shape)
    output_data = np.zeros(input_shape, dtype=np.float32)

    trt = TrtLite(build_engine_dynamic)
    if save_and_load:
        trt.save_to_file("out.trt")
        trt = TrtLite(engine_file_path="out.trt")
    trt.print_info()

    i2shape = {0: input_shape}
    d_buffers = trt.allocate_io_buffers(i2shape, True)

    cuda.memcpy_htod(d_buffers[0], input_data)
    trt.execute(d_buffers, i2shape)
    cuda.memcpy_dtoh(output_data, d_buffers[1])

    print(output_data)
Beispiel #3
0
def run_engine_dynamic(input_data):
    #trt = TrtLite(build_engine_dynamic)
    #trt.print_info()
    #trt.save_to_file("edvr.trt")

    trt = TrtLite(engine_file_path="edvr.trt")
    trt.print_info()

    io_info = trt.get_io_info({})
    if io_info is None:
        return
    print(io_info)
    h_buffers = trt.allocate_io_buffers({}, False)
    d_buffers = trt.allocate_io_buffers({}, True)

    h_buffers[0][:] = input_data

    for i, info in enumerate(io_info):
        if info[1]:
            cuda.memcpy_htod(d_buffers[i], h_buffers[i])
    trt.execute(d_buffers, {})

    nRound = 10
    cuda.Context.synchronize()
    t0 = time.time()
    for i in range(nRound):
        trt.execute(d_buffers, {})
    cuda.Context.synchronize()
    print('Prediction time: ', (time.time() - t0) / nRound)

    for i, info in enumerate(io_info):
        if not info[1]:
            cuda.memcpy_dtoh(h_buffers[i], d_buffers[i])

    name2tensor = {
        info[0]: h_buffers[i]
        for i, info in enumerate(io_info) if not info[1]
    }
    np.savez('out.npz', **name2tensor)
Beispiel #4
0
    import pycuda.driver as cuda
    class PyTorchTensorHolder(pycuda.driver.PointerHolderBase):
        def __init__(self,tensor):
            super(PyTorchTensorHolder,self).__init__()
            self.tensor = tensor
        def get_pointer(self):
            return self.tensor.data_ptr()

    tensorrt.init_libnvinfer_plugins(None, "")
    #engine_file_path = 'panoptic_fcn_fp16.trt'
    for engine_file_path in ['panoptic_fcn.trt','panoptic_fcn_fp16.trt']:
        if not os.path.exists(engine_file_path):
            print('bad!')
        else:
            print('=='+engine_file_path+'==')
        trt = TrtLite(engine_file_path=engine_file_path)
        trt.print_info()
        i2shape = {0:(1,3,h,w)}
        io_info = trt.get_io_info(i2shape)

       # print(io_info)
        # print(io_info[1])
        # print(io_info[1][2])
        d_buffers = trt.allocate_io_buffers(i2shape,True)
        scores_out = np.zeros(io_info[1][2],dtype=np.float32)
        pred_inst_out = np.zeros(io_info[2][2],dtype=np.int32)
        classes_out = np.zeros(io_info[3][2],dtype=np.float32)
        #print(d_buffers)
        # #output_data_trt =
        cuda.memcpy_dtod(d_buffers[0],PyTorchTensorHolder(image),image.nelement()*image.element_size())
        trt.execute(d_buffers,i2shape)