Example #1
0
    def __init__(self, engine_file_path):
        # Create a Context on this device,
        self.cfx = cuda.Device(0).make_context()
        stream = cuda.Stream()
        TRT_LOGGER = trt.Logger(trt.Logger.INFO)
        runtime = trt.Runtime(TRT_LOGGER)

        # Deserialize the engine from file
        with open(engine_file_path, "rb") as f:
            engine = runtime.deserialize_cuda_engine(f.read())
        context = engine.create_execution_context()

        host_inputs = []
        cuda_inputs = []
        host_outputs = []
        cuda_outputs = []
        bindings = []

        for binding in engine:
            size = trt.volume(
                engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            cuda_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(cuda_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                host_inputs.append(host_mem)
                cuda_inputs.append(cuda_mem)
            else:
                host_outputs.append(host_mem)
                cuda_outputs.append(cuda_mem)

        # Store
        self.stream = stream
        self.context = context
        self.engine = engine
        self.host_inputs = host_inputs
        self.cuda_inputs = cuda_inputs
        self.host_outputs = host_outputs
        self.cuda_outputs = cuda_outputs
        self.bindings = bindings
Example #2
0
 def __init__(self, model):
     print('setting up Yolov5s-simple.trt processor')
     # load tensorrt engine
     TRT_LOGGER = trt.Logger(trt.Logger.INFO)
     TRTbin = '{0}/models/{1}'.format(os.path.dirname(__file__), model)
     with open(TRTbin, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
         engine = runtime.deserialize_cuda_engine(f.read())
     self.context = engine.create_execution_context()
     # allocate memory
     inputs, outputs, bindings = [], [], []
     stream = cuda.Stream()
     for binding in engine:
         size = trt.volume(engine.get_binding_shape(binding))
         dtype = trt.nptype(engine.get_binding_dtype(binding))
         host_mem = cuda.pagelocked_empty(size, dtype)
         device_mem = cuda.mem_alloc(host_mem.nbytes)
         bindings.append(int(device_mem))
         if engine.binding_is_input(binding):
             inputs.append({'host': host_mem, 'device': device_mem})
         else:
             outputs.append({'host': host_mem, 'device': device_mem})
     # save to class
     self.inputs = inputs
     self.outputs = outputs
     self.bindings = bindings
     self.stream = stream
     # post processing config
     filters = (80 + 5) * 3
     self.output_shapes = [(1, 3, 80, 80, 85), (1, 3, 40, 40, 85),
                           (1, 3, 20, 20, 85)]
     self.strides = np.array([8., 16., 32.])
     anchors = np.array([
         [[10, 13], [16, 30], [33, 23]],
         [[30, 61], [62, 45], [59, 119]],
         [[116, 90], [156, 198], [373, 326]],
     ])
     self.nl = len(anchors)
     self.nc = 80  # classes
     self.no = self.nc + 5  # outputs per anchor
     self.na = len(anchors[0])
     a = anchors.copy().astype(np.float32)
     a = a.reshape(self.nl, -1, 2)
     self.anchors = a.copy()
     self.anchor_grid = a.copy().reshape(self.nl, 1, -1, 1, 1, 2)
def allocate_buffers(engine, profile_id):
    """Allocate device memory for I/O bindings of engine and return them."""

    d_inputs, outputs, bindings = [], [], []
    stream = cuda.Stream()
    if engine.has_implicit_batch_dimension:
        max_batch_size = engine.max_batch_size
    else:
        shape = engine.get_binding_shape(0)
        if -1 in list(shape):
            batch_dim = list(shape).index(-1)
            max_batch_size = engine.get_profile_shape(0, 0)[2][batch_dim]
        else:
            max_batch_size = shape[0]
    nb_bindings_per_profile = engine.num_bindings // engine.num_optimization_profiles
    bindings = [0 for i in range(engine.num_bindings)]
    for binding in range(profile_id * nb_bindings_per_profile, (profile_id + 1) * nb_bindings_per_profile):
        logging.info("Binding {:}".format(binding))
        dtype = engine.get_binding_dtype(binding)
        format = engine.get_binding_format(binding)
        shape = engine.get_binding_shape(binding)
        if format == trt.TensorFormat.CHW4:
            shape[-3] = ((shape[-3] - 1) // 4 + 1) * 4
        elif format == trt.TensorFormat.DHWC8:
            shape[-4] = ((shape[-4] - 1) // 8 + 1) * 8
        if not engine.has_implicit_batch_dimension:
            if -1 in list(shape):
                batch_dim = list(shape).index(-1)
                shape[batch_dim] = max_batch_size
            size = trt.volume(shape)
        else:
            size = trt.volume(shape) * max_batch_size
        # Allocate device buffers
        device_mem = cuda.mem_alloc(size * dtype.itemsize)
        # Append device buffer to device bindings.
        bindings[binding] = int(device_mem)
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            d_inputs.append(device_mem)
        else:
            host_mem = cuda.pagelocked_empty(size, trt.nptype(dtype))
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return d_inputs, outputs, bindings, stream
Example #4
0
 def __init__(self, model, anchor_nums, nc, anchors, output_shapes,
              img_size):
     # load tensorrt engine
     self.cfx = cuda.Device(0).make_context()
     TRT_LOGGER = trt.Logger(trt.Logger.INFO)
     TRTbin = model
     # print('trtbin', TRTbin)
     runtime = trt.Runtime(TRT_LOGGER)
     with open(TRTbin, 'rb') as f:
         engine = runtime.deserialize_cuda_engine(f.read())
     self.context = engine.create_execution_context()
     # allocate memory
     inputs, outputs, bindings = [], [], []
     stream = cuda.Stream()
     for binding in engine:
         size = trt.volume(
             engine.get_binding_shape(binding)) * engine.max_batch_size
         dtype = trt.nptype(engine.get_binding_dtype(binding))
         host_mem = cuda.pagelocked_empty(size, dtype)
         device_mem = cuda.mem_alloc(host_mem.nbytes)
         bindings.append(int(device_mem))
         if engine.binding_is_input(binding):
             inputs.append({'host': host_mem, 'device': device_mem})
         else:
             outputs.append({'host': host_mem, 'device': device_mem})
     # save to class
     self.inputs = inputs
     self.outputs = outputs
     self.bindings = bindings
     self.stream = stream
     self.anchor_nums = anchor_nums
     self.nc = nc  # classes
     self.no = self.nc + 5  # outputs per anchor
     # post processing config
     self.output_shapes = output_shapes
     self.strides = np.array([8., 16., 32.])
     self.na = len(anchors[0])
     self.nl = len(anchors)
     self.img_size = img_size
     a = anchors.copy().astype(np.float32)
     a = a.reshape(self.nl, -1, 2)
     self.anchors = a.copy()
     self.anchor_grid = a.copy().reshape(self.nl, 1, -1, 1, 1, 2)
Example #5
0
    def allocate_buffers(cls, engine):
        inputs = []
        outputs = []
        bindings = []
        stream = cuda.Stream()
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding)) \
                   * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_memory = cuda.pagelocked_empty(size, dtype)
            device_memory = cuda.mem_alloc(host_memory.nbytes)
            bindings.append(int(device_memory))
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMemory(host_memory, device_memory))
            else:
                outputs.append(HostDeviceMemory(host_memory, device_memory))

        return inputs, outputs, bindings, stream
Example #6
0
def allocate_buffers(engine):
    """Allocates host and device buffer for TRT engine inference.

    This function is similair to the one in ../../common.py, but
    converts network outputs (which are np.float32) appropriately
    before writing them to Python buffer. This is needed, since
    uff plugins doesn't support output type description, and
    in our particular case, we use NMS plugin as network output.

    Args:
        engine (trt.ICudaEngine): uff engine

    Returns:
        inputs [HostDeviceMem]: engine input memory
        outputs [HostDeviceMem]: engine output memory
        bindings [int]: buffer to device bindings
        stream (cuda.Stream): cuda stream for engine inference synchronization
    """
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    # Current NMS implementation in TRT only supports DataType.FLOAT but
    # it may change in the future, which could brake this sample here
    # when using lower precision [e.g. NMS output would not be np.float32
    # anymore, even though this is assumed in binding_to_type]

    for binding in engine:
        size = trt.volume(
            engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream
Example #7
0
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        builder = trt.Builder(TRT_LOGGER)
        network = builder.create_network(common.EXPLICIT_BATCH)
        parser = trt.OnnxParser(network, TRT_LOGGER)
        runtime = trt.Runtime(TRT_LOGGER)

        # Parse model file
        print('Loading ONNX file from path {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            print('Beginning ONNX file parsing')
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        print('Completed parsing of ONNX file')

        # Print input info
        print('Network inputs:')
        for i in range(network.num_inputs):
            tensor = network.get_input(i)
            print(tensor.name, trt.nptype(tensor.dtype), tensor.shape)

        network.get_input(0).shape = [10, 1]
        network.get_input(1).shape = [10, 1, 1, 16]
        network.get_input(2).shape = [6, 1]
        network.get_input(3).shape = [6, 1, 1, 16]

        config = builder.create_builder_config()
        config.set_flag(trt.BuilderFlag.REFIT)
        config.max_workspace_size = 1 << 28  # 256MiB

        print(
            'Building an engine from file {}; this may take a while...'.format(
                onnx_file_path))
        plan = builder.build_serialized_network(network, config)
        engine = runtime.deserialize_cuda_engine(plan)
        print("Completed creating Engine")

        with open(engine_file_path, "wb") as f:
            f.write(plan)
        return engine
Example #9
0
    def allocate_buffers(self):
        self.inputs = []
        self.outputs = []
        self.bindings = []
        self.stream = cuda.Stream()

        for binding in self.engine:
            size = trt.volume(self.engine.get_binding_shape(binding)) * self.max_batch_size
            dtype = trt.nptype(self.engine.get_binding_dtype(binding))
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)

            self.bindings.append(int(device_mem))
            # print(bindings)
            # Append to the appropriate list.
            if self.engine.binding_is_input(binding):
                self.inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                self.outputs.append(HostDeviceMem(host_mem, device_mem))
Example #10
0
    def get_layer_weights(self, layer_name):
        """
        Get the weights of a constant layer as a numpy array

        :param layer_name: string
        :return:
        """

        layer = self._ops[layer_name]
        if isinstance(layer, trt.IConstantLayer):
            return np.reshape(self._ops[layer_name].weights,
                              self._ops_output[layer_name].shape)
        elif isinstance(layer, trt.IIdentityLayer):
            cast_to_dtype = trt.nptype(layer.precision)
            const_layer_name = layer.get_input(0).name
            weights = self.get_layer_weights(const_layer_name)
            return weights.astype(cast_to_dtype)
        else:
            return layer.get_output(0)
Example #11
0
    def __init__(self, trt_path):
        # get model name
        self._model_name = os.path.basename(trt_path)
        self._model_name = self._model_name[:self._model_name.rfind(".")]

        # create engine
        self.trt_path = trt_path
        self.logger = trt.Logger()
        self.runtime = trt.Runtime(self.logger)
        with open(trt_path, "rb") as f:
            self.engine = self.runtime.deserialize_cuda_engine(f.read())

        # create context and buffer
        self.context = self.engine.create_execution_context()
        self.stream = cuda.Stream()
        bindings = []
        host_input = device_input = host_output = device_output = None

        for binding in self.engine:
            binding_idx = self.engine.get_binding_index(binding)
            print(f"binding name {binding}, idx {binding_idx}")
            shape = trt.volume(self.context.get_binding_shape(binding_idx))
            dtype = trt.nptype(self.engine.get_binding_dtype(binding))
            if self.engine.binding_is_input(binding):
                print(shape)
                host_input = np.empty(shape, dtype=np.float32)
                device_input = cuda.mem_alloc(host_input.nbytes)
                bindings.append(int(device_input))
            else:
                host_output = cuda.pagelocked_empty(shape, dtype)
                device_output = cuda.mem_alloc(host_output.nbytes)
                bindings.append(int(device_output))

        assert device_input is not None
        assert device_output is not None
        assert len(bindings) == 2

        self.bindings = bindings
        self.device_input = device_input
        self.host_input = host_input
        self.device_output = device_output
        self.host_output = host_output
Example #12
0
def build_engine(onnx_file_path, engine_file_path, calib=None):
    EXPLICIT_BATCH = 1 << (int)(
        trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
        (EXPLICIT_BATCH)) as network, trt.OnnxParser(network,
                                                     TRT_LOGGER) as parser:
        with open(onnx_file_path, 'rb') as model:
            print('Beginning ONNX file parsing')
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        print('Completed parsing of ONNX file')

        print('Network inputs:')

        for i in range(network.num_inputs):
            tensor = network.get_input(i)
            print(tensor.name, trt.nptype(tensor.dtype), tensor.shape)

        config = builder.create_builder_config()
        config.max_workspace_size = 1 << 20

        if calib:
            config.set_flag(trt.BuilderFlag.INT8)
            config.int8_calibrator = calib
        else:
            builder.fp16_mode = True

        print(
            'Building an engine from file {}; this may take a while...'.format(
                onnx_file_path))

        engine = builder.build_engine(network, config)
        print("Completed creating Engine. Writing file to: {}".format(
            engine_file_path))

        with open(engine_file_path, "wb") as f:
            f.write(engine.serialize())
        return engine
Example #13
0
def run(batchSize, nRow, nCol):
    print("test", batchSize, nRow, nCol)
    logger = trt.Logger(trt.Logger.ERROR)
    trt.init_libnvinfer_plugins(logger, '')
    ctypes.cdll.LoadLibrary(soFilePath)

    engine = buildEngine(logger, nRow, nCol)
    if engine == None:
        print("Failed building engine!")
        return None
    print("Succeeded building engine!")

    context = engine.create_execution_context()
    stream = cuda.Stream()

    condition = np.array(np.random.randint(0, 2, [batchSize, nRow, nCol]),
                         dtype=np.int32)
    inputX = np.full([batchSize, nRow, nCol], 1, dtype=np.float32)
    inputY = np.full([batchSize, nRow, nCol], -1, dtype=np.float32)
    inputH0 = np.ascontiguousarray(condition.reshape(-1))
    inputH1 = np.ascontiguousarray(inputX.reshape(-1))
    inputH2 = np.ascontiguousarray(inputY.reshape(-1))
    inputD0 = cuda.mem_alloc(inputH0.nbytes)
    inputD1 = cuda.mem_alloc(inputH1.nbytes)
    inputD2 = cuda.mem_alloc(inputH2.nbytes)
    outputH0 = np.empty((batchSize, ) + tuple(engine.get_binding_shape(3)),
                        dtype=trt.nptype(engine.get_binding_dtype(3)))
    outputD0 = cuda.mem_alloc(outputH0.nbytes)

    cuda.memcpy_htod_async(inputD0, inputH0, stream)
    cuda.memcpy_htod_async(inputD1, inputH1, stream)
    cuda.memcpy_htod_async(inputD2, inputH2, stream)
    context.execute_async(
        batchSize,
        [int(inputD0), int(inputD1),
         int(inputD2), int(outputD0)], stream.handle)
    cuda.memcpy_dtoh_async(outputH0, outputD0, stream)
    stream.synchronize()

    outputH0CPU = whereCPU(condition, inputX, inputY)
    print("Check result:",
          ["True" if np.all(outputH0 == outputH0CPU) else "False"][0])
Example #14
0
 def _allocate_buffers(self):
     """ nvidia function to allocate memory on the device """
     inputs = []
     outputs = []
     bindings = []
     stream = cuda.Stream()
     for binding in self.engine:
         size = (
             trt.volume(self.engine.get_binding_shape(binding))
             * self.engine.max_batch_size
         )
         dtype = trt.nptype(self.engine.get_binding_dtype(binding))
         host_mem = cuda.pagelocked_empty(size, dtype)
         device_mem = cuda.mem_alloc(host_mem.nbytes)
         bindings.append(int(device_mem))
         if self.engine.binding_is_input(binding):
             inputs.append(HostDeviceMem(host_mem, device_mem))
         else:
             outputs.append(HostDeviceMem(host_mem, device_mem))
     return inputs, outputs, bindings, stream
    def allocate_buffers(self):
        d_inputs, outputs, bindings = [], [], []
        stream = cuda.Stream()
        # FIXME: Clarify this with Po-han. What if we want a batch size smaller than max_batch_size from builder?
        # max_batch_size = self.engine.max_batch_size if self.engine.has_implicit_batch_dimension else self.engine.get_profile_shape(0, 0)[2][0]
        if self.engine.has_implicit_batch_dimension:
            max_batch_size = min(self.engine.max_batch_size,
                                 self.args.batch_size)
        else:
            max_batch_size = self.args.batch_size

        for binding in self.engine:
            logging.info("Binding {:}".format(binding))
            desc = self.engine.get_binding_format_desc(
                self.engine.get_binding_index(binding))
            logging.info("    Binding info {:} with shape {:}".format(
                desc,
                self.engine.get_binding_shape(
                    self.engine.get_binding_index(binding))))
            dtype = self.engine.get_binding_dtype(binding)
            format = self.engine.get_binding_format(
                self.engine.get_binding_index(binding))
            shape = self.engine.get_binding_shape(binding)
            if format == trt.TensorFormat.CHW4:
                shape[-3] = ((shape[-3] - 1) // 4 + 1) * 4
            if not self.engine.has_implicit_batch_dimension:
                shape[0] = max_batch_size
                size = trt.volume(shape)
            else:
                size = trt.volume(shape) * max_batch_size
            # Allocate host and device buffers
            device_mem = cuda.mem_alloc(size * dtype.itemsize)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if self.engine.binding_is_input(binding):
                d_inputs.append(device_mem)
            else:
                host_mem = cuda.pagelocked_empty(size, trt.nptype(dtype))
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return d_inputs, outputs, bindings, stream
Example #16
0
def allocate_buffers(engine):
    print(engine.get_binding_shape(0))
    print(engine.get_binding_shape(1))
    print(engine.get_binding_shape(2))
    #print(engine.get_binding_shape(3))
    #bindings = []
    # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output_1 = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
    h_output_2 = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(2)), dtype=trt.nptype(ModelData.DTYPE))
    #h_output_3 = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(3)), dtype=trt.nptype(ModelData.DTYPE))
    # Allocate device memory for inputs and outputs.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output_1 = cuda.mem_alloc(h_output_1.nbytes)
    d_output_2 = cuda.mem_alloc(h_output_2.nbytes)
    #d_output_3 = cuda.mem_alloc(h_output_3.nbytes)

    print('bbb')
    # Create a stream in which to copy inputs/outputs and run inference.
    stream = cuda.Stream()
    return h_input, d_input, [h_output_1,h_output_2] , [d_output_1,d_output_2], stream
Example #17
0
    def _init_buffers(self):
        self._stream = cuda.Stream()
        self._input_buffers = []
        self._output_buffers = []
        self._bindings = []

        for binding in self._engine:
            dtype = np.dtype(trt.nptype(self._engine.get_binding_dtype(binding)))
            shape = self._engine.get_binding_shape(binding)
            size = trt.volume(shape) * self._engine.max_batch_size
            device = cuda.mem_alloc(size * dtype.itemsize)
            self._bindings.append(int(device))

            if self._engine.binding_is_input(binding):
                host = None  # will be added during inference
                buffer = MemoryBuffer(dtype, shape, host, device)
                self._input_buffers.append(buffer)
            else:
                host = cuda.pagelocked_empty(size, dtype)
                buffer = MemoryBuffer(dtype, shape, host, device)
                self._output_buffers.append(buffer)
Example #18
0
def allocate_buffers(engine, batch_size=None):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    class HostDeviceMem(object):
        def __init__(self, host_mem, device_mem):
            self.host = host_mem
            self.device = device_mem

        def __str__(self):
            return "Host:\n" + str(self.host) + "\nDevice:\n" + str(
                self.device)

        def __repr__(self):
            return self.__str__()

    for binding in engine:
        dims = engine.get_binding_shape(binding)
        # print('buff--dims:', dims)
        if dims[0] == -1:
            assert (batch_size is not None)
            dims[0] = batch_size
        size = trt.volume(
            dims
        ) * engine.max_batch_size  # The maximum batch size which can be used for inference.
        # print("size:",size)
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        if engine.binding_is_input(
                binding):  # Determine whether a binding is an input binding.
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream
Example #19
0
def allocate_buffers(engine):
    class HostDeviceMem(object):
        def __init__(self, host_mem, device_mem):
            """
            host_mem: cpu memory
            device_mem: gpu memory
            """
            self.host = host_mem
            self.device = device_mem

        def __str__(self):
            return "Host:\n" + str(self.host) + "\nDevice:\n" + str(
                self.device)

        def __repr__(self):
            return self.__str__()

    inputs, outputs, bindings = [], [], []
    stream = cuda.Stream()
    for binding in engine:
        # print(binding) # 绑定的输入输出
        # print(engine.get_binding_shape(binding)) # get_binding_shape 是变量的大小
        size = trt.volume(
            engine.get_binding_shape(binding)) * engine.max_batch_size
        # volume 计算可迭代变量的空间,指元素个数
        # size = trt.volume(engine.get_binding_shape(binding)) # 如果采用固定bs的onnx,则采用该句
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # get_binding_dtype  获得binding的数据类型
        # nptype等价于numpy中的dtype,即数据类型
        # allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)  # 创建锁业内存
        device_mem = cuda.mem_alloc(host_mem.nbytes)  # cuda分配空间
        # print(int(device_mem)) # binding在计算图中的缓冲地址
        bindings.append(int(device_mem))
        # append to the appropriate list
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream
Example #20
0
 def __init__(self, model_path):
     # load model
     with open(model_path, 'rb') as f:
         with trt.Runtime(trt.Logger(trt.Logger.ERROR)) as runtime:
             self.engine = runtime.deserialize_cuda_engine(f.read())
     # allocate buffers
     self.inputs = list()
     self.outputs = list()
     self.bindings = list()
     self.stream = cuda.Stream()
     for binding in self.engine:
         shape = self.engine.get_binding_shape(binding)
         size = trt.volume(shape) * self.engine.max_batch_size
         dtype = trt.nptype(self.engine.get_binding_dtype(binding))
         host_mem = cuda.pagelocked_empty(size, dtype)
         device_mem = cuda.mem_alloc(host_mem.nbytes)
         self.bindings.append(int(device_mem))
         if self.engine.binding_is_input(binding):
             self.inputs.append([host_mem, device_mem])
         else:
             self.outputs.append([host_mem, device_mem])
     self.context = self.engine.create_execution_context()
def main():
    model_path="model/trt/conv_3d_2.trt"

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    runtime=trt.Runtime(TRT_LOGGER)
    f=open(model_path,"rb")
    engine = runtime.deserialize_cuda_engine(f.read())
    context=engine.create_execution_context()
    f.close()

    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            print("input_size: ", size, "dtype: ", dtype)
        else:
            print("output_size: ", size, "dtype: ", dtype)

    inputs, outputs, bindings, stream = common.allocate_buffers(engine)

    length=input_shape[0]*input_shape[1]*input_shape[2]*input_shape[3]
    data=np.zeros(length,dtype=np.float32)
    data[:]=1.0
    inputs[0].host=data.reshape(input_shape)
    print(inputs[0].host[0][0][0][:10])

    outputs[0].host=np.zeros(output_shape, dtype=np.float32)
    trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
    print(trt_outputs[0][0][0][0][:10])

    print("starting...")
    starttime = time.time()
    for i in range(1000):
        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
    endtime = time.time()
    print (endtime - starttime)

    print(trt_outputs[0][0][0][0][:10])
Example #22
0
    def allocate_buffer(self):
        bindings = []
        inputs = []
        outputs = []

        for binding in self.engine:
            size = trt.volume(self.engine.get_binding_shape(
                binding)) * self.engine.max_batch_size
            dtype = trt.nptype(self.engine.get_binding_dtype(binding))

            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)

            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            if self.engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))

        return inputs, outputs, bindings
    def get_input_metadata(self):
        inputs = OrderedDict()
        active_profile = self.context.active_optimization_profile
        bindings_per_profile = len(
            self.engine) // self.engine.num_optimization_profiles
        logging.debug(
            "Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}"
            .format(self.engine.num_optimization_profiles,
                    bindings_per_profile, active_profile))

        start_binding = bindings_per_profile * active_profile
        end_binding = start_binding + bindings_per_profile
        logging.info("Start Binding: {:}, End Binding: {:}".format(
            start_binding, end_binding))

        for binding in range(start_binding, end_binding):
            if self.engine.binding_is_input(binding):
                inputs[self.engine[binding]] = (
                    trt.nptype(self.engine.get_binding_dtype(binding)),
                    list(self.engine.get_binding_shape(binding)),
                )
        return inputs
Example #24
0
def allocate_buffers(engine):
    """
    Allocates all buffers required for an engine, i.e. host/device inputs/outputs.

    Parameters
    ----------
    engine : tensorrt.ICudaEngine
        An ICudaEngine for executing inference on a built network

    Returns
    -------
    list
        All input HostDeviceMem of an engine
    list
        All output HostDeviceMem of an engine
    GPU bindings
        Device bindings
    GPU stream
        A stream is a sequence of commands (possibly issued by different host threads) that execute in order
    """
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:  # binding相当于一组字符串名称,包括'data','prob', engine可以切片, engine[0]='data', engine[1]='prob'
        size = trt.volume(
            engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size,
                                         dtype)  # (784,)是把img拉直的一个一维数组,作为主机的缓存
        device_mem = cuda.mem_alloc(
            host_mem.nbytes)  # obj,可以int(obj),是所占的GPU内存比特数
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem,
                                        device_mem))  # (2,) 分别是data和prob
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream
def allocate_buffers(engine):

    for binding in engine:
        size = trt.volume(
            engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)

        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))

    # input_output_path = "/home/nsathish/Efficient_object_detection/mmdetection/buffer/ssd_pickle.dat"
    # buffers=[inputs,outputs,bindings]
    # with open(input_output_path, "wb") as f:
    #     pickle.dump(buffers, f, pickle.HIGHEST_PROTOCOL)
    return inputs, outputs, bindings, stream
def allocate_buffers(engine):
    inputs, outputs, bindings = [], [], []
    stream = cuda.Stream()
    for binding in engine:
        # print(binding) # 绑定的输入输出
        # print(engine.get_binding_shape(binding)) # get_binding_shape 是变量的大小
        size = trt.volume(engine.get_binding_shape(binding))*engine.max_batch_size
        # volume 计算可迭代变量的空间,指元素个数
        # size = trt.volume(engine.get_binding_shape(binding)) # 如果采用固定bs的onnx,则采用该句
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # get_binding_dtype  获得binding的数据类型
        # nptype等价于numpy中的dtype,即数据类型
        # allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)  # 创建锁业内存
        device_mem = cuda.mem_alloc(host_mem.nbytes)    # cuda分配空间
        # print(int(device_mem)) # binding在计算图中的缓冲地址
        bindings.append(int(device_mem))
        #append to the appropriate list
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream
    def __init__(self, model):
        # load tensorrt engine
        TRT_LOGGER = trt.Logger(trt.Logger.INFO)
        TRTbin = model
        print('trtbin', TRTbin)
        with open(TRTbin, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(f.read())
        self.context = engine.create_execution_context()
        # allocate memory
        inputs, outputs, bindings = [], [], []
        stream = cuda.Stream()
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding))
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            bindings.append(int(device_mem))
            if engine.binding_is_input(binding):
                inputs.append({'host': host_mem, 'device': device_mem})
            else:
                outputs.append({'host': host_mem, 'device': device_mem})
        # save to class
        self.inputs = inputs
        self.outputs = outputs
        self.bindings = bindings
        self.stream = stream

        self.no = 12
        self.output_shapes = [(1, 3, 56, 56, self.no), (1, 3, 28, 28, self.no),
                              (1, 3, 14, 14, self.no)]
        self.names = [
            'angular_leafspot', 'anthracnose_fruit_rot', 'blossom_blight',
            'gray_mold', 'leaf_spot', 'powdery_mildew_fruit',
            'powdery_mildew_leaf'
        ]

        self.img_size = 448
Example #29
0
    def build(self):
        assert os.path.exists(self.engine_file), "Engine file doesn't exist"

        runtime = trt.Runtime(TrtModel.TRT_LOGGER)
        with open(self.engine_file, 'rb') as engine_file:
            self.engine = runtime.deserialize_cuda_engine(engine_file.read())

        if self.engine is None:
            raise RuntimeError('Unable to load the engine file')

        self.context = self.engine.create_execution_context()
        self.stream = cp.cuda.Stream(non_blocking=True)

        self.max_batch_size = self.engine.get_profile_shape(0, 0)[2][0]
        for binding in self.engine:
            shape = self.engine.get_binding_shape(binding)
            if shape[0] == -1:
                shape = (self.max_batch_size, ) + shape[1:]

            size = trt.volume(shape)
            dtype = trt.nptype(self.engine.get_binding_dtype(binding))
            buffer = HostDeviceMem(size, dtype)

            self.bindings.append(buffer.devptr)
            if self.engine.binding_is_input(binding):
                self.input = buffer
                self.input_shapes.append(
                    self.engine.get_binding_shape(binding))
            else:
                self.outputs.append(buffer)
                self.out_shapes.append(self.engine.get_binding_shape(binding))
                self.out_names.append(binding)

        assert self.input is not None

        self.start = cp.cuda.Event()
        self.end = cp.cuda.Event()
Example #30
0
def load_input(img_path, host_buffer):
    print('load input')
    c, h, w = INPUT_SHAPE
    img = cv2.imread(img_path)
    mean = [123.829891747, 127.351147446, 110.256170154]
    stdv = [0.016895854, 0.017222115, 0.014714524]

    img = cv2.resize(img, (h, w))

    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 1, 2)
    img = np.array(img, dtype=float)

    mean = np.array(mean, dtype=float)
    stdv = np.array(stdv, dtype=float)
    img[0, :, :] -= mean[0]
    img[1, :, :] -= mean[1]
    img[2, :, :] -= mean[2]
    img[0, :, :] *= stdv[0]
    img[1, :, :] *= stdv[1]
    img[2, :, :] *= stdv[2]
    dtype = trt.nptype(DTYPE)
    img_array = np.asarray(img).astype(dtype).ravel()
    np.copyto(host_buffer, img_array)