Ejemplo n.º 1
0
def allocate_buffers_torch(engine: trt.ICudaEngine, device):
    import torch
    inputs = []
    outputs = []
    bindings = []
    index = 0
    dtype_map = np_to_torch_dtype_map()
    for binding in engine:
        size = trt.volume(
            engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(
            engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = torch.empty(*host_mem.shape,
                                 device=device,
                                 dtype=dtype_map[host_mem.dtype])
        # Append the device buffer to device bindings.
        bindings.append(device_mem.data_ptr())
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings
Ejemplo n.º 2
0
def allocate_buffers(engine: trt.ICudaEngine, batch_size: int):
    print('Allocating buffers ...')

    inputs = []
    outputs = []
    dbindings = []

    stream = cuda.Stream()

    for binding in engine:
        size = batch_size * abs(trt.volume(engine.get_binding_shape(binding)))
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        dbindings.append(int(device_mem))

        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))

    return inputs, outputs, dbindings, stream
Ejemplo n.º 3
0
    def __init__(
        self,
        engine: trt.ICudaEngine,
        idx_or_name: Union[int, str],
        max_batch_size: int,
        device: str,
    ):
        if isinstance(idx_or_name, six.string_types):
            self.name = idx_or_name
            self.index = engine.get_binding_index(self.name)
            if self.index == -1:
                raise IndexError(f"Binding name not found: {self.name}")
        else:
            self.index = idx_or_name
            self.name = engine.get_binding_name(self.index)
            if self.name is None:
                raise IndexError(f"Binding index out of range: {self.index}")

        self._dtype = TYPE_TRT_2_TORCH[engine.get_binding_dtype(self.index)]
        self._shape = (max_batch_size, ) + tuple(
            engine.get_binding_shape(self.index))[1:]
        self._device = torch.device(device)
        self._is_input = engine.binding_is_input(self.index)
        if self.is_input:
            self._binding_data = None
        else:
            self._binding_data = torch.zeros(size=self.shape,
                                             dtype=self.dtype,
                                             device=self.device)
Ejemplo n.º 4
0
def run_trt_engine(context: trt.IExecutionContext, engine: trt.ICudaEngine,
                   h_tensors: dict):
    """Run a TRT model.

  The model output is written in place inside the tensors provided in h_tensors['outputs'].

  Args:
      context (trt.IExecutionContext): 
      engine (trt.ICudaEngine): 
      h_tensors (dict): A dictionary with keys "inputs" and "outputs" and values which are another 
      dictionaries with tensor names as keys and numpy.ndarrays as values.
  """
    # Allocate GPU memory.
    d_tensors = {}
    d_tensors['inputs'] = {
        k: cuda.mem_alloc(v.nbytes)
        for k, v in h_tensors['inputs'].items()
    }
    d_tensors['outputs'] = {
        k: cuda.mem_alloc(v.nbytes)
        for k, v in h_tensors['outputs'].items()
    }

    # Copy input buffers to GPU.
    for h_tensor, d_tensor in zip(h_tensors['inputs'].values(),
                                  d_tensors['inputs'].values()):
        cuda.memcpy_htod(d_tensor, h_tensor)

    # Initialise bindings list.
    bindings = [None] * engine.num_bindings

    # Populate bindings list.
    for (name, h_tensor), (_, d_tensor) in zip(h_tensors['inputs'].items(),
                                               d_tensors['inputs'].items()):
        idx = engine.get_binding_index(name)
        bindings[idx] = int(d_tensor)
        if engine.is_shape_binding(idx) and is_shape_dynamic(
                context.get_shape(idx)):
            context.set_shape_input(idx, h_tensor)
        elif is_shape_dynamic(engine.get_binding_shape(idx)):
            context.set_binding_shape(idx, h_tensor.shape)

    for name, d_tensor in d_tensors['outputs'].items():
        idx = engine.get_binding_index(name)
        bindings[idx] = int(d_tensor)

    # Run engine.
    context.execute_v2(bindings=bindings)

    # Copy output buffers to CPU.
    for h_tensor, d_tensor in zip(h_tensors['outputs'].values(),
                                  d_tensors['outputs'].values()):
        cuda.memcpy_dtoh(h_tensor, d_tensor)