예제 #1
0
def run_trt_engine(context: trt.IExecutionContext, engine: trt.ICudaEngine,
                   h_tensors: dict):
    """Run a TRT model.

  The model output is written in place inside the tensors provided in h_tensors['outputs'].

  Args:
      context (trt.IExecutionContext): 
      engine (trt.ICudaEngine): 
      h_tensors (dict): A dictionary with keys "inputs" and "outputs" and values which are another 
      dictionaries with tensor names as keys and numpy.ndarrays as values.
  """
    # Allocate GPU memory.
    d_tensors = {}
    d_tensors['inputs'] = {
        k: cuda.mem_alloc(v.nbytes)
        for k, v in h_tensors['inputs'].items()
    }
    d_tensors['outputs'] = {
        k: cuda.mem_alloc(v.nbytes)
        for k, v in h_tensors['outputs'].items()
    }

    # Copy input buffers to GPU.
    for h_tensor, d_tensor in zip(h_tensors['inputs'].values(),
                                  d_tensors['inputs'].values()):
        cuda.memcpy_htod(d_tensor, h_tensor)

    # Initialise bindings list.
    bindings = [None] * engine.num_bindings

    # Populate bindings list.
    for (name, h_tensor), (_, d_tensor) in zip(h_tensors['inputs'].items(),
                                               d_tensors['inputs'].items()):
        idx = engine.get_binding_index(name)
        bindings[idx] = int(d_tensor)
        if engine.is_shape_binding(idx) and is_shape_dynamic(
                context.get_shape(idx)):
            context.set_shape_input(idx, h_tensor)
        elif is_shape_dynamic(engine.get_binding_shape(idx)):
            context.set_binding_shape(idx, h_tensor.shape)

    for name, d_tensor in d_tensors['outputs'].items():
        idx = engine.get_binding_index(name)
        bindings[idx] = int(d_tensor)

    # Run engine.
    context.execute_v2(bindings=bindings)

    # Copy output buffers to CPU.
    for h_tensor, d_tensor in zip(h_tensors['outputs'].values(),
                                  d_tensors['outputs'].values()):
        cuda.memcpy_dtoh(h_tensor, d_tensor)
예제 #2
0
    def __init__(
        self,
        engine: trt.ICudaEngine,
        idx_or_name: Union[int, str],
        max_batch_size: int,
        device: str,
    ):
        if isinstance(idx_or_name, six.string_types):
            self.name = idx_or_name
            self.index = engine.get_binding_index(self.name)
            if self.index == -1:
                raise IndexError(f"Binding name not found: {self.name}")
        else:
            self.index = idx_or_name
            self.name = engine.get_binding_name(self.index)
            if self.name is None:
                raise IndexError(f"Binding index out of range: {self.index}")

        self._dtype = TYPE_TRT_2_TORCH[engine.get_binding_dtype(self.index)]
        self._shape = (max_batch_size, ) + tuple(
            engine.get_binding_shape(self.index))[1:]
        self._device = torch.device(device)
        self._is_input = engine.binding_is_input(self.index)
        if self.is_input:
            self._binding_data = None
        else:
            self._binding_data = torch.zeros(size=self.shape,
                                             dtype=self.dtype,
                                             device=self.device)