def allocate_buffers(engine: trt.ICudaEngine, batch_size: int): print('Allocating buffers ...') inputs = [] outputs = [] dbindings = [] stream = cuda.Stream() for binding in engine: size = batch_size * abs(trt.volume(engine.get_binding_shape(binding))) dtype = trt.nptype(engine.get_binding_dtype(binding)) # Allocate host and device buffers host_mem = cuda.pagelocked_empty(size, dtype) device_mem = cuda.mem_alloc(host_mem.nbytes) # Append the device buffer to device bindings. dbindings.append(int(device_mem)) # Append to the appropriate list. if engine.binding_is_input(binding): inputs.append(HostDeviceMem(host_mem, device_mem)) else: outputs.append(HostDeviceMem(host_mem, device_mem)) return inputs, outputs, dbindings, stream
def get_binding_idxs(engine: trt.ICudaEngine, profile_index: int): # Calculate start/end binding indices for current context's profile num_bindings_per_profile = engine.num_bindings // engine.num_optimization_profiles start_binding = profile_index * num_bindings_per_profile end_binding = start_binding + num_bindings_per_profile print("Engine/Binding Metadata") print("\tNumber of optimization profiles: {}".format( engine.num_optimization_profiles)) print("\tNumber of bindings per profile: {}".format( num_bindings_per_profile)) print("\tFirst binding for profile {}: {}".format(profile_index, start_binding)) print("\tLast binding for profile {}: {}".format(profile_index, end_binding - 1)) # Separate input and output binding indices for convenience input_binding_idxs = [] output_binding_idxs = [] for binding_index in range(start_binding, end_binding): if engine.binding_is_input(binding_index): input_binding_idxs.append(binding_index) else: output_binding_idxs.append(binding_index) return input_binding_idxs, output_binding_idxs
def allocate_buffers_torch(engine: trt.ICudaEngine, device): import torch inputs = [] outputs = [] bindings = [] index = 0 dtype_map = np_to_torch_dtype_map() for binding in engine: size = trt.volume( engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) shape = [engine.max_batch_size] + list( engine.get_binding_shape(binding)) # Allocate host and device buffers host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape) device_mem = torch.empty(*host_mem.shape, device=device, dtype=dtype_map[host_mem.dtype]) # Append the device buffer to device bindings. bindings.append(device_mem.data_ptr()) # Append to the appropriate list. if engine.binding_is_input(binding): inputs.append(HostDeviceMem(host_mem, device_mem, binding, index)) else: outputs.append(HostDeviceMem(host_mem, device_mem, binding, index)) index += 1 return inputs, outputs, bindings
def __init__( self, engine: trt.ICudaEngine, idx_or_name: Union[int, str], max_batch_size: int, device: str, ): if isinstance(idx_or_name, six.string_types): self.name = idx_or_name self.index = engine.get_binding_index(self.name) if self.index == -1: raise IndexError(f"Binding name not found: {self.name}") else: self.index = idx_or_name self.name = engine.get_binding_name(self.index) if self.name is None: raise IndexError(f"Binding index out of range: {self.index}") self._dtype = TYPE_TRT_2_TORCH[engine.get_binding_dtype(self.index)] self._shape = (max_batch_size, ) + tuple( engine.get_binding_shape(self.index))[1:] self._device = torch.device(device) self._is_input = engine.binding_is_input(self.index) if self.is_input: self._binding_data = None else: self._binding_data = torch.zeros(size=self.shape, dtype=self.dtype, device=self.device)
def get_binding_idxs(engine: trt.ICudaEngine, profile_index: int): """ :param engine: :param profile_index: :return: """ # Calculate start/end binding indices for current context's profile num_bindings_per_profile = engine.num_bindings // engine.num_optimization_profiles start_binding = profile_index * num_bindings_per_profile end_binding = start_binding + num_bindings_per_profile # Separate input and output binding indices for convenience input_binding_idxs = [] output_binding_idxs = [] for binding_index in range(start_binding, end_binding): if engine.binding_is_input(binding_index): input_binding_idxs.append(binding_index) else: output_binding_idxs.append(binding_index) return input_binding_idxs, output_binding_idxs