def __init__( self, engine: trt.ICudaEngine, idx_or_name: Union[int, str], max_batch_size: int, device: str, ): if isinstance(idx_or_name, six.string_types): self.name = idx_or_name self.index = engine.get_binding_index(self.name) if self.index == -1: raise IndexError(f"Binding name not found: {self.name}") else: self.index = idx_or_name self.name = engine.get_binding_name(self.index) if self.name is None: raise IndexError(f"Binding index out of range: {self.index}") self._dtype = TYPE_TRT_2_TORCH[engine.get_binding_dtype(self.index)] self._shape = (max_batch_size, ) + tuple( engine.get_binding_shape(self.index))[1:] self._device = torch.device(device) self._is_input = engine.binding_is_input(self.index) if self.is_input: self._binding_data = None else: self._binding_data = torch.zeros(size=self.shape, dtype=self.dtype, device=self.device)
def get_random_inputs( engine: trt.ICudaEngine, context: trt.IExecutionContext, input_binding_idxs: List[int], seed: int = 42, ): # Input data for inference host_inputs = [] print("Generating Random Inputs") print("\tUsing random seed: {}".format(seed)) np.random.seed(seed) for binding_index in input_binding_idxs: # If input shape is fixed, we'll just use it input_shape = context.get_binding_shape(binding_index) input_name = engine.get_binding_name(binding_index) print("\tInput [{}] shape: {}".format(input_name, input_shape)) # If input shape is dynamic, we'll arbitrarily select one of the # the min/opt/max shapes from our optimization profile if is_dynamic(input_shape): profile_index = context.active_optimization_profile profile_shapes = engine.get_profile_shape(profile_index, binding_index) print("\tProfile Shapes for [{}]: [kMIN {} | kOPT {} | kMAX {}]". format(input_name, *profile_shapes)) # 0=min, 1=opt, 2=max, or choose any shape, (min <= shape <= max) input_shape = profile_shapes[1] print( "\tInput [{}] shape was dynamic, setting inference shape to {}" .format(input_name, input_shape)) host_inputs.append(np.random.random(input_shape).astype(np.float32)) return host_inputs
def setup_binding_shapes(engine: trt.ICudaEngine, context: trt.IExecutionContext, host_inputs, input_binding_idxs, output_binding_idxs): # Explicitly set the dynamic input shapes, so the dynamic output # shapes can be computed internally for host_input, binding_index in zip(host_inputs, input_binding_idxs): context.set_binding_shape(binding_index, host_input.shape) assert context.all_binding_shapes_specified host_outputs = [] device_outputs = [] for binding_index in output_binding_idxs: output_shape = context.get_binding_shape(binding_index) # Allocate buffers to hold output results after copying back to host buffer = np.empty(output_shape, dtype=np.float32) host_outputs.append(buffer) # Allocate output buffers on device device_outputs.append(cuda.mem_alloc(buffer.nbytes)) # 绑定输出shape utput_names = [ engine.get_binding_name(binding_idx) for binding_idx in output_binding_idxs ] return host_outputs, device_outputs