def add_fused_bottom_mlp(self, plugin_name, input_tensor, input_size, num_channels, names): """Add the MLP part of DLRM network as a fused plugin for better perf. Return the last FC layer in MLP. Args: plugin_name (str): Name of fused MLP plugin to use. input_tensor (ITensor): Input to MLP. input_size (int): Number of numerical features. num_channels (list): List of number of channels for each FC layer in MLP. names (list): List of names of each FC layer in MLP. """ plugin = None output_tensor_name = "" dynamic_range_dict = self.parse_calibration() for plugin_creator in trt.get_plugin_registry().plugin_creator_list: if plugin_creator.name == plugin_name: plugin_fields = [] plugin_fields.append(trt.PluginField("inputChannels", np.array([input_size], dtype=np.int32), trt.PluginFieldType.INT32)) for i, _ in enumerate(num_channels): weights = self.weights[names[i] + ".weight"].numpy() input_size_suggested_by_weights = weights.shape[1] if input_size > input_size_suggested_by_weights: weights = np.concatenate((weights, np.zeros((weights.shape[0], input_size - input_size_suggested_by_weights), dtype=weights.dtype)), 1) plugin_fields.append(trt.PluginField("weights" + str(i), weights, trt.PluginFieldType.FLOAT32)) plugin_fields.append(trt.PluginField("biases" + str(i), self.weights[names[i] + ".bias"].numpy(), trt.PluginFieldType.FLOAT32)) output_tensor_name = names[i] + ".relu.output" if i != len(num_channels) - 1: plugin_fields.append(trt.PluginField("dynamicRange" + str(i), np.array([dynamic_range_dict[output_tensor_name]], dtype=np.float32), trt.PluginFieldType.FLOAT32)) plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=trt.PluginFieldCollection(plugin_fields)) return plugin, output_tensor_name
def get_dlrm_interactions_plugin(self, plugin_name, tableOffsets, interactionsOutputInterleaved): """Create a plugin layer for the DLRM Interactions plugin and return it. DLRM Interactions plugin takes two inputs: from bottom MLP and categorical input and looks up their embeddings. Since DLRM embeddings can be larger than GPU memory, the plugin keeps the most frequently used embeddings on GPU and rest on host and manages the lookup with good performance. """ plugin = None for plugin_creator in trt.get_plugin_registry().plugin_creator_list: if plugin_creator.name == plugin_name: embeddingSize_field = trt.PluginField("embeddingSize", np.array([self.embedding_size], dtype=np.int32), trt.PluginFieldType.INT32) embeddingRows_field = trt.PluginField("embeddingRows", np.array([self.embedding_rows_total], dtype=np.int32), trt.PluginFieldType.INT32) reducedPrecisionIO_field = trt.PluginField("reducedPrecisionIO", np.array( [0 if self.need_calibration else (1 if self.precision == "fp16" else 2)], dtype=np.int32), trt.PluginFieldType.INT32) embeddingWeightsOnGpuPart_field = trt.PluginField("embeddingWeightsOnGpuPart", np.array([self.embedding_weights_on_gpu_part], dtype=np.float32), trt.PluginFieldType.FLOAT32) interactionsOutputInterleaved_field = trt.PluginField("interactionsOutputInterleaved", np.array([1 if interactionsOutputInterleaved else 0], dtype=np.int32), trt.PluginFieldType.INT32) tableOffsets_field = trt.PluginField("tableOffsets", tableOffsets, trt.PluginFieldType.INT32) embeddingWeightsFilepath_field = trt.PluginField("embeddingWeightsFilepath", np.array(list(self.embedding_weights_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR) if self.use_row_frequencies: rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list(self.row_frequencies_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR) else: rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list("".encode()), dtype=np.int8), trt.PluginFieldType.CHAR) output_padding_field = trt.PluginField("outputPaddingGranularity", np.array([self.output_padding], dtype=np.int32), trt.PluginFieldType.INT32) field_collection = trt.PluginFieldCollection([embeddingSize_field, embeddingRows_field, reducedPrecisionIO_field, embeddingWeightsOnGpuPart_field, interactionsOutputInterleaved_field, output_padding_field, tableOffsets_field, embeddingWeightsFilepath_field, rowFrequenciesFilepath_field]) plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection) return plugin
def create_torchunfold_plugin(layer_name, kernel_size, dilation, padding, stride): creator = trt.get_plugin_registry().get_plugin_creator( 'TorchUnfoldPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) pf_kernel_size = trt.PluginField('kernel_size', np.array(kernel_size, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_kernel_size) if isinstance(dilation, int): dilation = (dilation, dilation) pf_dilation = trt.PluginField('dilation', np.array(dilation, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_dilation) if isinstance(padding, int): padding = (padding, padding) pf_padding = trt.PluginField('padding', np.array(padding, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_padding) if isinstance(stride, int): stride = (stride, stride) pf_stride = trt.PluginField('stride', np.array(stride, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_stride) return creator.create_plugin(layer_name, pfc)
def create_torchembedding_plugin(layer_name, weight): creator = trt.get_plugin_registry().get_plugin_creator( 'TorchEmbeddingPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() num_embeddings = weight.shape[0] embedding_dim = weight.shape[1] pf_num_embeddings = trt.PluginField( "num_embeddings", np.array([num_embeddings], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_num_embeddings) pf_embedding_dim = trt.PluginField( "embedding_dim", np.array([embedding_dim], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_embedding_dim) pf_weight = trt.PluginField("weight", np.array(weight, dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_weight) return creator.create_plugin(layer_name, pfc)
def create_deformable_pool_plugin(layer_name, out_size, spatial_scale, sampling_ratio, gamma): creator = trt.get_plugin_registry().get_plugin_creator( 'DeformablePoolPluginDynamic', '1', '') if not isinstance(out_size, Iterable): out_size = [out_size, out_size] pfc = trt.PluginFieldCollection() pf_out_size = trt.PluginField('out_size', np.array(out_size, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_out_size) pf_spatial_scale = trt.PluginField( 'spatial_scale', np.array([spatial_scale], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_spatial_scale) pf_sampling_ratio = trt.PluginField( 'sampling_ratio', np.array([sampling_ratio], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_sampling_ratio) pf_gamma = trt.PluginField('gamma', np.array([gamma], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_gamma) return creator.create_plugin(layer_name, pfc)
def get_trt_plugin(plugin_name: str, field_collection: List[TRTPluginFieldCollection], version: str, plugin_namespace: str = "") -> TRTPlugin: """ Get a TensorRT plugin based on the given parameters. Args: plugin_name (str): Name of the plugin. field_collection (List[TRTPluginFieldCollection]): Parameters that needed to create a plugin using the plugin creator. version (str): Version of the plugin. plugin_namespace (str): Namespace of the plugin. Returns: A TensorRT plugin that can be added to TensorRT network as Plugin layer. """ plugin_registry = trt.get_plugin_registry() plugin_creator = plugin_registry.get_plugin_creator( plugin_name, version, plugin_namespace) assert plugin_creator, f"Unabled to find plugin creator with name {plugin_name}" plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection) assert plugin is not None, f"Plugin: {plugin_name} could not be fetched" return plugin
def get_plugin_creator(plugin_name): plugin_creator_list = tensorrt.get_plugin_registry().plugin_creator_list plugin_creator = None for c in plugin_creator_list: if c.name == plugin_name: plugin_creator = c return plugin_creator
def create_carafefeaturereassemble_plugin(layer_name, scale_factor, up_kernel, up_group, type_id=trt.DataType.FLOAT): creator = trt.get_plugin_registry().get_plugin_creator( 'CarafeFeatureReassemblePluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_scale_factor = trt.PluginField("scale_factor", np.array([scale_factor], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_scale_factor) pf_up_kernel = trt.PluginField("up_kernel", np.array([up_kernel], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_up_kernel) pf_up_group = trt.PluginField("up_group", np.array([up_group], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_up_group) pf_type_id = trt.PluginField("type_id", np.array([type_id], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_type_id) return creator.create_plugin(layer_name, pfc)
def create_roipool_plugin(layer_name, out_size, featmap_strides, roi_scale_factor, finest_scale): creator = trt.get_plugin_registry().get_plugin_creator( 'RoiPoolPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_out_size = trt.PluginField('out_size', np.array([out_size], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_out_size) pf_featmap_strides = trt.PluginField( 'featmap_strides', np.array(featmap_strides).astype(np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_featmap_strides) pf_roi_scale_factor = trt.PluginField( 'roi_scale_factor', np.array([roi_scale_factor], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_roi_scale_factor) pf_finest_scale = trt.PluginField('finest_scale', np.array([finest_scale], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_finest_scale) return creator.create_plugin(layer_name, pfc)
def getSortPlugin(): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == 'SortPlugin': p0 = trt.PluginField("descending", np.array([0], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0])) return None
def create_roiextractor_plugin(layer_name, out_size, sample_num, featmap_strides, roi_scale_factor, finest_scale): creator = trt.get_plugin_registry().get_plugin_creator( 'RoiExtractorPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_out_size = trt.PluginField("out_size", np.array([out_size], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_out_size) pf_sample_num = trt.PluginField("sample_num", np.array([sample_num], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_sample_num) pf_featmap_strides = trt.PluginField( "featmap_strides", np.array(featmap_strides, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_featmap_strides) pf_roi_scale_factor = trt.PluginField( "roi_scale_factor", np.array([roi_scale_factor], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_roi_scale_factor) pf_finest_scale = trt.PluginField("finest_scale", np.array([finest_scale], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_finest_scale) return creator.create_plugin(layer_name, pfc)
def getLayerNormPlugin(): for c in trt.get_plugin_registry().plugin_creator_list: #print(c.name) if c.name == 'LayerNorm': p0 = trt.PluginField('epsilon', np.float32(epsilon), trt.PluginFieldType.FLOAT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0])) return None
def create_meshgrid_plugin(layer_name, num_inputs, slice_dims=[2, 3], starts=[0., 0.], strides=[1., 1.]): creator = trt.get_plugin_registry().get_plugin_creator( 'MeshGridPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_num_inputs = trt.PluginField( "num_inputs", np.array([int(num_inputs)], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_num_inputs) pf_slice_dims = trt.PluginField("slice_dims", np.array(slice_dims, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_slice_dims) pf_starts = trt.PluginField("starts", np.array(starts, dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_starts) pf_strides = trt.PluginField("strides", np.array(strides, dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_strides) return creator.create_plugin(layer_name, pfc)
def getTopKAveragePlugin(nTopK, maxTopK): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == 'TopKAveragePlugin': p0 = trt.PluginField("nTopK", np.array([nTopK], dtype=np.int32), trt.PluginFieldType.INT32) p1 = trt.PluginField("maxTopK", np.array([maxTopK], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin('TopKAveragePlugin', trt.PluginFieldCollection([p0, p1])) return None
def initialize(self): # Create network. self.network = self.builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) # Parse from onnx file. parser = trt.OnnxParser(self.network, self.logger) with open(self.model_path, "rb") as f: model = f.read() success = parser.parse(model) if not success: raise RuntimeError( "ResNet50 onnx model parsing failed! Error: {:}".format( parser.get_error(0).desc())) nb_layers = self.network.num_layers for i in range(nb_layers): layer = self.network.get_layer(i) # ':' in tensor names will screw up calibration cache parsing (which uses ':' as a delimiter) for j in range(layer.num_inputs): tensor = layer.get_input(j) tensor.name = tensor.name.replace(":", "_") for j in range(layer.num_outputs): tensor = layer.get_output(j) tensor.name = tensor.name.replace(":", "_") # Post-process the TRT network self.postprocess(useConvForFC=(self.precision == "int8")) # Query system id for architecture self.system_id = get_system_id() self.gpu_arch = None if "T4" in self.system_id or "TitanRTX" in self.system_id: self.gpu_arch = "Turing" elif "V100" in self.system_id or "DGX-2H" in self.system_id: self.gpu_arch = "Volta" elif "Xavier" in self.system_id: self.gpu_arch = "Xavier" elif "A100" in self.system_id: self.gpu_arch = "Ampere" # Turn on res2-full-fusion plugin only for Turing now. self.use_res2_mega_kernel = (self.gpu_arch == "Turing") or (self.gpu_arch == "Ampere") if self.device_type == "gpu" and self.precision == "int8" and not self.need_calibration: # Read Calibration and fuse layers self.registry = trt.get_plugin_registry() parse_calibration(self.network, self.cache_file) if not self.use_res2_mega_kernel: fuse_br1_br2c_onnx(self.registry, self.network) fuse_br2b_br2c_onnx(self.registry, self.network) else: fuse_res2_onnx(self.registry, self.network) self.fix_layer_names() self.initialized = True
def convert_grid_sample(ctx): # parse args input = ctx.method_args[0] dim = ctx.method_args[1] index = ctx.method_args[2] output = ctx.method_return dim = convert_dim(dim, input.dim()) # get tensorrt input input_trt = add_missing_trt_tensors(ctx.network, [input])[0] index_trt = add_missing_trt_tensors(ctx.network, [index])[0] # add tensorrt layer creator = trt.get_plugin_registry().get_plugin_creator( 'GatherElementsPlugins', '1') assert creator is not None, 'Has no GatherElementsPlugins version 1' fc = [] fc.append( trt.PluginField(name='dim', data=np.array([dim], dtype=np.int32), type=trt.PluginFieldType.INT32)) fc = trt.PluginFieldCollection(fc) plugin = creator.create_plugin('GatherElementsPlugins', fc) layer = ctx.network.add_plugin_v2([input_trt, index_trt], plugin) # get tensorrt output output._trt = layer.get_output(0)
def create_layernorm_plugin(layer_name, normalized_shape, W, B, eps=1e-5, type_id=trt.DataType.FLOAT): creator = trt.get_plugin_registry().get_plugin_creator( 'LayerNormPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_normalized_shape = trt.PluginField( "normalized_shape", np.array(normalized_shape, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_normalized_shape) pf_eps = trt.PluginField("eps", np.array([eps], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_eps) pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32) pfc.append(pf_W) pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32) pfc.append(pf_B) pf_type_id = trt.PluginField("type_id", np.array([type_id], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_type_id) return creator.create_plugin(layer_name, pfc)
def get_plugin_creator(self, plugin_name): trt.init_libnvinfer_plugins(TRT_LOGGER, '') plugin_creator_list = trt.get_plugin_registry().plugin_creator_list plugin_creator = None for c in plugin_creator_list: if c.name == plugin_name: plugin_creator = c return plugin_creator
def getLodPreGruPlugin(datatype): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == "LodPreGruPlugin": p0 = trt.PluginField("datatype", np.array([datatype], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0])) return None
def getAddScalarPlugin(scalar): for c in trt.get_plugin_registry().plugin_creator_list: #print(c.name) if c.name == 'AddScalar': parameterList = [] parameterList.append(trt.PluginField("scalar", np.float32(scalar), trt.PluginFieldType.FLOAT32)) return c.create_plugin(c.name, trt.PluginFieldCollection(parameterList)) return None
def getReducePlugin(isSum): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == 'ReducePlugin': p0 = trt.PluginField("isSum", np.array([int(isSum)], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0])) return None
def getRandomPlugin(): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == 'RandomPlugin': p0 = trt.PluginField("seed", np.array([cuRandSeed], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0])) return None
def getLayerNormPlugin(epsilon): for c in trt.get_plugin_registry().plugin_creator_list: #print(c.name) if c.name == 'LayerNorm': parameterList = [] parameterList.append(trt.PluginField('epsilon', np.float32(epsilon), trt.PluginFieldType.FLOAT32)) return c.create_plugin(c.name, trt.PluginFieldCollection(parameterList)) return None
def get_plugin_creator(plugin_name): trt.init_libnvinfer_plugins(logger, '') plugin_creator_list = trt.get_plugin_registry().plugin_creator_list plugin_creator = None for c in plugin_creator_list: if c.name == plugin_name: plugin_creator = c return plugin_creator
def get_plugin_creator(plugin_name, logger): """Get the TensorRT plugin creator.""" trt.init_libnvinfer_plugins(logger, '') plugin_creator_list = trt.get_plugin_registry().plugin_creator_list for c in plugin_creator_list: if c.name == plugin_name: return c return None
def getResizePlugin(): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == 'ResizePlugin': p0 = trt.PluginField("hOut", np.array([hOut], dtype=np.int32), trt.PluginFieldType.INT32) p1 = trt.PluginField("wOut", np.array([wOut], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0, p1])) return None
def load_plugins(): import torch2trt.plugins registry = trt.get_plugin_registry() torch2trt_creators = [ c for c in registry.plugin_creator_list if c.plugin_namespace == 'torch2trt' ] for c in torch2trt_creators: registry.register_creator(c, 'torch2trt')
def create_gridanchordynamic_plugin(layer_name, base_size, stride, scales=np.array([1.]), ratios=np.array([1.]), scale_major=True, center_x=-1, center_y=-1, base_anchors=None): creator = trt.get_plugin_registry().get_plugin_creator( 'GridAnchorDynamicPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_base_size = trt.PluginField("base_size", np.array([base_size], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_base_size) pf_stride = trt.PluginField("stride", np.array([stride], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_stride) pf_scales = trt.PluginField("scales", np.array(scales).astype(np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_scales) pf_ratios = trt.PluginField("ratios", np.array(ratios).astype(np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_ratios) pf_scale_major = trt.PluginField( "scale_major", np.array([int(scale_major)], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_scale_major) pf_center_x = trt.PluginField("center_x", np.array([center_x], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_center_x) pf_center_y = trt.PluginField("center_y", np.array([center_y], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_center_y) if base_anchors is not None: pf_base_anchors = trt.PluginField( "base_anchors", np.array(base_anchors).astype(np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_base_anchors) return creator.create_plugin(layer_name, pfc)
def getCuBLASGemmPlugin(weight): for c in trt.get_plugin_registry().plugin_creator_list: #print(c.name) if c.name == 'CuBLASGemm': parameterList = [] parameterList.append(trt.PluginField("weight", np.float32(weight), trt.PluginFieldType.FLOAT32)) parameterList.append(trt.PluginField("k", np.int32(weight.shape[0]), trt.PluginFieldType.INT32)) parameterList.append(trt.PluginField("n", np.int32(weight.shape[1]), trt.PluginFieldType.INT32)) return c.create_plugin(c.name, trt.PluginFieldCollection(parameterList)) return None
def load_plugins(): ctypes.CDLL('/opt/torch2trt/torch2trt/libtorch2trt.so') registry = trt.get_plugin_registry() torch2trt_creators = [ c for c in registry.plugin_creator_list if c.plugin_namespace == 'torch2trt' ] for c in torch2trt_creators: registry.register_creator(c, 'torch2trt')