def create_torchunfold_plugin(layer_name, kernel_size, dilation, padding, stride): creator = trt.get_plugin_registry().get_plugin_creator( 'TorchUnfoldPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) pf_kernel_size = trt.PluginField('kernel_size', np.array(kernel_size, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_kernel_size) if isinstance(dilation, int): dilation = (dilation, dilation) pf_dilation = trt.PluginField('dilation', np.array(dilation, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_dilation) if isinstance(padding, int): padding = (padding, padding) pf_padding = trt.PluginField('padding', np.array(padding, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_padding) if isinstance(stride, int): stride = (stride, stride) pf_stride = trt.PluginField('stride', np.array(stride, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_stride) return creator.create_plugin(layer_name, pfc)
def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None): """ Add the skip layer """ idims = input_tensor.shape assert len(idims) == 5 hidden_size = idims[2] pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "bias"] pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "weight"] pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32) pf_type = trt.PluginField( "type_id", np.array([1 if config.use_fp16 else 0], np.int32), trt.PluginFieldType.INT32) fields = [pf_ld, pf_beta, pf_gamma, pf_type] if bias: pf_bias = trt.PluginField("bias", bias.numpy(), trt.PluginFieldType.FLOAT32) fields.append(pf_bias) pfc = trt.PluginFieldCollection(fields) skipln_plug = skln_plg_creator.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer
def create_deformable_pool_plugin(layer_name, out_size, spatial_scale, sampling_ratio, gamma): creator = trt.get_plugin_registry().get_plugin_creator( 'DeformablePoolPluginDynamic', '1', '') if not isinstance(out_size, Iterable): out_size = [out_size, out_size] pfc = trt.PluginFieldCollection() pf_out_size = trt.PluginField('out_size', np.array(out_size, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_out_size) pf_spatial_scale = trt.PluginField( 'spatial_scale', np.array([spatial_scale], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_spatial_scale) pf_sampling_ratio = trt.PluginField( 'sampling_ratio', np.array([sampling_ratio], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_sampling_ratio) pf_gamma = trt.PluginField('gamma', np.array([gamma], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_gamma) return creator.create_plugin(layer_name, pfc)
def get_dlrm_interactions_plugin(self, plugin_name, tableOffsets, interactionsOutputInterleaved): """Create a plugin layer for the DLRM Interactions plugin and return it. DLRM Interactions plugin takes two inputs: from bottom MLP and categorical input and looks up their embeddings. Since DLRM embeddings can be larger than GPU memory, the plugin keeps the most frequently used embeddings on GPU and rest on host and manages the lookup with good performance. """ plugin = None for plugin_creator in trt.get_plugin_registry().plugin_creator_list: if plugin_creator.name == plugin_name: embeddingSize_field = trt.PluginField("embeddingSize", np.array([self.embedding_size], dtype=np.int32), trt.PluginFieldType.INT32) embeddingRows_field = trt.PluginField("embeddingRows", np.array([self.embedding_rows_total], dtype=np.int32), trt.PluginFieldType.INT32) reducedPrecisionIO_field = trt.PluginField("reducedPrecisionIO", np.array( [0 if self.need_calibration else (1 if self.precision == "fp16" else 2)], dtype=np.int32), trt.PluginFieldType.INT32) embeddingWeightsOnGpuPart_field = trt.PluginField("embeddingWeightsOnGpuPart", np.array([self.embedding_weights_on_gpu_part], dtype=np.float32), trt.PluginFieldType.FLOAT32) interactionsOutputInterleaved_field = trt.PluginField("interactionsOutputInterleaved", np.array([1 if interactionsOutputInterleaved else 0], dtype=np.int32), trt.PluginFieldType.INT32) tableOffsets_field = trt.PluginField("tableOffsets", tableOffsets, trt.PluginFieldType.INT32) embeddingWeightsFilepath_field = trt.PluginField("embeddingWeightsFilepath", np.array(list(self.embedding_weights_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR) if self.use_row_frequencies: rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list(self.row_frequencies_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR) else: rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list("".encode()), dtype=np.int8), trt.PluginFieldType.CHAR) output_padding_field = trt.PluginField("outputPaddingGranularity", np.array([self.output_padding], dtype=np.int32), trt.PluginFieldType.INT32) field_collection = trt.PluginFieldCollection([embeddingSize_field, embeddingRows_field, reducedPrecisionIO_field, embeddingWeightsOnGpuPart_field, interactionsOutputInterleaved_field, output_padding_field, tableOffsets_field, embeddingWeightsFilepath_field, rowFrequenciesFilepath_field]) plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection) return plugin
def get_trt_plugin(plugin_name): plugin = None for plugin_creator in PLUGIN_CREATORS: if (plugin_creator.name == "Normalize_TRT") and \ (plugin_name == "Normalize_TRT"): nbWeights = trt.PluginField("nbWeights", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32) eps = trt.PluginField("eps", np.array([0.00001], dtype=np.float32), trt.PluginFieldType.FLOAT32) weights = trt.PluginField('weights', np.array([1] * 16, dtype=np.float32), trt.PluginFieldType.FLOAT32) field_collection = trt.PluginFieldCollection( [weights, eps, nbWeights]) plugin = plugin_creator.create_plugin( name=plugin_name, field_collection=field_collection) break elif (plugin_creator.name == "CustomGeluPluginDynamic") and (plugin_name == "CustomGeluPluginDynamic"): type_id = trt.PluginField("type_id", np.array([0], np.int32), trt.PluginFieldType.INT32) bias = trt.PluginField("bias", np.array([[[1]]], np.float32), trt.PluginFieldType.FLOAT32) field_collection = trt.PluginFieldCollection([type_id, bias]) plugin = plugin_creator.create_plugin( name=plugin_name, field_collection=field_collection) break return plugin
def create_roiextractor_plugin(layer_name, out_size, sample_num, featmap_strides, roi_scale_factor, finest_scale): creator = trt.get_plugin_registry().get_plugin_creator( 'RoiExtractorPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_out_size = trt.PluginField("out_size", np.array([out_size], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_out_size) pf_sample_num = trt.PluginField("sample_num", np.array([sample_num], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_sample_num) pf_featmap_strides = trt.PluginField( "featmap_strides", np.array(featmap_strides, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_featmap_strides) pf_roi_scale_factor = trt.PluginField( "roi_scale_factor", np.array([roi_scale_factor], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_roi_scale_factor) pf_finest_scale = trt.PluginField("finest_scale", np.array([finest_scale], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_finest_scale) return creator.create_plugin(layer_name, pfc)
def create_carafefeaturereassemble_plugin(layer_name, scale_factor, up_kernel, up_group, type_id=trt.DataType.FLOAT): creator = trt.get_plugin_registry().get_plugin_creator( 'CarafeFeatureReassemblePluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_scale_factor = trt.PluginField("scale_factor", np.array([scale_factor], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_scale_factor) pf_up_kernel = trt.PluginField("up_kernel", np.array([up_kernel], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_up_kernel) pf_up_group = trt.PluginField("up_group", np.array([up_group], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_up_group) pf_type_id = trt.PluginField("type_id", np.array([type_id], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_type_id) return creator.create_plugin(layer_name, pfc)
def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask): """ Add the attention layer """ assert(len(input_tensor.shape) == 5) B, S, hidden_size, _, _ = input_tensor.shape num_heads = config.num_attention_heads head_size = int(hidden_size / num_heads) Wall = init_dict[prefix + WQKV] Ball = init_dict[prefix + BQKV] # FC_attention if config.use_int8: mult_all = network.add_convolution(input_tensor, 3 * hidden_size, (1, 1), Wall, Ball) else: mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball) if config.use_qat: dr_qkv = max( init_dict[prefix + 'self_qv_a_input_quantizer_amax'], init_dict[prefix + 'self_qv_b_input_quantizer_amax'], init_dict[prefix + 'self_av_b_input_quantizer_amax'], ) set_output_range(mult_all, dr_qkv) set_output_name(mult_all, prefix, "qkv_mult") has_mask = imask is not None # QKV2CTX dtype = trt.float32 if config.use_fp16: dtype = trt.float16 # Multi-head attention doesn't use INT8 inputs and output by default unless it is specified. if config.use_int8 and config.use_int8_multihead and not config.is_calib_mode: dtype = trt.int8 pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32) pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32) if config.use_qat: dr_probs = init_dict[prefix + 'self_av_a_input_quantizer_amax'] dq_probs = dr_probs / 127.0 pf_dq_probs = trt.PluginField("dq_probs", np.array([dq_probs], np.float32), trt.PluginFieldType.FLOAT32) pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type, pf_dq_probs]) else: pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type]) qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc) qkv_in = [mult_all.get_output(0)] if has_mask: qkv_in.append(imask) qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug) if config.use_qat: dr_ctx = init_dict[prefix + 'output_dense_input_amax'] set_output_range(qkv2ctx, dr_ctx) set_output_name(qkv2ctx, prefix, "context_layer") return qkv2ctx
def add_fused_bottom_mlp(self, plugin_name, input_tensor, input_size, num_channels, names): """Add the MLP part of DLRM network as a fused plugin for better perf. Return the last FC layer in MLP. Args: plugin_name (str): Name of fused MLP plugin to use. input_tensor (ITensor): Input to MLP. input_size (int): Number of numerical features. num_channels (list): List of number of channels for each FC layer in MLP. names (list): List of names of each FC layer in MLP. """ plugin = None output_tensor_name = "" dynamic_range_dict = self.parse_calibration() for plugin_creator in trt.get_plugin_registry().plugin_creator_list: if plugin_creator.name == plugin_name: plugin_fields = [] plugin_fields.append(trt.PluginField("inputChannels", np.array([input_size], dtype=np.int32), trt.PluginFieldType.INT32)) for i, _ in enumerate(num_channels): weights = self.weights[names[i] + ".weight"].numpy() input_size_suggested_by_weights = weights.shape[1] if input_size > input_size_suggested_by_weights: weights = np.concatenate((weights, np.zeros((weights.shape[0], input_size - input_size_suggested_by_weights), dtype=weights.dtype)), 1) plugin_fields.append(trt.PluginField("weights" + str(i), weights, trt.PluginFieldType.FLOAT32)) plugin_fields.append(trt.PluginField("biases" + str(i), self.weights[names[i] + ".bias"].numpy(), trt.PluginFieldType.FLOAT32)) output_tensor_name = names[i] + ".relu.output" if i != len(num_channels) - 1: plugin_fields.append(trt.PluginField("dynamicRange" + str(i), np.array([dynamic_range_dict[output_tensor_name]], dtype=np.float32), trt.PluginFieldType.FLOAT32)) plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=trt.PluginFieldCollection(plugin_fields)) return plugin, output_tensor_name
def create_meshgrid_plugin(layer_name, num_inputs, slice_dims=[2, 3], starts=[0., 0.], strides=[1., 1.]): creator = trt.get_plugin_registry().get_plugin_creator( 'MeshGridPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_num_inputs = trt.PluginField( "num_inputs", np.array([int(num_inputs)], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_num_inputs) pf_slice_dims = trt.PluginField("slice_dims", np.array(slice_dims, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_slice_dims) pf_starts = trt.PluginField("starts", np.array(starts, dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_starts) pf_strides = trt.PluginField("strides", np.array(strides, dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_strides) return creator.create_plugin(layer_name, pfc)
def create_torchembedding_plugin(layer_name, weight): creator = trt.get_plugin_registry().get_plugin_creator( 'TorchEmbeddingPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() num_embeddings = weight.shape[0] embedding_dim = weight.shape[1] pf_num_embeddings = trt.PluginField( "num_embeddings", np.array([num_embeddings], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_num_embeddings) pf_embedding_dim = trt.PluginField( "embedding_dim", np.array([embedding_dim], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_embedding_dim) pf_weight = trt.PluginField("weight", np.array(weight, dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_weight) return creator.create_plugin(layer_name, pfc)
def getTopKAveragePlugin(nTopK, maxTopK): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == 'TopKAveragePlugin': p0 = trt.PluginField("nTopK", np.array([nTopK], dtype=np.int32), trt.PluginFieldType.INT32) p1 = trt.PluginField("maxTopK", np.array([maxTopK], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin('TopKAveragePlugin', trt.PluginFieldCollection([p0, p1])) return None
def skipln(prefix, init_dict, network, input_tensor, skip): """ Add the skip layer """ idims = input_tensor.shape assert len(idims) == 5 hidden_size = idims[2] pf_type_id = trt.PluginField("type_id", np.array([0], dtype=np.float32), trt.PluginFieldType.INT32) pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "beta"] pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "gamma"] pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32) pfc = trt.PluginFieldCollection([pf_type_id, pf_ld, pf_beta, pf_gamma]) skipln_plug = skln_plg_creator.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer
def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None): """ Add the skip layer """ idims = input_tensor.shape assert len(idims) == 5 hidden_size = idims[2] dtype = trt.float32 if config.use_fp16: dtype = trt.float16 # Skip layernorm doesn't use INT8 inputs and output by default unless it is specified. if config.use_int8 and config.use_int8_skipln and not config.is_calib_mode: dtype = trt.int8 pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "beta"] pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "gamma"] pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32) pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) fields = [pf_ld, pf_beta, pf_gamma, pf_type ] if bias: pf_bias = trt.PluginField("bias", bias.numpy(), trt.PluginFieldType.FLOAT32) fields.append(pf_bias) pfc = trt.PluginFieldCollection(fields) skipln_plug = skln_plg_creator.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer
def create_roipool_plugin(layer_name, out_size, featmap_strides, roi_scale_factor, finest_scale): creator = trt.get_plugin_registry().get_plugin_creator( 'RoiPoolPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_out_size = trt.PluginField('out_size', np.array([out_size], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_out_size) pf_featmap_strides = trt.PluginField( 'featmap_strides', np.array(featmap_strides).astype(np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_featmap_strides) pf_roi_scale_factor = trt.PluginField( 'roi_scale_factor', np.array([roi_scale_factor], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_roi_scale_factor) pf_finest_scale = trt.PluginField('finest_scale', np.array([finest_scale], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_finest_scale) return creator.create_plugin(layer_name, pfc)
def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask): """ Add the attention layer """ assert len(input_tensor.shape) == 5 B, S, hidden_size, _, _ = input_tensor.shape num_heads = config.num_attention_heads head_size = int(hidden_size / num_heads) Wall = init_dict[prefix + WQKV] Ball = init_dict[prefix + BQKV] mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball) set_layer_name(mult_all, prefix, "qkv_mult") has_mask = imask is not None pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32,) pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32) pf_S = trt.PluginField("S", np.array([S], np.int32), trt.PluginFieldType.INT32) pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32) pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_S, pf_has_mask]) qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc) qkv_in = [mult_all.get_output(0), imask] qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug) set_layer_name(qkv2ctx, prefix, "context_layer") return qkv2ctx
def create_layernorm_plugin(layer_name, normalized_shape, W, B, eps=1e-5, type_id=trt.DataType.FLOAT): creator = trt.get_plugin_registry().get_plugin_creator( 'LayerNormPluginDynamic', '1', '') pfc = trt.PluginFieldCollection() pf_normalized_shape = trt.PluginField( "normalized_shape", np.array(normalized_shape, dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_normalized_shape) pf_eps = trt.PluginField("eps", np.array([eps], dtype=np.float32), trt.PluginFieldType.FLOAT32) pfc.append(pf_eps) pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32) pfc.append(pf_W) pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32) pfc.append(pf_B) pf_type_id = trt.PluginField("type_id", np.array([type_id], dtype=np.int32), trt.PluginFieldType.INT32) pfc.append(pf_type_id) return creator.create_plugin(layer_name, pfc)
def skipln(prefix, config, init_dict, network, input_tensor, skip): """ Add the skip layer """ hidden_size = config.hidden_size dtype = config.get_trt_dtype() pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "beta"] pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "gamma"] pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32) pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) if config.use_int8 and config.interleaved: pfc = trt.PluginFieldCollection([pf_beta, pf_gamma]) skipln_plug = skln_plg_creator3.create_plugin("skipln", pfc) else: pfc = trt.PluginFieldCollection([pf_ld, pf_beta, pf_gamma, pf_type]) skipln_plug = skln_plg_creator2.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer
def layer_normalization(network,input,odim,model_dict,prefix): creator = trt.IPluginRegistry.get_plugin_creator("LayerNormalization_TRT","") gamma = trt.PluginField("gamma",model_dict[prefix + ".weight"], trt.PluginFieldType.FLOAT32) beta = trt.PluginField("beta",model_dict[prefix + ".bias"], trt.PluginFieldType.FLOAT32) pfc = trt.PluginFieldCollection([gamma,beta]) plugin = creator.create_plugin(name="LayerNormalization_TRT", field_collection=pfc) bottom = network.add_plugin_v2(inputs=[input], plugin=plugin) return bottom
def custom_fc(config, network, input_tensor, out_dims, W): pf_out_dims = trt.PluginField("out_dims", np.array([out_dims], dtype=np.int32), trt.PluginFieldType.INT32) pf_W = trt.PluginField("W", W.numpy(), trt.PluginFieldType.FLOAT32) pf_type = trt.PluginField("type_id", np.array([1 if config.use_fp16 else 0], np.int32), trt.PluginFieldType.INT32) pfc = trt.PluginFieldCollection([pf_out_dims, pf_W, pf_type]) fc_plugin = fc_plg_creator.create_plugin("fcplugin", pfc) plug_inputs = [input_tensor] out_dense = network.add_plugin_v2(plug_inputs, fc_plugin) return out_dense
def get_trt_plugin(plugin_name, sacle_factor=2, align_corners=False): plugin = None for plugin_creator in PLUGIN_CREATORS: if plugin_creator.name == plugin_name: scale_factor_field = trt.PluginField("scaleFactor", np.array([sacle_factor], dtype=np.int8), trt.PluginFieldType.INT8) align_corners_field = trt.PluginField("alignCorners", np.array([int(align_corners)], dtype=np.int8), trt.PluginFieldType.INT8) field_collection = trt.PluginFieldCollection([align_corners_field, scale_factor_field]) plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection) return plugin
def getResizePlugin(): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == 'ResizePlugin': p0 = trt.PluginField("hOut", np.array([hOut], dtype=np.int32), trt.PluginFieldType.INT32) p1 = trt.PluginField("wOut", np.array([wOut], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0, p1])) return None
def getCuBLASGemmPlugin(weight): for c in trt.get_plugin_registry().plugin_creator_list: #print(c.name) if c.name == 'CuBLASGemm': parameterList = [] parameterList.append(trt.PluginField("weight", np.float32(weight), trt.PluginFieldType.FLOAT32)) parameterList.append(trt.PluginField("k", np.int32(weight.shape[0]), trt.PluginFieldType.INT32)) parameterList.append(trt.PluginField("n", np.int32(weight.shape[1]), trt.PluginFieldType.INT32)) return c.create_plugin(c.name, trt.PluginFieldCollection(parameterList)) return None
def getGruPlugin(nDimInput: int, nDimHidden: int, weightX: np.array, weightH: np.array, bias: np.array): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == "GruPlugin": p0 = trt.PluginField("nDimInput", np.array([nDimInput], dtype=np.int32), trt.PluginFieldType.INT32) p1 = trt.PluginField("nDimHidden", np.array([nDimHidden], dtype=np.int32), trt.PluginFieldType.INT32) p2 = trt.PluginField("WeightX", weightX, trt.PluginFieldType.FLOAT32) p3 = trt.PluginField("WeightH", weightH, trt.PluginFieldType.FLOAT32) p4 = trt.PluginField("Bias", bias, trt.PluginFieldType.FLOAT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0, p1, p2, p3, p4])) return None
def add_nms(self, input_tensors): shareLocation = trt.PluginField("shareLocation", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32) backgroundLabelId = trt.PluginField("backgroundLabelId", np.array([-1], dtype=np.int32), trt.PluginFieldType.INT32) numClasses = trt.PluginField("numClasses", np.array([3], dtype=np.int32), trt.PluginFieldType.INT32) topK = trt.PluginField("topK", np.array([300], dtype=np.int32), trt.PluginFieldType.INT32) keepTopK = trt.PluginField("keepTopK", np.array([100], dtype=np.int32), trt.PluginFieldType.INT32) scoreThreshold = trt.PluginField("scoreThreshold", np.array([0.65], dtype=np.float32), trt.PluginFieldType.FLOAT32) iouThreshold = trt.PluginField("iouThreshold", np.array([0.5], dtype=np.float32), trt.PluginFieldType.FLOAT32) isNormalized = trt.PluginField("isNormalized", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32) clipBoxes = trt.PluginField("clipBoxes", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32) field_collection = trt.PluginFieldCollection([ shareLocation, backgroundLabelId, numClasses, topK, keepTopK, scoreThreshold, iouThreshold, isNormalized, clipBoxes ]) nms = nmsCreator.create_plugin(name='BatchedNMS_TRT', field_collection=field_collection) return self.network.add_plugin_v2( inputs=[input_tensors.get_output(x) for x in range(2)], plugin=nms)
def getMMTPlugin(h, dim_t, weight): for c in trt.get_plugin_registry().plugin_creator_list: if c.name == "MMTPlugin": p0 = trt.PluginField("w", np.array([weight], dtype=np.float32), trt.PluginFieldType.FLOAT32) p1 = trt.PluginField("h", np.array([h], dtype=np.int32), trt.PluginFieldType.INT32) p2 = trt.PluginField("dim_t", np.array([dim_t], dtype=np.int32), trt.PluginFieldType.INT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0, p1, p2])) return None
def get_trt_plugin(plugin_name): plugin = None for plugin_creator in PLUGIN_CREATORS: if plugin_creator.name == plugin_name: # 收集参数,各个参数的意义参考https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/_nv_infer_plugin_8h.html#af308dcae61dab659073bc91c6ba63a7e Clip_slope_field = trt.PluginField("clipMin", np.array([min_value], dtype=np.float32), \ trt.PluginFieldType.FLOAT32) Clip_slope_field2 = trt.PluginField("clipMax", np.array([max_value], dtype=np.float32),\ trt.PluginFieldType.FLOAT32) field_collection = trt.PluginFieldCollection([Clip_slope_field,Clip_slope_field2]) plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection) return plugin
def src_attention(network,input,encoder,n_head,odim,dtype,model_dict,prefix): q = FC(network, input, odim, odim, model_dict, prefix + ".src_attn.linear_q"); creator = trt.IPluginRegistry.get_plugin_creator("SrcAttention_TRT","") args = trt.PluginField("args",[n_head,odim,(int)dtype], trt.PluginFieldType.INT32) kw = trt.PluginField("kweight",model_dict[prefix + ".src_attn.linear_k.weight"],trt.PluginFieldType.FLOAT32) kb = trt.PluginField("kbias",model_dict[prefix + ".src_attn.linear_k.bias"],trt.PluginFieldType.FLOAT32) vw = trt.PluginField("vweight",model_dict[prefix + ".src_attn.linear_v.weight"],trt.PluginFieldType.FLOAT32) vb = trt.PluginField("vbias",model_dict[prefix + ".src_attn.linear_v.bias"],trt.PluginFieldType.FLOAT32) pfc = trt.PluginFieldCollection([args,kw,kb,vw,vb]) plugin = creator.create_plugin(name="SrcAttention_TRT", field_collection=pfc) bottom = network.add_plugin_v2(inputs=[q,encoder], plugin=plugin).get_output(0) out = FC(network, bottom, odim, odim, model_dict, prefix + ".src_attn.linear_out"); return out
def getLayerNormPlugin(): for c in trt.get_plugin_registry().plugin_creator_list: #print(c.name) if c.name == 'LayerNorm': p0 = trt.PluginField('epsilon', np.float32(epsilon), trt.PluginFieldType.FLOAT32) return c.create_plugin(c.name, trt.PluginFieldCollection([p0])) return None
def build_engine(shape): plugin_creator = get_plugin_creator('AddPlugin') if plugin_creator == None: print('Plugin not found. Exiting') exit() builder = trt.Builder(logger) builder.max_batch_size = 1024 builder.max_workspace_size = 1 << 20 builder.fp16_mode = use_fp16 network = builder.create_network() tensor = network.add_input('data', trt.DataType.FLOAT, shape) for _ in range(10): tensor = network.add_plugin_v2([tensor], plugin_creator.create_plugin( 'AddPlugin', trt.PluginFieldCollection([ trt.PluginField( 'valueToAdd', np.array([10.0], dtype=np.float32), trt.PluginFieldType.FLOAT32) ]))).get_output(0) network.mark_output(tensor) return builder.build_cuda_engine(network)