Beispiel #1
0
def create_torchunfold_plugin(layer_name, kernel_size, dilation, padding,
                              stride):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'TorchUnfoldPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    if isinstance(kernel_size, int):
        kernel_size = (kernel_size, kernel_size)
    pf_kernel_size = trt.PluginField('kernel_size',
                                     np.array(kernel_size, dtype=np.int32),
                                     trt.PluginFieldType.INT32)
    pfc.append(pf_kernel_size)

    if isinstance(dilation, int):
        dilation = (dilation, dilation)
    pf_dilation = trt.PluginField('dilation',
                                  np.array(dilation, dtype=np.int32),
                                  trt.PluginFieldType.INT32)
    pfc.append(pf_dilation)

    if isinstance(padding, int):
        padding = (padding, padding)
    pf_padding = trt.PluginField('padding', np.array(padding, dtype=np.int32),
                                 trt.PluginFieldType.INT32)
    pfc.append(pf_padding)

    if isinstance(stride, int):
        stride = (stride, stride)
    pf_stride = trt.PluginField('stride', np.array(stride, dtype=np.int32),
                                trt.PluginFieldType.INT32)
    pfc.append(pf_stride)

    return creator.create_plugin(layer_name, pfc)
Beispiel #2
0
def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None):
    """
    Add the skip layer
    """
    idims = input_tensor.shape
    assert len(idims) == 5
    hidden_size = idims[2]

    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32),
                            trt.PluginFieldType.INT32)
    wbeta = init_dict[prefix + "bias"]
    pf_beta = trt.PluginField("beta", wbeta.numpy(),
                              trt.PluginFieldType.FLOAT32)
    wgamma = init_dict[prefix + "weight"]
    pf_gamma = trt.PluginField("gamma", wgamma.numpy(),
                               trt.PluginFieldType.FLOAT32)
    pf_type = trt.PluginField(
        "type_id", np.array([1 if config.use_fp16 else 0], np.int32),
        trt.PluginFieldType.INT32)

    fields = [pf_ld, pf_beta, pf_gamma, pf_type]

    if bias:
        pf_bias = trt.PluginField("bias", bias.numpy(),
                                  trt.PluginFieldType.FLOAT32)
        fields.append(pf_bias)

    pfc = trt.PluginFieldCollection(fields)
    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)

    skipln_inputs = [input_tensor, skip]
    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
    return layer
def create_deformable_pool_plugin(layer_name, out_size, spatial_scale,
                                  sampling_ratio, gamma):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'DeformablePoolPluginDynamic', '1', '')

    if not isinstance(out_size, Iterable):
        out_size = [out_size, out_size]

    pfc = trt.PluginFieldCollection()

    pf_out_size = trt.PluginField('out_size', np.array(out_size,
                                                       dtype=np.int32),
                                  trt.PluginFieldType.INT32)
    pfc.append(pf_out_size)

    pf_spatial_scale = trt.PluginField(
        'spatial_scale', np.array([spatial_scale], dtype=np.float32),
        trt.PluginFieldType.FLOAT32)
    pfc.append(pf_spatial_scale)

    pf_sampling_ratio = trt.PluginField(
        'sampling_ratio', np.array([sampling_ratio], dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_sampling_ratio)

    pf_gamma = trt.PluginField('gamma', np.array([gamma], dtype=np.float32),
                               trt.PluginFieldType.FLOAT32)
    pfc.append(pf_gamma)

    return creator.create_plugin(layer_name, pfc)
    def get_dlrm_interactions_plugin(self, plugin_name, tableOffsets, interactionsOutputInterleaved):
        """Create a plugin layer for the DLRM Interactions plugin and return it.

        DLRM Interactions plugin takes two inputs: from bottom MLP and categorical input and looks up their embeddings.
        Since DLRM embeddings can be larger than GPU memory, the plugin keeps the most frequently used embeddings on GPU
        and rest on host and manages the lookup with good performance.
        """

        plugin = None
        for plugin_creator in trt.get_plugin_registry().plugin_creator_list:
            if plugin_creator.name == plugin_name:
                embeddingSize_field = trt.PluginField("embeddingSize", np.array([self.embedding_size], dtype=np.int32), trt.PluginFieldType.INT32)
                embeddingRows_field = trt.PluginField("embeddingRows", np.array([self.embedding_rows_total], dtype=np.int32), trt.PluginFieldType.INT32)
                reducedPrecisionIO_field = trt.PluginField("reducedPrecisionIO", np.array(
                    [0 if self.need_calibration else (1 if self.precision == "fp16" else 2)], dtype=np.int32), trt.PluginFieldType.INT32)
                embeddingWeightsOnGpuPart_field = trt.PluginField("embeddingWeightsOnGpuPart", np.array([self.embedding_weights_on_gpu_part], dtype=np.float32), trt.PluginFieldType.FLOAT32)
                interactionsOutputInterleaved_field = trt.PluginField("interactionsOutputInterleaved", np.array([1 if interactionsOutputInterleaved else 0], dtype=np.int32), trt.PluginFieldType.INT32)
                tableOffsets_field = trt.PluginField("tableOffsets", tableOffsets, trt.PluginFieldType.INT32)
                embeddingWeightsFilepath_field = trt.PluginField("embeddingWeightsFilepath", np.array(list(self.embedding_weights_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR)
                if self.use_row_frequencies:
                    rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list(self.row_frequencies_binary_filepath.encode()), dtype=np.int8), trt.PluginFieldType.CHAR)
                else:
                    rowFrequenciesFilepath_field = trt.PluginField("rowFrequenciesFilepath", np.array(list("".encode()), dtype=np.int8), trt.PluginFieldType.CHAR)

                output_padding_field = trt.PluginField("outputPaddingGranularity", np.array([self.output_padding], dtype=np.int32), trt.PluginFieldType.INT32)

                field_collection = trt.PluginFieldCollection([embeddingSize_field, embeddingRows_field, reducedPrecisionIO_field, embeddingWeightsOnGpuPart_field,
                                                              interactionsOutputInterleaved_field, output_padding_field, tableOffsets_field, embeddingWeightsFilepath_field, rowFrequenciesFilepath_field])
                plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection)
        return plugin
def get_trt_plugin(plugin_name):
    plugin = None
    for plugin_creator in PLUGIN_CREATORS:
        if (plugin_creator.name == "Normalize_TRT") and \
                (plugin_name == "Normalize_TRT"):
            nbWeights = trt.PluginField("nbWeights",
                                        np.array([1], dtype=np.int32),
                                        trt.PluginFieldType.INT32)
            eps = trt.PluginField("eps", np.array([0.00001], dtype=np.float32),
                                  trt.PluginFieldType.FLOAT32)
            weights = trt.PluginField('weights',
                                      np.array([1] * 16, dtype=np.float32),
                                      trt.PluginFieldType.FLOAT32)
            field_collection = trt.PluginFieldCollection(
                [weights, eps, nbWeights])
            plugin = plugin_creator.create_plugin(
                name=plugin_name, field_collection=field_collection)
            break
        elif (plugin_creator.name
              == "CustomGeluPluginDynamic") and (plugin_name
                                                 == "CustomGeluPluginDynamic"):
            type_id = trt.PluginField("type_id", np.array([0], np.int32),
                                      trt.PluginFieldType.INT32)
            bias = trt.PluginField("bias", np.array([[[1]]], np.float32),
                                   trt.PluginFieldType.FLOAT32)
            field_collection = trt.PluginFieldCollection([type_id, bias])
            plugin = plugin_creator.create_plugin(
                name=plugin_name, field_collection=field_collection)
            break
    return plugin
Beispiel #6
0
def create_roiextractor_plugin(layer_name, out_size, sample_num,
                               featmap_strides, roi_scale_factor,
                               finest_scale):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'RoiExtractorPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    pf_out_size = trt.PluginField("out_size",
                                  np.array([out_size], dtype=np.int32),
                                  trt.PluginFieldType.INT32)
    pfc.append(pf_out_size)

    pf_sample_num = trt.PluginField("sample_num",
                                    np.array([sample_num], dtype=np.int32),
                                    trt.PluginFieldType.INT32)
    pfc.append(pf_sample_num)

    pf_featmap_strides = trt.PluginField(
        "featmap_strides", np.array(featmap_strides, dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_featmap_strides)

    pf_roi_scale_factor = trt.PluginField(
        "roi_scale_factor", np.array([roi_scale_factor], dtype=np.float32),
        trt.PluginFieldType.FLOAT32)
    pfc.append(pf_roi_scale_factor)

    pf_finest_scale = trt.PluginField("finest_scale",
                                      np.array([finest_scale], dtype=np.int32),
                                      trt.PluginFieldType.INT32)
    pfc.append(pf_finest_scale)

    return creator.create_plugin(layer_name, pfc)
def create_carafefeaturereassemble_plugin(layer_name,
                                          scale_factor,
                                          up_kernel,
                                          up_group,
                                          type_id=trt.DataType.FLOAT):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'CarafeFeatureReassemblePluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    pf_scale_factor = trt.PluginField("scale_factor",
                                      np.array([scale_factor], dtype=np.int32),
                                      trt.PluginFieldType.INT32)
    pfc.append(pf_scale_factor)

    pf_up_kernel = trt.PluginField("up_kernel",
                                   np.array([up_kernel], dtype=np.int32),
                                   trt.PluginFieldType.INT32)
    pfc.append(pf_up_kernel)

    pf_up_group = trt.PluginField("up_group",
                                  np.array([up_group], dtype=np.int32),
                                  trt.PluginFieldType.INT32)
    pfc.append(pf_up_group)

    pf_type_id = trt.PluginField("type_id", np.array([type_id],
                                                     dtype=np.int32),
                                 trt.PluginFieldType.INT32)
    pfc.append(pf_type_id)

    return creator.create_plugin(layer_name, pfc)
Beispiel #8
0
def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
    """
    Add the attention layer
    """
    assert(len(input_tensor.shape) == 5)
    B, S, hidden_size, _, _ = input_tensor.shape
    num_heads = config.num_attention_heads
    head_size = int(hidden_size / num_heads)

    Wall = init_dict[prefix + WQKV]
    Ball = init_dict[prefix + BQKV]

    # FC_attention
    if config.use_int8:
        mult_all = network.add_convolution(input_tensor, 3 * hidden_size, (1, 1), Wall, Ball)
    else:
        mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball)

    if config.use_qat:
        dr_qkv = max(
            init_dict[prefix + 'self_qv_a_input_quantizer_amax'],
            init_dict[prefix + 'self_qv_b_input_quantizer_amax'],
            init_dict[prefix + 'self_av_b_input_quantizer_amax'],
        )
        set_output_range(mult_all, dr_qkv)
    set_output_name(mult_all, prefix, "qkv_mult")

    has_mask = imask is not None

    # QKV2CTX
    dtype = trt.float32
    if config.use_fp16:
        dtype = trt.float16
    # Multi-head attention doesn't use INT8 inputs and output by default unless it is specified.
    if config.use_int8 and config.use_int8_multihead and not config.is_calib_mode:
        dtype = trt.int8

    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
    pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32)
    pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32)
    if config.use_qat:
        dr_probs = init_dict[prefix + 'self_av_a_input_quantizer_amax']
        dq_probs = dr_probs / 127.0
        pf_dq_probs =  trt.PluginField("dq_probs", np.array([dq_probs], np.float32), trt.PluginFieldType.FLOAT32)
        pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type, pf_dq_probs])
    else:
        pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type])
    qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc)

    qkv_in = [mult_all.get_output(0)]
    if has_mask:
        qkv_in.append(imask)
    qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug)

    if config.use_qat:
        dr_ctx = init_dict[prefix + 'output_dense_input_amax']
        set_output_range(qkv2ctx, dr_ctx)
    set_output_name(qkv2ctx, prefix, "context_layer")
    return qkv2ctx
    def add_fused_bottom_mlp(self, plugin_name, input_tensor, input_size, num_channels, names):
        """Add the MLP part of DLRM network as a fused plugin for better perf. Return the last FC layer in MLP.

        Args:
            plugin_name (str): Name of fused MLP plugin to use.
            input_tensor (ITensor): Input to MLP.
            input_size (int): Number of numerical features.
            num_channels (list): List of number of channels for each FC layer in MLP.
            names (list): List of names of each FC layer in MLP.
        """

        plugin = None
        output_tensor_name = ""
        dynamic_range_dict = self.parse_calibration()
        for plugin_creator in trt.get_plugin_registry().plugin_creator_list:
            if plugin_creator.name == plugin_name:
                plugin_fields = []
                plugin_fields.append(trt.PluginField("inputChannels", np.array([input_size], dtype=np.int32), trt.PluginFieldType.INT32))
                for i, _ in enumerate(num_channels):
                    weights = self.weights[names[i] + ".weight"].numpy()
                    input_size_suggested_by_weights = weights.shape[1]
                    if input_size > input_size_suggested_by_weights:
                        weights = np.concatenate((weights, np.zeros((weights.shape[0], input_size - input_size_suggested_by_weights), dtype=weights.dtype)), 1)
                    plugin_fields.append(trt.PluginField("weights" + str(i), weights, trt.PluginFieldType.FLOAT32))
                    plugin_fields.append(trt.PluginField("biases" + str(i), self.weights[names[i] + ".bias"].numpy(), trt.PluginFieldType.FLOAT32))
                    output_tensor_name = names[i] + ".relu.output"
                    if i != len(num_channels) - 1:
                        plugin_fields.append(trt.PluginField("dynamicRange" + str(i), np.array([dynamic_range_dict[output_tensor_name]], dtype=np.float32), trt.PluginFieldType.FLOAT32))
                plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=trt.PluginFieldCollection(plugin_fields))
        return plugin, output_tensor_name
Beispiel #10
0
def create_meshgrid_plugin(layer_name,
                           num_inputs,
                           slice_dims=[2, 3],
                           starts=[0., 0.],
                           strides=[1., 1.]):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'MeshGridPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    pf_num_inputs = trt.PluginField(
        "num_inputs", np.array([int(num_inputs)], dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_num_inputs)

    pf_slice_dims = trt.PluginField("slice_dims",
                                    np.array(slice_dims, dtype=np.int32),
                                    trt.PluginFieldType.INT32)
    pfc.append(pf_slice_dims)

    pf_starts = trt.PluginField("starts", np.array(starts, dtype=np.float32),
                                trt.PluginFieldType.FLOAT32)
    pfc.append(pf_starts)

    pf_strides = trt.PluginField("strides", np.array(strides,
                                                     dtype=np.float32),
                                 trt.PluginFieldType.FLOAT32)
    pfc.append(pf_strides)

    return creator.create_plugin(layer_name, pfc)
def create_torchembedding_plugin(layer_name, weight):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'TorchEmbeddingPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    num_embeddings = weight.shape[0]
    embedding_dim = weight.shape[1]

    pf_num_embeddings = trt.PluginField(
        "num_embeddings", np.array([num_embeddings], dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_num_embeddings)

    pf_embedding_dim = trt.PluginField(
        "embedding_dim", np.array([embedding_dim], dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_embedding_dim)

    pf_weight = trt.PluginField("weight", np.array(weight, dtype=np.float32),
                                trt.PluginFieldType.FLOAT32)
    pfc.append(pf_weight)

    return creator.create_plugin(layer_name, pfc)
Beispiel #12
0
def getTopKAveragePlugin(nTopK, maxTopK):
    for c in trt.get_plugin_registry().plugin_creator_list:
        if c.name == 'TopKAveragePlugin':
            p0 = trt.PluginField("nTopK", np.array([nTopK], dtype=np.int32), trt.PluginFieldType.INT32)
            p1 = trt.PluginField("maxTopK", np.array([maxTopK], dtype=np.int32), trt.PluginFieldType.INT32)
            return c.create_plugin('TopKAveragePlugin', trt.PluginFieldCollection([p0, p1]))
    return None
Beispiel #13
0
def skipln(prefix, init_dict, network, input_tensor, skip):
    """
    Add the skip layer
    """
    idims = input_tensor.shape
    assert len(idims) == 5
    hidden_size = idims[2]

    pf_type_id = trt.PluginField("type_id", np.array([0], dtype=np.float32),
                                 trt.PluginFieldType.INT32)
    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32),
                            trt.PluginFieldType.INT32)
    wbeta = init_dict[prefix + "beta"]
    pf_beta = trt.PluginField("beta", wbeta.numpy(),
                              trt.PluginFieldType.FLOAT32)
    wgamma = init_dict[prefix + "gamma"]
    pf_gamma = trt.PluginField("gamma", wgamma.numpy(),
                               trt.PluginFieldType.FLOAT32)

    pfc = trt.PluginFieldCollection([pf_type_id, pf_ld, pf_beta, pf_gamma])
    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)

    skipln_inputs = [input_tensor, skip]
    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
    return layer
Beispiel #14
0
def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None):
    """
    Add the skip layer
    """
    idims = input_tensor.shape
    assert len(idims) == 5
    hidden_size = idims[2]

    dtype = trt.float32
    if config.use_fp16:
        dtype = trt.float16
    # Skip layernorm doesn't use INT8 inputs and output by default unless it is specified.
    if config.use_int8 and config.use_int8_skipln and not config.is_calib_mode:
        dtype = trt.int8

    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
    wbeta = init_dict[prefix + "beta"]
    pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32)
    wgamma = init_dict[prefix + "gamma"]
    pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32)
    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)

    fields = [pf_ld, pf_beta, pf_gamma, pf_type ]

    if bias:
        pf_bias = trt.PluginField("bias", bias.numpy(), trt.PluginFieldType.FLOAT32)
        fields.append(pf_bias)

    pfc = trt.PluginFieldCollection(fields)
    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)

    skipln_inputs = [input_tensor, skip]
    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
    return layer
Beispiel #15
0
def create_roipool_plugin(layer_name, out_size, featmap_strides,
                          roi_scale_factor, finest_scale):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'RoiPoolPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    pf_out_size = trt.PluginField('out_size',
                                  np.array([out_size], dtype=np.int32),
                                  trt.PluginFieldType.INT32)
    pfc.append(pf_out_size)

    pf_featmap_strides = trt.PluginField(
        'featmap_strides',
        np.array(featmap_strides).astype(np.float32),
        trt.PluginFieldType.FLOAT32)
    pfc.append(pf_featmap_strides)

    pf_roi_scale_factor = trt.PluginField(
        'roi_scale_factor', np.array([roi_scale_factor], dtype=np.float32),
        trt.PluginFieldType.FLOAT32)
    pfc.append(pf_roi_scale_factor)

    pf_finest_scale = trt.PluginField('finest_scale',
                                      np.array([finest_scale], dtype=np.int32),
                                      trt.PluginFieldType.INT32)
    pfc.append(pf_finest_scale)

    return creator.create_plugin(layer_name, pfc)
Beispiel #16
0
def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
    """
    Add the attention layer
    """
    assert len(input_tensor.shape) == 5
    B, S, hidden_size, _, _ = input_tensor.shape
    num_heads = config.num_attention_heads
    head_size = int(hidden_size / num_heads)

    Wall = init_dict[prefix + WQKV]
    Ball = init_dict[prefix + BQKV]

    mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball)
    set_layer_name(mult_all, prefix, "qkv_mult")

    has_mask = imask is not None

    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32,)
    pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32)
    pf_S = trt.PluginField("S", np.array([S], np.int32), trt.PluginFieldType.INT32)
    pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32)

    pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_S, pf_has_mask])
    qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc)

    qkv_in = [mult_all.get_output(0), imask]
    qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug)
    set_layer_name(qkv2ctx, prefix, "context_layer")
    return qkv2ctx
Beispiel #17
0
def create_layernorm_plugin(layer_name,
                            normalized_shape,
                            W,
                            B,
                            eps=1e-5,
                            type_id=trt.DataType.FLOAT):

    creator = trt.get_plugin_registry().get_plugin_creator(
        'LayerNormPluginDynamic', '1', '')

    pfc = trt.PluginFieldCollection()

    pf_normalized_shape = trt.PluginField(
        "normalized_shape", np.array(normalized_shape, dtype=np.int32),
        trt.PluginFieldType.INT32)
    pfc.append(pf_normalized_shape)

    pf_eps = trt.PluginField("eps", np.array([eps], dtype=np.float32),
                             trt.PluginFieldType.FLOAT32)
    pfc.append(pf_eps)

    pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32)
    pfc.append(pf_W)

    pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32)
    pfc.append(pf_B)

    pf_type_id = trt.PluginField("type_id", np.array([type_id],
                                                     dtype=np.int32),
                                 trt.PluginFieldType.INT32)
    pfc.append(pf_type_id)

    return creator.create_plugin(layer_name, pfc)
Beispiel #18
0
def skipln(prefix, config, init_dict, network, input_tensor, skip):
    """
    Add the skip layer
    """
    hidden_size = config.hidden_size
    dtype = config.get_trt_dtype()

    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32),
                            trt.PluginFieldType.INT32)
    wbeta = init_dict[prefix + "beta"]
    pf_beta = trt.PluginField("beta", wbeta.numpy(),
                              trt.PluginFieldType.FLOAT32)
    wgamma = init_dict[prefix + "gamma"]
    pf_gamma = trt.PluginField("gamma", wgamma.numpy(),
                               trt.PluginFieldType.FLOAT32)
    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32),
                              trt.PluginFieldType.INT32)

    if config.use_int8 and config.interleaved:
        pfc = trt.PluginFieldCollection([pf_beta, pf_gamma])
        skipln_plug = skln_plg_creator3.create_plugin("skipln", pfc)
    else:
        pfc = trt.PluginFieldCollection([pf_ld, pf_beta, pf_gamma, pf_type])
        skipln_plug = skln_plg_creator2.create_plugin("skipln", pfc)

    skipln_inputs = [input_tensor, skip]
    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
    return layer
Beispiel #19
0
def layer_normalization(network,input,odim,model_dict,prefix):
  creator = trt.IPluginRegistry.get_plugin_creator("LayerNormalization_TRT","")
  gamma = trt.PluginField("gamma",model_dict[prefix + ".weight"], trt.PluginFieldType.FLOAT32)
  beta = trt.PluginField("beta",model_dict[prefix + ".bias"], trt.PluginFieldType.FLOAT32)
  pfc = trt.PluginFieldCollection([gamma,beta])
  plugin = creator.create_plugin(name="LayerNormalization_TRT", field_collection=pfc)
  bottom = network.add_plugin_v2(inputs=[input], plugin=plugin)
  return bottom
Beispiel #20
0
def custom_fc(config, network, input_tensor, out_dims, W):
    pf_out_dims = trt.PluginField("out_dims", np.array([out_dims], dtype=np.int32), trt.PluginFieldType.INT32)
    pf_W = trt.PluginField("W", W.numpy(), trt.PluginFieldType.FLOAT32)
    pf_type = trt.PluginField("type_id", np.array([1 if config.use_fp16 else 0], np.int32), trt.PluginFieldType.INT32)
    pfc = trt.PluginFieldCollection([pf_out_dims, pf_W, pf_type])
    fc_plugin = fc_plg_creator.create_plugin("fcplugin", pfc)
    plug_inputs = [input_tensor]
    out_dense = network.add_plugin_v2(plug_inputs, fc_plugin)
    return out_dense
Beispiel #21
0
def get_trt_plugin(plugin_name, sacle_factor=2, align_corners=False):
    plugin = None
    for plugin_creator in PLUGIN_CREATORS:
        if plugin_creator.name == plugin_name:
            scale_factor_field = trt.PluginField("scaleFactor", np.array([sacle_factor], dtype=np.int8), trt.PluginFieldType.INT8)
            align_corners_field = trt.PluginField("alignCorners", np.array([int(align_corners)], dtype=np.int8), trt.PluginFieldType.INT8)
            field_collection = trt.PluginFieldCollection([align_corners_field, scale_factor_field])
            plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection)
    return plugin
def getResizePlugin():
    for c in trt.get_plugin_registry().plugin_creator_list:
        if c.name == 'ResizePlugin':
            p0 = trt.PluginField("hOut", np.array([hOut], dtype=np.int32),
                                 trt.PluginFieldType.INT32)
            p1 = trt.PluginField("wOut", np.array([wOut], dtype=np.int32),
                                 trt.PluginFieldType.INT32)
            return c.create_plugin(c.name, trt.PluginFieldCollection([p0, p1]))
    return None
def getCuBLASGemmPlugin(weight):
    for c in trt.get_plugin_registry().plugin_creator_list:
        #print(c.name)
        if c.name == 'CuBLASGemm':
            parameterList = []
            parameterList.append(trt.PluginField("weight", np.float32(weight), trt.PluginFieldType.FLOAT32))
            parameterList.append(trt.PluginField("k", np.int32(weight.shape[0]), trt.PluginFieldType.INT32))
            parameterList.append(trt.PluginField("n", np.int32(weight.shape[1]), trt.PluginFieldType.INT32))
            return c.create_plugin(c.name, trt.PluginFieldCollection(parameterList))
    return None
def getGruPlugin(nDimInput: int, nDimHidden: int, weightX: np.array, weightH: np.array, bias: np.array):
    for c in trt.get_plugin_registry().plugin_creator_list:
        if c.name == "GruPlugin":
            p0 = trt.PluginField("nDimInput", np.array([nDimInput], dtype=np.int32), trt.PluginFieldType.INT32)
            p1 = trt.PluginField("nDimHidden", np.array([nDimHidden], dtype=np.int32), trt.PluginFieldType.INT32)
            p2 = trt.PluginField("WeightX", weightX, trt.PluginFieldType.FLOAT32)
            p3 = trt.PluginField("WeightH", weightH, trt.PluginFieldType.FLOAT32)
            p4 = trt.PluginField("Bias", bias, trt.PluginFieldType.FLOAT32)
            return c.create_plugin(c.name, trt.PluginFieldCollection([p0, p1, p2, p3, p4]))
    return None
Beispiel #25
0
    def add_nms(self, input_tensors):
        shareLocation = trt.PluginField("shareLocation",
                                        np.array([1], dtype=np.int32),
                                        trt.PluginFieldType.INT32)
        backgroundLabelId = trt.PluginField("backgroundLabelId",
                                            np.array([-1], dtype=np.int32),
                                            trt.PluginFieldType.INT32)
        numClasses = trt.PluginField("numClasses", np.array([3],
                                                            dtype=np.int32),
                                     trt.PluginFieldType.INT32)
        topK = trt.PluginField("topK", np.array([300], dtype=np.int32),
                               trt.PluginFieldType.INT32)
        keepTopK = trt.PluginField("keepTopK", np.array([100], dtype=np.int32),
                                   trt.PluginFieldType.INT32)
        scoreThreshold = trt.PluginField("scoreThreshold",
                                         np.array([0.65], dtype=np.float32),
                                         trt.PluginFieldType.FLOAT32)
        iouThreshold = trt.PluginField("iouThreshold",
                                       np.array([0.5], dtype=np.float32),
                                       trt.PluginFieldType.FLOAT32)
        isNormalized = trt.PluginField("isNormalized",
                                       np.array([1], dtype=np.int32),
                                       trt.PluginFieldType.INT32)
        clipBoxes = trt.PluginField("clipBoxes", np.array([1], dtype=np.int32),
                                    trt.PluginFieldType.INT32)

        field_collection = trt.PluginFieldCollection([
            shareLocation, backgroundLabelId, numClasses, topK, keepTopK,
            scoreThreshold, iouThreshold, isNormalized, clipBoxes
        ])
        nms = nmsCreator.create_plugin(name='BatchedNMS_TRT',
                                       field_collection=field_collection)

        return self.network.add_plugin_v2(
            inputs=[input_tensors.get_output(x) for x in range(2)], plugin=nms)
def getMMTPlugin(h, dim_t, weight):
    for c in trt.get_plugin_registry().plugin_creator_list:
        if c.name == "MMTPlugin":
            p0 = trt.PluginField("w", np.array([weight], dtype=np.float32),
                                 trt.PluginFieldType.FLOAT32)
            p1 = trt.PluginField("h", np.array([h], dtype=np.int32),
                                 trt.PluginFieldType.INT32)
            p2 = trt.PluginField("dim_t", np.array([dim_t], dtype=np.int32),
                                 trt.PluginFieldType.INT32)
            return c.create_plugin(c.name,
                                   trt.PluginFieldCollection([p0, p1, p2]))
    return None
Beispiel #27
0
 def get_trt_plugin(plugin_name):
         plugin = None
         for plugin_creator in PLUGIN_CREATORS:
             if plugin_creator.name == plugin_name:
                 # 收集参数,各个参数的意义参考https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/_nv_infer_plugin_8h.html#af308dcae61dab659073bc91c6ba63a7e
                 Clip_slope_field = trt.PluginField("clipMin", np.array([min_value], dtype=np.float32), \
                                                     trt.PluginFieldType.FLOAT32)
                 Clip_slope_field2 = trt.PluginField("clipMax", np.array([max_value], dtype=np.float32),\
                                                     trt.PluginFieldType.FLOAT32)
                 field_collection = trt.PluginFieldCollection([Clip_slope_field,Clip_slope_field2])
                 plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection)
         return plugin
Beispiel #28
0
def src_attention(network,input,encoder,n_head,odim,dtype,model_dict,prefix):
  q = FC(network, input, odim, odim, model_dict, prefix + ".src_attn.linear_q");

  creator = trt.IPluginRegistry.get_plugin_creator("SrcAttention_TRT","")
  args = trt.PluginField("args",[n_head,odim,(int)dtype], trt.PluginFieldType.INT32)
  kw = trt.PluginField("kweight",model_dict[prefix + ".src_attn.linear_k.weight"],trt.PluginFieldType.FLOAT32)
  kb = trt.PluginField("kbias",model_dict[prefix + ".src_attn.linear_k.bias"],trt.PluginFieldType.FLOAT32)
  vw = trt.PluginField("vweight",model_dict[prefix + ".src_attn.linear_v.weight"],trt.PluginFieldType.FLOAT32)
  vb = trt.PluginField("vbias",model_dict[prefix + ".src_attn.linear_v.bias"],trt.PluginFieldType.FLOAT32)
  pfc = trt.PluginFieldCollection([args,kw,kb,vw,vb])
  plugin = creator.create_plugin(name="SrcAttention_TRT", field_collection=pfc)
  bottom = network.add_plugin_v2(inputs=[q,encoder], plugin=plugin).get_output(0)
  out = FC(network, bottom, odim, odim, model_dict, prefix + ".src_attn.linear_out");
  return out
def getLayerNormPlugin():
    for c in trt.get_plugin_registry().plugin_creator_list:
        #print(c.name)
        if c.name == 'LayerNorm':
            p0 = trt.PluginField('epsilon', np.float32(epsilon), trt.PluginFieldType.FLOAT32)
            return c.create_plugin(c.name, trt.PluginFieldCollection([p0]))
    return None
def build_engine(shape):
    plugin_creator = get_plugin_creator('AddPlugin')
    if plugin_creator == None:
        print('Plugin not found. Exiting')
        exit()

    builder = trt.Builder(logger)
    builder.max_batch_size = 1024
    builder.max_workspace_size = 1 << 20
    builder.fp16_mode = use_fp16
    network = builder.create_network()

    tensor = network.add_input('data', trt.DataType.FLOAT, shape)
    for _ in range(10):
        tensor = network.add_plugin_v2([tensor],
                                       plugin_creator.create_plugin(
                                           'AddPlugin',
                                           trt.PluginFieldCollection([
                                               trt.PluginField(
                                                   'valueToAdd',
                                                   np.array([10.0],
                                                            dtype=np.float32),
                                                   trt.PluginFieldType.FLOAT32)
                                           ]))).get_output(0)

    network.mark_output(tensor)
    return builder.build_cuda_engine(network)