Python SparsityConfig Examples

Programming Language: Python

Namespace/Package Name: torch_blocksparse

Class/Type: SparsityConfig

Examples at hotexamples.com: 6

Python SparsityConfig - 6 examples found. These are the top rated real world Python examples of torch_blocksparse.SparsityConfig extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SparsityConfig(6)

Frequently Used Methods

SparsityConfig (6)

Example #1

Show file

File: L1_models_bk.py Project: logenBupt/bert_pytorch

 def update_to_sparse_transformer(self,
                                  max_position,
                                  sparsity_config=SparsityConfig(
                                      num_heads=4, seq_len=1024)):
     self.extend_position_embedding(max_position)
     self.replace_model_self_attention_with_sparse_self_attention(
         max_position, sparsity_config)

Example #2

Show file

def replace_model_self_attention_with_sparse_self_attention(
    model,
    max_position,
    # SparsityConfig parameters needs to be set accordingly
    sparsity_config=SparsityConfig(num_heads=4, seq_len=1024)):
    """This function replaces the self attention layers in model encoder with sparse self attention.
    It currently supports bert and roberta model and can be easily extended to any other models following similar steps here.
    For sparsityConfig, refer to the config class.

    Arguments:
        model: required: a transformer model
        max_position: required: an integer determining new position embedding size
        sparsity_config: optional: this parameter determins sparsity pattern configuration; it is based on SparsityConfig class

    Return:
        model: updated model; in which self attention layer has been repleaced with DeepSpeed Sparse Self Attention layer.
    """

    if hasattr(model, 'bert'):
        model.config.max_position_embeddings = max_position
        replace_self_attention_layer_with_sparse_self_attention_layer(
            model.config, model.bert.encoder.layer, sparsity_config)
    elif hasattr(model, 'roberta'):
        model.config.max_position_embeddings = max_position + 2
        replace_self_attention_layer_with_sparse_self_attention_layer(
            model.config, model.roberta.encoder.layer, sparsity_config)
    else:
        raise ValueError(
            'Please extend \"update_model_self_attention_to_sparse_self_attention\" function to support \
				 your model type. It currently only supports \"bert\" & \"roberta\"!')
    return model

Example #3

Show file

def replace_self_attention_layer_with_sparse_self_attention_layer(
    config,
    layers,
    # SparsityConfig parameters needs to be set accordingly
    sparsity_config=SparsityConfig(num_heads=4, seq_len=1024)):
    """This function replaces the self attention layers in attention layer with sparse self attention.
    For sparsityConfig, refer to the config class.

    Arguments:
        config: required: transformer model config
        layers: required: transformer model attention layers
        sparsity_config: optional: this parameter determins sparsity pattern configuration; it is based on SparsityConfig class

    Return:
        layers: updated attention layers; in which self attention layers have been repleaced with DeepSpeed Sparse Self Attention layer.
    """

    for layer in layers:
        deepspeed_sparse_self_attn = BertSparseSelfAttention(
            config, sparsity_config)
        deepspeed_sparse_self_attn.query = layer.attention.self.query
        deepspeed_sparse_self_attn.key = layer.attention.self.key
        deepspeed_sparse_self_attn.value = layer.attention.self.value

        layer.attention.self = deepspeed_sparse_self_attn

    return layers

Example #4

Show file

File: L1_models_bk.py Project: logenBupt/bert_pytorch

    def replace_model_self_attention_with_sparse_self_attention(
        self,
        max_position,
        # SparsityConfig parameters needs to be set accordingly
        sparsity_config=SparsityConfig(num_heads=4, seq_len=1024)):
        """This function replaces the self attention layers in model encoder with sparse self attention.
        It currently supports bert and roberta model and can be easily extended to any other models following similar steps here.
        For sparsityConfig, refer to the config class.
        Arguments:
            model: required: a transformer model
            max_position: required: an integer determining new position embedding size
            sparsity_config: optional: this parameter determins sparsity pattern configuration; it is based on SparsityConfig class
        Return:
            model: updated model; in which self attention layer has been repleaced with DeepSpeed Sparse Self Attention layer.
        """

        self.bert.config.max_position_embeddings = max_position
        self.replace_self_attention_layer_with_sparse_self_attention_layer(
            self.bert.config, self.bert.encoder.layer, sparsity_config)

Example #5

Show file

File: sparseselfattention.py Project: arashashari/torch-blocksparse

    def __init__(
            self,
            # SparsityConfig parameters needs to be set accordingly
            sparsity_config=SparsityConfig(num_heads=4),
            key_padding_mask_mode='add',
            attn_mask_mode='mul'):
        """Initialize the sparse self attention layer.
        Arguments:
            sparsity_config: optional: this parameter determins sparsity pattern configuration; it is based on SparsityConfig class.
            key_padding_mask_mode: optional: a string determining if key padding mask needs to be added, `add`, or be multiplied, `mul`.
            attn_mask_mode: optional: a string determining if attention mask needs to be added, `add`, or be multiplied, `mul`.
        """
        super().__init__()

        # sparsity information
        self.sparsity_config = sparsity_config

        # mask modes
        self.key_padding_mask_mode = key_padding_mask_mode
        self.attn_mask_mode = attn_mask_mode

Example #6

Show file

File: sparse_transformers.py Project: playing-code/fairseq

    def replace_model_self_attention_with_sparse_self_attention(
        self, max_position, sparsity_config=SparsityConfig(num_heads=4)):

        self.config.max_position_embeddings = max_position
        tbs.replace_self_attention_layer_with_sparse_self_attention_layer(
            self.config, self.encoder.layer, sparsity_config)