def build(model):
        #
        #  we propose five bug generation methods for TEXTBUGGER:
        #
        transformation = CompositeTransformation(
            [
                # (1) Insert: Insert a space into the word.
                # Generally, words are segmented by spaces in English. Therefore,
                # we can deceive classifiers by inserting spaces into words.
                WordSwapRandomCharacterInsertion(
                    random_one=True,
                    letters_to_insert=" ",
                    skip_first_char=True,
                    skip_last_char=True,
                ),
                # (2) Delete: Delete a random character of the word except for the first
                # and the last character.
                WordSwapRandomCharacterDeletion(
                    random_one=True, skip_first_char=True, skip_last_char=True
                ),
                # (3) Swap: Swap random two adjacent letters in the word but do not
                # alter the first or last letter. This is a common occurrence when
                # typing quickly and is easy to implement.
                WordSwapNeighboringCharacterSwap(
                    random_one=True, skip_first_char=True, skip_last_char=True
                ),
                # (4) Substitute-C (Sub-C): Replace characters with visually similar
                # characters (e.g., replacing “o” with “0”, “l” with “1”, “a” with “@”)
                # or adjacent characters in the keyboard (e.g., replacing “m” with “n”).
                WordSwapHomoglyphSwap(),
                # (5) Substitute-W
                # (Sub-W): Replace a word with its topk nearest neighbors in a
                # context-aware word vector space. Specifically, we use the pre-trained
                # GloVe model [30] provided by Stanford for word embedding and set
                # topk = 5 in the experiment.
                WordSwapEmbedding(max_candidates=5),
            ]
        )

        constraints = [RepeatModification(), StopwordModification()]
        # In our experiment, we first use the Universal Sentence
        # Encoder [7], a model trained on a number of natural language
        # prediction tasks that require modeling the meaning of word
        # sequences, to encode sentences into high dimensional vectors.
        # Then, we use the cosine similarity to measure the semantic
        # similarity between original texts and adversarial texts.
        # ... "Furthermore, the semantic similarity threshold \eps is set
        # as 0.8 to guarantee a good trade-off between quality and
        # strength of the generated adversarial text."
        constraints.append(UniversalSentenceEncoder(threshold=0.8))
        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model)
        #
        # Greedily swap words with "Word Importance Ranking".
        #
        search_method = GreedyWordSwapWIR(wir_method="delete")

        return Attack(goal_function, constraints, transformation, search_method)
Exemple #2
0
 def build(model, max_num_word_swaps=1):
     # a combination of 4 different character-based transforms
     # ignore the first and last letter of each word, as in the paper
     transformation = CompositeTransformation([
         WordSwapNeighboringCharacterSwap(random_one=False,
                                          skip_first_char=True,
                                          skip_last_char=True),
         WordSwapRandomCharacterDeletion(random_one=False,
                                         skip_first_char=True,
                                         skip_last_char=True),
         WordSwapRandomCharacterInsertion(random_one=False,
                                          skip_first_char=True,
                                          skip_last_char=True),
         WordSwapQWERTY(random_one=False,
                        skip_first_char=True,
                        skip_last_char=True),
     ])
     # only edit words of length >= 4, edit max_num_word_swaps words.
     # note that we also are not editing the same word twice, so
     # max_num_word_swaps is really the max number of character
     # changes that can be made. The paper looks at 1 and 2 char attacks.
     constraints = [
         MinWordLength(min_length=4),
         StopwordModification(),
         MaxWordsPerturbed(max_num_words=max_num_word_swaps),
         RepeatModification(),
     ]
     # untargeted attack
     goal_function = UntargetedClassification(model)
     search_method = GreedySearch()
     return Attack(goal_function, constraints, transformation,
                   search_method)
def DeepWordBugGao2018(model, use_all_transformations=True):
    """
        Gao, Lanchantin, Soffa, Qi.
        
        Black-box Generation of Adversarial Text Sequences to Evade Deep Learning 
        Classifiers.
        
        https://arxiv.org/abs/1801.04354
    """
    #
    # Swap characters out from words. Choose the best of four potential transformations.
    #
    if use_all_transformations:
        # We propose four similar methods:
        transformation = CompositeTransformation([
            # (1) Swap: Swap two adjacent letters in the word.
            WordSwapNeighboringCharacterSwap(),
            # (2) Substitution: Substitute a letter in the word with a random letter.
            WordSwapRandomCharacterSubstitution(),
            # (3) Deletion: Delete a random letter from the word.
            WordSwapRandomCharacterDeletion(),
            # (4) Insertion: Insert a random letter in the word.
            WordSwapRandomCharacterInsertion(),
        ])
    else:
        # We use the Combined Score and the Substitution Transformer to generate
        # adversarial samples, with the maximum edit distance difference of 30
        # (ϵ = 30).
        transformation = WordSwapRandomCharacterSubstitution()
    #
    # Don't modify the same word twice or stopwords
    #
    constraints = [RepeatModification(), StopwordModification()]
    #
    # In these experiments, we hold the maximum difference
    # on edit distance (ϵ) to a constant 30 for each sample.
    #
    constraints.append(LevenshteinEditDistance(30))
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Greedily swap words with "Word Importance Ranking".
    #
    search_method = GreedyWordSwapWIR()

    return Attack(goal_function, constraints, transformation, search_method)
def Pruthi2019(model, max_num_word_swaps=1):
    """
        An implementation of the attack used in "Combating Adversarial
        Misspellings with Robust Word Recognition", Pruthi et al., 2019.

        This attack focuses on a small number of character-level changes
        that simulate common typos. It combines:
            - Swapping neighboring characters
            - Deleting characters
            - Inserting characters
            - Swapping characters for adjacent keys on a QWERTY keyboard.

        https://arxiv.org/abs/1905.11268

        :param model: Model to attack.
        :param max_num_word_swaps: Maximum number of modifications to allow.
    """
    # a combination of 4 different character-based transforms
    # ignore the first and last letter of each word, as in the paper
    transformation = CompositeTransformation(
        [
            WordSwapNeighboringCharacterSwap(
                random_one=False, skip_first_char=True, skip_last_char=True
            ),
            WordSwapRandomCharacterDeletion(
                random_one=False, skip_first_char=True, skip_last_char=True
            ),
            WordSwapRandomCharacterInsertion(
                random_one=False, skip_first_char=True, skip_last_char=True
            ),
            WordSwapQWERTY(random_one=False, skip_first_char=True, skip_last_char=True),
        ]
    )
    # only edit words of length >= 4, edit max_num_word_swaps words.
    # note that we also are not editing the same word twice, so
    # max_num_word_swaps is really the max number of character
    # changes that can be made. The paper looks at 1 and 2 char attacks.
    constraints = [
        MinWordLength(min_length=4),
        StopwordModification(),
        MaxWordsPerturbed(max_num_words=max_num_word_swaps),
        RepeatModification(),
    ]
    # untargeted attack
    goal_function = UntargetedClassification(model)
    search_method = GreedySearch()
    return Attack(goal_function, constraints, transformation, search_method)
Exemple #5
0
 def __init__(self, **kwargs):
     from textattack.transformations import CompositeTransformation
     from textattack.transformations import \
         WordSwapNeighboringCharacterSwap, \
         WordSwapRandomCharacterDeletion, WordSwapRandomCharacterInsertion, \
         WordSwapRandomCharacterSubstitution, WordSwapNeighboringCharacterSwap
     transformation = CompositeTransformation([
         # (1) Swap: Swap two adjacent letters in the word.
         WordSwapNeighboringCharacterSwap(),
         # (2) Substitution: Substitute a letter in the word with a random letter.
         WordSwapRandomCharacterSubstitution(),
         # (3) Deletion: Delete a random letter from the word.
         WordSwapRandomCharacterDeletion(),
         # (4) Insertion: Insert a random letter in the word.
         WordSwapRandomCharacterInsertion()
     ])
     super().__init__(transformation, constraints=DEFAULT_CONSTRAINTS, **kwargs)
Exemple #6
0
    def build(model, use_all_transformations=True, ensemble: bool=False):
        #
        # Swap characters out from words. Choose the best of four potential transformations.
        #
        if use_all_transformations:
            # We propose four similar methods:
            transformation = CompositeTransformation(
                [
                    # (1) Swap: Swap two adjacent letters in the word.
                    WordSwapNeighboringCharacterSwap(),
                    # (2) Substitution: Substitute a letter in the word with a random letter.
                    WordSwapRandomCharacterSubstitution(),
                    # (3) Deletion: Delete a random letter from the word.
                    WordSwapRandomCharacterDeletion(),
                    # (4) Insertion: Insert a random letter in the word.
                    WordSwapRandomCharacterInsertion(),
                ]
            )
        else:
            # We use the Combined Score and the Substitution Transformer to generate
            # adversarial samples, with the maximum edit distance difference of 30
            # (ϵ = 30).
            transformation = WordSwapRandomCharacterSubstitution()
        #
        # Don't modify the same word twice or stopwords
        #
        constraints = [RepeatModification(), StopwordModification()]
        #
        # In these experiments, we hold the maximum difference
        # on edit distance (ϵ) to a constant 30 for each sample.
        #
        constraints.append(LevenshteinEditDistance(30))
        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model)
        #
        # Greedily swap words with "Word Importance Ranking".
        #
        search_method = GreedyWordSwapWIR(ensemble=ensemble)

        return Attack(goal_function, constraints, transformation, search_method)
def DeepWordBugGao2018(model, use_all_transformations=True):
    #
    # Swap characters out from words. Choose the best of four potential transformations.
    #
    if use_all_transformations:
        # We propose four similar methods:
        transformation = CompositeTransformation([
            # (1) Swap: Swap two adjacent letters in the word.
            WordSwapNeighboringCharacterSwap(),
            # (2) Substitution: Substitute a letter in the word with a random letter.
            WordSwapRandomCharacterSubstitution(),
            # (3) Deletion: Delete a random letter from the word.
            WordSwapRandomCharacterDeletion(),
            # (4) Insertion: Insert a random letter in the word.
            WordSwapRandomCharacterInsertion()
        ])
    else:
        # We use the Combined Score and the Substitution Transformer to generate
        # adversarial samples, with the maximum edit distance difference of 30
        # (ϵ = 30).
        transformation = WordSwapRandomCharacterSubstitution()
    #
    # In these experiments, we hold the maximum difference
    # on edit distance (ϵ) to a constant 30 for each sample.
    #
    constraints = [LevenshteinEditDistance(30)]
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Greedily swap words with "Word Importance Ranking".
    #
    attack = GreedyWordSwapWIR(goal_function,
                               transformation=transformation,
                               constraints=constraints,
                               max_depth=None)

    return attack