コード例 #1
0
def build_baegarg2019(model_wrapper, threshold_cosine=0.936338023, query_budget=None, max_candidates=50):
    """
    Modified from https://github.com/QData/TextAttack/blob/04b7c6f79bdb5301b360555bd5458c15aa2b8695/textattack/attack_recipes/bae_garg_2019.py
    """
    transformation = WordSwapMaskedLM(
        method="bae", max_candidates=max_candidates, min_confidence=0.0
    )
    constraints = [RepeatModification(), StopwordModification()]

    constraints.append(PartOfSpeech(allow_verb_noun_swap=True))

    use_constraint = UniversalSentenceEncoder(
        threshold=threshold_cosine,
        metric="cosine",
        compare_against_original=True,
        window_size=15,
        skip_text_shorter_than_window=True,
    )
    constraints.append(use_constraint)
    goal_function = UntargetedClassification(model_wrapper)
    if query_budget is not None:
        goal_function.query_budget = query_budget
    search_method = GreedyWordSwapWIR(wir_method="delete")

    return Attack(goal_function, constraints, transformation, search_method)
コード例 #2
0
    def build(model_wrapper):
        #
        # Swap words with their synonyms extracted based on the HowNet.
        #
        transformation = WordSwapHowNet()
        #
        # Don't modify the same word twice or stopwords
        #
        constraints = [RepeatModification(), StopwordModification()]
        #
        #
        # During entailment, we should only edit the hypothesis - keep the premise
        # the same.
        #
        input_column_modification = InputColumnModification(
            ["premise", "hypothesis"], {"premise"})
        constraints.append(input_column_modification)
        #
        # Use untargeted classification for demo, can be switched to targeted one
        #
        goal_function = UntargetedClassification(model_wrapper)
        #
        # Perform word substitution with a Particle Swarm Optimization (PSO) algorithm.
        #
        search_method = ParticleSwarmOptimization(pop_size=60, max_iters=20)

        return Attack(goal_function, constraints, transformation,
                      search_method)
コード例 #3
0
 def build(model, max_num_word_swaps=1):
     # a combination of 4 different character-based transforms
     # ignore the first and last letter of each word, as in the paper
     transformation = CompositeTransformation([
         WordSwapNeighboringCharacterSwap(random_one=False,
                                          skip_first_char=True,
                                          skip_last_char=True),
         WordSwapRandomCharacterDeletion(random_one=False,
                                         skip_first_char=True,
                                         skip_last_char=True),
         WordSwapRandomCharacterInsertion(random_one=False,
                                          skip_first_char=True,
                                          skip_last_char=True),
         WordSwapQWERTY(random_one=False,
                        skip_first_char=True,
                        skip_last_char=True),
     ])
     # only edit words of length >= 4, edit max_num_word_swaps words.
     # note that we also are not editing the same word twice, so
     # max_num_word_swaps is really the max number of character
     # changes that can be made. The paper looks at 1 and 2 char attacks.
     constraints = [
         MinWordLength(min_length=4),
         StopwordModification(),
         MaxWordsPerturbed(max_num_words=max_num_word_swaps),
         RepeatModification(),
     ]
     # untargeted attack
     goal_function = UntargetedClassification(model)
     search_method = GreedySearch()
     return Attack(goal_function, constraints, transformation,
                   search_method)
コード例 #4
0
    def build(model, ensemble: bool = False):
        # [from correspondence with the author]
        # Candidate size K is set to 48 for all data-sets.
        transformation = WordSwapMaskedLM(method="bert-attack",
                                          max_candidates=48)
        #
        # Don't modify the same word twice or stopwords.
        #
        constraints = [RepeatModification(), StopwordModification()]

        # "We only take ε percent of the most important words since we tend to keep
        # perturbations minimum."
        #
        # [from correspondence with the author]
        # "Word percentage allowed to change is set to 0.4 for most data-sets, this
        # parameter is trivial since most attacks only need a few changes. This
        # epsilon is only used to avoid too much queries on those very hard samples."
        constraints.append(MaxWordsPerturbed(max_percent=0.4))

        # "As used in TextFooler (Jin et al., 2019), we also use Universal Sentence
        # Encoder (Cer et al., 2018) to measure the semantic consistency between the
        # adversarial sample and the original sequence. To balance between semantic
        # preservation and attack success rate, we set up a threshold of semantic
        # similarity score to filter the less similar examples."
        #
        # [from correspondence with author]
        # "Over the full texts, after generating all the adversarial samples, we filter
        # out low USE score samples. Thus the success rate is lower but the USE score
        # can be higher. (actually USE score is not a golden metric, so we simply
        # measure the USE score over the final texts for a comparison with TextFooler).
        # For datasets like IMDB, we set a higher threshold between 0.4-0.7; for
        # datasets like MNLI, we set threshold between 0-0.2."
        #
        # Since the threshold in the real world can't be determined from the training
        # data, the TextAttack implementation uses a fixed threshold - determined to
        # be 0.2 to be most fair.
        use_constraint = UniversalSentenceEncoder(
            threshold=0.2,
            metric="cosine",
            compare_against_original=True,
            window_size=None,
        )
        constraints.append(use_constraint)
        #
        # Goal is untargeted classification.
        #
        goal_function = UntargetedClassification(model)
        #
        # "We first select the words in the sequence which have a high significance
        # influence on the final output logit. Let S = [w0, ··· , wi ··· ] denote
        # the input sentence, and oy(S) denote the logit output by the target model
        # for correct label y, the importance score Iwi is defined as
        # Iwi = oy(S) − oy(S\wi), where S\wi = [w0, ··· , wi−1, [MASK], wi+1, ···]
        # is the sentence after replacing wi with [MASK]. Then we rank all the words
        # according to the ranking score Iwi in descending order to create word list
        # L."
        search_method = GreedyWordSwapWIR(wir_method="unk", ensemble=ensemble)

        return Attack(goal_function, constraints, transformation,
                      search_method)
コード例 #5
0
def Checklist2020(model):
    """An implementation of the attack used in "Beyond Accuracy: Behavioral
    Testing of NLP models with CheckList", Ribeiro et al., 2020.".
    This attack focuses on a number of attacks used in the Invariance Testing
    Method:
        - Contraction
        - Extension
        - Changing Names, Number, Location
        - possibly negation (not yet implemented)
    The idea is to alter elements of the sentence without actually changing the semantic of the sentence
    https://arxiv.org/abs/2005.04118
    :param model: Model to attack.
    :param max_num_word_swaps: Maximum number of modifications to allow.
    """

    transformation = CompositeTransformation(
        [
            # WordSwapExtend(),
            WordSwapContract(),
            WordSwapChangeName(),
            # WordSwapChangeNumber(),
            WordSwapChangeLocation(),
        ]
    )

    # Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop
    constraints = [RepeatModification()]

    # Untargeted attack & GreedySearch
    goal_function = UntargetedClassification(model)
    search_method = GreedySearch()

    return Attack(goal_function, constraints, transformation, search_method)
コード例 #6
0
    def build(model):

        transformations = WordSwapWordNet()
        
        #
        # Don't modify the same word twice or the stopwords defined
        # in the TextFooler public implementation.
        #
        # fmt: off

        constraints = [RepeatModification()]
        #constraints = [StopwordModification()]
        #
        # During entailment, we should only edit the hypothesis - keep the premise
        # the same.
        #
        input_column_modification = InputColumnModification(
            ["premise", "hypothesis"], {"premise"}
        )
        constraints.append(input_column_modification)

        #constraints.append(PartOfSpeech())
        #
        #

        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model)
        #
        # Greedily swap words with "Word Importance Ranking".
        #
        search_method = PriotirizedSearch()

        return Attack(goal_function, constraints, transformations, search_method)
コード例 #7
0
    def build(model):
        #
        #  we propose five bug generation methods for TEXTBUGGER:
        #
        transformation = CompositeTransformation(
            [
                # (1) Insert: Insert a space into the word.
                # Generally, words are segmented by spaces in English. Therefore,
                # we can deceive classifiers by inserting spaces into words.
                WordSwapRandomCharacterInsertion(
                    random_one=True,
                    letters_to_insert=" ",
                    skip_first_char=True,
                    skip_last_char=True,
                ),
                # (2) Delete: Delete a random character of the word except for the first
                # and the last character.
                WordSwapRandomCharacterDeletion(
                    random_one=True, skip_first_char=True, skip_last_char=True
                ),
                # (3) Swap: Swap random two adjacent letters in the word but do not
                # alter the first or last letter. This is a common occurrence when
                # typing quickly and is easy to implement.
                WordSwapNeighboringCharacterSwap(
                    random_one=True, skip_first_char=True, skip_last_char=True
                ),
                # (4) Substitute-C (Sub-C): Replace characters with visually similar
                # characters (e.g., replacing “o” with “0”, “l” with “1”, “a” with “@”)
                # or adjacent characters in the keyboard (e.g., replacing “m” with “n”).
                WordSwapHomoglyphSwap(),
                # (5) Substitute-W
                # (Sub-W): Replace a word with its topk nearest neighbors in a
                # context-aware word vector space. Specifically, we use the pre-trained
                # GloVe model [30] provided by Stanford for word embedding and set
                # topk = 5 in the experiment.
                WordSwapEmbedding(max_candidates=5),
            ]
        )

        constraints = [RepeatModification(), StopwordModification()]
        # In our experiment, we first use the Universal Sentence
        # Encoder [7], a model trained on a number of natural language
        # prediction tasks that require modeling the meaning of word
        # sequences, to encode sentences into high dimensional vectors.
        # Then, we use the cosine similarity to measure the semantic
        # similarity between original texts and adversarial texts.
        # ... "Furthermore, the semantic similarity threshold \eps is set
        # as 0.8 to guarantee a good trade-off between quality and
        # strength of the generated adversarial text."
        constraints.append(UniversalSentenceEncoder(threshold=0.8))
        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model)
        #
        # Greedily swap words with "Word Importance Ranking".
        #
        search_method = GreedyWordSwapWIR(wir_method="delete")

        return Attack(goal_function, constraints, transformation, search_method)
コード例 #8
0
ファイル: pwws_ren_2019.py プロジェクト: yyht/TextAttack
 def build(model):
     transformation = WordSwapWordNet()
     constraints = [RepeatModification(), StopwordModification()]
     goal_function = UntargetedClassification(model)
     # search over words based on a combination of their saliency score, and how efficient the WordSwap transform is
     search_method = GreedyWordSwapWIR("weighted-saliency")
     return Attack(goal_function, constraints, transformation,
                   search_method)
コード例 #9
0
def GeneticAlgorithmAlzantot2018(model):
    """Alzantot, M., Sharma, Y., Elgohary, A., Ho, B., Srivastava, M.B., &
    Chang, K. (2018).

    Generating Natural Language Adversarial Examples.

    https://arxiv.org/abs/1804.07998
    """
    #
    # Swap words with their embedding nearest-neighbors.
    #
    # Embedding: Counter-fitted Paragram Embeddings.
    #
    # "[We] fix the hyperparameter values to S = 60, N = 8, K = 4, and δ = 0.5"
    #
    transformation = WordSwapEmbedding(max_candidates=8)
    #
    # Don't modify the same word twice or stopwords
    #
    constraints = [RepeatModification(), StopwordModification()]
    #
    # During entailment, we should only edit the hypothesis - keep the premise
    # the same.
    #
    input_column_modification = InputColumnModification(
        ["premise", "hypothesis"], {"premise"})
    constraints.append(input_column_modification)
    #
    # Maximum words perturbed percentage of 20%
    #
    constraints.append(MaxWordsPerturbed(max_percent=0.2))
    #
    # Maximum word embedding euclidean distance of 0.5.
    #
    constraints.append(
        WordEmbeddingDistance(max_mse_dist=0.5,
                              compare_against_original=False))
    #
    # Language Model
    #
    constraints.append(
        Google1BillionWordsLanguageModel(top_n_per_index=4,
                                         compare_against_original=False))
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Perform word substitution with a genetic algorithm.
    #
    search_method = GeneticAlgorithm(pop_size=60,
                                     max_iters=20,
                                     post_crossover_check=False)

    return Attack(goal_function, constraints, transformation, search_method)
コード例 #10
0
    def build(model):
        #
        # Swap words with their embedding nearest-neighbors.
        #
        # Embedding: Counter-fitted Paragram Embeddings.
        #
        # "[We] fix the hyperparameter values to S = 60, N = 8, K = 4, and δ = 0.5"
        #
        transformation = WordSwapEmbedding(max_candidates=8)
        #
        # Don't modify the same word twice or stopwords
        #
        constraints = [RepeatModification(), StopwordModification()]
        #
        # During entailment, we should only edit the hypothesis - keep the premise
        # the same.
        #
        input_column_modification = InputColumnModification(
            ["premise", "hypothesis"], {"premise"})
        constraints.append(input_column_modification)
        #
        # Maximum words perturbed percentage of 20%
        #
        constraints.append(MaxWordsPerturbed(max_percent=0.2))
        #
        # Maximum word embedding euclidean distance of 0.5.
        #
        constraints.append(
            WordEmbeddingDistance(max_mse_dist=0.5,
                                  compare_against_original=False))
        #
        # Language Model
        #
        # constraints.append(
        #     Google1BillionWordsLanguageModel(
        #         top_n_per_index=4, compare_against_original=False
        #     )
        # )
        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model)
        #
        # Perform word substitution with a genetic algorithm.
        #
        search_method = AlzantotGeneticAlgorithm(pop_size=60,
                                                 max_iters=20,
                                                 post_crossover_check=False)

        return Attack(goal_function, constraints, transformation,
                      search_method)
コード例 #11
0
def TextFoolerJin2019Adjusted(model, SE_thresh=0.98, sentence_encoder='bert'):
    #
    # Swap words with their embedding nearest-neighbors. 
    #
    # Embedding: Counter-fitted PARAGRAM-SL999 vectors.
    #
    # 50 nearest-neighbors with a cosine similarity of at least 0.5.
    # (The paper claims 0.7, but analysis of the code and some empirical
    # results show that it's definitely 0.5.)
    #
    transformation = WordSwapEmbedding(max_candidates=50, textfooler_stopwords=True)
    #
    # Minimum word embedding cosine similarity of 0.9.
    #
    constraints = []
    constraints.append(
            WordEmbeddingDistance(min_cos_sim=0.9)
    )
    #
    # Universal Sentence Encoder with a minimum angular similarity of ε = 0.7.
    #
    if sentence_encoder == 'bert':
        se_constraint = BERT(threshold=SE_thresh,
            metric='cosine', compare_with_original=False, window_size=15,
            skip_text_shorter_than_window=False)
    else:
        se_constraint = UniversalSentenceEncoder(threshold=SE_thresh,
            metric='cosine', compare_with_original=False, window_size=15,
            skip_text_shorter_than_window=False)
    constraints.append(se_constraint)
    #
    # Do grammar checking
    #
    constraints.append(
            LanguageTool(0)
    )
    
    #
    # Untargeted attack   
    #
    goal_function = UntargetedClassification(model)

    #
    # Greedily swap words with "Word Importance Ranking".
    #
    attack = GreedyWordSwapWIR(goal_function, transformation=transformation,
        constraints=constraints, max_depth=None)
    
    return attack
コード例 #12
0
def Kuleshov2017(model):
    """
        Kuleshov, V. et al. 
        
        Generating Natural Language Adversarial Examples. 
        
        https://openreview.net/pdf?id=r1QZ3zbAZ.
    """
    #
    # "Specifically, in all experiments, we used a target of τ = 0.7,
    # a neighborhood size of N = 15, and parameters λ_1 = 0.2 and δ = 0.5; we set
    # the syntactic bound to λ_2 = 2 nats for sentiment analysis"

    #
    # Word swap with top-15 counter-fitted embedding neighbors.
    #
    transformation = WordSwapEmbedding(max_candidates=15)
    #
    # Don't modify the same word twice or stopwords
    #
    constraints = [RepeatModification(), StopwordModification()]
    #
    # Maximum of 50% of words perturbed (δ in the paper).
    #
    constraints.append(MaxWordsPerturbed(max_percent=0.5))
    #
    # Maximum thought vector Euclidean distance of λ_1 = 0.2. (eq. 4)
    #
    constraints.append(
        ThoughtVector(embedding_type='paragramcf',
                      threshold=0.2,
                      metric='max_euclidean'))
    #
    #
    # Maximum language model log-probability difference of λ_2 = 2. (eq. 5)
    #
    constraints.append(GPT2(max_log_prob_diff=2.0))
    #
    # Goal is untargeted classification: reduce original probability score
    # to below τ = 0.7 (Algorithm 1).
    #
    goal_function = UntargetedClassification(model, target_max_score=0.7)
    #
    # Perform word substitution with a genetic algorithm.
    #
    search_method = GreedySearch()

    return Attack(goal_function, constraints, transformation, search_method)
コード例 #13
0
def DeepWordBugGao2018(model, use_all_transformations=True):
    """
        Gao, Lanchantin, Soffa, Qi.
        
        Black-box Generation of Adversarial Text Sequences to Evade Deep Learning 
        Classifiers.
        
        https://arxiv.org/abs/1801.04354
    """
    #
    # Swap characters out from words. Choose the best of four potential transformations.
    #
    if use_all_transformations:
        # We propose four similar methods:
        transformation = CompositeTransformation([
            # (1) Swap: Swap two adjacent letters in the word.
            WordSwapNeighboringCharacterSwap(),
            # (2) Substitution: Substitute a letter in the word with a random letter.
            WordSwapRandomCharacterSubstitution(),
            # (3) Deletion: Delete a random letter from the word.
            WordSwapRandomCharacterDeletion(),
            # (4) Insertion: Insert a random letter in the word.
            WordSwapRandomCharacterInsertion(),
        ])
    else:
        # We use the Combined Score and the Substitution Transformer to generate
        # adversarial samples, with the maximum edit distance difference of 30
        # (ϵ = 30).
        transformation = WordSwapRandomCharacterSubstitution()
    #
    # Don't modify the same word twice or stopwords
    #
    constraints = [RepeatModification(), StopwordModification()]
    #
    # In these experiments, we hold the maximum difference
    # on edit distance (ϵ) to a constant 30 for each sample.
    #
    constraints.append(LevenshteinEditDistance(30))
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Greedily swap words with "Word Importance Ranking".
    #
    search_method = GreedyWordSwapWIR()

    return Attack(goal_function, constraints, transformation, search_method)
コード例 #14
0
def TextFoolerJin2019(model):
    #
    # Swap words with their embedding nearest-neighbors.
    #
    # Embedding: Counter-fitted PARAGRAM-SL999 vectors.
    #
    # 50 nearest-neighbors with a cosine similarity of at least 0.5.
    # (The paper claims 0.7, but analysis of the code and some empirical
    # results show that it's definitely 0.5.)
    #
    transformation = WordSwapEmbedding(max_candidates=50,
                                       textfooler_stopwords=True)
    #
    # Minimum word embedding cosine similarity of 0.5.
    #
    constraints = []
    constraints.append(WordEmbeddingDistance(min_cos_sim=0.5))
    #
    # Only replace words with the same part of speech (or nouns with verbs)
    #
    constraints.append(PartOfSpeech(allow_verb_noun_swap=True))
    #
    # Universal Sentence Encoder with a minimum angular similarity of ε = 0.7.
    #
    # In the TextFooler code, they forget to divide the angle between the two
    # embeddings by pi. So if the original threshold was that 1 - sim >= 0.7, the
    # new threshold is 1 - (0.3) / pi = 0.90445
    #
    use_constraint = UniversalSentenceEncoder(
        threshold=0.904458599,
        metric='angular',
        compare_with_original=False,
        window_size=15,
        skip_text_shorter_than_window=True)
    constraints.append(use_constraint)
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Greedily swap words with "Word Importance Ranking".
    #
    attack = GreedyWordSwapWIR(goal_function,
                               transformation=transformation,
                               constraints=constraints,
                               max_depth=None)

    return attack
コード例 #15
0
ファイル: pruthi_2019.py プロジェクト: sherlockyyc/TextAttack
def Pruthi2019(model, max_num_word_swaps=1):
    """
        An implementation of the attack used in "Combating Adversarial
        Misspellings with Robust Word Recognition", Pruthi et al., 2019.

        This attack focuses on a small number of character-level changes
        that simulate common typos. It combines:
            - Swapping neighboring characters
            - Deleting characters
            - Inserting characters
            - Swapping characters for adjacent keys on a QWERTY keyboard.

        https://arxiv.org/abs/1905.11268

        :param model: Model to attack.
        :param max_num_word_swaps: Maximum number of modifications to allow.
    """
    # a combination of 4 different character-based transforms
    # ignore the first and last letter of each word, as in the paper
    transformation = CompositeTransformation(
        [
            WordSwapNeighboringCharacterSwap(
                random_one=False, skip_first_char=True, skip_last_char=True
            ),
            WordSwapRandomCharacterDeletion(
                random_one=False, skip_first_char=True, skip_last_char=True
            ),
            WordSwapRandomCharacterInsertion(
                random_one=False, skip_first_char=True, skip_last_char=True
            ),
            WordSwapQWERTY(random_one=False, skip_first_char=True, skip_last_char=True),
        ]
    )
    # only edit words of length >= 4, edit max_num_word_swaps words.
    # note that we also are not editing the same word twice, so
    # max_num_word_swaps is really the max number of character
    # changes that can be made. The paper looks at 1 and 2 char attacks.
    constraints = [
        MinWordLength(min_length=4),
        StopwordModification(),
        MaxWordsPerturbed(max_num_words=max_num_word_swaps),
        RepeatModification(),
    ]
    # untargeted attack
    goal_function = UntargetedClassification(model)
    search_method = GreedySearch()
    return Attack(goal_function, constraints, transformation, search_method)
コード例 #16
0
def HotFlipEbrahimi2017(model):
    """
        Ebrahimi, J. et al. (2017)
        
        HotFlip: White-Box Adversarial Examples for Text Classification
        
        https://arxiv.org/abs/1712.06751
        
        This is a reproduction of the HotFlip word-level attack (section 5 of the 
        paper).
    """
    #
    # "HotFlip ... uses the gradient with respect to a one-hot input
    # representation to efficiently estimate which individual change has the
    # highest estimated loss."
    transformation = WordSwapGradientBased(model, top_n=1)
    #
    # Don't modify the same word twice or stopwords
    #
    constraints = [RepeatModification(), StopwordModification()]
    #
    # 0. "We were able to create only 41 examples (2% of the correctly-
    # classified instances of the SST test set) with one or two flips."
    #
    constraints.append(MaxWordsPerturbed(max_num_words=2))
    #
    # 1. "The cosine similarity between the embedding of words is bigger than a
    #   threshold (0.8)."
    #
    constraints.append(WordEmbeddingDistance(min_cos_sim=0.8))
    #
    # 2. "The two words have the same part-of-speech."
    #
    constraints.append(PartOfSpeech())
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # "HotFlip ... uses a beam search to find a set of manipulations that work
    # well together to confuse a classifier ... The adversary uses a beam size
    # of 10."
    #
    search_method = BeamSearch(beam_width=10)

    return Attack(goal_function, constraints, transformation, search_method)
コード例 #17
0
def build_attack(model_wrapper, target_class=-1):
    """
    Same as bert-attack except:
    - it is TargetedClassification instead of Untargeted when target_class != -1
    - using "bae" instead of "bert-attack" because of bert-attack's problem for subtokens
    Modified from https://github.com/QData/TextAttack/blob/36dfce6bdab933bdeed3a2093ae411e93018ebbf/textattack/attack_recipes/bert_attack_li_2020.py
    """

    # transformation = WordSwapMaskedLM(method="bert-attack", max_candidates=48)
    transformation = WordSwapMaskedLM(method="bae", max_candidates=100)
    constraints = [RepeatModification(), StopwordModification()]
    constraints.append(MaxWordsPerturbed(max_percent=0.4))

    use_constraint = UniversalSentenceEncoder(
        threshold=0.2,
        metric="cosine",
        compare_against_original=True,
        window_size=None,
    )
    constraints.append(use_constraint)
    if target_class == -1:
        goal_function = UntargetedClassification(model_wrapper)
    else:
        # We modify the goal
        goal_function = TargetedClassification(model_wrapper, target_class=target_class)
    search_method = GreedyWordSwapWIR(wir_method="unk")

    return Attack(goal_function, constraints, transformation, search_method)


# def build_attack_2(model_wrapper, target_class):
#     """
#     Same as HotFlipEbrahimi2017 attack except:
#     - it is TargetedClassification instead of Untargeted
#     """
#     transformation = WordSwapGradientBased(model_wrapper, top_n=1)
#     constraints = [RepeatModification(), StopwordModification()]
#     constraints.append(MaxWordsPerturbed(max_num_words=2))
#     constraints.append(WordEmbeddingDistance(min_cos_sim=0.8))
#     constraints.append(PartOfSpeech())
#     goal_function = TargetedClassification(model_wrapper)
    
#     search_method = BeamSearch(beam_width=10)

#     return Attack(goal_function, constraints, transformation, search_method)
コード例 #18
0
def PSOZang2020(model):
    """
        Zang, Y., Yang, C., Qi, F., Liu, Z., Zhang, M., Liu, Q., & Sun, M. (2019).
        
        Word-level Textual Adversarial Attacking as Combinatorial Optimization.
        
        https://www.aclweb.org/anthology/2020.acl-main.540.pdf

        Methodology description quoted from the paper:

        "We propose a novel word substitution-based textual attack model, which reforms
        both the aforementioned two steps. In the first step, we adopt a sememe-based word
        substitution strategy, which can generate more candidate adversarial examples with
        better semantic preservation. In the second step, we utilize particle swarm optimization
        (Eberhart and Kennedy, 1995) as the adversarial example searching algorithm."

        And "Following the settings in Alzantot et al. (2018), we set the max iteration time G to 20."
    """
    #
    # Swap words with their synonyms extracted based on the HowNet.
    #
    transformation = WordSwapHowNet()
    #
    # Don't modify the same word twice or stopwords
    #
    constraints = [RepeatModification(), StopwordModification()]
    #
    #
    # During entailment, we should only edit the hypothesis - keep the premise
    # the same.
    #
    input_column_modification = InputColumnModification(
        ["premise", "hypothesis"], {"premise"}
    )
    constraints.append(input_column_modification)
    #
    # Use untargeted classification for demo, can be switched to targeted one
    #
    goal_function = UntargetedClassification(model)
    #
    # Perform word substitution with a Particle Swarm Optimization (PSO) algorithm.
    #
    search_method = ParticleSwarmOptimization(pop_size=60, max_iters=20)

    return Attack(goal_function, constraints, transformation, search_method)
コード例 #19
0
def IGAWang2019(model):
    """
        Xiaosen Wang, Hao Jin, Kun He (2019). 
        
        Natural Language Adversarial Attack and Defense in Word Level. 
        
        http://arxiv.org/abs/1909.06723 
    """
    #
    # Swap words with their embedding nearest-neighbors.
    # Embedding: Counter-fitted Paragram Embeddings.
    # Fix the hyperparameter value to N = Unrestricted (50)."
    #
    transformation = WordSwapEmbedding(max_candidates=50)
    #
    # Don't modify the stopwords
    #
    constraints = [StopwordModification()]
    #
    # Maximum words perturbed percentage of 20%
    #
    constraints.append(MaxWordsPerturbed(max_percent=0.2))
    #
    # Maximum word embedding euclidean distance δ of 0.5.
    #
    constraints.append(
        WordEmbeddingDistance(max_mse_dist=0.5,
                              compare_against_original=False))
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Perform word substitution with an improved genetic algorithm.
    # Fix the hyperparameter values to S = 60, M = 20, λ = 5."
    #
    search_method = GeneticAlgorithm(
        pop_size=60,
        max_iters=20,
        improved_genetic_algorithm=True,
        max_replace_times_per_index=5,
        post_crossover_check=False,
    )

    return Attack(goal_function, constraints, transformation, search_method)
コード例 #20
0
ファイル: kuleshov_2017.py プロジェクト: zxlzr/TextAttack
def Kuleshov2017(model):
    #
    # "Specifically, in all experiments, we used a target of τ = 0.7,
    # a neighborhood size of N = 15, and parameters λ_1 = 0.2 and δ = 0.5; we set
    # the syntactic bound to λ_2 = 2 nats for sentiment analysis"
    
    #
    # Word swap with top-15 counter-fitted embedding neighbors.
    #
    transformation = WordSwapEmbedding(max_candidates=15)
    #
    # Maximum of 50% of words perturbed (δ in the paper).
    #
    constraints = []
    constraints.append(
            WordsPerturbed(max_percent=0.5)
    )
    #
    # Maximum thought vector Euclidean distance of λ_1 = 0.2. (eq. 4)
    #
    constraints.append(
        ThoughtVector(embedding_type='paragramcf', threshold=0.2, metric='max_euclidean')
    )
    #
    #
    # Maximum language model log-probability difference of λ_2 = 2. (eq. 5)
    #
    constraints.append(
        GPT2(max_log_prob_diff=2.0)
    )
    #
    # Goal is untargeted classification: reduce original probability score 
    # to below τ = 0.7 (Algorithm 1).
    #
    goal_function = UntargetedClassification(model, target_max_score=0.7)
    #
    # Perform word substitution with a genetic algorithm.
    #
    attack = GreedyWordSwap(goal_function, constraints=constraints,
        transformation=transformation)
    
    return attack


        # GPT2(max_log_prob_diff=2)
コード例 #21
0
    def build(model_wrapper, mlm=False):
        """Build attack recipe.

        Args:
            model_wrapper (:class:`~textattack.models.wrappers.ModelWrapper`):
                Model wrapper containing both the model and the tokenizer.
            mlm (:obj:`bool`, `optional`, defaults to :obj:`False`):
                If :obj:`True`, load `A2T-MLM` attack. Otherwise, load regular `A2T` attack.

        Returns:
            :class:`~textattack.Attack`: A2T attack.
        """
        constraints = [RepeatModification(), StopwordModification()]
        input_column_modification = InputColumnModification(
            ["premise", "hypothesis"], {"premise"})
        constraints.append(input_column_modification)
        constraints.append(PartOfSpeech(allow_verb_noun_swap=False))
        constraints.append(MaxModificationRate(max_rate=0.1, min_threshold=4))
        sent_encoder = BERT(model_name="stsb-distilbert-base",
                            threshold=0.9,
                            metric="cosine")
        constraints.append(sent_encoder)

        if mlm:
            transformation = transformation = WordSwapMaskedLM(
                method="bae",
                max_candidates=20,
                min_confidence=0.0,
                batch_size=16)
        else:
            transformation = WordSwapEmbedding(max_candidates=20)
            constraints.append(WordEmbeddingDistance(min_cos_sim=0.8))

        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model_wrapper,
                                                 model_batch_size=32)
        #
        # Greedily swap words with "Word Importance Ranking".
        #
        search_method = GreedyWordSwapWIR(wir_method="gradient")

        return Attack(goal_function, constraints, transformation,
                      search_method)
コード例 #22
0
    def build(model):
        transformation = CompositeTransformation([
            WordSwapExtend(),
            WordSwapContract(),
            WordSwapChangeName(),
            WordSwapChangeNumber(),
            WordSwapChangeLocation(),
        ])

        # Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop
        constraints = [RepeatModification()]

        # Untargeted attack & GreedySearch
        goal_function = UntargetedClassification(model)
        search_method = GreedySearch()

        return Attack(goal_function, constraints, transformation,
                      search_method)
コード例 #23
0
def Alzantot2018Adjusted(model, SE_thresh=0.98, sentence_encoder='bert'):
    #
    # Swap words with their embedding nearest-neighbors. 
    #
    # Embedding: Counter-fitted PARAGRAM-SL999 vectors.
    #
    # "[We] fix the hyperparameter values to S = 60, N = 8, K = 4, and δ = 0.5"
    #
    transformation = WordSwapEmbedding(max_candidates=50, textfooler_stopwords=True)
    #
    # Minimum word embedding cosine similarity of 0.9.
    #
    constraints = []
    constraints.append(
            WordEmbeddingDistance(min_cos_sim=0.9)
    )
    #
    # Universal Sentence Encoder with a minimum angular similarity of ε = 0.7.
    #
    if sentence_encoder == 'bert':
        se_constraint = BERT(threshold=SE_thresh,
            metric='cosine', compare_with_original=False, window_size=15,
            skip_text_shorter_than_window=False)
    else:
        se_constraint = UniversalSentenceEncoder(threshold=SE_thresh,
            metric='cosine', compare_with_original=False, window_size=15,
            skip_text_shorter_than_window=False)
    constraints.append(se_constraint)
    #
    # Do grammar checking
    #
    constraints.append(
            LanguageTool(0)
    )
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Greedily swap words with "Word Importance Ranking".
    #
    attack = GeneticAlgorithm(goal_function, transformation=transformation, 
        constraints=constraints, pop_size=60, max_iters=20)
    return attack
コード例 #24
0
    def build(model,
              freq_dict,
              delta=None,
              type=None,
              gamma=None,
              candidates=8):

        transformations = [
            WordSwapEmbedding(max_candidates=candidates),
            WordSwapWordNet()
        ]

        #
        # Don't modify the same word twice or the stopwords defined
        # in the TextFooler public implementation.
        #
        # fmt: off

        constraints = [RepeatModification()]
        #
        # During entailment, we should only edit the hypothesis - keep the premise
        # the same.
        #
        input_column_modification = InputColumnModification(
            ["premise", "hypothesis"], {"premise"})
        constraints.append(input_column_modification)

        #constraints.append(PartOfSpeech())
        #
        # Universal Sentence Encoder with a minimum angular similarity of ε = 0.5.
        #

        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model)
        #
        # Greedily swap words with "Word Importance Ranking".
        #

        search_method = VocabularySearch(freq_dict)

        return Attack(goal_function, constraints, transformations,
                      search_method)
コード例 #25
0
def PWWSRen2019(model):
    """An implementation of Probability Weighted Word Saliency from "Generating
    Natural Langauge Adversarial Examples through Probability Weighted Word
    Saliency", Ren et al., 2019.

    Words are prioritized for a synonym-swap transformation based on
    a combination of their saliency score and maximum word-swap effectiveness.
    Note that this implementation does not include the Named
    Entity adversarial swap from the original paper, because it requires
    access to the full dataset and ground truth labels in advance.

    https://www.aclweb.org/anthology/P19-1103/
    """
    transformation = WordSwapWordNet()
    constraints = [RepeatModification(), StopwordModification()]
    goal_function = UntargetedClassification(model)
    # search over words based on a combination of their saliency score, and how efficient the WordSwap transform is
    search_method = GreedyWordSwapWIR("pwws")
    return Attack(goal_function, constraints, transformation, search_method)
コード例 #26
0
def Alzantot2018(model):
    """
        Alzantot, M., Sharma, Y., Elgohary, A., Ho, B., Srivastava, M.B., & Chang, K. (2018). 
        
        Generating Natural Language Adversarial Examples. 
        
        https://arxiv.org/abs/1801.00554 
    """
    #
    # Swap words with their embedding nearest-neighbors.
    #
    # Embedding: Counter-fitted Paragram Embeddings.
    #
    # "[We] fix the hyperparameter values to S = 60, N = 8, K = 4, and δ = 0.5"
    #
    transformation = WordSwapEmbedding(max_candidates=8)
    #
    # Don't modify the same word twice or stopwords
    #
    constraints = [RepeatModification(), StopwordModification()]
    #
    # Maximum words perturbed percentage of 20%
    #
    constraints.append(MaxWordsPerturbed(max_percent=0.2))
    #
    # Maximum word embedding euclidean distance of 0.5.
    #
    constraints.append(WordEmbeddingDistance(max_mse_dist=0.5))
    #
    # Language Model
    #
    constraints.append(Google1BillionWordsLanguageModel(top_n_per_index=4))
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Perform word substitution with a genetic algorithm.
    #
    search_method = GeneticAlgorithm(pop_size=60, max_iters=20)

    return Attack(goal_function, constraints, transformation, search_method)
コード例 #27
0
    def build(model, use_all_transformations=True, ensemble: bool=False):
        #
        # Swap characters out from words. Choose the best of four potential transformations.
        #
        if use_all_transformations:
            # We propose four similar methods:
            transformation = CompositeTransformation(
                [
                    # (1) Swap: Swap two adjacent letters in the word.
                    WordSwapNeighboringCharacterSwap(),
                    # (2) Substitution: Substitute a letter in the word with a random letter.
                    WordSwapRandomCharacterSubstitution(),
                    # (3) Deletion: Delete a random letter from the word.
                    WordSwapRandomCharacterDeletion(),
                    # (4) Insertion: Insert a random letter in the word.
                    WordSwapRandomCharacterInsertion(),
                ]
            )
        else:
            # We use the Combined Score and the Substitution Transformer to generate
            # adversarial samples, with the maximum edit distance difference of 30
            # (ϵ = 30).
            transformation = WordSwapRandomCharacterSubstitution()
        #
        # Don't modify the same word twice or stopwords
        #
        constraints = [RepeatModification(), StopwordModification()]
        #
        # In these experiments, we hold the maximum difference
        # on edit distance (ϵ) to a constant 30 for each sample.
        #
        constraints.append(LevenshteinEditDistance(30))
        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model)
        #
        # Greedily swap words with "Word Importance Ranking".
        #
        search_method = GreedyWordSwapWIR(ensemble=ensemble)

        return Attack(goal_function, constraints, transformation, search_method)
コード例 #28
0
def DeepWordBugGao2018(model, use_all_transformations=True):
    #
    # Swap characters out from words. Choose the best of four potential transformations.
    #
    if use_all_transformations:
        # We propose four similar methods:
        transformation = CompositeTransformation([
            # (1) Swap: Swap two adjacent letters in the word.
            WordSwapNeighboringCharacterSwap(),
            # (2) Substitution: Substitute a letter in the word with a random letter.
            WordSwapRandomCharacterSubstitution(),
            # (3) Deletion: Delete a random letter from the word.
            WordSwapRandomCharacterDeletion(),
            # (4) Insertion: Insert a random letter in the word.
            WordSwapRandomCharacterInsertion()
        ])
    else:
        # We use the Combined Score and the Substitution Transformer to generate
        # adversarial samples, with the maximum edit distance difference of 30
        # (ϵ = 30).
        transformation = WordSwapRandomCharacterSubstitution()
    #
    # In these experiments, we hold the maximum difference
    # on edit distance (ϵ) to a constant 30 for each sample.
    #
    constraints = [LevenshteinEditDistance(30)]
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Greedily swap words with "Word Importance Ranking".
    #
    attack = GreedyWordSwapWIR(goal_function,
                               transformation=transformation,
                               constraints=constraints,
                               max_depth=None)

    return attack
コード例 #29
0
def HotFlipEbrahimi2017(model):
    #
    # "HotFlip ... uses the gradient with respect to a one-hot input
    # representation to efficiently estimate which individual change has the
    # highest estimated loss."
    transformation = GradientBasedWordSwap(model,
                                           top_n=1,
                                           replace_stopwords=False)
    constraints = []
    #
    # 0. "We were able to create only 41 examples (2% of the correctly-
    # classified instances of the SST test set) with one or two flips."
    #
    constraints.append(WordsPerturbed(max_num_words=2))
    #
    # 1. "The cosine similarity between the embedding of words is bigger than a
    #   threshold (0.8)."
    #
    constraints.append(WordEmbeddingDistance(min_cos_sim=0.8))
    #
    # 2. "The two words have the same part-of-speech."
    #
    constraints.append(PartOfSpeech())
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # "HotFlip ... uses a beam search to find a set of manipulations that work
    # well together to confuse a classifier ... The adversary uses a beam size
    # of 10."
    #
    attack = BeamSearch(goal_function,
                        constraints=constraints,
                        transformation=transformation,
                        beam_width=10)

    return attack
コード例 #30
0
ファイル: iga_wang_2019.py プロジェクト: qiyanjun/TextAttack
    def build(model_wrapper):
        #
        # Swap words with their embedding nearest-neighbors.
        # Embedding: Counter-fitted Paragram Embeddings.
        # Fix the hyperparameter value to N = Unrestricted (50)."
        #
        transformation = WordSwapEmbedding(max_candidates=50)
        #
        # Don't modify the stopwords
        #
        constraints = [StopwordModification()]
        #
        # Maximum words perturbed percentage of 20%
        #
        constraints.append(MaxWordsPerturbed(max_percent=0.2))
        #
        # Maximum word embedding euclidean distance δ of 0.5.
        #
        constraints.append(
            WordEmbeddingDistance(max_mse_dist=0.5,
                                  compare_against_original=False))
        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model_wrapper)
        #
        # Perform word substitution with an improved genetic algorithm.
        # Fix the hyperparameter values to S = 60, M = 20, λ = 5."
        #
        search_method = ImprovedGeneticAlgorithm(
            pop_size=60,
            max_iters=20,
            max_replace_times_per_index=5,
            post_crossover_check=False,
        )

        return Attack(goal_function, constraints, transformation,
                      search_method)