def build(model): # # we propose five bug generation methods for TEXTBUGGER: # transformation = CompositeTransformation( [ # (1) Insert: Insert a space into the word. # Generally, words are segmented by spaces in English. Therefore, # we can deceive classifiers by inserting spaces into words. WordSwapRandomCharacterInsertion( random_one=True, letters_to_insert=" ", skip_first_char=True, skip_last_char=True, ), # (2) Delete: Delete a random character of the word except for the first # and the last character. WordSwapRandomCharacterDeletion( random_one=True, skip_first_char=True, skip_last_char=True ), # (3) Swap: Swap random two adjacent letters in the word but do not # alter the first or last letter. This is a common occurrence when # typing quickly and is easy to implement. WordSwapNeighboringCharacterSwap( random_one=True, skip_first_char=True, skip_last_char=True ), # (4) Substitute-C (Sub-C): Replace characters with visually similar # characters (e.g., replacing “o” with “0”, “l” with “1”, “a” with “@”) # or adjacent characters in the keyboard (e.g., replacing “m” with “n”). WordSwapHomoglyphSwap(), # (5) Substitute-W # (Sub-W): Replace a word with its topk nearest neighbors in a # context-aware word vector space. Specifically, we use the pre-trained # GloVe model [30] provided by Stanford for word embedding and set # topk = 5 in the experiment. WordSwapEmbedding(max_candidates=5), ] ) constraints = [RepeatModification(), StopwordModification()] # In our experiment, we first use the Universal Sentence # Encoder [7], a model trained on a number of natural language # prediction tasks that require modeling the meaning of word # sequences, to encode sentences into high dimensional vectors. # Then, we use the cosine similarity to measure the semantic # similarity between original texts and adversarial texts. # ... "Furthermore, the semantic similarity threshold \eps is set # as 0.8 to guarantee a good trade-off between quality and # strength of the generated adversarial text." constraints.append(UniversalSentenceEncoder(threshold=0.8)) # # Goal is untargeted classification # goal_function = UntargetedClassification(model) # # Greedily swap words with "Word Importance Ranking". # search_method = GreedyWordSwapWIR(wir_method="delete") return Attack(goal_function, constraints, transformation, search_method)
def build(model, max_num_word_swaps=1): # a combination of 4 different character-based transforms # ignore the first and last letter of each word, as in the paper transformation = CompositeTransformation([ WordSwapNeighboringCharacterSwap(random_one=False, skip_first_char=True, skip_last_char=True), WordSwapRandomCharacterDeletion(random_one=False, skip_first_char=True, skip_last_char=True), WordSwapRandomCharacterInsertion(random_one=False, skip_first_char=True, skip_last_char=True), WordSwapQWERTY(random_one=False, skip_first_char=True, skip_last_char=True), ]) # only edit words of length >= 4, edit max_num_word_swaps words. # note that we also are not editing the same word twice, so # max_num_word_swaps is really the max number of character # changes that can be made. The paper looks at 1 and 2 char attacks. constraints = [ MinWordLength(min_length=4), StopwordModification(), MaxWordsPerturbed(max_num_words=max_num_word_swaps), RepeatModification(), ] # untargeted attack goal_function = UntargetedClassification(model) search_method = GreedySearch() return Attack(goal_function, constraints, transformation, search_method)
def DeepWordBugGao2018(model, use_all_transformations=True): """ Gao, Lanchantin, Soffa, Qi. Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers. https://arxiv.org/abs/1801.04354 """ # # Swap characters out from words. Choose the best of four potential transformations. # if use_all_transformations: # We propose four similar methods: transformation = CompositeTransformation([ # (1) Swap: Swap two adjacent letters in the word. WordSwapNeighboringCharacterSwap(), # (2) Substitution: Substitute a letter in the word with a random letter. WordSwapRandomCharacterSubstitution(), # (3) Deletion: Delete a random letter from the word. WordSwapRandomCharacterDeletion(), # (4) Insertion: Insert a random letter in the word. WordSwapRandomCharacterInsertion(), ]) else: # We use the Combined Score and the Substitution Transformer to generate # adversarial samples, with the maximum edit distance difference of 30 # (ϵ = 30). transformation = WordSwapRandomCharacterSubstitution() # # Don't modify the same word twice or stopwords # constraints = [RepeatModification(), StopwordModification()] # # In these experiments, we hold the maximum difference # on edit distance (ϵ) to a constant 30 for each sample. # constraints.append(LevenshteinEditDistance(30)) # # Goal is untargeted classification # goal_function = UntargetedClassification(model) # # Greedily swap words with "Word Importance Ranking". # search_method = GreedyWordSwapWIR() return Attack(goal_function, constraints, transformation, search_method)
def Pruthi2019(model, max_num_word_swaps=1): """ An implementation of the attack used in "Combating Adversarial Misspellings with Robust Word Recognition", Pruthi et al., 2019. This attack focuses on a small number of character-level changes that simulate common typos. It combines: - Swapping neighboring characters - Deleting characters - Inserting characters - Swapping characters for adjacent keys on a QWERTY keyboard. https://arxiv.org/abs/1905.11268 :param model: Model to attack. :param max_num_word_swaps: Maximum number of modifications to allow. """ # a combination of 4 different character-based transforms # ignore the first and last letter of each word, as in the paper transformation = CompositeTransformation( [ WordSwapNeighboringCharacterSwap( random_one=False, skip_first_char=True, skip_last_char=True ), WordSwapRandomCharacterDeletion( random_one=False, skip_first_char=True, skip_last_char=True ), WordSwapRandomCharacterInsertion( random_one=False, skip_first_char=True, skip_last_char=True ), WordSwapQWERTY(random_one=False, skip_first_char=True, skip_last_char=True), ] ) # only edit words of length >= 4, edit max_num_word_swaps words. # note that we also are not editing the same word twice, so # max_num_word_swaps is really the max number of character # changes that can be made. The paper looks at 1 and 2 char attacks. constraints = [ MinWordLength(min_length=4), StopwordModification(), MaxWordsPerturbed(max_num_words=max_num_word_swaps), RepeatModification(), ] # untargeted attack goal_function = UntargetedClassification(model) search_method = GreedySearch() return Attack(goal_function, constraints, transformation, search_method)
def __init__(self, **kwargs): from textattack.transformations import CompositeTransformation from textattack.transformations import \ WordSwapNeighboringCharacterSwap, \ WordSwapRandomCharacterDeletion, WordSwapRandomCharacterInsertion, \ WordSwapRandomCharacterSubstitution, WordSwapNeighboringCharacterSwap transformation = CompositeTransformation([ # (1) Swap: Swap two adjacent letters in the word. WordSwapNeighboringCharacterSwap(), # (2) Substitution: Substitute a letter in the word with a random letter. WordSwapRandomCharacterSubstitution(), # (3) Deletion: Delete a random letter from the word. WordSwapRandomCharacterDeletion(), # (4) Insertion: Insert a random letter in the word. WordSwapRandomCharacterInsertion() ]) super().__init__(transformation, constraints=DEFAULT_CONSTRAINTS, **kwargs)
def build(model, use_all_transformations=True, ensemble: bool=False): # # Swap characters out from words. Choose the best of four potential transformations. # if use_all_transformations: # We propose four similar methods: transformation = CompositeTransformation( [ # (1) Swap: Swap two adjacent letters in the word. WordSwapNeighboringCharacterSwap(), # (2) Substitution: Substitute a letter in the word with a random letter. WordSwapRandomCharacterSubstitution(), # (3) Deletion: Delete a random letter from the word. WordSwapRandomCharacterDeletion(), # (4) Insertion: Insert a random letter in the word. WordSwapRandomCharacterInsertion(), ] ) else: # We use the Combined Score and the Substitution Transformer to generate # adversarial samples, with the maximum edit distance difference of 30 # (ϵ = 30). transformation = WordSwapRandomCharacterSubstitution() # # Don't modify the same word twice or stopwords # constraints = [RepeatModification(), StopwordModification()] # # In these experiments, we hold the maximum difference # on edit distance (ϵ) to a constant 30 for each sample. # constraints.append(LevenshteinEditDistance(30)) # # Goal is untargeted classification # goal_function = UntargetedClassification(model) # # Greedily swap words with "Word Importance Ranking". # search_method = GreedyWordSwapWIR(ensemble=ensemble) return Attack(goal_function, constraints, transformation, search_method)
def DeepWordBugGao2018(model, use_all_transformations=True): # # Swap characters out from words. Choose the best of four potential transformations. # if use_all_transformations: # We propose four similar methods: transformation = CompositeTransformation([ # (1) Swap: Swap two adjacent letters in the word. WordSwapNeighboringCharacterSwap(), # (2) Substitution: Substitute a letter in the word with a random letter. WordSwapRandomCharacterSubstitution(), # (3) Deletion: Delete a random letter from the word. WordSwapRandomCharacterDeletion(), # (4) Insertion: Insert a random letter in the word. WordSwapRandomCharacterInsertion() ]) else: # We use the Combined Score and the Substitution Transformer to generate # adversarial samples, with the maximum edit distance difference of 30 # (ϵ = 30). transformation = WordSwapRandomCharacterSubstitution() # # In these experiments, we hold the maximum difference # on edit distance (ϵ) to a constant 30 for each sample. # constraints = [LevenshteinEditDistance(30)] # # Goal is untargeted classification # goal_function = UntargetedClassification(model) # # Greedily swap words with "Word Importance Ranking". # attack = GreedyWordSwapWIR(goal_function, transformation=transformation, constraints=constraints, max_depth=None) return attack