Beispiel #1
0
 def build(model, max_num_word_swaps=1):
     # a combination of 4 different character-based transforms
     # ignore the first and last letter of each word, as in the paper
     transformation = CompositeTransformation([
         WordSwapNeighboringCharacterSwap(random_one=False,
                                          skip_first_char=True,
                                          skip_last_char=True),
         WordSwapRandomCharacterDeletion(random_one=False,
                                         skip_first_char=True,
                                         skip_last_char=True),
         WordSwapRandomCharacterInsertion(random_one=False,
                                          skip_first_char=True,
                                          skip_last_char=True),
         WordSwapQWERTY(random_one=False,
                        skip_first_char=True,
                        skip_last_char=True),
     ])
     # only edit words of length >= 4, edit max_num_word_swaps words.
     # note that we also are not editing the same word twice, so
     # max_num_word_swaps is really the max number of character
     # changes that can be made. The paper looks at 1 and 2 char attacks.
     constraints = [
         MinWordLength(min_length=4),
         StopwordModification(),
         MaxWordsPerturbed(max_num_words=max_num_word_swaps),
         RepeatModification(),
     ]
     # untargeted attack
     goal_function = UntargetedClassification(model)
     search_method = GreedySearch()
     return Attack(goal_function, constraints, transformation,
                   search_method)
    def build(model):
        #
        #  we propose five bug generation methods for TEXTBUGGER:
        #
        transformation = CompositeTransformation(
            [
                # (1) Insert: Insert a space into the word.
                # Generally, words are segmented by spaces in English. Therefore,
                # we can deceive classifiers by inserting spaces into words.
                WordSwapRandomCharacterInsertion(
                    random_one=True,
                    letters_to_insert=" ",
                    skip_first_char=True,
                    skip_last_char=True,
                ),
                # (2) Delete: Delete a random character of the word except for the first
                # and the last character.
                WordSwapRandomCharacterDeletion(
                    random_one=True, skip_first_char=True, skip_last_char=True
                ),
                # (3) Swap: Swap random two adjacent letters in the word but do not
                # alter the first or last letter. This is a common occurrence when
                # typing quickly and is easy to implement.
                WordSwapNeighboringCharacterSwap(
                    random_one=True, skip_first_char=True, skip_last_char=True
                ),
                # (4) Substitute-C (Sub-C): Replace characters with visually similar
                # characters (e.g., replacing “o” with “0”, “l” with “1”, “a” with “@”)
                # or adjacent characters in the keyboard (e.g., replacing “m” with “n”).
                WordSwapHomoglyphSwap(),
                # (5) Substitute-W
                # (Sub-W): Replace a word with its topk nearest neighbors in a
                # context-aware word vector space. Specifically, we use the pre-trained
                # GloVe model [30] provided by Stanford for word embedding and set
                # topk = 5 in the experiment.
                WordSwapEmbedding(max_candidates=5),
            ]
        )

        constraints = [RepeatModification(), StopwordModification()]
        # In our experiment, we first use the Universal Sentence
        # Encoder [7], a model trained on a number of natural language
        # prediction tasks that require modeling the meaning of word
        # sequences, to encode sentences into high dimensional vectors.
        # Then, we use the cosine similarity to measure the semantic
        # similarity between original texts and adversarial texts.
        # ... "Furthermore, the semantic similarity threshold \eps is set
        # as 0.8 to guarantee a good trade-off between quality and
        # strength of the generated adversarial text."
        constraints.append(UniversalSentenceEncoder(threshold=0.8))
        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model)
        #
        # Greedily swap words with "Word Importance Ranking".
        #
        search_method = GreedyWordSwapWIR(wir_method="delete")

        return Attack(goal_function, constraints, transformation, search_method)
def Checklist2020(model):
    """An implementation of the attack used in "Beyond Accuracy: Behavioral
    Testing of NLP models with CheckList", Ribeiro et al., 2020.".
    This attack focuses on a number of attacks used in the Invariance Testing
    Method:
        - Contraction
        - Extension
        - Changing Names, Number, Location
        - possibly negation (not yet implemented)
    The idea is to alter elements of the sentence without actually changing the semantic of the sentence
    https://arxiv.org/abs/2005.04118
    :param model: Model to attack.
    :param max_num_word_swaps: Maximum number of modifications to allow.
    """

    transformation = CompositeTransformation(
        [
            # WordSwapExtend(),
            WordSwapContract(),
            WordSwapChangeName(),
            # WordSwapChangeNumber(),
            WordSwapChangeLocation(),
        ]
    )

    # Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop
    constraints = [RepeatModification()]

    # Untargeted attack & GreedySearch
    goal_function = UntargetedClassification(model)
    search_method = GreedySearch()

    return Attack(goal_function, constraints, transformation, search_method)
Beispiel #4
0
    def __init__(self,
                 model="distilroberta-base",
                 tokenizer="distilroberta-base",
                 **kwargs):
        import transformers

        from textattack.transformations import (
            CompositeTransformation,
            WordInsertionMaskedLM,
            WordMergeMaskedLM,
            WordSwapMaskedLM,
        )

        shared_masked_lm = transformers.AutoModelForCausalLM.from_pretrained(
            model)
        shared_tokenizer = transformers.AutoTokenizer.from_pretrained(
            tokenizer)

        transformation = CompositeTransformation([
            WordSwapMaskedLM(
                method="bae",
                masked_language_model=shared_masked_lm,
                tokenizer=shared_tokenizer,
                max_candidates=50,
                min_confidence=5e-4,
            ),
            WordInsertionMaskedLM(
                masked_language_model=shared_masked_lm,
                tokenizer=shared_tokenizer,
                max_candidates=50,
                min_confidence=0.0,
            ),
            WordMergeMaskedLM(
                masked_language_model=shared_masked_lm,
                tokenizer=shared_tokenizer,
                max_candidates=50,
                min_confidence=5e-3,
            ),
        ])

        use_constraint = UniversalSentenceEncoder(
            threshold=0.7,
            metric="cosine",
            compare_against_original=True,
            window_size=15,
            skip_text_shorter_than_window=True,
        )

        constraints = DEFAULT_CONSTRAINTS + [use_constraint]

        super().__init__(transformation, constraints=constraints, **kwargs)
def DeepWordBugGao2018(model, use_all_transformations=True):
    """
        Gao, Lanchantin, Soffa, Qi.
        
        Black-box Generation of Adversarial Text Sequences to Evade Deep Learning 
        Classifiers.
        
        https://arxiv.org/abs/1801.04354
    """
    #
    # Swap characters out from words. Choose the best of four potential transformations.
    #
    if use_all_transformations:
        # We propose four similar methods:
        transformation = CompositeTransformation([
            # (1) Swap: Swap two adjacent letters in the word.
            WordSwapNeighboringCharacterSwap(),
            # (2) Substitution: Substitute a letter in the word with a random letter.
            WordSwapRandomCharacterSubstitution(),
            # (3) Deletion: Delete a random letter from the word.
            WordSwapRandomCharacterDeletion(),
            # (4) Insertion: Insert a random letter in the word.
            WordSwapRandomCharacterInsertion(),
        ])
    else:
        # We use the Combined Score and the Substitution Transformer to generate
        # adversarial samples, with the maximum edit distance difference of 30
        # (ϵ = 30).
        transformation = WordSwapRandomCharacterSubstitution()
    #
    # Don't modify the same word twice or stopwords
    #
    constraints = [RepeatModification(), StopwordModification()]
    #
    # In these experiments, we hold the maximum difference
    # on edit distance (ϵ) to a constant 30 for each sample.
    #
    constraints.append(LevenshteinEditDistance(30))
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Greedily swap words with "Word Importance Ranking".
    #
    search_method = GreedyWordSwapWIR()

    return Attack(goal_function, constraints, transformation, search_method)
Beispiel #6
0
def Pruthi2019(model, max_num_word_swaps=1):
    """
        An implementation of the attack used in "Combating Adversarial
        Misspellings with Robust Word Recognition", Pruthi et al., 2019.

        This attack focuses on a small number of character-level changes
        that simulate common typos. It combines:
            - Swapping neighboring characters
            - Deleting characters
            - Inserting characters
            - Swapping characters for adjacent keys on a QWERTY keyboard.

        https://arxiv.org/abs/1905.11268

        :param model: Model to attack.
        :param max_num_word_swaps: Maximum number of modifications to allow.
    """
    # a combination of 4 different character-based transforms
    # ignore the first and last letter of each word, as in the paper
    transformation = CompositeTransformation(
        [
            WordSwapNeighboringCharacterSwap(
                random_one=False, skip_first_char=True, skip_last_char=True
            ),
            WordSwapRandomCharacterDeletion(
                random_one=False, skip_first_char=True, skip_last_char=True
            ),
            WordSwapRandomCharacterInsertion(
                random_one=False, skip_first_char=True, skip_last_char=True
            ),
            WordSwapQWERTY(random_one=False, skip_first_char=True, skip_last_char=True),
        ]
    )
    # only edit words of length >= 4, edit max_num_word_swaps words.
    # note that we also are not editing the same word twice, so
    # max_num_word_swaps is really the max number of character
    # changes that can be made. The paper looks at 1 and 2 char attacks.
    constraints = [
        MinWordLength(min_length=4),
        StopwordModification(),
        MaxWordsPerturbed(max_num_words=max_num_word_swaps),
        RepeatModification(),
    ]
    # untargeted attack
    goal_function = UntargetedClassification(model)
    search_method = GreedySearch()
    return Attack(goal_function, constraints, transformation, search_method)
Beispiel #7
0
 def __init__(self, **kwargs):
     from textattack.transformations import CompositeTransformation
     from textattack.transformations import \
         WordSwapNeighboringCharacterSwap, \
         WordSwapRandomCharacterDeletion, WordSwapRandomCharacterInsertion, \
         WordSwapRandomCharacterSubstitution, WordSwapNeighboringCharacterSwap
     transformation = CompositeTransformation([
         # (1) Swap: Swap two adjacent letters in the word.
         WordSwapNeighboringCharacterSwap(),
         # (2) Substitution: Substitute a letter in the word with a random letter.
         WordSwapRandomCharacterSubstitution(),
         # (3) Deletion: Delete a random letter from the word.
         WordSwapRandomCharacterDeletion(),
         # (4) Insertion: Insert a random letter in the word.
         WordSwapRandomCharacterInsertion()
     ])
     super().__init__(transformation, constraints=DEFAULT_CONSTRAINTS, **kwargs)
    def build(model):
        transformation = CompositeTransformation([
            WordSwapExtend(),
            WordSwapContract(),
            WordSwapChangeName(),
            WordSwapChangeNumber(),
            WordSwapChangeLocation(),
        ])

        # Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop
        constraints = [RepeatModification()]

        # Untargeted attack & GreedySearch
        goal_function = UntargetedClassification(model)
        search_method = GreedySearch()

        return Attack(goal_function, constraints, transformation,
                      search_method)
Beispiel #9
0
    def build(model, use_all_transformations=True, ensemble: bool=False):
        #
        # Swap characters out from words. Choose the best of four potential transformations.
        #
        if use_all_transformations:
            # We propose four similar methods:
            transformation = CompositeTransformation(
                [
                    # (1) Swap: Swap two adjacent letters in the word.
                    WordSwapNeighboringCharacterSwap(),
                    # (2) Substitution: Substitute a letter in the word with a random letter.
                    WordSwapRandomCharacterSubstitution(),
                    # (3) Deletion: Delete a random letter from the word.
                    WordSwapRandomCharacterDeletion(),
                    # (4) Insertion: Insert a random letter in the word.
                    WordSwapRandomCharacterInsertion(),
                ]
            )
        else:
            # We use the Combined Score and the Substitution Transformer to generate
            # adversarial samples, with the maximum edit distance difference of 30
            # (ϵ = 30).
            transformation = WordSwapRandomCharacterSubstitution()
        #
        # Don't modify the same word twice or stopwords
        #
        constraints = [RepeatModification(), StopwordModification()]
        #
        # In these experiments, we hold the maximum difference
        # on edit distance (ϵ) to a constant 30 for each sample.
        #
        constraints.append(LevenshteinEditDistance(30))
        #
        # Goal is untargeted classification
        #
        goal_function = UntargetedClassification(model)
        #
        # Greedily swap words with "Word Importance Ranking".
        #
        search_method = GreedyWordSwapWIR(ensemble=ensemble)

        return Attack(goal_function, constraints, transformation, search_method)
Beispiel #10
0
    def __init__(self, **kwargs):
        from textattack.transformations import (
            CompositeTransformation,
            WordSwapChangeLocation,
            WordSwapChangeName,
            WordSwapChangeNumber,
            WordSwapContract,
            WordSwapExtend,
        )

        transformation = CompositeTransformation([
            WordSwapChangeNumber(),
            WordSwapChangeLocation(),
            WordSwapChangeName(),
            WordSwapExtend(),
            WordSwapContract(),
        ])

        constraints = [DEFAULT_CONSTRAINTS[0]]

        super().__init__(transformation, constraints=constraints, **kwargs)
def DeepWordBugGao2018(model, use_all_transformations=True):
    #
    # Swap characters out from words. Choose the best of four potential transformations.
    #
    if use_all_transformations:
        # We propose four similar methods:
        transformation = CompositeTransformation([
            # (1) Swap: Swap two adjacent letters in the word.
            WordSwapNeighboringCharacterSwap(),
            # (2) Substitution: Substitute a letter in the word with a random letter.
            WordSwapRandomCharacterSubstitution(),
            # (3) Deletion: Delete a random letter from the word.
            WordSwapRandomCharacterDeletion(),
            # (4) Insertion: Insert a random letter in the word.
            WordSwapRandomCharacterInsertion()
        ])
    else:
        # We use the Combined Score and the Substitution Transformer to generate
        # adversarial samples, with the maximum edit distance difference of 30
        # (ϵ = 30).
        transformation = WordSwapRandomCharacterSubstitution()
    #
    # In these experiments, we hold the maximum difference
    # on edit distance (ϵ) to a constant 30 for each sample.
    #
    constraints = [LevenshteinEditDistance(30)]
    #
    # Goal is untargeted classification
    #
    goal_function = UntargetedClassification(model)
    #
    # Greedily swap words with "Word Importance Ranking".
    #
    attack = GreedyWordSwapWIR(goal_function,
                               transformation=transformation,
                               constraints=constraints,
                               max_depth=None)

    return attack
    def build(model):
        # "This paper presents CLARE, a ContextuaLized AdversaRial Example generation model
        # that produces fluent and grammatical outputs through a mask-then-infill procedure.
        # CLARE builds on a pre-trained masked language model and modifies the inputs in a context-aware manner.
        # We propose three contex-tualized  perturbations, Replace, Insert and Merge, allowing for generating outputs of
        # varied lengths."
        #
        # "We  experiment  with  a  distilled  version  of RoBERTa (RoBERTa_{distill}; Sanh et al., 2019)
        # as the masked language model for contextualized infilling."
        # Because BAE and CLARE both use similar replacement papers, we use BAE's replacement method here.

        shared_masked_lm = transformers.AutoModelForCausalLM.from_pretrained(
            "distilroberta-base")
        shared_tokenizer = transformers.AutoTokenizer.from_pretrained(
            "distilroberta-base")
        transformation = CompositeTransformation([
            WordSwapMaskedLM(
                method="bae",
                masked_language_model=shared_masked_lm,
                tokenizer=shared_tokenizer,
                max_candidates=50,
                min_confidence=5e-4,
            ),
            WordInsertionMaskedLM(
                masked_language_model=shared_masked_lm,
                tokenizer=shared_tokenizer,
                max_candidates=50,
                min_confidence=0.0,
            ),
            WordMergeMaskedLM(
                masked_language_model=shared_masked_lm,
                tokenizer=shared_tokenizer,
                max_candidates=50,
                min_confidence=5e-3,
            ),
        ])

        #
        # Don't modify the same word twice or stopwords.
        #
        constraints = [RepeatModification(), StopwordModification()]

        # "A  common  choice  of sim(·,·) is to encode sentences using neural networks,
        # and calculate their cosine similarity in the embedding space (Jin et al., 2020)."
        # The original implementation uses similarity of 0.7.
        use_constraint = UniversalSentenceEncoder(
            threshold=0.7,
            metric="cosine",
            compare_against_original=True,
            window_size=15,
            skip_text_shorter_than_window=True,
        )
        constraints.append(use_constraint)

        # Goal is untargeted classification.
        # "The score is then the negative probability of predicting the gold label from f, using [x_{adv}] as the input"
        goal_function = UntargetedClassification(model)

        # "To achieve this,  we iteratively apply the actions,
        #  and first select those minimizing the probability of outputting the gold label y from f."
        #
        # "Only one of the three actions can be applied at each position, and we select the one with the highest score."
        #
        # "Actions are iteratively applied to the input, until an adversarial example is found or a limit of actions T
        # is reached.
        #  Each step selects the highest-scoring action from the remaining ones."
        #
        search_method = GreedySearch()

        return Attack(goal_function, constraints, transformation,
                      search_method)
Beispiel #13
0
def attack_from_queue(args, in_queue, out_queue):
	gpu_id = torch.multiprocessing.current_process()._identity[0] - 2
	set_env_variables(gpu_id)

	config = BertConfig.from_pretrained("hfl/chinese-macbert-base") # "hfl/chinese-macbert-base"
	config.output_attentions = False
	config.output_token_type_ids = False
	# config.max_length = 30
	tokenizer = BertTokenizer.from_pretrained("hfl/chinese-macbert-base", config=config)

	config = AutoConfig.from_pretrained(
		'./models/roberta/chinese-roberta-wwm-ext-OCNLI-2021-01-05-23-46-02-975289', num_labels=3
	)
	#  for normal
	model = AutoModelForSequenceClassification.from_pretrained(
		'./models/roberta/chinese-roberta-wwm-ext-OCNLI-2021-01-05-23-46-02-975289',
		config=config,
	)
	
	model_wrapper = HuggingFaceModelWrapper(model, tokenizer, batch_size=24)

	# for normal
	# shared_masked_lm = BertModel.from_pretrained(
	# 		"bert-base-chinese"
	# 	)
	# for mask!!!
	shared_masked_lm = AutoModelForMaskedLM.from_pretrained(
			"bert-base-chinese"
		)
	shared_tokenizer = BertTokenizer.from_pretrained(
			"bert-base-chinese"
	)
	transformation = CompositeTransformation(
		[
			WordSwapMaskedLM(
				method="bae",
				masked_language_model=shared_masked_lm,
				tokenizer=shared_tokenizer,
				max_candidates=5,
				min_confidence=5e-4,
			),
			WordInsertionMaskedLM(
				masked_language_model=shared_masked_lm,
				tokenizer=shared_tokenizer,
				max_candidates=5,
				min_confidence=0.0,
			),
			WordMergeMaskedLM(
				masked_language_model=shared_masked_lm,
				tokenizer=shared_tokenizer,
				max_candidates=5,
				min_confidence=5e-3,
			),
		]
	)
	

	# goal function
	goal_function = UntargetedClassification(model_wrapper)
	# constraints
	stopwords = set(
		["个", "关于", "之上", "across", "之后", "afterwards", "再次", "against", "ain", "全部", "几乎", "单独", "along", "早已", "也", "虽然", "是", "among", "amongst", "一个", "和", "其他", "任何", "anyhow", "任何人", "anything", "anyway", "anywhere", "are", "aren", "没有", "around", "as", "at", "后", "been", "之前", "beforehand", "behind", "being", "below", "beside", "besides", "之間", "beyond", "皆是", "但", "by", "可以", "不可以", "是", "不是", "couldn't", "d", "didn", "didn't", "doesn", "doesn't", "don", "don't", "down", "due", "之間", "either", "之外", "elsewhere", "空", "足夠", "甚至", "ever", "任何人", "everything", "everywhere", "except", "first", "for", "former", "formerly", "from", "hadn", "hadn't", "hasn", "hasn't", "haven", "haven't", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "i", "if", "in", "indeed", "into", "is", "isn", "isn't", "it", "it's", "its", "itself", "just", "latter", "latterly", "least", "ll", "may", "me", "meanwhile", "mightn", "mightn't", "mine", "more", "moreover", "most", "mostly", "must", "mustn", "mustn't", "my", "myself", "namely", "needn", "needn't", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "o", "of", "off", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "per", "please", "s", "same", "shan", "shan't", "she", "she's", "should've", "shouldn", "shouldn't", "somehow", "something", "sometime", "somewhere", "such", "t", "than", "that", "that'll", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "this", "those", "through", "throughout", "thru", "thus", "to", "too", "toward", "towards", "under", "unless", "until", "up", "upon", "used", "ve", "was", "wasn", "wasn't", "we", "were", "weren", "weren't", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "with", "within", "without", "won", "won't", "would", "wouldn", "wouldn't", "y", "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves"]
	)
	constraints = [RepeatModification(), StopwordModification()]
	# input_column_modification = InputColumnModification(
	#         ["premise", "hypothesis"], {"premise"}
	# )
	# constraints.append(input_column_modification)
	# constraints.append(WordEmbeddingDistance(min_cos_sim=0.5))
	use_constraint = UniversalSentenceEncoder(
		threshold=0.7,
		metric="cosine",
		compare_against_original=True,
		window_size=15,
		skip_text_shorter_than_window=True,
	)
	constraints.append(use_constraint)
	# constraints = [
	#     MaxWordsPerturbed(5),
	# ]
	# transformation
	# transformation = WordSwapMaskedLM(method="bae", max_candidates=50, min_confidence=0.0)
	# transformation = WordSwapEmbedding(max_candidates=10)
	# transformation = WordDeletion()
	# search methods
	# search_method = GreedyWordSwapWIR(wir_method="delete")
	search_method = GreedySearch()

	
	textattack.shared.utils.set_seed(args.random_seed)
	attack = Attack(goal_function, constraints, transformation, search_method)
	# attack = parse_attack_from_args(args)
	if gpu_id == 0:
		print(attack, "\n")
	while not in_queue.empty():
		try:
			i, text, output = in_queue.get()
			results_gen = attack.attack_dataset([(text, output)])
			result = next(results_gen)
			out_queue.put((i, result))
		except Exception as e:
			out_queue.put(e)
			exit()