def main(): print("Load model") tokenizer = transformers.AutoTokenizer.from_pretrained( "echarlaix/bert-base-uncased-sst2-acc91.1-d37-hybrid") model = transformers.AutoModelForSequenceClassification.from_pretrained( "echarlaix/bert-base-uncased-sst2-acc91.1-d37-hybrid", num_labels=2, output_hidden_states=False) victim = OpenAttack.classifiers.TransformersClassifier( model, tokenizer, model.bert.embeddings.word_embeddings) print("New Attacker") attacker = OpenAttack.attackers.PWWSAttacker() dataset = datasets.load_dataset( "sst", split="train[:20]").map(function=dataset_mapping) print("Start attack") attack_eval = OpenAttack.AttackEval( attacker, victim, metrics=[ OpenAttack.metric.EditDistance(), OpenAttack.metric.ModificationRate() ]) attack_eval.eval(dataset, visualize=True)
def main(): def dataset_mapping(x): return { "x": x["sentence"], "y": 1 if x["label"] > 0.5 else 0, } print("Word-Level Attacks") #attacker = OpenAttack.attackers.HotFlipAttacker() attacker = OpenAttack.attackers.PWWSAttacker() print("Build model") victim = make_model() dataset = datasets.load_dataset( "sst", split="train[:20]").map(function=dataset_mapping) print("Start attack") attack_eval = OpenAttack.AttackEval( attacker, victim, metrics=[ OpenAttack.metric.Fluency(), OpenAttack.metric.GrammaticalErrors(), OpenAttack.metric.SemanticSimilarity(), OpenAttack.metric.EditDistance(), OpenAttack.metric.ModificationRate() ]) attack_eval.eval(dataset, visualize=True, progress_bar=True)
def attack(classifier, dataset, attacker = OpenAttack.attackers.PWWSAttacker()): attack_eval = OpenAttack.AttackEval( attacker, classifier, ) correct_samples = [ inst for inst in dataset if classifier.get_pred( [inst["x"]] )[0] == inst["y"] ] accuracy = len(correct_samples) / len(dataset) adversarial_samples = { "x": [], "y": [], "tokens": [] } for result in tqdm.tqdm(attack_eval.ieval(correct_samples), total=len(correct_samples)): if result["success"]: adversarial_samples["x"].append(result["result"]) adversarial_samples["y"].append(result["data"]["y"]) adversarial_samples["tokens"].append(tokenizer.tokenize(result["result"], pos_tagging=False)) attack_success_rate = len(adversarial_samples["x"]) / len(correct_samples) print("Accuracy: %lf%%\nAttack success rate: %lf%%" % (accuracy * 100, attack_success_rate * 100)) return datasets.Dataset.from_dict(adversarial_samples)
def main(): # load a fine-tuned sentiment analysis model from Transformers (you can also use our fine-tuned Victim.BERT.SST) print("Load model") tokenizer = transformers.AutoTokenizer.from_pretrained( "echarlaix/bert-base-uncased-sst2-acc91.1-d37-hybrid") model = transformers.AutoModelForSequenceClassification.from_pretrained( "echarlaix/bert-base-uncased-sst2-acc91.1-d37-hybrid", num_labels=2, output_hidden_states=False) victim = OpenAttack.classifiers.TransformersClassifier( model, tokenizer, model.bert.embeddings.word_embeddings) print("New Attacker") attacker = OpenAttack.attackers.PWWSAttacker() # create your dataset here dataset = datasets.Dataset.from_dict({ "x": ["I hate this movie.", "I like this apple."], "y": [ 0, # 0 for negative 1, # 1 for positive ] }) print("Start attack") attack_eval = OpenAttack.AttackEval( attacker, victim, metrics=[ OpenAttack.metric.EditDistance(), OpenAttack.metric.ModificationRate() ]) attack_eval.eval(dataset, visualize=True)
def main(): victim = OpenAttack.loadVictim("BERT.SST") dataset = datasets.load_dataset( "sst", split="train[:10]").map(function=dataset_mapping) attacker = MyAttacker() attack_eval = OpenAttack.AttackEval(attacker, victim) attack_eval.eval(dataset, visualize=True)
def main(): victim = OpenAttack.loadVictim("BERT.SST") dataset = datasets.load_dataset( "sst", split="train[:20]").map(function=dataset_mapping) attacker = OpenAttack.attackers.PWWSAttacker() attack_eval = OpenAttack.AttackEval(attacker, victim, metrics=[SentenceLength()]) attack_eval.eval(dataset, visualize=True)
def main(): # load some examples of SST-2 for evaluation dataset = datasets.load_dataset("sst", split="train[:20]").map(function=dataset_mapping) # choose the costomized classifier as the victim model victim = MyClassifier() # choose PWWS as the attacker and initialize it with default parameters attacker = OpenAttack.attackers.PWWSAttacker() # prepare for attacking attack_eval = OpenAttack.AttackEval(attacker, victim) # launch attacks and print attack results attack_eval.eval(dataset, visualize=True)
def main(): victim = OpenAttack.loadVictim("BERT.SST") # Victim.BiLSTM.SST is a pytorch model which is trained on Dataset.SST. It uses Glove vectors for word representation. # The load operation returns a PytorchClassifier that can be further used for Attacker and AttackEval. dataset = datasets.load_dataset("sst", split="train[:20]").map(function=dataset_mapping) # Dataset.SST.sample is a list of 1k sentences sampled from test dataset of Dataset.SST. attacker = OpenAttack.attackers.GeneticAttacker() # After this step, we’ve initialized a GeneticAttacker and uses the default configuration during attack process. attack_eval = OpenAttack.AttackEval(attacker, victim) # DefaultAttackEval is the default implementation for AttackEval which supports seven basic metrics. attack_eval.eval(dataset, visualize=True, num_workers=4)
def main(): print("New Attacker") attacker = OpenAttack.attackers.PWWSAttacker(lang="chinese") print("Building model") victim = OpenAttack.loadVictim("BERT.AMAZON_ZH").to("cuda:0") print("Loading dataset") dataset = datasets.load_dataset( "amazon_reviews_multi", 'zh', split="train[:20]").map(function=dataset_mapping) print("Start attack") attack_eval = OpenAttack.AttackEval(attacker, victim) attack_eval.eval(dataset, visualize=True, progress_bar=True)
def main(): victim = OpenAttack.loadVictim("BERT.SST") # BERT.SST is a pytorch model which is fine-tuned on SST-2. It uses Glove vectors for word representation. # The load operation returns a PytorchClassifier that can be further used for Attacker and AttackEval. dataset = datasets.load_dataset( "sst", split="train[:20]").map(function=dataset_mapping) # We load the sst-2 dataset using `datasets` package, and map the fields. attacker = OpenAttack.attackers.PWWSAttacker() # After this step, we’ve initialized a PWWSAttacker and uses the default configuration during attack process. attack_eval = OpenAttack.AttackEval(attacker, victim) # Use the default implementation for AttackEval which supports seven basic metrics. attack_eval.eval(dataset, visualize=True)
def main(): print("New Attacker") attacker = OpenAttack.attackers.PWWSAttacker(lang="chinese") print("Building model") clsf = OpenAttack.loadVictim("BERT.AMAZON_ZH") print("Loading dataset") dataset = datasets.load_dataset("amazon_reviews_multi",'zh',split="train[:20]").map(function=dataset_mapping) print("Start attack") attack_eval = OpenAttack.AttackEval(attacker, clsf, metrics=[ OpenAttack.metric.Fluency(), OpenAttack.metric.GrammaticalErrors(), OpenAttack.metric.EditDistance(), OpenAttack.metric.ModificationRate() ]) attack_eval.eval(dataset, visualize=True, progress_bar=True)
def main(): print("Load model") tokenizer = transformers.AutoTokenizer.from_pretrained("roberta-large-mnli") model = transformers.AutoModelForSequenceClassification.from_pretrained("roberta-large-mnli", output_hidden_states=False) victim = OpenAttack.classifiers.TransformersClassifier(model, tokenizer, model.roberta.embeddings.word_embeddings) victim = NLIWrapper(victim) print("New Attacker") attacker = OpenAttack.attackers.PWWSAttacker() dataset = datasets.load_dataset("glue", "mnli", split="train[:20]").map(function=dataset_mapping) print("Start attack") attack_eval = OpenAttack.AttackEval(attacker, victim, metrics = [ OpenAttack.metric.EditDistance(), OpenAttack.metric.ModificationRate() ]) attack_eval.eval(dataset, visualize=True)
def main(): import multiprocessing if multiprocessing.get_start_method() != "spawn": multiprocessing.set_start_method("spawn", force=True) dataset = datasets.load_dataset("sst", split="train[:100]").map(function=dataset_mapping) clsf = OpenAttack.loadVictim("BERT.SST") # .to("cuda:0") attackers = get_attackers(dataset, clsf) for attacker in attackers: print(attacker.__class__.__name__) try: print( OpenAttack.AttackEval(attacker, clsf).eval(dataset, progress_bar=True), ) except Exception as e: raise e print(e) print("\n")
def main(): print("New Attacker") attacker = OpenAttack.attackers.PWWSAttacker() print("Build model") clsf = OpenAttack.loadVictim("BERT.SST") dataset = datasets.load_dataset("sst", split="train[:100]").map(function=dataset_mapping) print("Start attack") attack_eval = OpenAttack.AttackEval( attacker, clsf, metrics=[ OpenAttack.metric.Fluency(), OpenAttack.metric.GrammaticalErrors(), OpenAttack.metric.SemanticSimilarity(), OpenAttack.metric.EditDistance(), OpenAttack.metric.ModificationRate() ] ) attack_eval.eval(dataset, visualize=True, progress_bar=True)
def main(): print("New Attacker") #attacker = OpenAttack.attackers.PWWSAttacker() print("Build model") #clsf = OpenAttack.loadVictim("BERT.SST") tokenizer = transformers.AutoTokenizer.from_pretrained("./data/Victim.BERT.SST") model = transformers.AutoModelForSequenceClassification.from_pretrained("./data/Victim.BERT.SST", num_labels=2, output_hidden_states=True) clsf = OpenAttack.classifiers.TransformersClassifier(model, tokenizer=tokenizer, max_length=100, embedding_layer=model.bert.embeddings.word_embeddings) dataset = datasets.load_dataset("sst", split="train[:5]").map(function=dataset_mapping) print("New Attacker") attacker = OpenAttack.attackers.GEOAttacker(data=dataset) print("Start attack") attack_eval = OpenAttack.AttackEval( attacker, clsf, metrics=[ OpenAttack.metric.Fluency(), OpenAttack.metric.GrammaticalErrors(), OpenAttack.metric.SemanticSimilarity(), OpenAttack.metric.EditDistance(), OpenAttack.metric.ModificationRate() ] ) attack_eval.eval(dataset, visualize=True, progress_bar=True)
def main(): import multiprocessing if multiprocessing.get_start_method() != "spawn": multiprocessing.set_start_method("spawn", force=True) dataset = datasets.load_dataset("amazon_reviews_multi",'zh',split="train[:5]").map(dataset_mapping) clsf = OpenAttack.loadVictim("BERT.AMAZON_ZH").to("cuda:0") attackers = get_attackers_on_chinese(dataset, clsf) for attacker in attackers: print(attacker.__class__.__name__) try: st = time.perf_counter() print( OpenAttack.AttackEval(attacker, clsf, language="chinese").eval(dataset, progress_bar=True), time.perf_counter() - st ) except Exception as e: raise e print(e) print("\n")