def find_top_k_words_with_tag(k, tag): stop_words = criteria.get_stopwords() bigrams = nltk.bigrams( (x[0].lower(), x[1]) for x in nltk.corpus.brown.tagged_words(tagset='universal')) # Filter bigrams tagged = [] if tag == 'ADJ': next_tags = 'PROPN', 'NOUN', 'PRON' tagged = [ x[0] for x in bigrams if x[0][1] == 'ADJ' and x[1][1] in next_tags ] if tag == 'ADV': next_tags = 'VERB' tagged = [ x[0] for x in bigrams if x[0][1] == 'ADV' and x[1][1] in next_tags ] # tagged.extend([x[1] for x in bigrams if x[1][1] == 'ADV' and x[0][1] in next_tags]) freq = nltk.FreqDist(x for x in tagged if x[0] not in stop_words) \ .most_common(k) top_list = [x[0][0] for x in freq] return top_list
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument("--dataset_path", type=str, required=True, help="Which dataset to attack.") parser.add_argument("--nclasses", type=int, default=2, help="How many classes for classification.") parser.add_argument("--target_model", type=str, required=True, choices=['wordLSTM', 'bert', 'wordCNN'], help="Target models for text classification: fasttext, charcnn, word level lstm " "For NLI: InferSent, ESIM, bert-base-uncased") parser.add_argument("--target_model_path", type=str, required=True, help="pre-trained target model path") parser.add_argument("--word_embeddings_path", type=str, default='', help="path to the word embeddings for the target model") parser.add_argument("--counter_fitting_embeddings_path", type=str, required=True, help="path to the counter-fitting embeddings we used to find synonyms") parser.add_argument("--counter_fitting_cos_sim_path", type=str, default='', help="pre-compute the cosine similarity scores based on the counter-fitting embeddings") parser.add_argument("--USE_cache_path", type=str, required=True, help="Path to the USE encoder cache.") parser.add_argument("--output_dir", type=str, default='adv_results', help="The output directory where the attack results will be written.") ## Model hyperparameters parser.add_argument("--sim_score_window", default=15, type=int, help="Text length or token number to compute the semantic similarity score") parser.add_argument("--import_score_threshold", default=-1., type=float, help="Required mininum importance score.") parser.add_argument("--sim_score_threshold", default=0.85, type=float, help="Required minimum semantic similarity score.") parser.add_argument("--synonym_num", default=50, type=int, help="Number of synonyms to extract") parser.add_argument("--batch_size", default=32, type=int, help="Batch size to get prediction") parser.add_argument("--data_size", default=100, type=int, help="Data size to create adversaries") parser.add_argument("--perturb_ratio", default=0., type=float, help="Whether use random perturbation for ablation study") parser.add_argument("--max_seq_length", default=128, type=int, help="max sequence length for BERT target model") args = parser.parse_args() if os.path.exists(args.output_dir) and os.listdir(args.output_dir): print("Output directory ({}) already exists and is not empty.".format(args.output_dir)) else: os.makedirs(args.output_dir, exist_ok=True) # get data to attack texts, labels = dataloader.read_corpus(args.dataset_path) data = list(zip(texts, labels)) data = data[:args.data_size] # choose how many samples for adversary print("Data import finished!") # construct the model print("Building Model...") if args.target_model == 'wordLSTM': model = Model(args.word_embeddings_path, nclasses=args.nclasses).cuda() checkpoint = torch.load(args.target_model_path, map_location='cuda:0') model.load_state_dict(checkpoint) elif args.target_model == 'wordCNN': model = Model(args.word_embeddings_path, nclasses=args.nclasses, hidden_size=100, cnn=True).cuda() checkpoint = torch.load(args.target_model_path, map_location='cuda:0') model.load_state_dict(checkpoint) elif args.target_model == 'bert': model = NLI_infer_BERT(args.target_model_path, nclasses=args.nclasses, max_seq_length=args.max_seq_length) predictor = model.text_pred print("Model built!") maskedLM = Contextual_synonyms_BERT(args.target_model_path, max_seq_length=args.max_seq_length) maskedLM_predictor=maskedLM #.text_pred print("Masked LM BERT built!") # prepare synonym extractor # build dictionary via the embedding file idx2word = {} word2idx = {} print("Building vocab...") with open(args.counter_fitting_embeddings_path, 'r') as ifile: for line in ifile: word = line.split()[0] if word not in idx2word: idx2word[len(idx2word)] = word word2idx[word] = len(idx2word) - 1 print("Building cos sim matrix...") if args.counter_fitting_cos_sim_path: # load pre-computed cosine similarity matrix if provided print('Load pre-computed cosine similarity matrix from {}'.format(args.counter_fitting_cos_sim_path)) cos_sim = np.load(args.counter_fitting_cos_sim_path) else: # calculate the cosine similarity matrix print('Start computing the cosine similarity matrix!') embeddings = [] with open(args.counter_fitting_embeddings_path, 'r') as ifile: for line in ifile: embedding = [float(num) for num in line.strip().split()[1:]] embeddings.append(embedding) embeddings = np.array(embeddings) product = np.dot(embeddings, embeddings.T) norm = np.linalg.norm(embeddings, axis=1, keepdims=True) cos_sim = product / np.dot(norm, norm.T) print("Cos sim import finished!") # build the semantic similarity module use = USE(args.USE_cache_path) # start attacking orig_failures = 0. adv_failures = 0. changed_rates = [] nums_queries = [] orig_texts = [] adv_texts = [] true_labels = [] new_labels = [] log_file = open(os.path.join(args.output_dir, 'results_log'), 'a') stop_words_set = criteria.get_stopwords() print('Start attacking!') for idx, (text, true_label) in enumerate(data): if idx % 2 == 0: print('{} samples out of {} have been finished!'.format(idx, args.data_size)) if args.perturb_ratio > 0.: new_text, num_changed, orig_label, \ new_label, num_queries = random_attack(text, true_label, predictor, args.perturb_ratio, stop_words_set, word2idx, idx2word, cos_sim, sim_predictor=use, sim_score_threshold=args.sim_score_threshold, import_score_threshold=args.import_score_threshold, sim_score_window=args.sim_score_window, synonym_num=args.synonym_num, batch_size=args.batch_size) else: new_text, num_changed, orig_label, \ new_label, num_queries = contextual_attack(text, true_label, predictor, maskedLM_predictor, stop_words_set, word2idx, idx2word, cos_sim, sim_predictor=use, sim_score_threshold=args.sim_score_threshold, import_score_threshold=args.import_score_threshold, sim_score_window=args.sim_score_window, synonym_num=args.synonym_num, batch_size=args.batch_size) ''' new_text, num_changed, orig_label, \ new_label, num_queries = attack(text, true_label, predictor, stop_words_set, word2idx, idx2word, cos_sim, sim_predictor=use, sim_score_threshold=args.sim_score_threshold, import_score_threshold=args.import_score_threshold, sim_score_window=args.sim_score_window, synonym_num=args.synonym_num, batch_size=args.batch_size) ''' if true_label != orig_label: orig_failures += 1 else: nums_queries.append(num_queries) if true_label != new_label: adv_failures += 1 print("OLD TEXT: "+ ' '.join(text)) print("NEW TEXT: "+ new_text) print("NUM CHANGED: "+str(num_changed)) sys.stdout.flush() changed_rate = 1.0 * num_changed / len(text) if true_label == orig_label and true_label != new_label: changed_rates.append(changed_rate) orig_texts.append(' '.join(text)) adv_texts.append(new_text) true_labels.append(true_label) new_labels.append(new_label) message = 'For target model {}: original accuracy: {:.3f}%, adv accuracy: {:.3f}%, ' \ 'avg changed rate: {:.3f}%, num of queries: {:.1f}\n'.format(args.target_model, (1-orig_failures/1000)*100, (1-adv_failures/1000)*100, np.mean(changed_rates)*100, np.mean(nums_queries)) print(message) log_file.write(message) with open(os.path.join(args.output_dir, 'adversaries.txt'), 'w') as ofile: for orig_text, adv_text, true_label, new_label in zip(orig_texts, adv_texts, true_labels, new_labels): ofile.write('orig sent ({}):\t{}\nadv sent ({}):\t{}\n\n'.format(true_label, orig_text, new_label, adv_text))
def main(): if os.path.exists(args.output_dir) and os.listdir(args.output_dir): print("Output directory ({}) already exists and is not empty.".format( args.output_dir)) else: os.makedirs(args.output_dir, exist_ok=True) # get data to attack texts, labels = dataloader.read_corpus(args.dataset_path) data = list(zip(texts, labels)) data = data[:args.data_size] # choose how many samples for adversary print("Data import finished!") # construct the model print("Building Model...") if args.target_model == 'wordLSTM': model = Model(args.word_embeddings_path, nclasses=args.nclasses).cuda() checkpoint = torch.load(args.target_model_path, map_location='cuda:0') model.load_state_dict(checkpoint) elif args.target_model == 'wordCNN': model = Model(args.word_embeddings_path, nclasses=args.nclasses, hidden_size=100, cnn=True).cuda() checkpoint = torch.load(args.target_model_path, map_location='cuda:0') model.load_state_dict(checkpoint) elif args.target_model == 'bert': model = NLI_infer_BERT(args.target_model_path, nclasses=args.nclasses, max_seq_length=args.max_seq_length) predictor = model.text_pred print("Model built!") # prepare synonym extractor # build dictionary via the embedding file idx2word = {} word2idx = {} # print("Building vocab...") # with open(args.counter_fitting_embeddings_path, 'r') as ifile: # for line in ifile: # word = line.split()[0] # if word not in idx2word: # idx2word[len(idx2word)] = word # word2idx[word] = len(idx2word) - 1 # print("Building cos sim matrix...") # if args.counter_fitting_cos_sim_path: # # load pre-computed cosine similarity matrix if provided # print('Load pre-computed cosine similarity matrix from {}'.format(args.counter_fitting_cos_sim_path)) # cos_sim = np.load(args.counter_fitting_cos_sim_path) # else: # # calculate the cosine similarity matrix # print('Start computing the cosine similarity matrix!') # embeddings = [] # with open(args.counter_fitting_embeddings_path, 'r') as ifile: # for line in ifile: # embedding = [float(num) for num in line.strip().split()[1:]] # embeddings.append(embedding) # embeddings = np.array(embeddings) # product = np.dot(embeddings, embeddings.T) # norm = np.linalg.norm(embeddings, axis=1, keepdims=True) # cos_sim = product / np.dot(norm, norm.T) # print("Cos sim import finished!") # build the semantic similarity module # use = USE(args.USE_cache_path) use = None # start attacking orig_failures = 0. adv_failures = 0. changed_rates = [] nums_queries = [] orig_texts = [] adv_texts = [] true_labels = [] new_labels = [] log_file = open(os.path.join(args.output_dir, 'results_log'), 'a') stop_words_set = criteria.get_stopwords() print('Start attacking!') for idx, (text, true_label) in enumerate(data): if idx % 20 == 0: print('{} samples out of {} have been finished!'.format( idx, args.data_size)) if args.perturb_ratio > 0.: new_text, num_changed, orig_label, \ new_label, num_queries = random_attack(text, true_label, predictor, args.perturb_ratio, stop_words_set, word2idx, idx2word, cos_sim, sim_predictor=use, sim_score_threshold=args.sim_score_threshold, import_score_threshold=args.import_score_threshold, sim_score_window=args.sim_score_window, synonym_num=args.synonym_num, batch_size=args.batch_size) else: new_text, num_changed, orig_label, \ new_label, num_queries = attack(text, true_label, predictor, model, batch_size=args.batch_size) print(true_label, orig_label, new_label) print("orig texts:", text) if true_label != orig_label: orig_failures += 1 print("orig failure") else: nums_queries.append(num_queries) if orig_label != new_label: adv_failures += 1 print( f"attack successful: {adv_failures}/{idx + 1}={adv_failures / (idx + 1)}" ) changed_rate = 1.0 * num_changed / len(text) if orig_label != new_label: changed_rates.append(changed_rate) orig_texts.append(text) adv_texts.append(new_text) true_labels.append(true_label) new_labels.append(new_label) message = 'For target model {}: original accuracy: {:.3f}%, adv accuracy: {:.3f}%, ' \ 'avg changed rate: {:.3f}%, num of queries: {:.1f}\n'.format(args.target_model, (1-orig_failures/args.data_size)*100, (1-adv_failures/args.data_size)*100, np.mean(changed_rates)*100, np.mean(nums_queries)) print(message) log_file.write(message) # with open(os.path.join(args.output_dir, 'adversaries.txt'), 'w') as ofile: # for orig_text, adv_text, true_label, new_label in zip(orig_texts, adv_texts, true_labels, new_labels): # ofile.write('orig sent ({}):\t{}\nadv sent ({}):\t{}\n\n'.format(true_label, orig_text, new_label, adv_text)) adv_data = { 'adv_text': adv_texts, 'orig_text': orig_texts, 'true_labels': true_labels, 'new_labels': new_labels } import joblib joblib.dump(adv_data, os.path.join(args.output_dir, args.save))
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument("--dataset_path", type=str, required=True, help="Which dataset to attack.") parser.add_argument( "--target_model", type=str, required=True, choices=['wordLSTM', 'bert', 'wordCNN', 'gcp'], help= "Target models for text classification: wordcnn, word Lstm or GCP NLP API " ) parser.add_argument("--gcp_nlp_json_link", type=str, required=False, default='', help="Link to GCP NLP API json key file") parser.add_argument("--target_model_path", type=str, required=False, default='', help="pre-trained target model path") parser.add_argument("--nclasses", type=int, default=2, help="How many classes for classification.") parser.add_argument( "--word_embeddings_path", type=str, default='word_embeddings_path/glove.6B.200d.txt', help="path to the word embeddings for the target model") parser.add_argument( "--counter_fitting_embeddings_path", type=str, required=False, default="counter_fitting_embedding/counter-fitted-vectors.txt", help="path to the counter-fitting embeddings used to find synonyms") parser.add_argument( "--output_dir", type=str, default='adv_results', help="The output directory where the attack results will be written.") parser.add_argument("--pos_filter", type=str, default='coarse', help="pos filter mask: either 'fine' or 'coarse") args = parser.parse_args() output_dir = args.output_dir #download data to be Attacked data = download_attack_data(args.dataset_path) data = data[:100] #find word2idx and idx2word dicts embeddings, word2idx_vocab, idx2word_vocab = generate_embedding_mat( args.counter_fitting_embeddings_path) #compute cosine similarity matrix of words in text cos_sim, word2idx_rev, idx2word_rev = generateCosSimMat.csim_matrix( data, embeddings, word2idx_vocab) gcp_attack = False #Load the saved model using state dic if args.target_model == "wordCNN": default_model_path = "saved_models/wordCNN/" if 'imdb' in args.dataset_path: default_model_path += 'imdb' elif 'mr' in args.dataset_path: default_model_path += 'mr' cmodel = Model(args.word_embeddings_path, nclasses=args.nclasses, hidden_size=100, cnn=True).cuda() elif args.target_model == "wordLSTM": default_model_path = "saved_models/wordLSTM/" if 'imdb' in args.dataset_path: default_model_path += 'imdb' elif 'mr' in args.dataset_path: default_model_path += 'mr' elif 'ag' in args.dataset_path: default_model_path += 'ag' cmodel = Model(args.word_embeddings_path, nclasses=args.nclasses, cnn=False).cuda() elif args.target_model == "bert": default_model_path = "saved_models/bert/" if 'imdb' in args.dataset_path: default_model_path += 'imdb' elif 'mr' in args.dataset_path: default_model_path += 'mr' if args.target_model_path: cmodel = LoadPretrainedBert.loadPretrainedModel( args.target_model_path, nclasses=args.nclasses) else: cmodel = LoadPretrainedBert.loadPretrainedModel( default_model_path, nclasses=args.nclasses) elif args.target_model == "gcp": cmodel = None if args.target_model != 'bert' and args.target_model != 'gcp': #load checkpoints if args.target_model_path: print("target model path") checkpoint = torch.load(args.target_model_path, map_location=torch.device('cuda:0')) else: checkpoint = torch.load(default_model_path, map_location=torch.device('cuda:0')) cmodel.load_state_dict(checkpoint) orig_failures = 0. adv_failures = 0. changed_rates = [] nums_queries = [] orig_texts = [] adv_texts = [] true_labels = [] new_labels = [] sem_sim = [] log_file = open(os.path.join(args.output_dir, 'results_log'), 'a') stop_words_set = criteria.get_stopwords() true_label = 1 predictor = 1 sim_score_threshold = 0.5 perturb_ratio = 0.4 size = len(data) print('Start attacking!') ct = 0 #tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) pos_filter = args.pos_filter random_atk = False for idx, (text, true_label) in enumerate(data): ct += 1 print(idx) if idx % 20 == 0: print('{} samples out of {} have been finished!'.format(idx, size)) new_text, num_changed, orig_label, \ new_label, num_queries = attack(cmodel, args.gcp_nlp_json_link, text, true_label, stop_words_set, word2idx_rev, idx2word_rev, idx2word_vocab, cos_sim, pos_filter, synonym_num=80, sim_score_threshold=sim_score_threshold , syn_sim=0.65) #print(text) if true_label != orig_label: orig_failures += 1 else: nums_queries.append(num_queries) if true_label != new_label: adv_failures += 1 tokenizer = RegexpTokenizer(r'\w+') text_tokens = tokenizer.tokenize(text) changed_rate = 1.0 * num_changed / len(text_tokens) if true_label == orig_label and true_label != new_label: changed_rates.append(changed_rate) orig_texts.append(text) adv_texts.append(new_text) true_labels.append(true_label) new_labels.append(new_label) message = 'For target model {}: original accuracy: {:.3f}%, adv accuracy: {:.3f}%, ' \ 'avg changed rate: {:.3f}%, Avg num of queries: {:.1f}\n, Median num of queries:{:.1f} \n'.format(predictor, (1-orig_failures/size)*100, (1-adv_failures/size)*100, np.mean(changed_rates)*100, np.mean(nums_queries), np.median(nums_queries)) print(message) log_file.write(message) i = 1 with open( os.path.join(args.output_dir, 'adversaries_AG_1-1000_lstm_coarse_POS.txt'), 'w') as ofile: for orig_text, adv_text, true_label, new_label in zip( orig_texts, adv_texts, true_labels, new_labels): ofile.write('orig sent ({}):\t{}\nadv sent ({}):\t{}\n\n'.format( true_label, orig_text, new_label, adv_text))
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument("--dataset_path", type=str, required=True, help="Which dataset to attack.") parser.add_argument("--nclasses", type=int, default=2, help="How many classes for classification.") parser.add_argument( "--target_model", type=str, required=True, choices=['wordLSTM', 'bert', 'wordCNN'], help= "Target models for text classification: fasttext, charcnn, word level lstm " "For NLI: InferSent, ESIM, bert-base-uncased") parser.add_argument("--target_model_path", type=str, required=True, help="pre-trained target model path") parser.add_argument( "--word_embeddings_path", type=str, default='', help="path to the word embeddings for the target model") parser.add_argument( "--counter_fitting_embeddings_path", type=str, required=True, help="path to the counter-fitting embeddings we used to find synonyms") parser.add_argument( "--counter_fitting_cos_sim_path", type=str, default='', help= "pre-compute the cosine similarity scores based on the counter-fitting embeddings" ) parser.add_argument("--USE_cache_path", type=str, required=True, help="Path to the USE encoder cache.") parser.add_argument( "--output_dir", type=str, default='adv_results', help="The output directory where the attack results will be written.") ## Model hyperparameters parser.add_argument( "--sim_score_window", default=15, type=int, help= "Text length or token number to compute the semantic similarity score") parser.add_argument("--import_score_threshold", default=-1., type=float, help="Required mininum importance score.") parser.add_argument("--sim_score_threshold", default=0.7, type=float, help="Required minimum semantic similarity score.") parser.add_argument("--synonym_num", default=5000, type=int, help="Number of synonyms to extract") parser.add_argument("--batch_size", default=32, type=int, help="Batch size to get prediction") parser.add_argument("--data_size", default=1000, type=int, help="Data size to create adversaries") parser.add_argument( "--perturb_ratio", default=0., type=float, help="Whether use random perturbation for ablation study") parser.add_argument("--max_seq_length", default=128, type=int, help="max sequence length for BERT target model") parser.add_argument("--target_dataset", default="imdb", type=str, help="Dataset Name") parser.add_argument("--fuzz", default=0, type=int, help="Word Pruning Value") parser.add_argument("--top_k_words", default=1000000, type=int, help="Top K Words") parser.add_argument("--allowed_qrs", default=1000000, type=int, help="Allowerd qrs") args = parser.parse_args() if os.path.exists(args.output_dir) and os.listdir(args.output_dir): print("Output directory ({}) already exists and is not empty.".format( args.output_dir)) else: os.makedirs(args.output_dir, exist_ok=True) # get data to attack texts, labels = dataloader.read_corpus(args.dataset_path, csvf=False) data = list(zip(texts, labels)) data = data[:args.data_size] # choose how many samples for adversary print("Data import finished!") # construct the model print("Building Model...") if args.target_model == 'wordLSTM': model = Model(args.word_embeddings_path, nclasses=args.nclasses).cuda() checkpoint = torch.load(args.target_model_path, map_location='cuda:0') model.load_state_dict(checkpoint) elif args.target_model == 'wordCNN': model = Model(args.word_embeddings_path, nclasses=args.nclasses, hidden_size=150, cnn=True).cuda() checkpoint = torch.load(args.target_model_path, map_location='cuda:0') model.load_state_dict(checkpoint) elif args.target_model == 'bert': model = NLI_infer_BERT(args.target_model_path, nclasses=args.nclasses, max_seq_length=args.max_seq_length) predictor = model.text_pred print("Model built!") # prepare synonym extractor # build dictionary via the embedding file idx2word = {} word2idx = {} sim_lis = [] word_embedding = defaultdict(list) print("Building vocab...") with open(args.counter_fitting_embeddings_path, 'r') as ifile: for line in ifile: word = line.split()[0] embedding = [float(num) for num in line.strip().split()[1:]] if word not in idx2word: idx2word[len(idx2word)] = word word2idx[word] = len(idx2word) - 1 word_embedding[word] = embedding print("Building cos sim matrix...") if args.counter_fitting_cos_sim_path: print('Load pre-computed cosine similarity matrix from {}'.format( args.counter_fitting_cos_sim_path)) with open(args.counter_fitting_cos_sim_path, "rb") as fp: sim_lis = pickle.load(fp) # load pre-computed cosine similarity matrix if provided #print('Load pre-computed cosine similarity matrix from {}'.format(args.counter_fitting_cos_sim_path)) #cos_sim = np.load(args.counter_fitting_cos_sim_path) else: print('Start computing the cosine similarity matrix!') embeddings = [] with open(args.counter_fitting_embeddings_path, 'r') as ifile: for line in ifile: embedding = [float(num) for num in line.strip().split()[1:]] embeddings.append(embedding) embeddings = np.array(embeddings) print(embeddings.T.shape) norm = np.linalg.norm(embeddings, axis=1, keepdims=True) embeddings = np.asarray(embeddings / norm, "float64") cos_sim = np.dot(embeddings, embeddings.T) print("Cos sim import finished!") # build the semantic similarity module use = USE(args.USE_cache_path) # start attacking orig_failures = 0. adv_failures = 0. avg = 0. changed_rates = [] nums_queries = [] orig_texts = [] adv_texts = [] true_labels = [] new_labels = [] wrds = [] s_queries = [] f_queries = [] sims_final = [] success = [] results = [] fails = [] scrs = [] log_file = "results_context/" + args.target_model + "/" + args.target_dataset + "/log.txt" result_file = "results_context/" + args.target_model + "/" + args.target_dataset + "/results.csv" wts_file = "tfidf_weights/" + "tfidf-" + args.target_dataset + ".csv" fail_file = "fails_tfidf/" + args.target_model + "/" + args.target_dataset + "/fails.csv" stop_words_set = criteria.get_stopwords() print('Start attacking!') for idx, (text, true_label) in enumerate(data): #print(text) #print(true_label) if idx % 20 == 0: print(str(idx) + " Samples Done") print(len(success)) print(np.mean(changed_rates)) if args.perturb_ratio > 0.: new_text, num_changed, orig_label, \ new_label, num_queries = random_attack(text, true_label, predictor, args.perturb_ratio, stop_words_set, word2idx, idx2word, cos_sim, sim_predictor=use, sim_score_threshold=args.sim_score_threshold, import_score_threshold=args.import_score_threshold, sim_score_window=args.sim_score_window, synonym_num=args.synonym_num, batch_size=args.batch_size) else: new_text, num_changed, orig_label, \ new_label, num_queries,pwrds,perts = attack(args.fuzz,args.top_k_words,args.allowed_qrs, wts_file,idx,text, true_label, predictor, stop_words_set, word2idx, idx2word, sim_lis, word_embedding , sim_predictor=use, sim_score_threshold=args.sim_score_threshold, import_score_threshold=args.import_score_threshold, sim_score_window=args.sim_score_window, synonym_num=args.synonym_num, batch_size=args.batch_size) scrs.append(perts) if true_label != orig_label: orig_failures += 1 else: nums_queries.append(num_queries) if true_label != new_label: adv_failures += 1 #f_queries.append(num_queries) changed_rate = 1.0 * num_changed / len(text) if true_label == orig_label and true_label != new_label: temp = [] s_queries.append(num_queries) success.append(idx) changed_rates.append(changed_rate) orig_texts.append(' '.join(text)) adv_texts.append(new_text) true_labels.append(true_label) new_labels.append(new_label) wrds.append(pwrds) temp.append(idx) temp.append(' '.join(text)) temp.append(new_text) temp.append(num_queries) temp.append(changed_rate * 100) results.append(temp) print("Attacked: " + str(idx)) if true_label == orig_label and true_label == new_label: f_queries.append(num_queries) temp1 = [] temp1.append(idx) temp1.append(' '.join(text)) temp1.append(new_text) temp1.append(num_queries) fails.append(temp1) message = 'For target model {} on dataset window size {} with WP val {} top words {} qrs {} : ' \ 'original accuracy: {:.3f}%, adv accuracy: {:.3f}%, ' \ 'avg changed rate: {:.3f}%, num of queries: {:.1f}\n'.format(args.target_model, args.sim_score_window, args.fuzz, args.top_k_words,args.allowed_qrs, (1-orig_failures/1000)*100, (1-adv_failures/1000)*100, np.mean(changed_rates)*100, np.mean(nums_queries)) print(message) log = open(log_file, 'a') log.write(message) with open(result_file, 'w') as csvfile: csvwriter = csv.writer(csvfile) csvwriter.writerows(results) #with open('scores-mr.csv','w') as csvfile: # csvwriter = csv.writer(csvfile) # csvwriter.writerows(scrs) # with open(fail_file,'w') as csvfile: # csvwriter = csv.writer(csvfile) # csvwriter.writerows(fails) # writing the data rows print(avg) print(len(f_queries)) print(f_queries) with open(os.path.join(args.output_dir, 'adversaries.txt'), 'w') as ofile: for orig_text, adv_text, true_label, new_label in zip( orig_texts, adv_texts, true_labels, new_labels): ofile.write('orig sent ({}):\t{}\nadv sent ({}):\t{}\n\n'.format( true_label, orig_text, new_label, adv_text))
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument("--dataset_path", type=str, required=True, help="Which dataset to attack.") parser.add_argument("--nclasses", type=int, default=2, help="How many classes for classification.") parser.add_argument( "--target_model", type=str, required=True, choices=['wordLSTM', 'bert', 'wordCNN'], help= "Target models for text classification: fasttext, charcnn, word level lstm " "For NLI: InferSent, ESIM, bert-base-uncased") parser.add_argument("--target_model_path", type=str, required=True, help="pre-trained target model path") parser.add_argument( "--word_embeddings_path", type=str, default='', help="path to the word embeddings for the target model") parser.add_argument( "--counter_fitting_embeddings_path", type=str, required=True, help="path to the counter-fitting embeddings we used to find synonyms") parser.add_argument( "--counter_fitting_cos_sim_path", type=str, default='', help= "pre-compute the cosine similarity scores based on the counter-fitting embeddings" ) parser.add_argument("--USE_cache_path", type=str, required=True, help="Path to the USE encoder cache.") parser.add_argument( "--output_dir", type=str, default='adv_results', help="The output directory where the attack results will be written.") ## Model hyperparameters parser.add_argument( "--sim_score_window", default=15, type=int, help= "Text length or token number to compute the semantic similarity score") parser.add_argument("--import_score_threshold", default=-1., type=float, help="Required mininum importance score.") parser.add_argument("--sim_score_threshold", default=0.7, type=float, help="Required minimum semantic similarity score.") parser.add_argument("--synonym_num", default=50, type=int, help="Number of synonyms to extract") parser.add_argument("--batch_size", default=32, type=int, help="Batch size to get prediction") parser.add_argument("--data_size", default=1000, type=int, help="Data size to create adversaries") parser.add_argument( "--perturb_ratio", default=0., type=float, help="Whether use random perturbation for ablation study") parser.add_argument("--max_seq_length", default=128, type=int, help="max sequence length for BERT target model") args = parser.parse_args() # get data to attack texts, labels = dataloader.read_corpus(args.dataset_path) data = list(zip(texts, labels)) data = data[:args.data_size] # choose how many samples for adversary print("Data import finished!") # construct the model print("Building Model...") model = BERTInference(args.target_model_path, nclasses=args.nclasses, max_seq_length=args.max_seq_length) predictor = model.text_pred print("Model built!") # prepare synonym extractor # build dictionary via the embedding file idx2word = {} word2idx = {} print("Building vocab...") with open(args.counter_fitting_embeddings_path, 'r') as ifile: for line in ifile: word = line.split()[0] if word not in idx2word: idx2word[len(idx2word)] = word word2idx[word] = len(idx2word) - 1 print("Building cos sim matrix...") cos_sim = np.load(args.counter_fitting_cos_sim_path) print("Cos sim import finished!") # build the semantic similarity module # use = UniversalSentenceEncoder(args.USE_cache_path) use = UniversalSentenceEncoder() stop_words_set = criteria.get_stopwords() print('Start attacking!') changed_rates = [] total_time, success, total = 0, 0, 0 for idx, (text, true_label) in enumerate(data): tick = time.time() new_text, num_changed, orig_label, \ new_label, num_queries = attack(text, true_label, predictor, stop_words_set, word2idx, idx2word, cos_sim, sim_predictor=use, sim_score_threshold=args.sim_score_threshold, import_score_threshold=args.import_score_threshold, sim_score_window=args.sim_score_window, synonym_num=args.synonym_num, batch_size=args.batch_size) old_text = ' '.join(text) print(f"Original: {old_text}") print() print(f"New: {new_text}") print("--------------------------------------------------------------") changed_rate = 1.0 * num_changed / len(text) if true_label == orig_label and true_label != new_label: changed_rates.append(changed_rate) tock = time.time() total_time += tock - tick success += 1 total += 1 print( f"Time: {total_time}\tAvg. Change Rate: {np.mean(changed_rates)*100}\tSuccess Rate: {(success / total) * 100}" )
def main(): # Own data val_data = np.load( '../bachelor-thesis/models/bert_scientsBank/correct_sciEntsBank_val.npy', allow_pickle=True) # Own model model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3) model.load_state_dict( torch.load('../bachelor-thesis/models/bert_sciEntsBank/model.pt')) model.cuda() model.eval() # data derived from correct model predictions, list of tuples of reference answer, student's answer and prediction # All cases are incorrect data = [separate_answers(x[0]) for x in val_data if x[1] == 0] # TextFooler part # prepare synonym extractor # build dictionary via the embedding file idx2word = {} word2idx = {} stop_words_set = criteria.get_stopwords() print("Building vocab...") with open("../TextFooler/data/counter-fitted-vectors.txt", 'r', encoding="utf8") as ifile: for line in ifile: word = line.split()[0] if word not in idx2word: idx2word[len(idx2word)] = word word2idx[word] = len(idx2word) - 1 print("Building cos sim matrix...") cos_sim = np.load("../TextFooler/data/cos_sim_counter_fitting.npy", allow_pickle=True) print("Cos sim import finished!") use = USE("use") print('Start attacking!') orig_scores = {} flips = collections.defaultdict(lambda: []) # Find flips in data adversary_successes = {} adversary_count = {} # Was used to track top adjectives/adverbs # main_tracker_adv = {} # main_tracker_adj = {} for i, inst in enumerate(data): print("Data instances finished: ", i) adversaries = [] num_tf_changed, num_tf_queries, tf_adversaries = text_fooler( inst, 0, model, stop_words_set, word2idx, idx2word, cos_sim, sim_predictor=use, sim_score_threshold=0.7, import_score_threshold=-1., sim_score_window=4, synonym_num=50, batch_size=16) # Uncomment for textfooler only query_num, success_num, bug_adversaries = text_bugger(inst, 0, model) # Was used to track top adjectives and adversaries """, tracker_adj, tracker_adv""" # All adversaries adversaries.extend(tf_adversaries) adversaries.extend(bug_adversaries) # Was used to track top adjectives and adversaries """ for key in tracker_adj: main_tracker_adj[key] = main_tracker_adj.get(key, 0) + tracker_adj[key] for key in tracker_adv: main_tracker_adv[key] = main_tracker_adv.get(key, 0) + tracker_adv[key] """ if len(adversaries) > 0: flips[list_to_string(inst[1])].extend(adversaries) adversary_successes['tf'] = adversary_successes.get( 'tf', 0) + num_tf_changed adversary_count['tf'] = adversary_count.get('tf', 0) + num_tf_queries for key in query_num: adversary_successes[key] = adversary_successes.get( key, 0) + success_num.get(key, 0) adversary_count[key] = adversary_count.get( key, 0) + query_num.get(key, 0) # Was used to track top adjectives and adversaries # np.save("adv_result.npy", main_tracker_adv) # np.save("adj_result.npy", main_tracker_adj) np.save("adversary_successes_tf.npy", adversary_successes) np.save("adversary_count_tf.npy", adversary_count) tr2 = replace_rules.TextToReplaceRules( nlp, [list_to_string(x[1]) for x in data], [], min_freq=0.005, min_flip=0.005, ngram_size=2) # Finding frequent rules frequent_rules = [] rule_idx = {} rule_flips = {} for z, f in enumerate(flips): # f is the student's answer # flips[f] flips for given student's answer rules = tr2.compute_rules(f, [list_to_string(x) for x in flips[f]], use_pos=True, use_tags=False) for rs in rules: for r in rs: if r.hash() not in rule_idx: i = len(rule_idx) rule_idx[r.hash()] = i rule_flips[i] = [] frequent_rules.append(r) i = rule_idx[r.hash()] rule_flips[i].append(z) if z % 1000 == 0: print("Done with flip nr. ", z) # Tokenize the student's answers tokenized_stud_ans = tokenizer.tokenize( [list_to_string(x[1]) for x in data]) model_preds = {} print("Number of frequent rules: ", len(frequent_rules)) a = time.time() rule_flips = {} rule_other_texts = {} rule_other_flips = {} rule_applies = {} for i, r in enumerate(frequent_rules): if i % 100 == 0: print("Nr. of rules applied: ", i) # Get indices, where rule can be applied idxs = list(tr2.get_rule_idxs(r)) to_apply = [tokenized_stud_ans[x] for x in idxs] applies, nt = r.apply_to_texts(to_apply, fix_apostrophe=False) # Find indices, where rule has been applied applies = [idxs[x] for x in applies] to_compute = [x for x in zip(applies, nt) if x[1] not in model_preds] if to_compute: # New predicts new_labels = [] for compute in to_compute: j, new_stud = compute # Get reference answer for sequence classification orig_instance = data[j] logits = predict(model, orig_instance[0], new_stud, 0) new_label = int(np.argmax(logits)) new_labels.append(new_label) for x, y in zip(to_compute, new_labels): model_preds[x[1]] = y new_labels = np.array([model_preds[x] for x in nt]) where_flipped = np.where(new_labels == 2)[0] flips = sorted([applies[x] for x in where_flipped]) rule_flips[i] = flips rule_other_texts[i] = nt rule_other_flips[i] = where_flipped rule_applies[i] = applies print("Time used for applying rules: ", time.time() - a) threshold = int(0.01 * len(data)) really_frequent_rules_idx = [ i for i in range(len(rule_flips)) if len(rule_flips[i]) > threshold ] # test = [frequent_rules[i] for i in really_frequent_rules_idx if frequent_rules[i].hash().split()[1] == '->'] # test_2 = [i.hash() for i in test if i.hash()[:4] == 'text'] print("Amount of really frequent rules: ", len(really_frequent_rules_idx)) print("Done!") high_number_rules = [ frequent_rules[idx] for idx in really_frequent_rules_idx ] np.save("frequent_rules.npy", high_number_rules)