def apply_rules(dataset_path, model_path): data = np.load(dataset_path, allow_pickle=True) test_data = [separate_answers(x[0]) for x in data if int(x[1]) == 0] top_rules = np.load("final_rules.npy", allow_pickle=True) tr2 = replace_rules.TextToReplaceRules(nlp, [x[1] for x in test_data], [], min_freq=0.005, min_flip=0.005, ngram_size=2) # Own model model = AlbertForSequenceClassification.from_pretrained(pretrained_weights, num_labels=3) model.load_state_dict(torch.load(model_path)) model.cuda() model.eval() tokenized_stud_ans = tokenizer.tokenize([x[1] for x in test_data]) model_preds = {} rule_flip_amount = {} data_id_flipped = {} a = time.time() for rule in top_rules: idxs = list(tr2.get_rule_idxs(rule)) to_apply = [tokenized_stud_ans[x] for x in idxs] applies, nt = rule.apply_to_texts(to_apply, fix_apostrophe=False) # Find indices, where rule has been applied applies = [idxs[x] for x in applies] to_compute = [x for x in zip(applies, nt) if x[1] not in model_preds] if to_compute: # New predicts new_labels = [] for compute in to_compute: j, new_stud = compute # Get reference answer for sequence classification orig_instance = test_data[j] logits = predict(model, orig_instance[0], new_stud, 0) new_label = int(np.argmax(logits)) new_labels.append(new_label) for x, y in zip(to_compute, new_labels): model_preds[x[1]] = y new_labels = np.array([model_preds[x] for x in nt]) where_flipped = np.where(new_labels == 2)[0] flips = sorted([applies[x] for x in where_flipped]) rule_flip_amount[rule.hash()] = len(flips) data_id_flipped[rule.hash()] = list(where_flipped) #print("Done with " + rule.hash()) # Top 10 rules top_10 = [x.replace("text_", "").replace("pos_", "") for x in list({k: v for k, v in sorted(rule_flip_amount.items(), key=lambda item: item[1], reverse=True)})[:10]] np.save(model_path[:model_path.rfind("/") + 1] + "top_10.npy", top_10) print("Time used for applying rules: ", time.time() - a) print("Total amount of adversaries:", sum(list(rule_flip_amount.values()))) print("Total amount of afflicted data instances:", len(set(np.concatenate(list(data_id_flipped.values())).ravel().tolist())))
return fs orig_scores = {} flips = collections.defaultdict(lambda: []) # Find flips in data for i, inst in enumerate(data): if i % 1 == 0: print("Data instance nr: ", i) fs = create_possible_flips(inst, model, topk=100, threshold=-10) # Key for the flips is the student's answer flips[list_to_string(inst[1])].extend([x[0] for x in fs]) tr2 = replace_rules.TextToReplaceRules(nlp, [list_to_string(x[1]) for x in data], [], min_freq=0.005, min_flip=0.00, ngram_size=4) # Finding frequent rules frequent_rules = [] rule_idx = {} rule_flips = {} for z, f in enumerate(flips): # f is the student's answer # flips[f] flips for given student's answer rules = tr2.compute_rules(f, flips[f], use_pos=True, use_tags=False) for rs in rules: for r in rs: if r.hash() not in rule_idx: i = len(rule_idx) rule_idx[r.hash()] = i
def main(): # Own data val_data = np.load( '../bachelor-thesis/models/bert_scientsBank/correct_sciEntsBank_val.npy', allow_pickle=True) # Own model model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3) model.load_state_dict( torch.load('../bachelor-thesis/models/bert_sciEntsBank/model.pt')) model.cuda() model.eval() # data derived from correct model predictions, list of tuples of reference answer, student's answer and prediction # All cases are incorrect data = [separate_answers(x[0]) for x in val_data if x[1] == 0] # TextFooler part # prepare synonym extractor # build dictionary via the embedding file idx2word = {} word2idx = {} stop_words_set = criteria.get_stopwords() print("Building vocab...") with open("../TextFooler/data/counter-fitted-vectors.txt", 'r', encoding="utf8") as ifile: for line in ifile: word = line.split()[0] if word not in idx2word: idx2word[len(idx2word)] = word word2idx[word] = len(idx2word) - 1 print("Building cos sim matrix...") cos_sim = np.load("../TextFooler/data/cos_sim_counter_fitting.npy", allow_pickle=True) print("Cos sim import finished!") use = USE("use") print('Start attacking!') orig_scores = {} flips = collections.defaultdict(lambda: []) # Find flips in data adversary_successes = {} adversary_count = {} # Was used to track top adjectives/adverbs # main_tracker_adv = {} # main_tracker_adj = {} for i, inst in enumerate(data): print("Data instances finished: ", i) adversaries = [] num_tf_changed, num_tf_queries, tf_adversaries = text_fooler( inst, 0, model, stop_words_set, word2idx, idx2word, cos_sim, sim_predictor=use, sim_score_threshold=0.7, import_score_threshold=-1., sim_score_window=4, synonym_num=50, batch_size=16) # Uncomment for textfooler only query_num, success_num, bug_adversaries = text_bugger(inst, 0, model) # Was used to track top adjectives and adversaries """, tracker_adj, tracker_adv""" # All adversaries adversaries.extend(tf_adversaries) adversaries.extend(bug_adversaries) # Was used to track top adjectives and adversaries """ for key in tracker_adj: main_tracker_adj[key] = main_tracker_adj.get(key, 0) + tracker_adj[key] for key in tracker_adv: main_tracker_adv[key] = main_tracker_adv.get(key, 0) + tracker_adv[key] """ if len(adversaries) > 0: flips[list_to_string(inst[1])].extend(adversaries) adversary_successes['tf'] = adversary_successes.get( 'tf', 0) + num_tf_changed adversary_count['tf'] = adversary_count.get('tf', 0) + num_tf_queries for key in query_num: adversary_successes[key] = adversary_successes.get( key, 0) + success_num.get(key, 0) adversary_count[key] = adversary_count.get( key, 0) + query_num.get(key, 0) # Was used to track top adjectives and adversaries # np.save("adv_result.npy", main_tracker_adv) # np.save("adj_result.npy", main_tracker_adj) np.save("adversary_successes_tf.npy", adversary_successes) np.save("adversary_count_tf.npy", adversary_count) tr2 = replace_rules.TextToReplaceRules( nlp, [list_to_string(x[1]) for x in data], [], min_freq=0.005, min_flip=0.005, ngram_size=2) # Finding frequent rules frequent_rules = [] rule_idx = {} rule_flips = {} for z, f in enumerate(flips): # f is the student's answer # flips[f] flips for given student's answer rules = tr2.compute_rules(f, [list_to_string(x) for x in flips[f]], use_pos=True, use_tags=False) for rs in rules: for r in rs: if r.hash() not in rule_idx: i = len(rule_idx) rule_idx[r.hash()] = i rule_flips[i] = [] frequent_rules.append(r) i = rule_idx[r.hash()] rule_flips[i].append(z) if z % 1000 == 0: print("Done with flip nr. ", z) # Tokenize the student's answers tokenized_stud_ans = tokenizer.tokenize( [list_to_string(x[1]) for x in data]) model_preds = {} print("Number of frequent rules: ", len(frequent_rules)) a = time.time() rule_flips = {} rule_other_texts = {} rule_other_flips = {} rule_applies = {} for i, r in enumerate(frequent_rules): if i % 100 == 0: print("Nr. of rules applied: ", i) # Get indices, where rule can be applied idxs = list(tr2.get_rule_idxs(r)) to_apply = [tokenized_stud_ans[x] for x in idxs] applies, nt = r.apply_to_texts(to_apply, fix_apostrophe=False) # Find indices, where rule has been applied applies = [idxs[x] for x in applies] to_compute = [x for x in zip(applies, nt) if x[1] not in model_preds] if to_compute: # New predicts new_labels = [] for compute in to_compute: j, new_stud = compute # Get reference answer for sequence classification orig_instance = data[j] logits = predict(model, orig_instance[0], new_stud, 0) new_label = int(np.argmax(logits)) new_labels.append(new_label) for x, y in zip(to_compute, new_labels): model_preds[x[1]] = y new_labels = np.array([model_preds[x] for x in nt]) where_flipped = np.where(new_labels == 2)[0] flips = sorted([applies[x] for x in where_flipped]) rule_flips[i] = flips rule_other_texts[i] = nt rule_other_flips[i] = where_flipped rule_applies[i] = applies print("Time used for applying rules: ", time.time() - a) threshold = int(0.01 * len(data)) really_frequent_rules_idx = [ i for i in range(len(rule_flips)) if len(rule_flips[i]) > threshold ] # test = [frequent_rules[i] for i in really_frequent_rules_idx if frequent_rules[i].hash().split()[1] == '->'] # test_2 = [i.hash() for i in test if i.hash()[:4] == 'text'] print("Amount of really frequent rules: ", len(really_frequent_rules_idx)) print("Done!") high_number_rules = [ frequent_rules[idx] for idx in really_frequent_rules_idx ] np.save("frequent_rules.npy", high_number_rules)