def get_qualitative_examples(): lines, tags = read_valid_lines("data/classes/test_retokenized.txt") c = list(zip(lines, tags)) random.shuffle(c) lines, tags = zip(*c) lines = lines[:200] tags = tags[:200] for line, tag in tqdm(zip(lines, tags)): w_i, c_i = get_word_and_char_indices(line) # check if model prediction is incorrect, if yes, find next example... model_prediction = predict(w_i, c_i) if t2i[tag] != model_prediction: # already incorrect, not interesting... continue gen_attacks = attacks.all_one_attack(line) for idx, adversary in gen_attacks: #adversary = checker.correct_string(adversary) w_i, c_i = get_word_and_char_indices(adversary) adv_pred = predict(w_i, c_i) if adv_pred == t2i[tag]: # this example doesn't break the model... continue corrected_string = checker.correct_string(adversary) w_i, c_i = get_word_and_char_indices(corrected_string) post_pred = predict(w_i, c_i) if post_pred != t2i[tag]: # after correction the tag isn't correct... continue log.pr(" -------------- ") log.pr("Original line = %s" % (line)) log.pr("Original label = %s" % (tag)) log.pr_red("Adversary = %s" % (adversary)) log.pr_green("Correction = %s" % (corrected_string)) log.pr(" -------------- ") return None
def check_against_spell_mistakes(filename): lines, tags = read_valid_lines(filename) c = list(zip(lines, tags)) random.shuffle(c) lines, tags = zip(*c) lines = lines tags = tags # if in small (or COMPUTATION HEAVY) modes if params['small']: lines = lines[:200] tags = tags[:200] if params['small'] and params['sc_atd']: lines = lines[:99] tags = tags[:99] inc_count = 0.0 inc_count_per_attack = [0.0 for _ in range(NUM_ATTACKS + 1)] error_analyser = {} for line, tag in tqdm(zip(lines, tags)): w_i, c_i = get_word_and_char_indices(line) if params['is_spell_check']: w_i, c_i = get_word_and_char_indices(checker.correct_string(line)) # check if model prediction is incorrect, if yes, continue model_prediction = predict(w_i, c_i) if t2i[tag] != model_prediction: # already incorrect, no attack needed inc_count += 1 inc_count_per_attack[0] += 1.0 continue found_incorrect = False worst_example = line worst_confidence = 1.0 worst_idx = -1 ignore_incides = set() for attack_count in range(1, 1 + NUM_ATTACKS): ignore_incides.add(worst_idx) if 'drop' in type_of_attack: gen_attacks = attacks.drop_one_attack( worst_example, ignore_incides, include_ends=params['include_ends']) elif 'swap' in type_of_attack: gen_attacks = attacks.swap_one_attack( worst_example, include_ends=params['include_ends']) elif 'key' in type_of_attack: gen_attacks = attacks.key_one_attack( worst_example, ignore_incides, include_ends=params['include_ends']) elif 'add' in type_of_attack: gen_attacks = attacks.add_one_attack( worst_example, ignore_incides, include_ends=params['include_ends']) elif 'all' in type_of_attack: gen_attacks = attacks.all_one_attack( worst_example, ignore_incides, include_ends=params['include_ends']) for idx, adversary in gen_attacks: original_adv = adversary if found_incorrect: break if params['is_spell_check']: adversary = checker.correct_string(adversary) w_i, c_i = get_word_and_char_indices(adversary) adv_pred = predict(w_i, c_i) confidence = get_confidence(w_i, c_i) if confidence < worst_confidence: worst_confidence = confidence worst_idx = idx worst_example = adversary if adv_pred != t2i[tag]: # found incorrect prediction found_incorrect = True break if found_incorrect: inc_count += 1.0 inc_count_per_attack[attack_count] += 1.0 if params['analyse']: error_analyser[line] = {} error_analyser[line]['adversary'] = original_adv.split( )[idx] error_analyser[line]['correction'] = adversary.split()[idx] error_analyser[line]['idx'] = idx break for num in range(NUM_ATTACKS + 1): log.pr_red( 'adversarial accuracy of the model after %d attacks = %.2f' % (num, 100. * (1 - sum(inc_count_per_attack[:num + 1]) / len(lines)))) if params['analyse']: curr_time = datetime.datetime.now().strftime("%B_%d_%I:%M%p") pickle.dump(error_analyser, open("error_analyser_" + str(curr_time) + ".p", 'wb')) return None
def check_against_spell_mistakes(filename): if(singles): lines, tags = read_valid_lines_single(filename) spell_check_model = BertForMaskedLM.from_pretrained("bert-base-uncased", cache_dir="/data/kashyap_data/") spell_check_model.eval() spell_check_model.to('cuda') c = list(zip(lines, tags)) random.shuffle(c) lines, tags = zip(*c) lines = lines tags = tags # if in small (or COMPUTATION HEAVY) modes if params['small']: lines = lines[:200] tags = tags[:200] if params['small'] and params['sc_atd']: lines = lines[:99] tags = tags[:99] inc_count = 0.0 inc_count_per_attack = [0.0 for _ in range(NUM_ATTACKS+1)] error_analyser = {} for line, tag in tqdm(zip(lines, tags)): w_i, c_i = get_word_and_char_indices(line) if params['is_spell_check']: """ pickle_off = open("tokenizer.pkl", "rb") bert_tokenizer = pickle.load(pickle_off) tokenizer = nlp.Defaults.create_tokenizer(nlp) tokens = [t.text for t in tokenizer(line)] misspelled = spell.unknown(tokens) indices = [] for i in misspelled: indices.append(tokens.index(i)) tokens[tokens.index(i)] = "[MASK]" tokens.insert(0, "[CLS]") tokens.insert(-1, "[SEP]") new_string = " ".join(tokens) bert_tokenized_text = bert_tokenizer.tokenize(new_string) indexed_tokens = bert_tokenizer.convert_tokens_to_ids( bert_tokenized_text) segments_ids = [0]*len(bert_tokenized_text) tokens_tensor = torch.tensor([indexed_tokens]) segments_tensors = torch.tensor([segments_ids]) tokens_tensor = tokens_tensor.to('cuda') segments_tensors = segments_tensors.to('cuda') with torch.no_grad(): outputs = spell_check_model(tokens_tensor, token_type_ids=segments_tensors) predictions = outputs[0] predicted_tokens = [] indices = [i+1 for i in indices] for i in indices: predicted_tokens.append(bert_tokenizer.convert_ids_to_tokens( [torch.argmax(predictions[0, i]).item()])[0]) for j in range(len(predicted_tokens)): tokens[indices[j]] = predicted_tokens[j] tokens.remove("[SEP]") tokens.remove("[CLS]") final_string = " ".join(tokens) """ temp = line.split(" ") new_string = [] for i in temp: new_string.append(spell.correction(i)) temp = " ".join(new_string) line = temp w_i, c_i = get_word_and_char_indices(line) # check if model prediction is incorrect, if yes, continue model_prediction = predict(w_i, c_i) if tag != model_prediction: # already incorrect, no attack needed inc_count += 1 inc_count_per_attack[0] += 1.0 continue found_incorrect = False worst_example = line worst_confidence = 1.0 worst_idx = -1 ignore_incides=set() for attack_count in range(1, 1 + NUM_ATTACKS): ignore_incides.add(worst_idx) if 'drop' in type_of_attack: gen_attacks = attacks.drop_one_attack(worst_example, ignore_incides, include_ends=params['include_ends']) elif 'swap' in type_of_attack: gen_attacks = attacks.swap_one_attack(worst_example, include_ends=params['include_ends']) elif 'key' in type_of_attack: gen_attacks = attacks.key_one_attack(worst_example, ignore_incides, include_ends=params['include_ends']) elif 'add' in type_of_attack: gen_attacks = attacks.add_one_attack(worst_example, ignore_incides, include_ends=params['include_ends']) elif 'all' in type_of_attack: gen_attacks = attacks.all_one_attack(worst_example, ignore_incides, include_ends=params['include_ends']) for idx, adversary in gen_attacks: original_adv = adversary if found_incorrect: break if params['is_spell_check']: """ pickle_off = open("tokenizer.pkl", "rb") bert_tokenizer = pickle.load(pickle_off) tokenizer = nlp.Defaults.create_tokenizer(nlp) tokens = [t.text for t in tokenizer(line)] misspelled = spell.unknown(tokens) indices = [] for i in misspelled: indices.append(tokens.index(i)) tokens[tokens.index(i)] = "[MASK]" tokens.insert(0, "[CLS]") tokens.insert(-1, "[SEP]") new_string = " ".join(tokens) bert_tokenized_text = bert_tokenizer.tokenize(new_string) indexed_tokens = bert_tokenizer.convert_tokens_to_ids( bert_tokenized_text) segments_ids = [0]*len(bert_tokenized_text) tokens_tensor = torch.tensor([indexed_tokens]) segments_tensors = torch.tensor([segments_ids]) tokens_tensor = tokens_tensor.to('cuda') segments_tensors = segments_tensors.to('cuda') with torch.no_grad(): outputs = spell_check_model( tokens_tensor, token_type_ids=segments_tensors) predictions = outputs[0] predicted_tokens = [] indices = [i+1 for i in indices] for i in indices: predicted_tokens.append(bert_tokenizer.convert_ids_to_tokens( [torch.argmax(predictions[0, i]).item()])[0]) for j in range(len(predicted_tokens)): tokens[indices[j]] = predicted_tokens[j] tokens.remove("[SEP]") tokens.remove("[CLS]") final_string = " ".join(tokens) """ temp = line[0].split(" ") new_string = [] for i in temp: new_string.append(spell.correction(i)) temp = " ".join(new_string) adversary = temp w_i, c_i = get_word_and_char_indices(adversary) adv_pred = predict(w_i, c_i) confidence = get_confidence(w_i, c_i) if confidence < worst_confidence: worst_confidence = confidence worst_idx = idx worst_example = adversary if adv_pred != tag: # found incorrect prediction found_incorrect = True break if found_incorrect: inc_count += 1.0 inc_count_per_attack[attack_count] += 1.0 if params['analyse']: error_analyser[line] = {} error_analyser[line]['adversary'] = original_adv.split()[idx] error_analyser[line]['correction'] = adversary.split()[idx] error_analyser[line]['idx'] = idx break else: lines, tags = read_valid_lines_single(filename) spell_check_model = BertForMaskedLM.from_pretrained("bert-base-uncased", cache_dir="/data/kashyap_data/") spell_check_model.eval() spell_check_model.to('cuda') # c = list(zip(lines, tags)) # random.shuffle(c) # lines, tags = zip(*c) lines = lines tags = tags # if in small (or COMPUTATION HEAVY) modes if params['small']: lines = lines[:200] tags = tags[:200] if params['small'] and params['sc_atd']: lines = lines[:99] tags = tags[:99] inc_count = 0.0 inc_count_per_attack = [0.0 for _ in range(NUM_ATTACKS+1)] error_analyser = {} for line in tqdm(lines): tag = line[2] w_i, c_i = get_word_and_char_indices(line) if params['is_spell_check']: temp = line[0].split(" ") new_string = [] for i in temp: new_string.append(spell.correction(i)) temp = " ".join(new_string) line[0] = temp w_i, c_i = get_word_and_char_indices(line) # check if model prediction is incorrect, if yes, continue model_prediction = predict(w_i, c_i) if tag != model_prediction: # already incorrect, no attack needed inc_count += 1 inc_count_per_attack[0] += 1.0 continue found_incorrect = False worst_example = line[0] #could make random choice worst_confidence = 1.0 worst_idx = -1 ignore_incides=set() for attack_count in range(1, 1 + NUM_ATTACKS): ignore_incides.add(worst_idx) if 'drop' in type_of_attack: gen_attacks = attacks.drop_one_attack(worst_example, ignore_incides, include_ends=params['include_ends']) elif 'swap' in type_of_attack: gen_attacks = attacks.swap_one_attack(worst_example, include_ends=params['include_ends']) elif 'key' in type_of_attack: gen_attacks = attacks.key_one_attack(worst_example, ignore_incides, include_ends=params['include_ends']) elif 'add' in type_of_attack: gen_attacks = attacks.add_one_attack(worst_example, ignore_incides, include_ends=params['include_ends']) elif 'all' in type_of_attack: gen_attacks = attacks.all_one_attack(worst_example, ignore_incides, include_ends=params['include_ends']) for idx, adversary in gen_attacks: original_adv = adversary if found_incorrect: break if params['is_spell_check']: temp = adversary.split(" ") new_string = [] for i in temp: new_string.append(spell.correction(i)) temp = " ".join(new_string) adversary = adversary w_i, c_i = get_word_and_char_indices_adv(adversary) adv_pred = predict(w_i, c_i) confidence = get_confidence(w_i, c_i) if confidence < worst_confidence: worst_confidence = confidence worst_idx = idx worst_example = adversary if adv_pred != tag: # found incorrect prediction found_incorrect = True break if found_incorrect: inc_count += 1.0 inc_count_per_attack[attack_count] += 1.0 if params['analyse']: error_analyser[line] = {} error_analyser[line]['adversary'] = original_adv.split()[idx] error_analyser[line]['correction'] = adversary.split()[idx] error_analyser[line]['idx'] = idx break for num in range(NUM_ATTACKS + 1): log.pr_red('adversarial accuracy of the model after %d attacks = %.2f' %(num, 100. * (1 - sum(inc_count_per_attack[:num+1])/len(lines)))) if params['analyse']: curr_time = datetime.datetime.now().strftime("%B_%d_%I:%M%p") pickle.dump(error_analyser, open("error_analyser_" + str(curr_time) + ".p", 'wb')) return None