def heuristic_fn(text, candidate): ''' Return the difference between the classification probability of the original word and the candidate substitute synonym, which is defined in Eq.(4) and Eq.(5). ''' doc = nlp(text) origin_vector = None perturbed_vector = None if level == 'word': origin_vector = text_to_vector(text, tokenizer, dataset) perturbed_tokens = _compile_perturbed_tokens(doc, [candidate]) perturbed_doc = nlp(' '.join(perturbed_tokens)) perturbed_vector = text_to_vector(perturbed_doc.text, tokenizer, dataset) elif level == 'char': max_len = config.char_max_len[dataset] origin_vector = doc_process(text, get_embedding_dict(), dataset).reshape(1, max_len) perturbed_tokens = _compile_perturbed_tokens( nlp(input_text), [candidate]) perturbed_text = ' '.join(perturbed_tokens) perturbed_vector = doc_process(perturbed_text, get_embedding_dict(), dataset).reshape(1, max_len) origin_prob = grad_guide.predict_prob(input_vector=origin_vector) perturbed_prob = grad_guide.predict_prob(input_vector=perturbed_vector) delta_p = origin_prob[true_y] - perturbed_prob[true_y] return delta_p
def evaluate_word_saliency(doc, grad_guide, tokenizer, input_y, dataset, level): word_saliency_list = [] # zero the code of the current word and calculate the amount of change in the classification probability if level == 'word': max_len = config.word_max_len[dataset] text = [doc[position].text for position in range(len(doc))] text = ' '.join(text) origin_vector = text_to_vector(text, tokenizer, dataset) origin_prob = grad_guide.predict_prob(input_vector=origin_vector) for position in range(len(doc)): if position >= max_len: break # get x_i^(\hat) without_word_vector = copy.deepcopy(origin_vector) without_word_vector[0][position] = 0 prob_without_word = grad_guide.predict_prob( input_vector=without_word_vector) # calculate S(x,w_i) defined in Eq.(6) word_saliency = origin_prob[input_y] - prob_without_word[input_y] word_saliency_list.append( (position, doc[position], word_saliency, doc[position].tag_)) elif level == 'char': max_len = config.char_max_len[dataset] embedding_dic = get_embedding_dict() origin_vector = doc_process(doc.text.lower(), embedding_dic, dataset).reshape(1, max_len) origin_prob = grad_guide.predict_prob(input_vector=origin_vector) find_a_word = False word_position = 0 without_word_vector = copy.deepcopy(origin_vector) for i, c in enumerate(doc.text): if i >= max_len: break if c is not ' ': without_word_vector[0][i] = 0 else: find_a_word = True prob_without_word = grad_guide.predict_prob( without_word_vector) word_saliency = origin_prob[input_y] - prob_without_word[ input_y] word_saliency_list.append( (word_position, doc[word_position], word_saliency, doc[word_position].tag_)) word_position += 1 if find_a_word: without_word_vector = copy.deepcopy(origin_vector) find_a_word = False position_word_list = [] for word in word_saliency_list: position_word_list.append((word[0], word[1])) return position_word_list, word_saliency_list
def origin_perturbed_vector_fn(text, substitute): doc = nlp(text) origin_vector = None perturbed_vector = None if level == 'word': origin_vector = text_to_vector(text, tokenizer, dataset) perturbed_tokens = _compile_perturbed_tokens(doc, substitute) perturbed_doc = nlp(' '.join(perturbed_tokens)) perturbed_vector = text_to_vector(perturbed_doc.text, tokenizer, dataset) elif level == 'char': max_len = config.char_max_len[dataset] origin_vector = doc_process(text, get_embedding_dict(), dataset).reshape(1, max_len) perturbed_tokens = _compile_perturbed_tokens( nlp(input_text), substitute) perturbed_text = ' '.join(perturbed_tokens) perturbed_vector = doc_process(perturbed_text, get_embedding_dict(), dataset).reshape(1, max_len) return origin_vector, perturbed_vector
def halt_condition_fn(perturbed_text): ''' Halt if model output is changed. ''' perturbed_vector = None if level == 'word': perturbed_vector = text_to_vector(perturbed_text, tokenizer, dataset) elif level == 'char': max_len = config.char_max_len[dataset] perturbed_vector = doc_process(perturbed_text, get_embedding_dict(), dataset).reshape(1, max_len) adv_y = grad_guide.predict_classes(input_vector=perturbed_vector) if adv_y != true_y: return True else: return False
def adversarial_paraphrase(input_text, true_y, grad_guide, tokenizer, dataset, dataset_dict, word_candidate, level, verbose=True): ''' Compute a perturbation, greedily choosing the synonym if it causes the most significant change in the classification probability after replacement :return perturbed_text: generated adversarial examples :return perturbed_y: predicted class of perturbed_text :return sub_rate: word replacement rate showed in Table 3 :return change_tuple_list: list of substitute words ''' def halt_condition_fn(perturbed_text): ''' Halt if model output is changed. ''' perturbed_vector = None if level == 'word': perturbed_vector = text_to_vector(perturbed_text, tokenizer, dataset) elif level == 'char': max_len = config.char_max_len[dataset] perturbed_vector = doc_process(perturbed_text, get_embedding_dict(), dataset).reshape(1, max_len) adv_y = grad_guide.predict_classes(input_vector=perturbed_vector) if adv_y != true_y: return True else: return False def heuristic_fn(text, candidate): ''' Return the difference between the classification probability of the original word and the candidate substitute synonym, which is defined in Eq.(4) and Eq.(5). ''' doc = nlp(text) origin_vector = None perturbed_vector = None if level == 'word': origin_vector = text_to_vector(text, tokenizer, dataset) perturbed_tokens = _compile_perturbed_tokens(doc, [candidate]) perturbed_doc = nlp(' '.join(perturbed_tokens)) perturbed_vector = text_to_vector(perturbed_doc.text, tokenizer, dataset) elif level == 'char': max_len = config.char_max_len[dataset] origin_vector = doc_process(text, get_embedding_dict(), dataset).reshape(1, max_len) perturbed_tokens = _compile_perturbed_tokens( nlp(input_text), [candidate]) perturbed_text = ' '.join(perturbed_tokens) perturbed_vector = doc_process(perturbed_text, get_embedding_dict(), dataset).reshape(1, max_len) origin_prob = grad_guide.predict_prob(input_vector=origin_vector) perturbed_prob = grad_guide.predict_prob(input_vector=perturbed_vector) delta_p = origin_prob[true_y] - perturbed_prob[true_y] return delta_p def origin_perturbed_vector_fn(text, substitute): doc = nlp(text) origin_vector = None perturbed_vector = None if level == 'word': origin_vector = text_to_vector(text, tokenizer, dataset) perturbed_tokens = _compile_perturbed_tokens(doc, substitute) perturbed_doc = nlp(' '.join(perturbed_tokens)) perturbed_vector = text_to_vector(perturbed_doc.text, tokenizer, dataset) elif level == 'char': max_len = config.char_max_len[dataset] origin_vector = doc_process(text, get_embedding_dict(), dataset).reshape(1, max_len) perturbed_tokens = _compile_perturbed_tokens( nlp(input_text), substitute) perturbed_text = ' '.join(perturbed_tokens) perturbed_vector = doc_process(perturbed_text, get_embedding_dict(), dataset).reshape(1, max_len) return origin_vector, perturbed_vector def delta_P_fn(origin_vector, perturbed_vector): '''Return the difference between the classification probility of the clean text and the perturbed text. ''' # origin_vector = text_to_vector(text, tokenizer, dataset) origin_prob = grad_guide.predict_prob(input_vector=origin_vector) # perturbed_vector = text_to_vector(perturbed_text, tokenizer, dataset) perturbed_prob = grad_guide.predict_prob(input_vector=perturbed_vector) delta_P = origin_prob[true_y] - perturbed_prob[true_y] return delta_P doc = nlp(input_text) # BU-MHS position_word_list, word_saliency_list = evaluate_word_saliency( doc, grad_guide, tokenizer, true_y, dataset, level) perturbed_text, sub_word, sub_rate, NE_rate, change_tuple_list = BU_MHS( doc, true_y, dataset, dataset_dict, word_candidate, word_saliency_list=word_saliency_list, heuristic_fn=heuristic_fn, halt_condition_fn=halt_condition_fn, origin_perturbed_vector_fn=origin_perturbed_vector_fn, delta_P_fn=delta_P_fn, verbose=verbose) # print("perturbed_text after perturb_text:", perturbed_text) origin_vector = perturbed_vector = None if level == 'word': origin_vector = text_to_vector(input_text, tokenizer, dataset) perturbed_vector = text_to_vector(perturbed_text, tokenizer, dataset) elif level == 'char': max_len = config.char_max_len[dataset] origin_vector = doc_process(input_text, get_embedding_dict(), dataset).reshape(1, max_len) perturbed_vector = doc_process(perturbed_text, get_embedding_dict(), dataset).reshape(1, max_len) perturbed_y = grad_guide.predict_classes(input_vector=perturbed_vector) if verbose: origin_prob = grad_guide.predict_prob(input_vector=origin_vector) perturbed_prob = grad_guide.predict_prob(input_vector=perturbed_vector) raw_score = origin_prob[true_y] - perturbed_prob[true_y] print('Prob before: ', origin_prob[true_y], '. Prob after: ', perturbed_prob[true_y], '. Prob shift: ', raw_score) return perturbed_text, perturbed_y, sub_word, sub_rate, NE_rate, change_tuple_list