def get_tokenize_sentences(premises, hypothesis): all_sentences = [] for i in zip(premises, hypothesis): st = i[0] + " " + i[1] tokens = words_from_text(st) all_sentences.append(tokens) return all_sentences
def _get_thought_vector(self, text): """Sums the embeddings of all the words in ``text`` into a "thought vector".""" embeddings = [] for word in utils.words_from_text(text): embedding = self.word_embedding[word] if embedding is not None: # out-of-vocab words do not have embeddings embeddings.append(embedding) embeddings = torch.tensor(embeddings) return torch.mean(embeddings, dim=0)
def _get_replacement_words_by_grad(self, attacked_text, indices_to_replace): """Returns returns a list containing all possible words to replace `word` with, based off of the model's gradient. Arguments: attacked_text (AttackedText): The full text input to perturb word_index (int): index of the word to replace """ lookup_table = self.model.get_input_embeddings().weight.data.cpu() grad_output = self.model_wrapper.get_grad( attacked_text.tokenizer_input) emb_grad = torch.tensor(grad_output["gradient"]) text_ids = grad_output["ids"] # grad differences between all flips and original word (eq. 1 from paper) vocab_size = lookup_table.size(0) diffs = torch.zeros(len(indices_to_replace), vocab_size) indices_to_replace = list(indices_to_replace) for j, word_idx in enumerate(indices_to_replace): # Make sure the word is in bounds. if word_idx >= len(emb_grad): continue # Get the grad w.r.t the one-hot index of the word. b_grads = lookup_table.mv(emb_grad[word_idx]).squeeze() a_grad = b_grads[text_ids[word_idx]] diffs[j] = b_grads - a_grad # Don't change to the pad token. diffs[:, self.tokenizer.pad_token_id] = float("-inf") # Find best indices within 2-d tensor by flattening. word_idxs_sorted_by_grad = (-diffs).flatten().argsort() candidates = [] num_words_in_text, num_words_in_vocab = diffs.shape for idx in word_idxs_sorted_by_grad.tolist(): idx_in_diffs = idx // num_words_in_vocab idx_in_vocab = idx % (num_words_in_vocab) idx_in_sentence = indices_to_replace[idx_in_diffs] word = self.tokenizer.convert_id_to_word(idx_in_vocab) if (not utils.has_letter(word)) or (len( utils.words_from_text(word)) != 1): # Do not consider words that are solely letters or punctuation. continue candidates.append((word, idx_in_sentence)) if len(candidates) == self.top_n: break return candidates
def _get_transformations(self, current_text, indices_to_modify): indices_to_modify = list(indices_to_modify) if self.method == "bert-attack": current_inputs = self._encode_text(current_text.text) with torch.no_grad(): pred_probs = self._language_model(**current_inputs)[0][0] top_probs, top_ids = torch.topk(pred_probs, self.max_candidates) id_preds = top_ids.cpu() masked_lm_logits = pred_probs.cpu() transformed_texts = [] for i in indices_to_modify: word_at_index = current_text.words[i] replacement_words = self._bert_attack_replacement_words( current_text, i, id_preds=id_preds, masked_lm_logits=masked_lm_logits, ) for r in replacement_words: if r != word_at_index: transformed_texts.append( current_text.replace_word_at_index(i, r)) return transformed_texts elif self.method == "bae": replacement_words = self._bae_replacement_words( current_text, indices_to_modify) transformed_texts = [] for i in range(len(replacement_words)): index_to_modify = indices_to_modify[i] word_at_index = current_text.words[index_to_modify] for word in replacement_words[i]: if word != word_at_index and len( utils.words_from_text(word)) == 1: transformed_texts.append( current_text.replace_word_at_index( index_to_modify, word)) return transformed_texts else: raise ValueError( f"Unrecognized value {self.method} for `self.method`.")
def get_words_cached(s): return np.array(words_from_text(s))
def _get_replacement_words_by_grad(self, attacked_text, indices_to_replace): """Returns returns a list containing all possible words to replace `word` with, based off of the model's gradient. Arguments: attacked_text (AttackedText): The full text input to perturb word_index (int): index of the word to replace """ self.model.train() self.model.emb_layer.embedding.weight.requires_grad = True lookup_table = self.model.lookup_table.to(utils.device) lookup_table_transpose = lookup_table.transpose(0, 1) # get word IDs text_ids = self.model.tokenizer.encode(attacked_text.tokenizer_input) # set backward hook on the word embeddings for input x emb_hook = Hook(self.model.word_embeddings, backward=True) self.model.zero_grad() predictions = self._call_model(text_ids) original_label = predictions.argmax() y_true = torch.Tensor([original_label]).long().to(utils.device) loss = self.loss(predictions, y_true) loss.backward() # grad w.r.t to word embeddings emb_grad = emb_hook.output[0].to(utils.device).squeeze() # grad differences between all flips and original word (eq. 1 from paper) vocab_size = lookup_table.size(0) diffs = torch.zeros(len(indices_to_replace), vocab_size) indices_to_replace = list(indices_to_replace) for j, word_idx in enumerate(indices_to_replace): # Get the grad w.r.t the one-hot index of the word. b_grads = ( emb_grad[word_idx].view(1, -1).mm(lookup_table_transpose).squeeze() ) a_grad = b_grads[text_ids[word_idx]] diffs[j] = b_grads - a_grad # Don't change to the pad token. diffs[:, self.model.tokenizer.pad_id] = float("-inf") # Find best indices within 2-d tensor by flattening. word_idxs_sorted_by_grad = (-diffs).flatten().argsort() candidates = [] num_words_in_text, num_words_in_vocab = diffs.shape for idx in word_idxs_sorted_by_grad.tolist(): idx_in_diffs = idx // num_words_in_vocab idx_in_vocab = idx % (num_words_in_vocab) idx_in_sentence = indices_to_replace[idx_in_diffs] word = self.model.tokenizer.convert_id_to_word(idx_in_vocab) if (not utils.has_letter(word)) or (len(utils.words_from_text(word)) != 1): # Do not consider words that are solely letters or punctuation. continue candidates.append((word, idx_in_sentence)) if len(candidates) == self.top_n: break self.model.eval() self.model.emb_layer.embedding.weight.requires_grad = ( self.model.emb_layer_trainable ) return candidates