def _get_replacement_words_by_grad(self, text, indices_to_replace): """ Returns returns a list containing all possible words to replace `word` with, based off of the model's gradient. Arguments: text (TokenizedText): The full text input to perturb word_index (int): index of the word to replace """ self.model.train() lookup_table = self.model.lookup_table.to(utils.get_device()) lookup_table_transpose = lookup_table.transpose(0, 1) # set backward hook on the word embeddings for input x emb_hook = Hook(self.model.word_embeddings, backward=True) self.model.zero_grad() predictions = self._call_model(text) original_label = predictions.argmax() y_true = torch.Tensor([original_label]).long().to(utils.get_device()) loss = self.loss(predictions, y_true) loss.backward() # grad w.r.t to word embeddings emb_grad = emb_hook.output[0].to(utils.get_device()).squeeze() # grad differences between all flips and original word (eq. 1 from paper) vocab_size = lookup_table.size(0) diffs = torch.zeros(len(indices_to_replace), vocab_size) indices_to_replace = list(indices_to_replace) for j, word_idx in enumerate(indices_to_replace): # Get the grad w.r.t the one-hot index of the word. b_grads = emb_grad[word_idx].view( 1, -1).mm(lookup_table_transpose).squeeze() a_grad = b_grads[text.ids[0][word_idx]] diffs[j] = b_grads - a_grad # Don't change to the pad token. diffs[:, self.model.tokenizer.pad_id] = float('-inf') # Find best indices within 2-d tensor by flattening. word_idxs_sorted_by_grad = (-diffs).flatten().argsort() candidates = [] num_words_in_text, num_words_in_vocab = diffs.shape for idx in word_idxs_sorted_by_grad.tolist(): idx_in_diffs = idx // num_words_in_vocab idx_in_vocab = idx % (num_words_in_vocab) idx_in_sentence = indices_to_replace[idx_in_diffs] word = self.model.tokenizer.convert_id_to_word(idx_in_vocab) if not utils.has_letter(word): # Do not consider words that are solely letters or punctuation. continue candidates.append((word, idx_in_sentence)) if len(candidates) == self.top_n: break self.model.eval() return candidates
def get_mse_dist(self, a, b): """ Returns the MSE distance of words with IDs a and b.""" a, b = min(a, b), max(a, b) try: mse_dist = self.mse_dist_mat[a][b] except KeyError: e1 = self.word_embeddings[a] e2 = self.word_embeddings[b] e1 = torch.tensor(e1).to(utils.get_device()) e2 = torch.tensor(e2).to(utils.get_device()) mse_dist = torch.sum((e1 - e2)**2) self.mse_dist_mat[a][b] = mse_dist return mse_dist
def get_log_probs_at_index(self, text_list, word_index): """ Gets the probability of the word at index `word_index` according to GPT-2. Assumes that all items in `text_list` have the same prefix up until `word_index`. """ prefix = text_list[0].text_until_word_index(word_index) if not utils.has_letter(prefix): # This language model perplexity is not defined with respect to # a word without a prefix. If the prefix is null, just return the # log-probability 0.0. return torch.zeros(len(text_list), dtype=torch.float) token_ids = self.tokenizer.encode(prefix) tokens_tensor = torch.tensor([token_ids]) tokens_tensor = tokens_tensor.to(utils.get_device()) with torch.no_grad(): outputs = self.model(tokens_tensor) predictions = outputs[0] probs = [] for tokenized_text in text_list: nxt_word_ids = self.tokenizer.encode( tokenized_text.words[word_index]) next_word_prob = predictions[0, -1, next_word_ids[0]] probs.append(next_word_prob) return probs
def get_cos_sim(self, a, b): """ Returns the cosine similarity of words with IDs a and b.""" if isinstance(a, str): a = self.word_embedding_word2index[a] if isinstance(b, str): b = self.word_embedding_word2index[b] a, b = min(a, b), max(a, b) try: cos_sim = self.cos_sim_mat[a][b] except KeyError: e1 = self.word_embeddings[a] e2 = self.word_embeddings[b] e1 = torch.tensor(e1).to(utils.get_device()) e2 = torch.tensor(e2).to(utils.get_device()) cos_sim = torch.nn.CosineSimilarity(dim=0)(e1, e2) self.cos_sim_mat[a][b] = cos_sim return cos_sim
def __init__(self, model_path, num_labels=2, entailment=False): model_file_path = utils.download_if_needed(model_path) self.model = BertForSequenceClassification.from_pretrained( model_file_path, num_labels=num_labels) self.model.to(utils.get_device()) self.model.eval() if entailment: self.tokenizer = BERTEntailmentTokenizer() else: self.tokenizer = BERTTokenizer(model_file_path)
def __init__(self, mode='english_to_german', max_length=20, num_beams=1, early_stopping=True): self.model = AutoModelWithLMHead.from_pretrained("t5-base") self.model.to(utils.get_device()) self.model.eval() self.tokenizer = T5Tokenizer(mode) self.max_length = max_length self.num_beams = num_beams self.early_stopping = early_stopping
def _score_list(self, x, x_adv_list): """ Returns the metric similarity between the embedding of the text and a list of perturbed text. Args: x (str): The original text x_adv_list (list(str)): A list of perturbed texts Returns: A list with the similarity between the original text and each perturbed text in :obj:`x_adv_list`. If x_adv_list is empty, an empty tensor is returned """ # Return an empty tensor if x_adv_list is empty. # This prevents us from calling .repeat(x, 0), which throws an # error on machines with multiple GPUs (pytorch 1.2). if len(x_adv_list) == 0: return torch.tensor([]) if self.window_size: x_list_text = [] x_adv_list_text = [] for x_adv in x_adv_list: modified_index = x_adv.attack_attrs['modified_word_index'] x_list_text.append( x.text_window_around_index(modified_index, self.window_size)) x_adv_list_text.append( x_adv.text_window_around_index(modified_index, self.window_size)) embeddings = self.encode(x_list_text + x_adv_list_text) original_embeddings = torch.tensor( embeddings[:len(x_adv_list)]).to(utils.get_device()) perturbed_embeddings = torch.tensor( embeddings[len(x_adv_list):]).to(utils.get_device()) else: x_text = x.text x_adv_list_text = [x_adv.text for x_adv in x_adv_list] embeddings = self.encode([x_text] + x_adv_list_text) if isinstance(embeddings[0], torch.Tensor): original_embedding = embeddings[0].to(utils.get_device()) else: # If the embedding is not yet a tensor, make it one. original_embedding = torch.tensor(embeddings[0]).to( utils.get_device()) if isinstance(embeddings, list): # If `encode` did not return a Tensor of all embeddings, combine # into a tensor. perturbed_embeddings = torch.stack(embeddings[1:]).to( utils.get_device()) else: perturbed_embeddings = torch.tensor(embeddings[1:]).to( utils.get_device()) # Repeat original embedding to size of perturbed embedding. original_embeddings = original_embedding.unsqueeze(dim=0).repeat( len(perturbed_embeddings), 1) return self.sim_metric(original_embeddings, perturbed_embeddings)
def _sim_score(self, starting_text, transformed_text): """ Returns the metric similarity between the embedding of the starting text and the transformed text. Args: starting_text: The ``TokenizedText``to use as a starting point. transformed_text: A transformed ``TokenizedText``\. Returns: The similarity between the starting and transformed text using the metric. """ try: modified_index = next( iter(x_adv.attack_attrs['newly_modified_indices'])) except KeyError: raise KeyError( 'Cannot apply sentence encoder constraint without `newly_modified_indices`' ) starting_text_window = starting_text.text_window_around_index( modified_index, self.window_size) transformed_text_window = transformed_text.text_window_around_index( modified_index, self.window_size) starting_embedding, transformed_embedding = self.model.encode( [starting_text_window, transformed_text_window]) starting_embedding = torch.tensor(starting_embedding).to( utils.get_device()) transformed_embedding = torch.tensor(transformed_embedding).to( utils.get_device()) starting_embedding = torch.unsqueeze(starting_embedding, dim=0) transformed_embedding = torch.unsqueeze(transformed_embedding, dim=0) return self.sim_metric(starting_embedding, transformed_embedding)
def sim_score(self, x, x_adv): """ Returns the metric similarity between embeddings of the text and the perturbed text. Args: x (str): The original text x_adv (str): The perturbed text Returns: The similarity between the original and perturbed text using the metric. """ original_embedding, perturbed_embedding = self.model.encode([x, x_adv]) original_embedding = torch.tensor(original_embedding).to( utils.get_device()) perturbed_embedding = torch.tensor(perturbed_embedding).to( utils.get_device()) original_embedding = torch.unsqueeze(original_embedding, dim=0) perturbed_embedding = torch.unsqueeze(perturbed_embedding, dim=0) return self.sim_metric(original_embedding, perturbed_embedding)
def __init__(self, threshold=0.7, metric='cosine', **kwargs): super().__init__(threshold=threshold, metric=metric, **kwargs) self.model = SentenceTransformer('bert-base-nli-stsb-mean-tokens') self.model.to(utils.get_device())
def load_from_disk(self, model_folder_path): self.load_state_dict(load_cached_state_dict(model_folder_path)) self.to(utils.get_device()) self.eval()
def hook_fn(self, module, input, output): self.input = [x.to(utils.get_device()) for x in input] self.output = [x.to(utils.get_device()) for x in output]
def load_cached_state_dict(model_folder_path): model_folder_path = utils.download_if_needed(model_folder_path) model_path = os.path.join(model_folder_path, 'model.bin') state_dict = torch.load(model_path, map_location=utils.get_device()) return state_dict
def load_from_disk(self, model_folder_path): self.load_state_dict(load_cached_state_dict(model_folder_path)) self.word_embeddings = self.emb_layer.embedding self.lookup_table = self.emb_layer.embedding.weight.data self.to(utils.get_device()) self.eval()
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.model = self.get_infersent_model() self.model.to(utils.get_device())
def __init__(self, **kwargs): self.model = GPT2LMHeadModel.from_pretrained('gpt2') self.model.to(utils.get_device()) self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2') super().__init__(**kwargs)
def _score_list(self, starting_text, transformed_texts): """ Returns the metric similarity between the embedding of the starting text and a list of transformed texts. Args: starting_text: The ``TokenizedText``to use as a starting point. transformed_texts: A list of transformed ``TokenizedText``\s. Returns: A list with the similarity between the ``starting_text`` and each of ``transformed_texts``. If ``transformed_texts`` is empty, an empty tensor is returned """ # Return an empty tensor if x_adv_list is empty. # This prevents us from calling .repeat(x, 0), which throws an # error on machines with multiple GPUs (pytorch 1.2). if len(transformed_texts) == 0: return torch.tensor([]) if self.window_size: starting_text_windows = [] transformed_text_windows = [] for transformed_text in transformed_texts: #@TODO make this work when multiple indices have been modified try: modified_index = next( iter(transformed_text. attack_attrs['newly_modified_indices'])) except KeyError: raise KeyError( 'Cannot apply sentence encoder constraint without `newly_modified_indices`' ) starting_text_windows.append( starting_text.text_window_around_index( modified_index, self.window_size)) transformed_text_windows.append( transformed_text.text_window_around_index( modified_index, self.window_size)) embeddings = self.encode(starting_text_windows + transformed_text_windows) starting_embeddings = torch.tensor( embeddings[:len(transformed_texts)]).to(utils.get_device()) transformed_embeddings = torch.tensor( embeddings[len(transformed_texts):]).to(utils.get_device()) else: starting_raw_text = starting_text.text transformed_raw_texts = [t.text for t in transformed_texts] embeddings = self.encode([starting_raw_text] + transformed_raw_texts) if isinstance(embeddings[0], torch.Tensor): starting_embedding = embeddings[0].to(utils.get_device()) else: # If the embedding is not yet a tensor, make it one. starting_embedding = torch.tensor(embeddings[0]).to( utils.get_device()) if isinstance(embeddings, list): # If `encode` did not return a Tensor of all embeddings, combine # into a tensor. transformed_embeddings = torch.stack(embeddings[1:]).to( utils.get_device()) else: transformed_embeddings = torch.tensor(embeddings[1:]).to( utils.get_device()) # Repeat original embedding to size of perturbed embedding. starting_embeddings = starting_embedding.unsqueeze(dim=0).repeat( len(transformed_embeddings), 1) return self.sim_metric(starting_embeddings, transformed_embeddings)