def _get_replacement_words_by_grad(self, text, indices_to_replace):
        """ Returns returns a list containing all possible words to replace
            `word` with, based off of the model's gradient.
            
            Arguments:
                text (TokenizedText): The full text input to perturb
                word_index (int): index of the word to replace
        """
        self.model.train()

        lookup_table = self.model.lookup_table.to(utils.get_device())
        lookup_table_transpose = lookup_table.transpose(0, 1)

        # set backward hook on the word embeddings for input x
        emb_hook = Hook(self.model.word_embeddings, backward=True)

        self.model.zero_grad()
        predictions = self._call_model(text)
        original_label = predictions.argmax()
        y_true = torch.Tensor([original_label]).long().to(utils.get_device())
        loss = self.loss(predictions, y_true)
        loss.backward()

        # grad w.r.t to word embeddings
        emb_grad = emb_hook.output[0].to(utils.get_device()).squeeze()

        # grad differences between all flips and original word (eq. 1 from paper)
        vocab_size = lookup_table.size(0)
        diffs = torch.zeros(len(indices_to_replace), vocab_size)
        indices_to_replace = list(indices_to_replace)
        for j, word_idx in enumerate(indices_to_replace):
            # Get the grad w.r.t the one-hot index of the word.
            b_grads = emb_grad[word_idx].view(
                1, -1).mm(lookup_table_transpose).squeeze()
            a_grad = b_grads[text.ids[0][word_idx]]
            diffs[j] = b_grads - a_grad

        # Don't change to the pad token.
        diffs[:, self.model.tokenizer.pad_id] = float('-inf')

        # Find best indices within 2-d tensor by flattening.
        word_idxs_sorted_by_grad = (-diffs).flatten().argsort()

        candidates = []
        num_words_in_text, num_words_in_vocab = diffs.shape
        for idx in word_idxs_sorted_by_grad.tolist():
            idx_in_diffs = idx // num_words_in_vocab
            idx_in_vocab = idx % (num_words_in_vocab)
            idx_in_sentence = indices_to_replace[idx_in_diffs]
            word = self.model.tokenizer.convert_id_to_word(idx_in_vocab)
            if not utils.has_letter(word):
                # Do not consider words that are solely letters or punctuation.
                continue
            candidates.append((word, idx_in_sentence))
            if len(candidates) == self.top_n:
                break

        self.model.eval()
        return candidates
 def get_mse_dist(self, a, b):
     """ Returns the MSE distance of words with IDs a and b."""
     a, b = min(a, b), max(a, b)
     try:
         mse_dist = self.mse_dist_mat[a][b]
     except KeyError:
         e1 = self.word_embeddings[a]
         e2 = self.word_embeddings[b]
         e1 = torch.tensor(e1).to(utils.get_device())
         e2 = torch.tensor(e2).to(utils.get_device())
         mse_dist = torch.sum((e1 - e2)**2)
         self.mse_dist_mat[a][b] = mse_dist
     return mse_dist
Esempio n. 3
0
    def get_log_probs_at_index(self, text_list, word_index):
        """ Gets the probability of the word at index `word_index` according
            to GPT-2. Assumes that all items in `text_list`
            have the same prefix up until `word_index`.
        """
        prefix = text_list[0].text_until_word_index(word_index)

        if not utils.has_letter(prefix):
            # This language model perplexity is not defined with respect to
            # a word without a prefix. If the prefix is null, just return the
            # log-probability 0.0.
            return torch.zeros(len(text_list), dtype=torch.float)

        token_ids = self.tokenizer.encode(prefix)
        tokens_tensor = torch.tensor([token_ids])
        tokens_tensor = tokens_tensor.to(utils.get_device())

        with torch.no_grad():
            outputs = self.model(tokens_tensor)
        predictions = outputs[0]

        probs = []
        for tokenized_text in text_list:
            nxt_word_ids = self.tokenizer.encode(
                tokenized_text.words[word_index])
            next_word_prob = predictions[0, -1, next_word_ids[0]]
            probs.append(next_word_prob)

        return probs
 def get_cos_sim(self, a, b):
     """ Returns the cosine similarity of words with IDs a and b."""
     if isinstance(a, str):
         a = self.word_embedding_word2index[a]
     if isinstance(b, str):
         b = self.word_embedding_word2index[b]
     a, b = min(a, b), max(a, b)
     try:
         cos_sim = self.cos_sim_mat[a][b]
     except KeyError:
         e1 = self.word_embeddings[a]
         e2 = self.word_embeddings[b]
         e1 = torch.tensor(e1).to(utils.get_device())
         e2 = torch.tensor(e2).to(utils.get_device())
         cos_sim = torch.nn.CosineSimilarity(dim=0)(e1, e2)
         self.cos_sim_mat[a][b] = cos_sim
     return cos_sim
 def __init__(self, model_path, num_labels=2, entailment=False):
     model_file_path = utils.download_if_needed(model_path)
     self.model = BertForSequenceClassification.from_pretrained(
         model_file_path, num_labels=num_labels)
     self.model.to(utils.get_device())
     self.model.eval()
     if entailment:
         self.tokenizer = BERTEntailmentTokenizer()
     else:
         self.tokenizer = BERTTokenizer(model_file_path)
Esempio n. 6
0
 def __init__(self,
              mode='english_to_german',
              max_length=20,
              num_beams=1,
              early_stopping=True):
     self.model = AutoModelWithLMHead.from_pretrained("t5-base")
     self.model.to(utils.get_device())
     self.model.eval()
     self.tokenizer = T5Tokenizer(mode)
     self.max_length = max_length
     self.num_beams = num_beams
     self.early_stopping = early_stopping
Esempio n. 7
0
    def _score_list(self, x, x_adv_list):
        """
        Returns the metric similarity between the embedding of the text and a list
        of perturbed text. 

        Args:
            x (str): The original text
            x_adv_list (list(str)): A list of perturbed texts

        Returns:
            A list with the similarity between the original text and each perturbed text in :obj:`x_adv_list`. 
            If x_adv_list is empty, an empty tensor is returned

        """
        # Return an empty tensor if x_adv_list is empty.
        # This prevents us from calling .repeat(x, 0), which throws an
        # error on machines with multiple GPUs (pytorch 1.2).
        if len(x_adv_list) == 0: return torch.tensor([])

        if self.window_size:
            x_list_text = []
            x_adv_list_text = []
            for x_adv in x_adv_list:
                modified_index = x_adv.attack_attrs['modified_word_index']
                x_list_text.append(
                    x.text_window_around_index(modified_index,
                                               self.window_size))
                x_adv_list_text.append(
                    x_adv.text_window_around_index(modified_index,
                                                   self.window_size))
            embeddings = self.encode(x_list_text + x_adv_list_text)
            original_embeddings = torch.tensor(
                embeddings[:len(x_adv_list)]).to(utils.get_device())
            perturbed_embeddings = torch.tensor(
                embeddings[len(x_adv_list):]).to(utils.get_device())
        else:
            x_text = x.text
            x_adv_list_text = [x_adv.text for x_adv in x_adv_list]
            embeddings = self.encode([x_text] + x_adv_list_text)
            if isinstance(embeddings[0], torch.Tensor):
                original_embedding = embeddings[0].to(utils.get_device())
            else:
                # If the embedding is not yet a tensor, make it one.
                original_embedding = torch.tensor(embeddings[0]).to(
                    utils.get_device())

            if isinstance(embeddings, list):
                # If `encode` did not return a Tensor of all embeddings, combine
                # into a tensor.
                perturbed_embeddings = torch.stack(embeddings[1:]).to(
                    utils.get_device())
            else:
                perturbed_embeddings = torch.tensor(embeddings[1:]).to(
                    utils.get_device())

            # Repeat original embedding to size of perturbed embedding.
            original_embeddings = original_embedding.unsqueeze(dim=0).repeat(
                len(perturbed_embeddings), 1)

        return self.sim_metric(original_embeddings, perturbed_embeddings)
Esempio n. 8
0
    def _sim_score(self, starting_text, transformed_text):
        """ 
        Returns the metric similarity between the embedding of the starting text and the 
        transformed text.

        Args:
            starting_text: The ``TokenizedText``to use as a starting point.
            transformed_text: A transformed ``TokenizedText``\.

        Returns:
            The similarity between the starting and transformed text using the metric. 
        """
        try:
            modified_index = next(
                iter(x_adv.attack_attrs['newly_modified_indices']))
        except KeyError:
            raise KeyError(
                'Cannot apply sentence encoder constraint without `newly_modified_indices`'
            )
        starting_text_window = starting_text.text_window_around_index(
            modified_index, self.window_size)

        transformed_text_window = transformed_text.text_window_around_index(
            modified_index, self.window_size)

        starting_embedding, transformed_embedding = self.model.encode(
            [starting_text_window, transformed_text_window])

        starting_embedding = torch.tensor(starting_embedding).to(
            utils.get_device())
        transformed_embedding = torch.tensor(transformed_embedding).to(
            utils.get_device())

        starting_embedding = torch.unsqueeze(starting_embedding, dim=0)
        transformed_embedding = torch.unsqueeze(transformed_embedding, dim=0)

        return self.sim_metric(starting_embedding, transformed_embedding)
Esempio n. 9
0
    def sim_score(self, x, x_adv):
        """ 
        Returns the metric similarity between embeddings of the text and 
        the perturbed text. 

        Args:
            x (str): The original text
            x_adv (str): The perturbed text

        Returns:
            The similarity between the original and perturbed text using the metric. 

        """
        original_embedding, perturbed_embedding = self.model.encode([x, x_adv])

        original_embedding = torch.tensor(original_embedding).to(
            utils.get_device())
        perturbed_embedding = torch.tensor(perturbed_embedding).to(
            utils.get_device())

        original_embedding = torch.unsqueeze(original_embedding, dim=0)
        perturbed_embedding = torch.unsqueeze(perturbed_embedding, dim=0)

        return self.sim_metric(original_embedding, perturbed_embedding)
Esempio n. 10
0
 def __init__(self, threshold=0.7, metric='cosine', **kwargs):
     super().__init__(threshold=threshold, metric=metric, **kwargs)
     self.model = SentenceTransformer('bert-base-nli-stsb-mean-tokens')
     self.model.to(utils.get_device())
 def load_from_disk(self, model_folder_path):
     self.load_state_dict(load_cached_state_dict(model_folder_path))
     self.to(utils.get_device())
     self.eval()
Esempio n. 12
0
 def hook_fn(self, module, input, output):
     self.input = [x.to(utils.get_device()) for x in input]
     self.output = [x.to(utils.get_device()) for x in output]
Esempio n. 13
0
def load_cached_state_dict(model_folder_path):
    model_folder_path = utils.download_if_needed(model_folder_path)
    model_path = os.path.join(model_folder_path, 'model.bin')
    state_dict = torch.load(model_path, map_location=utils.get_device())
    return state_dict
Esempio n. 14
0
 def load_from_disk(self, model_folder_path):
     self.load_state_dict(load_cached_state_dict(model_folder_path))
     self.word_embeddings = self.emb_layer.embedding
     self.lookup_table = self.emb_layer.embedding.weight.data
     self.to(utils.get_device())
     self.eval()
Esempio n. 15
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.model = self.get_infersent_model()
     self.model.to(utils.get_device())
Esempio n. 16
0
 def __init__(self, **kwargs):
     self.model = GPT2LMHeadModel.from_pretrained('gpt2')
     self.model.to(utils.get_device())
     self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
     super().__init__(**kwargs)
Esempio n. 17
0
    def _score_list(self, starting_text, transformed_texts):
        """
        Returns the metric similarity between the embedding of the starting text and a list
        of transformed texts. 

        Args:
            starting_text: The ``TokenizedText``to use as a starting point.
            transformed_texts: A list of transformed ``TokenizedText``\s.

        Returns:
            A list with the similarity between the ``starting_text`` and each of 
                ``transformed_texts``. If ``transformed_texts`` is empty, 
                an empty tensor is returned
        """
        # Return an empty tensor if x_adv_list is empty.
        # This prevents us from calling .repeat(x, 0), which throws an
        # error on machines with multiple GPUs (pytorch 1.2).
        if len(transformed_texts) == 0: return torch.tensor([])

        if self.window_size:
            starting_text_windows = []
            transformed_text_windows = []
            for transformed_text in transformed_texts:
                #@TODO make this work when multiple indices have been modified
                try:
                    modified_index = next(
                        iter(transformed_text.
                             attack_attrs['newly_modified_indices']))
                except KeyError:
                    raise KeyError(
                        'Cannot apply sentence encoder constraint without `newly_modified_indices`'
                    )
                starting_text_windows.append(
                    starting_text.text_window_around_index(
                        modified_index, self.window_size))
                transformed_text_windows.append(
                    transformed_text.text_window_around_index(
                        modified_index, self.window_size))
            embeddings = self.encode(starting_text_windows +
                                     transformed_text_windows)
            starting_embeddings = torch.tensor(
                embeddings[:len(transformed_texts)]).to(utils.get_device())
            transformed_embeddings = torch.tensor(
                embeddings[len(transformed_texts):]).to(utils.get_device())
        else:
            starting_raw_text = starting_text.text
            transformed_raw_texts = [t.text for t in transformed_texts]
            embeddings = self.encode([starting_raw_text] +
                                     transformed_raw_texts)
            if isinstance(embeddings[0], torch.Tensor):
                starting_embedding = embeddings[0].to(utils.get_device())
            else:
                # If the embedding is not yet a tensor, make it one.
                starting_embedding = torch.tensor(embeddings[0]).to(
                    utils.get_device())

            if isinstance(embeddings, list):
                # If `encode` did not return a Tensor of all embeddings, combine
                # into a tensor.
                transformed_embeddings = torch.stack(embeddings[1:]).to(
                    utils.get_device())
            else:
                transformed_embeddings = torch.tensor(embeddings[1:]).to(
                    utils.get_device())

            # Repeat original embedding to size of perturbed embedding.
            starting_embeddings = starting_embedding.unsqueeze(dim=0).repeat(
                len(transformed_embeddings), 1)

        return self.sim_metric(starting_embeddings, transformed_embeddings)