예제 #1
0
def get_tokenize_sentences(premises, hypothesis):
    all_sentences = []

    for i in zip(premises, hypothesis):
        st = i[0] + " " + i[1]
        tokens = words_from_text(st)
        all_sentences.append(tokens)

    return all_sentences
예제 #2
0
 def _get_thought_vector(self, text):
     """Sums the embeddings of all the words in ``text`` into a "thought
     vector"."""
     embeddings = []
     for word in utils.words_from_text(text):
         embedding = self.word_embedding[word]
         if embedding is not None:  # out-of-vocab words do not have embeddings
             embeddings.append(embedding)
     embeddings = torch.tensor(embeddings)
     return torch.mean(embeddings, dim=0)
    def _get_replacement_words_by_grad(self, attacked_text,
                                       indices_to_replace):
        """Returns returns a list containing all possible words to replace
        `word` with, based off of the model's gradient.

        Arguments:
            attacked_text (AttackedText): The full text input to perturb
            word_index (int): index of the word to replace
        """

        lookup_table = self.model.get_input_embeddings().weight.data.cpu()

        grad_output = self.model_wrapper.get_grad(
            attacked_text.tokenizer_input)
        emb_grad = torch.tensor(grad_output["gradient"])
        text_ids = grad_output["ids"]
        # grad differences between all flips and original word (eq. 1 from paper)
        vocab_size = lookup_table.size(0)
        diffs = torch.zeros(len(indices_to_replace), vocab_size)
        indices_to_replace = list(indices_to_replace)

        for j, word_idx in enumerate(indices_to_replace):
            # Make sure the word is in bounds.
            if word_idx >= len(emb_grad):
                continue
            # Get the grad w.r.t the one-hot index of the word.
            b_grads = lookup_table.mv(emb_grad[word_idx]).squeeze()
            a_grad = b_grads[text_ids[word_idx]]
            diffs[j] = b_grads - a_grad

        # Don't change to the pad token.
        diffs[:, self.tokenizer.pad_token_id] = float("-inf")

        # Find best indices within 2-d tensor by flattening.
        word_idxs_sorted_by_grad = (-diffs).flatten().argsort()

        candidates = []
        num_words_in_text, num_words_in_vocab = diffs.shape
        for idx in word_idxs_sorted_by_grad.tolist():
            idx_in_diffs = idx // num_words_in_vocab
            idx_in_vocab = idx % (num_words_in_vocab)
            idx_in_sentence = indices_to_replace[idx_in_diffs]
            word = self.tokenizer.convert_id_to_word(idx_in_vocab)
            if (not utils.has_letter(word)) or (len(
                    utils.words_from_text(word)) != 1):
                # Do not consider words that are solely letters or punctuation.
                continue
            candidates.append((word, idx_in_sentence))
            if len(candidates) == self.top_n:
                break

        return candidates
예제 #4
0
    def _get_transformations(self, current_text, indices_to_modify):
        indices_to_modify = list(indices_to_modify)
        if self.method == "bert-attack":
            current_inputs = self._encode_text(current_text.text)
            with torch.no_grad():
                pred_probs = self._language_model(**current_inputs)[0][0]
            top_probs, top_ids = torch.topk(pred_probs, self.max_candidates)
            id_preds = top_ids.cpu()
            masked_lm_logits = pred_probs.cpu()

            transformed_texts = []

            for i in indices_to_modify:
                word_at_index = current_text.words[i]
                replacement_words = self._bert_attack_replacement_words(
                    current_text,
                    i,
                    id_preds=id_preds,
                    masked_lm_logits=masked_lm_logits,
                )

                for r in replacement_words:
                    if r != word_at_index:
                        transformed_texts.append(
                            current_text.replace_word_at_index(i, r))

            return transformed_texts

        elif self.method == "bae":
            replacement_words = self._bae_replacement_words(
                current_text, indices_to_modify)
            transformed_texts = []
            for i in range(len(replacement_words)):
                index_to_modify = indices_to_modify[i]
                word_at_index = current_text.words[index_to_modify]
                for word in replacement_words[i]:
                    if word != word_at_index and len(
                            utils.words_from_text(word)) == 1:
                        transformed_texts.append(
                            current_text.replace_word_at_index(
                                index_to_modify, word))
            return transformed_texts
        else:
            raise ValueError(
                f"Unrecognized value {self.method} for `self.method`.")
예제 #5
0
def get_words_cached(s):
    return np.array(words_from_text(s))
예제 #6
0
    def _get_replacement_words_by_grad(self, attacked_text, indices_to_replace):
        """Returns returns a list containing all possible words to replace
        `word` with, based off of the model's gradient.

        Arguments:
            attacked_text (AttackedText): The full text input to perturb
            word_index (int): index of the word to replace
        """
        self.model.train()
        self.model.emb_layer.embedding.weight.requires_grad = True

        lookup_table = self.model.lookup_table.to(utils.device)
        lookup_table_transpose = lookup_table.transpose(0, 1)

        # get word IDs
        text_ids = self.model.tokenizer.encode(attacked_text.tokenizer_input)

        # set backward hook on the word embeddings for input x
        emb_hook = Hook(self.model.word_embeddings, backward=True)

        self.model.zero_grad()
        predictions = self._call_model(text_ids)
        original_label = predictions.argmax()
        y_true = torch.Tensor([original_label]).long().to(utils.device)
        loss = self.loss(predictions, y_true)
        loss.backward()

        # grad w.r.t to word embeddings
        emb_grad = emb_hook.output[0].to(utils.device).squeeze()

        # grad differences between all flips and original word (eq. 1 from paper)
        vocab_size = lookup_table.size(0)
        diffs = torch.zeros(len(indices_to_replace), vocab_size)
        indices_to_replace = list(indices_to_replace)
        for j, word_idx in enumerate(indices_to_replace):
            # Get the grad w.r.t the one-hot index of the word.
            b_grads = (
                emb_grad[word_idx].view(1, -1).mm(lookup_table_transpose).squeeze()
            )
            a_grad = b_grads[text_ids[word_idx]]
            diffs[j] = b_grads - a_grad

        # Don't change to the pad token.
        diffs[:, self.model.tokenizer.pad_id] = float("-inf")

        # Find best indices within 2-d tensor by flattening.
        word_idxs_sorted_by_grad = (-diffs).flatten().argsort()

        candidates = []
        num_words_in_text, num_words_in_vocab = diffs.shape
        for idx in word_idxs_sorted_by_grad.tolist():
            idx_in_diffs = idx // num_words_in_vocab
            idx_in_vocab = idx % (num_words_in_vocab)
            idx_in_sentence = indices_to_replace[idx_in_diffs]
            word = self.model.tokenizer.convert_id_to_word(idx_in_vocab)
            if (not utils.has_letter(word)) or (len(utils.words_from_text(word)) != 1):
                # Do not consider words that are solely letters or punctuation.
                continue
            candidates.append((word, idx_in_sentence))
            if len(candidates) == self.top_n:
                break

        self.model.eval()
        self.model.emb_layer.embedding.weight.requires_grad = (
            self.model.emb_layer_trainable
        )
        return candidates