예제 #1
0
 def __init__(self, model, top_n=1, replace_stopwords=False):
     validate_model_gradient_word_swap_compatibility(model)
     if not hasattr(model, 'word_embeddings'):
         raise ValueError(
             'Model needs word embedding matrix for gradient-based word swap'
         )
     if not hasattr(model, 'lookup_table'):
         raise ValueError(
             'Model needs lookup table for gradient-based word swap')
     if not hasattr(model, 'zero_grad'):
         raise ValueError(
             'Model needs `zero_grad()` for gradient-based word swap')
     if not hasattr(model.tokenizer, 'convert_id_to_word'):
         raise ValueError(
             'Tokenizer needs `convert_id_to_word()` for gradient-based word swap'
         )
     if not hasattr(model.tokenizer, 'pad_id'):
         raise ValueError(
             'Tokenizer needs `pad_id` for gradient-based word swap')
     if not hasattr(model.tokenizer, 'oov_id'):
         raise ValueError(
             'Tokenizer needs `oov_id` for gradient-based word swap')
     self.loss = torch.nn.CrossEntropyLoss()
     self.model = model
     self.pad_id = self.model.tokenizer.pad_id
     self.oov_id = self.model.tokenizer.oov_id
     self.top_n = top_n
     self.replace_stopwords = replace_stopwords
     if replace_stopwords:
         self.stopwords = set()
     else:
         from nltk.corpus import stopwords
         self.stopwords = set(stopwords.words('english'))
 def __init__(self, model, top_n=1):
     validate_model_gradient_word_swap_compatibility(model)
     if not hasattr(model, 'word_embeddings'):
         raise ValueError(
             'Model needs word embedding matrix for gradient-based word swap'
         )
     if not hasattr(model, 'lookup_table'):
         raise ValueError(
             'Model needs lookup table for gradient-based word swap')
     if not hasattr(model, 'zero_grad'):
         raise ValueError(
             'Model needs `zero_grad()` for gradient-based word swap')
     if not hasattr(model.tokenizer, 'convert_id_to_word'):
         raise ValueError(
             'Tokenizer needs `convert_id_to_word()` for gradient-based word swap'
         )
     if not hasattr(model.tokenizer, 'pad_id'):
         raise ValueError(
             'Tokenizer needs `pad_id` for gradient-based word swap')
     if not hasattr(model.tokenizer, 'oov_id'):
         raise ValueError(
             'Tokenizer needs `oov_id` for gradient-based word swap')
     self.loss = torch.nn.CrossEntropyLoss()
     self.model = model
     self.pad_id = self.model.tokenizer.pad_id
     self.oov_id = self.model.tokenizer.oov_id
     self.top_n = top_n
     self.is_black_box = False
예제 #3
0
    def __init__(self, model_wrapper, top_n=1):
        # Unwrap model wrappers. Need raw model for gradient.
        if not isinstance(model_wrapper, textattack.models.wrappers.ModelWrapper):
            raise TypeError(f"Got invalid model wrapper type {type(model_wrapper)}")
        self.model = model_wrapper.model
        self.model_wrapper = model_wrapper
        self.tokenizer = self.model_wrapper.tokenizer
        # Make sure we know how to compute the gradient for this model.
        validate_model_gradient_word_swap_compatibility(self.model)
        # Make sure this model has all of the required properties.
        if not hasattr(self.model, "get_input_embeddings"):
            raise ValueError(
                "Model needs word embedding matrix for gradient-based word swap"
            )
        if not hasattr(self.tokenizer, "pad_token_id") and self.tokenizer.pad_token_id:
            raise ValueError(
                "Tokenizer needs to have `pad_token_id` for gradient-based word swap"
            )

        self.top_n = top_n
        self.is_black_box = False
 def __init__(self, model_wrapper, top_n=1):
     # Unwrap model wrappers. Need raw model for gradient.
     if not isinstance(model_wrapper,
                       textattack.models.wrappers.ModelWrapper):
         raise TypeError(
             f"Got invalid model wrapper type {type(model_wrapper)}")
     self.model = model_wrapper.model
     self.model_wrapper = model_wrapper
     self.tokenizer = self.model_wrapper.tokenizer
     # Make sure we know how to compute the gradient for this model.
     validate_model_gradient_word_swap_compatibility(self.model)
     # Make sure this model has all of the required properties.
     if not hasattr(self.model, "word_embeddings"):
         raise ValueError(
             "Model needs word embedding matrix for gradient-based word swap"
         )
     if not hasattr(self.model, "lookup_table"):
         raise ValueError(
             "Model needs lookup table for gradient-based word swap")
     if not hasattr(self.model, "zero_grad"):
         raise ValueError(
             "Model needs `zero_grad()` for gradient-based word swap")
     if not hasattr(self.tokenizer, "convert_id_to_word"):
         raise ValueError(
             "Tokenizer needs `convert_id_to_word()` for gradient-based word swap"
         )
     if not hasattr(self.tokenizer, "pad_id"):
         raise ValueError(
             "Tokenizer needs `pad_id` for gradient-based word swap")
     if not hasattr(self.tokenizer, "oov_id"):
         raise ValueError(
             "Tokenizer needs `oov_id` for gradient-based word swap")
     self.loss = torch.nn.CrossEntropyLoss()
     self.pad_id = self.model_wrapper.tokenizer.pad_id
     self.oov_id = self.model_wrapper.tokenizer.oov_id
     self.top_n = top_n
     self.is_black_box = False