def __init__(self, model, top_n=1, replace_stopwords=False): validate_model_gradient_word_swap_compatibility(model) if not hasattr(model, 'word_embeddings'): raise ValueError( 'Model needs word embedding matrix for gradient-based word swap' ) if not hasattr(model, 'lookup_table'): raise ValueError( 'Model needs lookup table for gradient-based word swap') if not hasattr(model, 'zero_grad'): raise ValueError( 'Model needs `zero_grad()` for gradient-based word swap') if not hasattr(model.tokenizer, 'convert_id_to_word'): raise ValueError( 'Tokenizer needs `convert_id_to_word()` for gradient-based word swap' ) if not hasattr(model.tokenizer, 'pad_id'): raise ValueError( 'Tokenizer needs `pad_id` for gradient-based word swap') if not hasattr(model.tokenizer, 'oov_id'): raise ValueError( 'Tokenizer needs `oov_id` for gradient-based word swap') self.loss = torch.nn.CrossEntropyLoss() self.model = model self.pad_id = self.model.tokenizer.pad_id self.oov_id = self.model.tokenizer.oov_id self.top_n = top_n self.replace_stopwords = replace_stopwords if replace_stopwords: self.stopwords = set() else: from nltk.corpus import stopwords self.stopwords = set(stopwords.words('english'))
def __init__(self, model, top_n=1): validate_model_gradient_word_swap_compatibility(model) if not hasattr(model, 'word_embeddings'): raise ValueError( 'Model needs word embedding matrix for gradient-based word swap' ) if not hasattr(model, 'lookup_table'): raise ValueError( 'Model needs lookup table for gradient-based word swap') if not hasattr(model, 'zero_grad'): raise ValueError( 'Model needs `zero_grad()` for gradient-based word swap') if not hasattr(model.tokenizer, 'convert_id_to_word'): raise ValueError( 'Tokenizer needs `convert_id_to_word()` for gradient-based word swap' ) if not hasattr(model.tokenizer, 'pad_id'): raise ValueError( 'Tokenizer needs `pad_id` for gradient-based word swap') if not hasattr(model.tokenizer, 'oov_id'): raise ValueError( 'Tokenizer needs `oov_id` for gradient-based word swap') self.loss = torch.nn.CrossEntropyLoss() self.model = model self.pad_id = self.model.tokenizer.pad_id self.oov_id = self.model.tokenizer.oov_id self.top_n = top_n self.is_black_box = False
def __init__(self, model_wrapper, top_n=1): # Unwrap model wrappers. Need raw model for gradient. if not isinstance(model_wrapper, textattack.models.wrappers.ModelWrapper): raise TypeError(f"Got invalid model wrapper type {type(model_wrapper)}") self.model = model_wrapper.model self.model_wrapper = model_wrapper self.tokenizer = self.model_wrapper.tokenizer # Make sure we know how to compute the gradient for this model. validate_model_gradient_word_swap_compatibility(self.model) # Make sure this model has all of the required properties. if not hasattr(self.model, "get_input_embeddings"): raise ValueError( "Model needs word embedding matrix for gradient-based word swap" ) if not hasattr(self.tokenizer, "pad_token_id") and self.tokenizer.pad_token_id: raise ValueError( "Tokenizer needs to have `pad_token_id` for gradient-based word swap" ) self.top_n = top_n self.is_black_box = False
def __init__(self, model_wrapper, top_n=1): # Unwrap model wrappers. Need raw model for gradient. if not isinstance(model_wrapper, textattack.models.wrappers.ModelWrapper): raise TypeError( f"Got invalid model wrapper type {type(model_wrapper)}") self.model = model_wrapper.model self.model_wrapper = model_wrapper self.tokenizer = self.model_wrapper.tokenizer # Make sure we know how to compute the gradient for this model. validate_model_gradient_word_swap_compatibility(self.model) # Make sure this model has all of the required properties. if not hasattr(self.model, "word_embeddings"): raise ValueError( "Model needs word embedding matrix for gradient-based word swap" ) if not hasattr(self.model, "lookup_table"): raise ValueError( "Model needs lookup table for gradient-based word swap") if not hasattr(self.model, "zero_grad"): raise ValueError( "Model needs `zero_grad()` for gradient-based word swap") if not hasattr(self.tokenizer, "convert_id_to_word"): raise ValueError( "Tokenizer needs `convert_id_to_word()` for gradient-based word swap" ) if not hasattr(self.tokenizer, "pad_id"): raise ValueError( "Tokenizer needs `pad_id` for gradient-based word swap") if not hasattr(self.tokenizer, "oov_id"): raise ValueError( "Tokenizer needs `oov_id` for gradient-based word swap") self.loss = torch.nn.CrossEntropyLoss() self.pad_id = self.model_wrapper.tokenizer.pad_id self.oov_id = self.model_wrapper.tokenizer.oov_id self.top_n = top_n self.is_black_box = False