def __init__(self, model, use_cache=True): validators.validate_model_goal_function_compatibility( self.__class__, model.__class__) self.model = model self.use_cache = use_cache self.num_queries = 0 if self.use_cache: self._call_model_cache = lru.LRU(utils.config('MODEL_CACHE_SIZE')) else: self._call_model_cache = None
def batch_model_predict(model, inputs, batch_size=utils.config("MODEL_BATCH_SIZE")): outputs = [] i = 0 while i < len(inputs): batch = inputs[i:i + batch_size] batch_preds = model_predict(model, batch) outputs.append(batch_preds) i += batch_size try: return torch.cat(outputs, dim=0) except TypeError: # A TypeError occurs when the lists in ``outputs`` are full of strings # instead of numbers. If this is the case, just return the regular # list. return outputs
def __init__( self, goal_function=None, constraints=[], transformation=None, search_method=None, ): """ Initialize an attack object. Attacks can be run multiple times. """ self.goal_function = goal_function if not self.goal_function: raise NameError( "Cannot instantiate attack without self.goal_function for predictions" ) self.search_method = search_method if not self.search_method: raise NameError("Cannot instantiate attack without search method") self.transformation = transformation if not self.transformation: raise NameError("Cannot instantiate attack without transformation") self.is_black_box = getattr(transformation, "is_black_box", True) if not self.search_method.check_transformation_compatibility( self.transformation): raise ValueError( "SearchMethod {self.search_method} incompatible with transformation {self.transformation}" ) self.constraints = [] self.pre_transformation_constraints = [] for constraint in constraints: if isinstance( constraint, textattack.constraints.pre_transformation. PreTransformationConstraint, ): self.pre_transformation_constraints.append(constraint) else: self.constraints.append(constraint) self.constraints_cache = lru.LRU(utils.config("CONSTRAINT_CACHE_SIZE")) # Give search method access to functions for getting transformations and evaluating them self.search_method.get_transformations = self.get_transformations self.search_method.get_goal_results = self.goal_function.get_results
def __init__(self, goal_function, transformation, constraints=[], is_black_box=True): """ Initialize an attack object. Attacks can be run multiple times. """ self.goal_function = goal_function if not self.goal_function: raise NameError( 'Cannot instantiate attack without self.goal_function for predictions' ) if not hasattr(self, 'tokenizer'): if hasattr(self.goal_function.model, 'tokenizer'): self.tokenizer = self.goal_function.model.tokenizer else: raise NameError('Cannot instantiate attack without tokenizer') self.transformation = transformation self.constraints = constraints self.is_black_box = is_black_box self.constraints_cache = lru.LRU(utils.config('CONSTRAINT_CACHE_SIZE'))
def __init__( self, model, tokenizer=None, use_cache=True, query_budget=float("inf") ): validators.validate_model_goal_function_compatibility( self.__class__, model.__class__ ) self.model = model self.tokenizer = tokenizer if not self.tokenizer: if hasattr(self.model, "tokenizer"): self.tokenizer = self.model.tokenizer else: raise NameError("Cannot instantiate goal function without tokenizer") if not hasattr(self.tokenizer, "encode"): raise TypeError("Tokenizer must contain `encode()` method") self.use_cache = use_cache self.num_queries = 0 self.query_budget = query_budget if self.use_cache: self._call_model_cache = lru.LRU(utils.config("MODEL_CACHE_SIZE")) else: self._call_model_cache = None
def __init__(self, goal_function=None, constraints=[], transformation=None, search_method=None): """ Initialize an attack object. Attacks can be run multiple times. """ self.search_method = search_method self.goal_function = goal_function if not self.goal_function: raise NameError( 'Cannot instantiate attack without self.goal_function for predictions' ) if not hasattr(self, 'tokenizer'): if hasattr(self.goal_function.model, 'tokenizer'): self.tokenizer = self.goal_function.model.tokenizer else: raise NameError('Cannot instantiate attack without tokenizer') self.transformation = transformation self.is_black_box = getattr(transformation, 'is_black_box', True) if not self.search_method.check_transformation_compatibility( self.transformation): raise ValueError( 'SearchMethod {self.search_method} incompatible with transformation {self.transformation}' ) self.constraints = [] self.pre_transformation_constraints = [] for constraint in constraints: if isinstance(constraint, PreTransformationConstraint): self.pre_transformation_constraints.append(constraint) else: self.constraints.append(constraint) self.constraints_cache = lru.LRU(utils.config('CONSTRAINT_CACHE_SIZE')) # Give search method access to functions for getting transformations and evaluating them self.search_method.get_transformations = self.get_transformations self.search_method.get_goal_results = self.goal_function.get_results
def _call_model_uncached(self, tokenized_text_list, batch_size=utils.config('MODEL_BATCH_SIZE')): """ Queries model and returns outputs for a list of TokenizedText objects. """ if not len(tokenized_text_list): return [] ids = [t.ids for t in tokenized_text_list] if hasattr(self.model, 'model'): model_device = next(self.model.model.parameters()).device else: model_device = next(self.model.parameters()).device ids = torch.tensor(ids).to(model_device) # # shape of `ids` is (n, m, d) # - n: number of elements in `tokenized_text_list` # - m: number of vectors per element # ex: most classification models take a single vector, so m=1 # ex: some entailment models take three vectors, so m=3 # - d: dimensionality of each vector # (a typical model might set d=128 or d=256) num_fields = ids.shape[1] num_batches = int( math.ceil(len(tokenized_text_list) / float(batch_size))) outputs = [] for batch_i in range(num_batches): batch_start = batch_i * batch_size batch_stop = (batch_i + 1) * batch_size batch_ids = ids[batch_start:batch_stop] batch = [batch_ids[:, x, :] for x in range(num_fields)] with torch.no_grad(): preds = self.model(*batch) if isinstance(preds, tuple): preds = preds[0] outputs.append(preds) return self._process_model_outputs(tokenized_text_list, outputs)