def __init__(self, model: PreTrainedModel, tokenizer: BatchTokenizer, sim_matrix_provider: SimilarityMatrixProvider, method: str = 'max', clean_special: bool = True, argmax_only: bool = False): assert method in self.methods, 'inappropriate scoring method' self.model = model self.tokenizer = tokenizer self.encoder = LongBatchEncoder(model, tokenizer) self.sim_matrix_provider = sim_matrix_provider self.method = method self.clean_special = clean_special self.cleaner = SpecialTokensCleaner(tokenizer.tokenizer) self.device = next(self.model.parameters(), None).device self.argmax_only = argmax_only
def __init__(self, model: PreTrainedModel, tokenizer: PreTrainedTokenizer, sim_matrix_provider: SimilarityMatrixProvider, method: str = 'max', clean_special: bool = True, argmax_only: bool = False): assert method in self.methods, 'inappropriate scoring method' self.model = model max_seq_length = self.model.config.max_position_embeddings self.tokenizer = tokenizer self.batch_encoder = LongBatchEncoder(model, tokenizer, max_seq_length=max_seq_length) self.sim_matrix_provider = sim_matrix_provider self.method = method self.clean_special = clean_special self.cleaner = SpecialTokensCleaner(tokenizer.tokenizer) self.device = next(self.model.parameters(), None).device self.argmax_only = argmax_only print(self.tokenizer.__class__)