Exemple #1
0
 def __init__(self,
              model: PreTrainedModel,
              tokenizer: BatchTokenizer,
              sim_matrix_provider: SimilarityMatrixProvider,
              method: str = 'max',
              clean_special: bool = True,
              argmax_only: bool = False):
     assert method in self.methods, 'inappropriate scoring method'
     self.model = model
     self.tokenizer = tokenizer
     self.encoder = LongBatchEncoder(model, tokenizer)
     self.sim_matrix_provider = sim_matrix_provider
     self.method = method
     self.clean_special = clean_special
     self.cleaner = SpecialTokensCleaner(tokenizer.tokenizer)
     self.device = next(self.model.parameters(), None).device
     self.argmax_only = argmax_only
Exemple #2
0
    def __init__(self,
                 model: PreTrainedModel,
                 tokenizer: PreTrainedTokenizer,
                 sim_matrix_provider: SimilarityMatrixProvider,
                 method: str = 'max',
                 clean_special: bool = True,
                 argmax_only: bool = False):
        assert method in self.methods, 'inappropriate scoring method'
        self.model = model
        max_seq_length = self.model.config.max_position_embeddings
        self.tokenizer = tokenizer
        self.batch_encoder = LongBatchEncoder(model, tokenizer, max_seq_length=max_seq_length)
        self.sim_matrix_provider = sim_matrix_provider
        self.method = method
        self.clean_special = clean_special
        self.cleaner = SpecialTokensCleaner(tokenizer.tokenizer)
        self.device = next(self.model.parameters(), None).device
        self.argmax_only = argmax_only

        print(self.tokenizer.__class__)