Beispiel #1
0
 def test_encodes_lm_like(self):
     """Tests that for GPT-like language models the embedding from first token is used for sequence embedding"""
     from transformers import AutoModelForPreTraining
     for model in ["gpt2", "openai-gpt"]:
         with patch.object(AutoModelForPreTraining,
                           'from_pretrained',
                           return_value=MockPtModel(model)):
             encoder = TransformerTorchEncoder(
                 pretrained_model_name_or_path=model,
                 pooling_strategy='auto',
                 metas={})
             encoded_batch = encoder.encode(self.texts)
             assert encoded_batch.shape == (2, 768)
Beispiel #2
0
 def test_encodes_bert_like(self):
     """Tests that for BERT-like models the embedding from first token is used for sequence embedding"""
     from transformers import AutoModelForPreTraining
     for model in [
             "bert-base-uncased", "google/electra-base-discriminator",
             "roberta-base"
     ]:
         with patch.object(AutoModelForPreTraining,
                           'from_pretrained',
                           return_value=MockPtModel(model)):
             encoder = TransformerTorchEncoder(
                 pretrained_model_name_or_path=model,
                 pooling_strategy='auto',
                 metas={})
             encoded_batch = encoder.encode(self.texts)
             assert encoded_batch.shape == (2, 768)
Beispiel #3
0
 def _get_encoder(self, metas):
     return TransformerTorchEncoder(
         pretrained_model_name_or_path='bert-base-uncased',
         metas=metas)
Beispiel #4
0
 def _get_encoder(self, metas):
     return TransformerTorchEncoder(
         polling_strategy='min',
         pretrained_model_name_or_path='xlnet-base-cased',
         metas=metas)