Example #1
0
    def run_tsdae(self,
                  train_sentences: list,
                  batch_size=32,
                  learning_rate: float = 3e-5,
                  num_epochs: int = 1,
                  model_output_path: str = '.',
                  weight_decay: int = 0,
                  use_amp: bool = True,
                  scheduler: str = 'constantlr'):
        """
Set use_amp to True if your GPU supports FP16 cores
        """
        train_dataset = datasets.DenoisingAutoEncoderDataset(train_sentences)
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      drop_last=True)
        train_loss = losses.DenoisingAutoEncoderLoss(self.model,
                                                     tie_encoder_decoder=True)
        self.model.fit(train_objectives=[(train_dataloader, train_loss)],
                       epochs=num_epochs,
                       weight_decay=weight_decay,
                       scheduler=scheduler,
                       optimizer_params={'lr': learning_rate},
                       show_progress_bar=True,
                       checkpoint_path=model_output_path,
                       use_amp=use_amp)
        print("Finished training. You can now encode.")
        print(f"Model saved at {model_output_path}")
Example #2
0
    if id not in dev_test_ids:
        train_sentences.append(sentence)

logging.info("{} train sentences".format(len(train_sentences)))

################# Intialize an SBERT model #################
model_name = sys.argv[1] if len(sys.argv) >= 2 else 'bert-base-uncased'
word_embedding_model = models.Transformer(model_name)
# Apply **cls** pooling to get one fixed sized sentence vector
pooling_model = models.Pooling(
    word_embedding_model.get_word_embedding_dimension(), 'cls')
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

################# Train and evaluate the model (it needs about 1 hour for one epoch of AskUbuntu) #################
# We wrap our training sentences in the DenoisingAutoEncoderDataset to add deletion noise on the fly
train_dataset = datasets.DenoisingAutoEncoderDataset(train_sentences)
train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              drop_last=True)
train_loss = losses.DenoisingAutoEncoderLoss(model,
                                             decoder_name_or_path=model_name,
                                             tie_encoder_decoder=True)

# Create a dev evaluator
dev_evaluator = evaluation.RerankingEvaluator(dev_dataset,
                                              name='AskUbuntu dev')

logging.info("Dev performance before training")
dev_evaluator(model)