def get_loss(loss_type, model): if loss_type == 'BatchAllTripletLoss': return losses.BatchAllTripletLoss(model=model) if loss_type == 'BatchHardSoftMarginTripletLoss': return losses.BatchHardSoftMarginTripletLoss(model=model) if loss_type == 'BatchHardTripletLoss': return losses.BatchHardTripletLoss(model=model) if loss_type == 'BatchSemiHardTripletLoss': return losses.BatchSemiHardTripletLoss(model=model) if loss_type == 'ContrastiveLoss': return losses.ContrastiveLoss(model=model) if loss_type == 'CosineSimilarityLoss': return losses.CosineSimilarityLoss(model=model) if loss_type == 'MegaBatchMarginLoss': return losses.MegaBatchMarginLoss(model=model) if loss_type == 'MultipleNegativesRankingLoss': return losses.MultipleNegativesRankingLoss(model=model) if loss_type == 'OnlineContrastiveLoss': return losses.OnlineContrastiveLoss(model=model) raise ValueError('Invalid loss type')
examples=train_set, model=model, provide_positive=False, #For BatchHardTripletLoss, we must set provide_positive and provide_negative to False provide_negative=False, ) train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=train_batch_size) ### Triplet losses #################### ### There are 4 triplet loss variants: ### - BatchHardTripletLoss ### - BatchHardSoftMarginTripletLoss ### - BatchSemiHardTripletLoss ### - BatchAllTripletLoss ####################################### train_loss = losses.BatchAllTripletLoss(model=model) #train_loss = losses.BatchHardTripletLoss(sentence_embedder=model) #train_loss = losses.BatchHardSoftMarginTripletLoss(sentence_embedder=model) #train_loss = losses.BatchSemiHardTripletLoss(sentence_embedder=model) logging.info("Read TREC val dataset") dev_evaluator = TripletEvaluator.from_input_examples(dev_set, name='dev') logging.info("Performance before fine-tuning:") dev_evaluator(model) warmup_steps = int( len(train_dataset) * num_epochs / train_batch_size * 0.1 ) # 10% of train data