def test_LabelAccuracyEvaluator(self): """Tests that the LabelAccuracyEvaluator can be loaded correctly""" model = SentenceTransformer('paraphrase-distilroberta-base-v1') nli_dataset_path = 'datasets/AllNLI.tsv.gz' if not os.path.exists(nli_dataset_path): util.http_get('https://sbert.net/datasets/AllNLI.tsv.gz', nli_dataset_path) label2int = {"contradiction": 0, "entailment": 1, "neutral": 2} dev_samples = [] with gzip.open(nli_dataset_path, 'rt', encoding='utf8') as fIn: reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE) for row in reader: if row['split'] == 'train': label_id = label2int[row['label']] dev_samples.append( InputExample( texts=[row['sentence1'], row['sentence2']], label=label_id)) if len(dev_samples) >= 100: break train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model. get_sentence_embedding_dimension(), num_labels=len(label2int)) dev_dataloader = DataLoader(dev_samples, shuffle=False, batch_size=16) evaluator = evaluation.LabelAccuracyEvaluator(dev_dataloader, softmax_model=train_loss) acc = evaluator(model) assert acc > 0.2
def test_multiclass(self): transformer = models.Transformer('prajjwal1/bert-tiny') model = SentenceTransformer(modules=[ transformer, models.Pooling(transformer.get_word_embedding_dimension()) ]) softmax_loss = losses.SoftmaxLoss( model, transformer.get_word_embedding_dimension(), num_labels=3) samples = [ InputExample(texts=[ "Hello Word, a first test sentence", "Hello Word, a other test sentence" ], label=0), InputExample(texts=[ "Hello Word, a second test sentence", "Hello Word, a other test sentence" ], label=1), InputExample(texts=[ "Hello Word, a third test sentence", "Hello Word, a other test sentence" ], label=2) ] dataloader = DataLoader(samples, batch_size=1) evaluator = MulticlassEvaluator(dataloader, softmax_model=softmax_loss) result = evaluator(model) i = 0
def train(hp): """Train the advanced blocking model Store the trained model in hp.model_fn. Args: hp (Namespace): the hyperparameters Returns: None """ # define model model_names = {'distilbert': 'distilbert-base-uncased', 'bert': 'bert-base-uncased', 'albert': 'albert-base-v2' } word_embedding_model = models.Transformer(model_names[hp.lm]) pooling_model = models.Pooling(word_embedding_model\ .get_word_embedding_dimension(), pooling_mode_mean_tokens=True, pooling_mode_cls_token=False, pooling_mode_max_tokens=False) model = SentenceTransformer(modules=[word_embedding_model, pooling_model]) # load the training and validation data reader = Reader() trainset = SentencesDataset(examples=reader.get_examples(hp.train_fn), model=model) train_dataloader = DataLoader(trainset, shuffle=True, batch_size=hp.batch_size) train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model\ .get_sentence_embedding_dimension(), num_labels=2) dev_data = SentencesDataset(examples=reader\ .get_examples(hp.valid_fn), model=model) dev_dataloader = DataLoader(dev_data, shuffle=False, batch_size=hp.batch_size) evaluator = EmbeddingSimilarityEvaluator(dev_dataloader) warmup_steps = math.ceil(len(train_dataloader) \ * hp.n_epochs / hp.batch_size * 0.1) #10% of train data for warm-up if os.path.exists(hp.model_fn): import shutil shutil.rmtree(hp.model_fn) # Train the model model.fit(train_objectives=[(train_dataloader, train_loss)], evaluator=evaluator, epochs=hp.n_epochs, evaluation_steps=1000, warmup_steps=warmup_steps, output_path=hp.model_fn, fp16=hp.fp16, fp16_opt_level='O2')
def load_model(self, text_model_path, classifier_path): """ Method used for pretrained model loading """ self.model = SentenceTransformer(text_model_path) self.classification_model = torch.load(classifier_path) self.train_loss_nli = losses.SoftmaxLoss( model=self.model, sentence_embedding_dimension=self.model. get_sentence_embedding_dimension(), num_labels=len(self.label2int)) self.train_loss_nli.classifier = self.classification_model
def train_sbert(model_name, model_save_path): batch_size = 16 nli_reader, sts_reader = load_dataset() train_num_labels = nli_reader.get_num_labels() # Use BERT for mapping tokens to embeddings word_embedding_model = models.BERT(model_name) # Apply mean pooling to get one fixed sized sentence vector pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True, pooling_mode_cls_token=False, pooling_mode_max_tokens=False) model = SentenceTransformer(modules=[word_embedding_model, pooling_model]) # Convert the dataset to a DataLoader ready for training logging.info("Read AllNLI train dataset") train_data = SentencesDataset(nli_reader.get_examples('train.gz'), model=model) train_dataloader = DataLoader(train_data, shuffle=True, batch_size=batch_size) train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=train_num_labels) logging.info("Read STSbenchmark dev dataset") dev_data = SentencesDataset(examples=sts_reader.get_examples('sts-dev.csv'), model=model) dev_dataloader = DataLoader(dev_data, shuffle=False, batch_size=batch_size) evaluator = EmbeddingSimilarityEvaluator(dev_dataloader) # Configure the training num_epochs = 1 warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) #10% of train data for warm-up logging.info("Warmup-steps: {}".format(warmup_steps)) # Train the model model.fit(train_objectives=[(train_dataloader, train_loss)], evaluator=evaluator, epochs=num_epochs, evaluation_steps=1000, warmup_steps=warmup_steps, output_path=model_save_path ) model = SentenceTransformer(model_save_path) test_data = SentencesDataset(examples=sts_reader.get_examples("sts-test.csv"), model=model) test_dataloader = DataLoader(test_data, shuffle=False, batch_size=batch_size) evaluator = EmbeddingSimilarityEvaluator(test_dataloader) model.evaluate(evaluator)
def initialize_model(self): # Read the dataset # Use BERT for mapping tokens to embeddings word_embedding_model = models.Transformer(self.base_model, max_seq_length=128) # Apply mean pooling to get one fixed sized sentence vector pooling_model = models.Pooling( word_embedding_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True, pooling_mode_cls_token=False, pooling_mode_max_tokens=False) self.model = SentenceTransformer( modules=[word_embedding_model, pooling_model]) self.train_loss_nli = losses.SoftmaxLoss( model=self.model, sentence_embedding_dimension=self.model. get_sentence_embedding_dimension(), num_labels=len(self.label2int))
def main(): args = parser.parse_args() model = build_model() train_loss = losses.SoftmaxLoss( model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=NUM_LABELS) score_map = collections.defaultdict(lambda : collections.defaultdict()) with open(args.inputdir + "/scores.pickle", 'rb') as f: scores = pickle.load(f) for key, score_list in scores.items(): dataset, pair_index = key if 'test' in dataset: continue for rev_i, score in enumerate(score_list): score_map[key][rev_i] = score dev_samples = sum([ build_samples(args.inputdir, "traindev_dev", i, score_map) for i in range(6) ], []) dev_evaluator = BasicEvaluator.from_input_examples( dev_samples, model, batch_size=TRAIN_BATCH_SIZE, name='sts-dev') for epoch_i in range(num_epochs): num_examples = len(glob.glob(args.inputdir +"/traindev_train/*")) - 2 num_examples = 20 for example_i in range(num_examples): train_loader = build_dataloader(args.inputdir, "traindev_train", example_i, score_map, TRAIN_BATCH_SIZE) warmup_steps = math.ceil(len(train_loader) * 0.1) #10% of train data for warm-up model.fit(train_objectives=[(train_loader, train_loss)], evaluator=dev_evaluator, epochs=1, evaluation_steps=1000, warmup_steps=warmup_steps, output_path=model_save_path) for input_ids, labels in train_loader: print(train_loss(input_ids, None))
def test_train_nli(self): word_embedding_model = models.Transformer('distilbert-base-uncased') pooling_model = models.Pooling( word_embedding_model.get_word_embedding_dimension()) model = SentenceTransformer( modules=[word_embedding_model, pooling_model]) train_dataset = SentencesDataset(self.nli_train_samples, model=model) train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=16) train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model. get_sentence_embedding_dimension(), num_labels=3) model.fit(train_objectives=[(train_dataloader, train_loss)], evaluator=None, epochs=1, warmup_steps=int(len(train_dataloader) * 0.1), use_amp=True) self.evaluate_stsb_test(model, 50.0)
def fine_tune(cfg): """ Function to finetune a model with Infodemic-specific data. :param cfg: configuration dictionary :return: none """ model = SentenceTransformer(cfg['model']) # data reading dependent on data format, see https://github.com/UKPLab/sentence-transformers/blob/master/examples/training/sts/training_stsbenchmark_continue_training.py # for an example at lines 48-62 train_samples = None train_ds = SentencesDataset(train_samples, model) train_dl = DataLoader(train_ds) train_loss = losses.SoftmaxLoss(model, num_labels=3) evaluator = None # list of evaluators at https://github.com/UKPLab/sentence-transformers/tree/master/sentence_transformers/evaluation model.fit(train_objectives=[(train_dl, train_loss)], evaluator=evaluator, epochs=30, evaluation_steps=1000, output_path=cfg['model_output'])
if os.path.isfile(labels_file): os.remove(os.path.join(curr_dir, "prediction_labels.csv")) if os.path.isfile(pred_file): os.remove(os.path.join(curr_dir, "prediction_results.csv")) # Model path model_save_path = curr_dir batch_size = 24 agb_reader = TestAGBReader('datasets/og-test') train_num_labels = agb_reader.get_num_labels() model = SentenceTransformer(model_save_path, device="cpu") train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=train_num_labels) train_loss.classifier = torch.load(os.path.join(model_save_path, "2_Softmax/pytorch_model.bin")) print("test") test_dir = "/data/daumiller/sentence-transformers/examples/datasets/og-test" for fn in sorted(os.listdir(test_dir)): examples = agb_reader.get_examples(fn) if not examples: continue # Hack to avoid problems with docs almost as long as batch size if len(examples) == batch_size + 1: batch_size_used = batch_size - 3 else: batch_size_used = batch_size test_data = SentencesDataset(examples=examples, model=model, shorten=True)
def main(): parser = argparse.ArgumentParser(description='Start training with SBERT') parser.add_argument('--model_path', type=str, help='Path to trained model folder ./models/[MODEL_NAME]') parser.add_argument('--dataset', type=str, default='few_rel', help='Name dataset') parser.add_argument('--mask_method', type=str, default='bracket', help='Type of masking') parser.add_argument('--num_epochs', type=int, default=15, help='Number epochs') parser.add_argument('--num_samples', type=int, default=-1, help='Number of samples for test run, default -1 means all data') parser.add_argument('--max_seq_length', type=int, default=256, help='Max token length for BERT') args = parser.parse_args() model_path = args.model_path dataset = args.dataset mask_method = args.mask_method num_samples = args.num_samples max_seq_length=args.max_seq_length num_epochs = args.num_epochs evaluation_steps = 1000 # Frequency of evaluation results warmup_steps = 1000 # warm up steps sentence_out_embedding_dimension = 256 if model_path.endswith('/'): model_path = model_path[:-1] model_name = model_path.split('/')[-1] path_train_data = f'./data/train_samples/{dataset}_train_{mask_method}_train.csv' path_eval_data = f'./data/train_samples/{dataset}_val_{mask_method}_test.csv' if num_samples>0: model_save_path = f'./trained_models/{model_name}_sbert_bi_{dataset}_test/' else: model_save_path = f'./trained_models/{model_name}_sbert_bi_{dataset}/' ### Define the model word_embedding_model = models.Transformer(model_path, max_seq_length=max_seq_length) ### Add special tokens - this helps us add tokens like Doc or query or Entity1 / Entity2 # but in our case we already added that to the model prior #tokens = ["[DOC]", "[QRY]"] #word_embedding_model.tokenizer.add_tokens(tokens, special_tokens=True) #word_embedding_model.auto_model.resize_token_embeddings(len(word_embedding_model.tokenizer)) pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension()) dense_model = models.Dense(in_features=pooling_model.get_sentence_embedding_dimension(), out_features=sentence_out_embedding_dimension, activation_function=nn.Tanh()) # Model pipeline model = SentenceTransformer(modules=[word_embedding_model, pooling_model, dense_model]) # Prep DataLoader train_examples = load_train_sbert(path_train_data, num_samples) train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16) # Prep Evaluator sentences1, sentences2, scores = load_eval_sbert(path_eval_data, num_samples) #evaluator = evaluation.EmbeddingSimilarityEvaluator(sentences1, sentences2, scores) evaluator = evaluation.BinaryClassificationEvaluator(sentences1, sentences2, scores) #train_loss = losses.CosineSimilarityLoss(model) train_loss = losses.SoftmaxLoss(model, sentence_embedding_dimension= sentence_out_embedding_dimension, num_labels = 2) #Tune the model model.fit(train_objectives=[(train_dataloader, train_loss)], evaluator=evaluator, epochs=num_epochs, evaluation_steps=evaluation_steps, warmup_steps=warmup_steps, output_path=model_save_path)
def trainer( model: SBERTPredictor, tokenizer, df_train, df_val, epochs: int = 1, learning_rate: float = 1e-5, batch_size: int = 16, embedding_epochs: int = None, enable_class_weights: bool = True, ): """Train the SBERT model using a training data loader and a validation dataloader. :param model: SBERTPredicor model :type model: SBERT_Predictor :param tokenizer: tokenizer used in SBERT model :param df_train: train dataframe :type train_dataloader: pd.DataFrame() :param df_val: validation dataframe :type df_val: pd.DataFrame() :param epochs: numer of epochs :type epochs: int :param learning_rate: learning rate :type learning_rate: float :param batch_size: batch size to be used for training :type batch_size: int """ if embedding_epochs is None: embedding_epochs = epochs nli_reader = NLIDataReader(df_train.append(df_val)) train_num_labels = nli_reader.get_num_labels() train_data = SentencesDataset(nli_reader.get_examples(), model=model.embedding_model) train_data.label_type = torch.long # some bug in sentence_transformer library causes it to be identified as # float by default train_dataloader_embed = DataLoader(train_data, shuffle=True, batch_size=batch_size) train_loss_embed = losses.SoftmaxLoss( model=model.embedding_model, sentence_embedding_dimension=model.embedding_model. get_sentence_embedding_dimension(), num_labels=train_num_labels) val_nli_reader = NLIDataReader(df_val) dev_data = SentencesDataset(val_nli_reader.get_examples(), model=model.embedding_model) dev_data.label_type = torch.long evaluator = EmbeddingSimilarityEvaluator( sentences1=df_val["sentence1"].values, sentences2=df_val["sentence2"].values, scores=df_val["label"].values / 2., batch_size=batch_size) warmup_steps = math.ceil( len(train_dataloader_embed) * epochs / batch_size * 0.1) # 10% of train data for warm-up # now to train the final layer train_dataset = ClassifierDataset(df_train, tokenizer=tokenizer) val_dataset = ClassifierDataset(df_val, tokenizer=tokenizer) if enable_class_weights is False: class_weights = None else: class_weights = train_dataset.class_weights() train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=True) val_dataloader = DataLoader(dataset=val_dataset, batch_size=1, collate_fn=collate_fn) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") criterion = nn.CrossEntropyLoss(weight=class_weights.to(device)) optimizer = optim.Adam(model.parameters(), lr=learning_rate) model.to(device) print("------TRAINING STARTS----------") # noqa: T001 # train embedding layer unfreeze_layer(model.embedding_model) model.embedding_model.fit( train_objectives=[(train_dataloader_embed, train_loss_embed)], evaluator=evaluator, epochs=1, evaluation_steps=1000, warmup_steps=warmup_steps, ) # train the Transformer layer freeze_layer(model.embedding_model) x, y = format_create(df=df_train, model=model) x_test, y_test = format_create(df=df_val, model=model) if model.logistic_model is True: model.logisticregression.fit(x, y) print( classification_report( y_test, model.logisticregression.predict(x_test))) # noqa: T001 else: accuracy_stats = { "train": [], "val": [], } loss_stats = { "train": [], "val": [], } for e in range(epochs): train_epoch_loss = 0 train_epoch_acc = 0 model.train() for sentence1, sentence2, label in tqdm(train_dataloader): label = label.to(device) optimizer.zero_grad() y_train_pred = model(sentence1, sentence2) train_loss = criterion(y_train_pred, label) train_acc = multi_acc(y_train_pred, label) train_loss.backward() optimizer.step() train_epoch_loss += train_loss.item() train_epoch_acc += train_acc.item() # VALIDATION with torch.no_grad(): val_epoch_loss = 0 val_epoch_acc = 0 model.eval() for sentence1, sentence2, label in val_dataloader: label = label.to(device) y_val_pred = model(sentence1, sentence2) val_loss = criterion(y_val_pred, label) val_acc = multi_acc(y_val_pred, label) val_epoch_loss += val_loss.item() val_epoch_acc += val_acc.item() loss_stats['train'].append(train_epoch_loss / len(train_dataloader)) loss_stats['val'].append(val_epoch_loss / len(val_dataloader)) accuracy_stats['train'].append(train_epoch_acc / len(train_dataloader)) accuracy_stats['val'].append(val_epoch_acc / len(val_dataloader)) print( f"Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_dataloader):.5f} \ | Val Loss: {val_epoch_loss / len(val_dataloader):.5f} \ | Train Acc: {train_epoch_acc/len(train_dataloader):.3f} \ | Val Acc: {val_epoch_acc/len(val_dataloader):.3f}" ) # noqa: T001 print("---------TRAINING ENDED------------") # noqa: T001
train_nli_samples = [] with gzip.open(nli_dataset_path, 'rt', encoding='utf8') as fIn: reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE) for row in reader: if row['split'] == 'train': label_id = label2int[row['label']] train_nli_samples.append( InputExample(texts=[row['sentence1'], row['sentence2']], label=label_id)) train_data_nli = SentencesDataset(train_nli_samples, model=model) train_dataloader_nli = DataLoader(train_data_nli, shuffle=True, batch_size=batch_size) train_loss_nli = losses.SoftmaxLoss( model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=len(label2int)) logging.info("Read STSbenchmark train dataset") train_sts_samples = [] dev_sts_samples = [] test_sts_samples = [] with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn: reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE) for row in reader: score = float(row['score']) / 5.0 # Normalize score to range 0 ... 1 inp_example = InputExample(texts=[row['sentence1'], row['sentence2']], label=score) if row['split'] == 'dev': dev_sts_samples.append(inp_example)
def train_nli(): #### Just some code to print debug information to stdout logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO, handlers=[LoggingHandler()]) #### /print debug information to stdout #You can specify any huggingface/transformers pre-trained model here, for example, bert-base-uncased, roberta-base, xlm-roberta-base #model_name = sys.argv[1] if len(sys.argv) > 1 else 'bert-base-uncased' model_name = 'pretrained_model/bert-base-uncased' # Read the dataset train_batch_size = 6 nli_reader = NLIDataReader('./examples/datasets/AllNLI') sts_reader = STSBenchmarkDataReader('./examples/datasets/stsbenchmark') train_num_labels = nli_reader.get_num_labels() model_save_path = 'output/training_nli_'+model_name.replace("/", "-")+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S") # Use Huggingface/transformers model (like BERT, RoBERTa, XLNet, XLM-R) for mapping tokens to embeddings word_embedding_model = models.Transformer(model_name) # Apply mean pooling to get one fixed sized sentence vector pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True, pooling_mode_cls_token=False, pooling_mode_max_tokens=False) model = SentenceTransformer(modules=[word_embedding_model, pooling_model]) # Convert the dataset to a DataLoader ready for training logging.info("Read AllNLI train dataset") train_dataset = SentencesDataset(nli_reader.get_examples('train.gz'), model=model) train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=train_batch_size) train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=train_num_labels) logging.info("Read STSbenchmark dev dataset") dev_data = SentencesDataset(examples=sts_reader.get_examples('sts-dev.csv'), model=model) dev_dataloader = DataLoader(dev_data, shuffle=False, batch_size=train_batch_size) evaluator = LabelAccuracyEvaluator(dev_dataloader,softmax_model = Softmax_label(model = model, sentence_embedding_dimension = model.get_sentence_embedding_dimension(), num_labels = train_num_labels)) # Configure the training num_epochs = 1 warmup_steps = math.ceil(len(train_dataset) * num_epochs / train_batch_size * 0.1) #10% of train data for warm-up logging.info("Warmup-steps: {}".format(warmup_steps)) # Train the model model.fit(train_objectives=[(train_dataloader, train_loss)], evaluator=evaluator, epochs=num_epochs, evaluation_steps=100, warmup_steps=warmup_steps, output_path=model_save_path ) ############################################################################## # # Load the stored model and evaluate its performance on STS benchmark dataset # ############################################################################## #model = SentenceTransformer(model_save_path) test_data = SentencesDataset(examples=sts_reader.get_examples("sts-test.csv"), model=model) test_dataloader = DataLoader(test_data, shuffle=False, batch_size=train_batch_size) #evaluator = EmbeddingSimilarityEvaluator(test_dataloader) model.evaluate(evaluator)
def train_self(): train_batch_size = 8 num_epochs = 50 device = 'cuda:0' train_num_labels = 6 evaluation_steps = 1000 local = True #### Just some code to print debug information to stdout logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO, handlers=[LoggingHandler()]) # model_name = sys.argv[1] if len(sys.argv) > 1 else 'bert-base-uncased' #model_name = 'bert-base-chinese' model_name = './pretrained_model/bert-base-chinese' #train_batch_size = config.train_batch_size self_reader = Self_csv_DataReader('./self_dataset',local = local) #train_num_labels = config.train_num_labels model_save_path = 'output/training_nli_'+model_name.replace("/", "-")+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S") # Use Huggingface/transformers model (like BERT, RoBERTa, XLNet, XLM-R) for mapping tokens to embeddings word_embedding_model = models.Transformer(model_name,cache_dir = './pretrained_model') # Apply mean pooling to get one fixed sized sentence vector pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True, pooling_mode_cls_token=False, pooling_mode_max_tokens=False) model = SentenceTransformer_NoPooling(modules=[word_embedding_model])#, pooling_model]) # Convert the dataset to a DataLoader ready for training logging.info("Read self train dataset") train_dataset = SentencesDataset(examples=self_reader.get_examples("train.csv"), model=model) train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=train_batch_size) train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_word_embedding_dimension(), num_labels=train_num_labels) logging.info("Read self dev dataset") dev_data = SentencesDataset(examples=self_reader.get_examples('dev.csv'), model=model) dev_dataloader = DataLoader(dev_data, shuffle=False, batch_size=train_batch_size) evaluator = LabelAccuracyEvaluator(dev_dataloader,softmax_model = Softmax_label(model = model, sentence_embedding_dimension = model.get_word_embedding_dimension(), num_labels = train_num_labels)) warmup_steps = math.ceil(len(train_dataset) * num_epochs / train_batch_size * 0.1) #10% of train data for warm-up logging.info("Warmup-steps: {}".format(warmup_steps)) # Train the model model.fit(train_objectives=[(train_dataloader, train_loss)], evaluator=evaluator, epochs=num_epochs, evaluation_steps=evaluation_steps, warmup_steps=warmup_steps, output_path=model_save_path )