def main(test_input_dir, model_dir, test_upper_bound, result_save_dir): #Create a dataloader. logger.info("Create test dataloader from {}.".format(test_input_dir)) test_dataset = create_dataset(test_input_dir, num_examples=-1, num_options=20) test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True) #Create a classifier model. logger.info("Create a classifier model.") classifier_model = LinearClassifier.from_pretrained( "cl-tohoku/bert-base-japanese-whole-word-masking") classifier_model.to(device) #Create a directory to save the results in. logger.info("Results will be saved in {}.".format(result_save_dir)) os.makedirs(result_save_dir, exist_ok=True) logger.info("Start model evaluation.") for i in range(test_upper_bound): model_filepath = os.path.join(model_dir, "checkpoint_{}.pt".format(i + 1)) logger.info("Load model parameters from {}.".format(model_filepath)) parameters = torch.load(model_filepath, map_location=device) classifier_model.load_state_dict(parameters) pred_labels, correct_labels, accuracy = evaluate( classifier_model, test_dataloader) logger.info("Accuracy: {}".format(accuracy)) #Save results as text files. res_filepath = os.path.join(result_save_dir, "result_test_{}.txt".format(i + 1)) labels_filepath = os.path.join(result_save_dir, "labels_test_{}.txt".format(i + 1)) with open(res_filepath, "w") as w: w.write("Accuracy: {}\n".format(accuracy)) with open(labels_filepath, "w") as w: for pred_label, correct_label in zip(pred_labels, correct_labels): w.write("{} {}\n".format(pred_label, correct_label)) logger.info("Finished model evaluation.")
def main(batch_size, num_epochs, lr, train_input_dir, dev1_input_dir, result_save_dir): logger.info("batch_size: {} num_epochs: {} lr: {}".format( batch_size, num_epochs, lr)) #Create dataloaders. logger.info("Create train dataloader from {}.".format(train_input_dir)) train_dataset = create_dataset(train_input_dir, num_examples=-1, num_options=4) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) logger.info("Create dev1 dataloader from {}.".format(dev1_input_dir)) dev1_dataset = create_dataset(dev1_input_dir, num_examples=-1, num_options=20) dev1_dataloader = DataLoader(dev1_dataset, batch_size=4, shuffle=False, drop_last=True) #Create a classifier model. logger.info("Create a classifier model.") classifier_model = LinearClassifier.from_pretrained( "cl-tohoku/bert-base-japanese-whole-word-masking") classifier_model.to(device) #Create an optimizer and a scheduler. optimizer = AdamW(classifier_model.parameters(), lr=lr, eps=1e-8) total_steps = len(train_dataloader) * num_epochs scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps) #Create a directory to save the results in. os.makedirs(result_save_dir, exist_ok=True) logger.info("Start model training.") for epoch in range(num_epochs): logger.info("===== Epoch {}/{} =====".format(epoch + 1, num_epochs)) mean_loss = train(classifier_model, optimizer, scheduler, train_dataloader) logger.info("Mean loss: {}".format(mean_loss)) #Save model parameters. checkpoint_filepath = os.path.join( result_save_dir, "checkpoint_{}.pt".format(epoch + 1)) torch.save(classifier_model.state_dict(), checkpoint_filepath) pred_labels, correct_labels, accuracy = evaluate( classifier_model, dev1_dataloader) logger.info("Accuracy: {}".format(accuracy)) #Save results as text files. res_filepath = os.path.join(result_save_dir, "result_eval_{}.txt".format(epoch + 1)) labels_filepath = os.path.join(result_save_dir, "labels_eval_{}.txt".format(epoch + 1)) with open(res_filepath, "w") as w: w.write("Accuracy: {}\n".format(accuracy)) with open(labels_filepath, "w") as w: for pred_label, correct_label in zip(pred_labels, correct_labels): w.write("{} {}\n".format(pred_label, correct_label)) logger.info("Finished model training.")