def main(test_file, pretrained_file, batch_size=32): """ Test the ESIM model with pretrained weights on some dataset. Args: test_file: The path to a file containing preprocessed NLI data. pretrained_file: The path to a checkpoint produced by the 'train_model' script. vocab_size: The number of words in the vocabulary of the model being tested. embedding_dim: The size of the embeddings in the model. hidden_size: The size of the hidden layers in the model. Must match the size used during training. Defaults to 300. num_classes: The number of classes in the output of the model. Must match the value used during training. Defaults to 3. batch_size: The size of the batches used for testing. Defaults to 32. """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(20 * "=", " Preparing for testing ", 20 * "=") checkpoint = torch.load(pretrained_file) # Retrieving model parameters from checkpoint. vocab_size = checkpoint["model"]["_word_embedding.weight"].size(0) embedding_dim = checkpoint["model"]['_word_embedding.weight'].size(1) hidden_size = checkpoint["model"]["_projection.0.weight"].size(0) num_classes = checkpoint["model"]["_classification.4.weight"].size(0) print("\t* Loading test data...") with open(test_file, "rb") as pkl: test_data = NLIDataset(pickle.load(pkl)) test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size) print("\t* Building model...") model = ESIM(vocab_size, embedding_dim, hidden_size, num_classes=num_classes, device=device).to(device) model.load_state_dict(checkpoint["model"]) print(20 * "=", " Testing ESIM model on device: {} ".format(device), 20 * "=") batch_time, total_time, accuracy, accuracy_score, precision_score, recall_score, f1_score = test( model, test_loader) print("-> Average batch processing time: {:.4f}s, total test time:\ {:.4f}s, accuracy: {:.4f}%, accuracy_score: {:.4f}%, precision_score: {:.4f}%, recall_score: {:.4f}%, f1_score: {:.4f}%".format(batch_time, total_time, (accuracy*100), (accuracy_score*100), (precision_score*100), (recall_score*100), (f1_score*100)))
def main(train_file, valid_file, test_file, embeddings_file, target_dir, hidden_size=300, dropout=0.5, num_classes=3, epochs=64, batch_size=32, lr=0.0004, patience=5, max_grad_norm=10.0, checkpoint_model0=None, checkpoint_model1=None, finetuning=False): """ Train the ESIM model on the Quora dataset. Args: train_file: A path to some preprocessed data that must be used to train the model. valid_file: A path to some preprocessed data that must be used to validate the model. embeddings_file: A path to some preprocessed word embeddings that must be used to initialise the model. target_dir: The path to a directory where the trained model must be saved. hidden_size: The size of the hidden layers in the model. Defaults to 300. dropout: The dropout rate to use in the model. Defaults to 0.5. num_classes: The number of classes in the output of the model. Defaults to 3. epochs: The maximum number of epochs for training. Defaults to 64. batch_size: The size of the batches for training. Defaults to 32. lr: The learning rate for the optimizer. Defaults to 0.0004. patience: The patience to use for early stopping. Defaults to 5. checkpoint: A checkpoint from which to continue training. If None, training starts from scratch. Defaults to None. """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(20 * "=", " Preparing for training ", 20 * "=") if not os.path.exists(target_dir): os.makedirs(target_dir) # -------------------- Data loading ------------------- # print("\t* Loading training data...") with open(train_file, "rb") as pkl: train_data = NLIDataset(pickle.load(pkl)) train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size) print("\t* Loading validation data...") with open(valid_file, "rb") as pkl: valid_data = NLIDataset(pickle.load(pkl)) valid_loader = DataLoader(valid_data, shuffle=False, batch_size=batch_size) print("\t* Loading test data...") with open(test_file, "rb") as pkl: test_data = NLIDataset(pickle.load(pkl)) test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size) # -------------------- Model definition ------------------- # print("\t* Building model...") with open(embeddings_file, "rb") as pkl: embeddings = torch.tensor(pickle.load(pkl), dtype=torch.float)\ .to(device) model = [] model0 = ESIM(embeddings.shape[0], embeddings.shape[1], hidden_size, embeddings=embeddings, dropout=0, num_classes=num_classes, device=device).to(device) model1 = TOP(embeddings.shape[0], embeddings.shape[1], hidden_size, embeddings=embeddings, dropout=dropout, num_classes=num_classes, device=device).to(device) model.append(model0) model.append(model1) # -------------------- Preparation for training ------------------- # criterion = nn.CrossEntropyLoss() if finetuning: optimizer = torch.optim.Adam(itertools.chain(model[0].parameters(), model[1].parameters()), lr=lr) else: optimizer = torch.optim.Adam(model[1].parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=0) best_score = 0.0 start_epoch = 1 # Data for loss curves plot. epochs_count = [] train_losses = [] valid_losses = [] # Continuing training from a checkpoint if one was given as argument. if checkpoint_model0: checkpoint = torch.load(checkpoint_model0) # start_epoch = checkpoint["epoch"] + 1 best_score = checkpoint["best_score"] print("\t* Training will continue on existing model from epoch {}...". format(start_epoch)) model[0].load_state_dict(checkpoint["model"]) # optimizer.load_state_dict(checkpoint["optimizer"]) # epochs_count = checkpoint["epochs_count"] # train_losses = checkpoint["train_losses"] # valid_losses = checkpoint["valid_losses"] if checkpoint_model1: checkpoint = torch.load(checkpoint_model1) start_epoch = checkpoint["epoch"] + 1 best_score = checkpoint["best_score"] print("\t* Training will continue on existing model from epoch {}...". format(start_epoch)) model[1].load_state_dict(checkpoint["model"]) optimizer.load_state_dict(checkpoint["optimizer"]) epochs_count = checkpoint["epochs_count"] train_losses = checkpoint["train_losses"] valid_losses = checkpoint["valid_losses"] else: model_dict = model1.state_dict() pretrained_dict = checkpoint["model"] pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model1.load_state_dict(model_dict) # Compute loss and accuracy before starting (or resuming) training. # _, valid_loss, valid_accuracy = validate(model, # valid_loader, # criterion) # print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%" # .format(valid_loss, (valid_accuracy*100))) # # _, valid_loss, valid_accuracy = validate(model, # test_loader, # criterion) # print("\t* test loss before training: {:.4f}, accuracy: {:.4f}%" # .format(valid_loss, (valid_accuracy*100))) # -------------------- Training epochs ------------------- # print("\n", 20 * "=", "Training ESIM model on device: {}".format(device), 20 * "=") patience_counter = 0 for epoch in range(start_epoch, epochs + 1): epochs_count.append(epoch) print("* Training epoch {}:".format(epoch)) epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader, optimizer, criterion, epoch, max_grad_norm) train_losses.append(epoch_loss) print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%". format(epoch_time, epoch_loss, (epoch_accuracy * 100))) print("* Validation for epoch {}:".format(epoch)) epoch_time, epoch_loss, epoch_accuracy = validate( model, valid_loader, criterion) valid_losses.append(epoch_loss) print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n". format(epoch_time, epoch_loss, (epoch_accuracy * 100))) print("* test for epoch {}:".format(epoch)) epoch_time, epoch_loss, test_accuracy = validate( model, test_loader, criterion) print( "-> test. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n".format( epoch_time, epoch_loss, (test_accuracy * 100))) sys.stdout.flush() # 刷新输出 # Update the optimizer's learning rate with the scheduler. scheduler.step(epoch_accuracy) # Early stopping on validation accuracy. if epoch_accuracy < best_score: patience_counter += 1 else: best_score = epoch_accuracy patience_counter = 0 # Save the best model. The optimizer is not saved to avoid having # a checkpoint file that is too heavy to be shared. To resume # training from the best model, use the 'esim_*.pth.tar' # checkpoints instead. # torch.save({"epoch": epoch, # "model": model[0].state_dict(), # "best_score": best_score, # "epochs_count": epochs_count, # "train_losses": train_losses, # "valid_losses": valid_losses}, # os.path.join(target_dir, "best_model0.pth.tar")) torch.save( { "epoch": epoch, "model": model[1].state_dict(), "best_score": best_score, "optimizer": optimizer.state_dict(), "epochs_count": epochs_count, "train_losses": train_losses, "valid_losses": valid_losses }, os.path.join(target_dir, "best_model1.pth.tar")) # Save the model at each epoch. # torch.save({"epoch": epoch, # "model": model[0].state_dict(), # "best_score": best_score, # "optimizer": optimizer.state_dict(), # "epochs_count": epochs_count, # "train_losses": train_losses, # "valid_losses": valid_losses}, # os.path.join(target_dir, "esim_model0{}.pth.tar".format(epoch))) torch.save( { "epoch": epoch, "model": model[1].state_dict(), "best_score": best_score, "optimizer": optimizer.state_dict(), "epochs_count": epochs_count, "train_losses": train_losses, "valid_losses": valid_losses }, os.path.join(target_dir, "esim_model1{}.pth.tar".format(epoch))) if patience_counter >= patience: print("-> Early stopping: patience limit reached, stopping...") break # Plotting of the loss curves for the train and validation sets. fig = plt.figure() plt.plot(epochs_count, train_losses, "-r") plt.plot(epochs_count, valid_losses, "-b") plt.xlabel("epoch") plt.ylabel("loss") plt.legend(["Training loss", "Validation loss"]) plt.title("Cross entropy loss") fig.savefig('quora_loss.png')
def main(train_file, valid_file, test_file, embeddings_file, target_dir, hidden_size=300, dropout=0.5, num_classes=3, epochs=64, batch_size=32, lr=0.0004, patience=5, max_grad_norm=10.0, checkpoint_model0=None, checkpoint_model1=None, finetuning=False): """ Train the ESIM model on the Quora dataset. Args: train_file: A path to some preprocessed data that must be used to train the model. valid_file: A path to some preprocessed data that must be used to validate the model. embeddings_file: A path to some preprocessed word embeddings that must be used to initialise the model. target_dir: The path to a directory where the trained model must be saved. hidden_size: The size of the hidden layers in the model. Defaults to 300. dropout: The dropout rate to use in the model. Defaults to 0.5. num_classes: The number of classes in the output of the model. Defaults to 3. epochs: The maximum number of epochs for training. Defaults to 64. batch_size: The size of the batches for training. Defaults to 32. lr: The learning rate for the optimizer. Defaults to 0.0004. patience: The patience to use for early stopping. Defaults to 5. checkpoint: A checkpoint from which to continue training. If None, training starts from scratch. Defaults to None. """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(20 * "=", " Preparing for training ", 20 * "=") if not os.path.exists(target_dir): os.makedirs(target_dir) print("\t* Loading validation data...") with open(valid_file, "rb") as pkl: valid_data = NLIDataset(pickle.load(pkl)) valid_loader = DataLoader(valid_data, shuffle=False, batch_size=batch_size) print("\t* Loading test data...") with open(test_file, "rb") as pkl: test_data = NLIDataset(pickle.load(pkl)) test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size) # -------------------- Model definition ------------------- # print("\t* Building model...") with open(embeddings_file, "rb") as pkl: embeddings = torch.tensor(pickle.load(pkl), dtype=torch.float)\ .to(device) model = [] model0 = ESIM(embeddings.shape[0], embeddings.shape[1], hidden_size, embeddings=embeddings, dropout=0, num_classes=num_classes, device=device).to(device) model1 = TOP(embeddings.shape[0], embeddings.shape[1], hidden_size, embeddings=embeddings, dropout=dropout, num_classes=num_classes, device=device).to(device) model.append(model0) model.append(model1) # -------------------- Preparation for training ------------------- # criterion = nn.CrossEntropyLoss() start_epoch = 1 # Continuing training from a checkpoint if one was given as argument. if checkpoint_model0: checkpoint = torch.load(checkpoint_model0) # start_epoch = checkpoint["epoch"] + 1 print("\t* Training will continue on existing model from epoch {}..." .format(start_epoch)) model[0].load_state_dict(checkpoint["model"]) if checkpoint_model1: checkpoint = torch.load(checkpoint_model1) start_epoch = checkpoint["epoch"] + 1 print("\t* Training will continue on existing model from epoch {}..." .format(start_epoch)) model[1].load_state_dict(checkpoint["model"]) else: model_dict = model1.state_dict() pretrained_dict = checkpoint["model"] pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) model1.load_state_dict(model_dict) # Compute loss and accuracy before starting (or resuming) training. data = test(model, valid_loader, criterion) data.to_csv('matched_submission.csv', index=False) data = test(model, test_loader, criterion) data.to_csv('mismatched_submission.csv', index=False)
def main(train_file, valid_file, test_file, embeddings_file, target_dir, hidden_size=300, dropout=0.5, num_classes=3, epochs=64, batch_size=32, lr=0.0004, patience=5, max_grad_norm=10.0, checkpoint=None): """ Train the ESIM model on the Quora dataset. Args: train_file: A path to some preprocessed data that must be used to train the model. valid_file: A path to some preprocessed data that must be used to validate the model. embeddings_file: A path to some preprocessed word embeddings that must be used to initialise the model. target_dir: The path to a directory where the trained model must be saved. hidden_size: The size of the hidden layers in the model. Defaults to 300. dropout: The dropout rate to use in the model. Defaults to 0.5. num_classes: The number of classes in the output of the model. Defaults to 3. epochs: The maximum number of epochs for training. Defaults to 64. batch_size: The size of the batches for training. Defaults to 32. lr: The learning rate for the optimizer. Defaults to 0.0004. patience: The patience to use for early stopping. Defaults to 5. checkpoint: A checkpoint from which to continue training. If None, training starts from scratch. Defaults to None. """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(20 * "=", " Preparing for training ", 20 * "=") if not os.path.exists(target_dir): os.makedirs(target_dir) # -------------------- Data loading ------------------- # # print("\t* Loading training data...") # with open(train_file, "rb") as pkl: # train_data = NLIDataset(pickle.load(pkl)) # # train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size) print("\t* Loading validation data...") with open(valid_file, "rb") as pkl: valid_data = NLIDataset(pickle.load(pkl)) valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size) # print("\t* Loading test data...") # with open(test_file, "rb") as pkl: # test_data = NLIDataset(pickle.load(pkl)) # # test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size) # -------------------- Model definition ------------------- # print("\t* Building model...") with open(embeddings_file, "rb") as pkl: embeddings = torch.tensor(pickle.load(pkl), dtype=torch.float)\ .to(device) model = ESIM(embeddings.shape[0], embeddings.shape[1], hidden_size, embeddings=embeddings, dropout=dropout, num_classes=num_classes, device=device).to(device) # Continuing training from a checkpoint if one was given as argument. if checkpoint: checkpoint = torch.load(checkpoint) start_epoch = checkpoint["epoch"] + 1 print("\t* Training will continue on existing model from epoch {}...". format(start_epoch)) model.load_state_dict(checkpoint["model"]) # Compute loss and accuracy before starting (or resuming) training. _, valid_accuracy = validate(model, valid_loader) print("\t* Validation accuracy: {:.4f}%".format(valid_accuracy * 100))
def main(test_files, pretrained_file, labeldict, output_dir, batch_size=32): """ Test the ESIM model with pretrained weights on the MultiNLI dataset. Args: test_files: The paths to the preprocessed matched and mismatched MNLI test sets. pretrained_file: The path to a checkpoint produced by the 'train_mnli' script. labeldict: A dictionary associating labels (classes) to integer values. output_dir: The path to a directory where the predictions of the model must be saved. batch_size: The size of the batches used for testing. Defaults to 32. """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(20 * "=", " Preparing for testing ", 20 * "=") output_dir = os.path.normpath(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) checkpoint = torch.load(pretrained_file) # Retrieve model parameters from the checkpoint. vocab_size = checkpoint['model']['_word_embedding.weight'].size(0) embedding_dim = checkpoint['model']['_word_embedding.weight'].size(1) hidden_size = checkpoint['model']['_projection.0.weight'].size(0) num_classes = checkpoint['model']['_classification.4.weight'].size(0) print("\t* Loading test data...") with open(os.path.normpath(test_files["matched"]), 'rb') as pkl: matched_test_data = NLIDataset(pickle.load(pkl)) with open(os.path.normpath(test_files["mismatched"]), 'rb') as pkl: mismatched_test_data = NLIDataset(pickle.load(pkl)) matched_test_loader = DataLoader(matched_test_data, shuffle=False, batch_size=batch_size) mismatched_test_loader = DataLoader(mismatched_test_data, shuffle=False, batch_size=batch_size) print("\t* Building model...") model = ESIM(vocab_size, embedding_dim, hidden_size, num_classes=num_classes, device=device).to(device) model.load_state_dict(checkpoint['model']) print(20 * "=", " Prediction on MNLI with ESIM model on device: {} ".format(device), 20 * "=") print("\t* Prediction for matched test set...") predictions = predict(model, matched_test_loader, labeldict) with open(os.path.join(output_dir, "matched_predictions.csv"), 'w') as output_f: output_f.write("pairID,gold_label\n") for pair_id in predictions: output_f.write(pair_id + "," + predictions[pair_id] + "\n") print("\t* Prediction for mismatched test set...") predictions = predict(model, mismatched_test_loader, labeldict) with open(os.path.join(output_dir, "mismatched_predictions.csv"), 'w') as output_f: output_f.write("pairID,gold_label\n") for pair_id in predictions: output_f.write(pair_id + "," + predictions[pair_id] + "\n")