コード例 #1
0
ファイル: test_quora.py プロジェクト: rzhangpku/VAA
def main(test_file, pretrained_file, batch_size=32):
    """
    Test the ESIM model with pretrained weights on some dataset.

    Args:
        test_file: The path to a file containing preprocessed NLI data.
        pretrained_file: The path to a checkpoint produced by the
            'train_model' script.
        vocab_size: The number of words in the vocabulary of the model
            being tested.
        embedding_dim: The size of the embeddings in the model.
        hidden_size: The size of the hidden layers in the model. Must match
            the size used during training. Defaults to 300.
        num_classes: The number of classes in the output of the model. Must
            match the value used during training. Defaults to 3.
        batch_size: The size of the batches used for testing. Defaults to 32.
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    print(20 * "=", " Preparing for testing ", 20 * "=")

    checkpoint = torch.load(pretrained_file)

    # Retrieving model parameters from checkpoint.
    vocab_size = checkpoint["model"]["_word_embedding.weight"].size(0)
    embedding_dim = checkpoint["model"]['_word_embedding.weight'].size(1)
    hidden_size = checkpoint["model"]["_projection.0.weight"].size(0)
    num_classes = checkpoint["model"]["_classification.4.weight"].size(0)

    print("\t* Loading test data...")
    with open(test_file, "rb") as pkl:
        test_data = NLIDataset(pickle.load(pkl))

    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

    print("\t* Building model...")
    model = ESIM(vocab_size,
                 embedding_dim,
                 hidden_size,
                 num_classes=num_classes,
                 device=device).to(device)

    model.load_state_dict(checkpoint["model"])

    print(20 * "=",
          " Testing ESIM model on device: {} ".format(device),
          20 * "=")
    batch_time, total_time, accuracy, accuracy_score, precision_score, recall_score, f1_score = test(
        model, test_loader)

    print("-> Average batch processing time: {:.4f}s, total test time:\
 {:.4f}s, accuracy: {:.4f}%, accuracy_score: {:.4f}%, precision_score: {:.4f}%, recall_score: {:.4f}%, f1_score: {:.4f}%".format(batch_time, total_time, (accuracy*100), (accuracy_score*100), (precision_score*100), (recall_score*100), (f1_score*100)))
コード例 #2
0
ファイル: top_esim_quora.py プロジェクト: rzhangpku/VAA
def main(train_file,
         valid_file,
         test_file,
         embeddings_file,
         target_dir,
         hidden_size=300,
         dropout=0.5,
         num_classes=3,
         epochs=64,
         batch_size=32,
         lr=0.0004,
         patience=5,
         max_grad_norm=10.0,
         checkpoint_model0=None,
         checkpoint_model1=None,
         finetuning=False):
    """
    Train the ESIM model on the Quora dataset.

    Args:
        train_file: A path to some preprocessed data that must be used
            to train the model.
        valid_file: A path to some preprocessed data that must be used
            to validate the model.
        embeddings_file: A path to some preprocessed word embeddings that
            must be used to initialise the model.
        target_dir: The path to a directory where the trained model must
            be saved.
        hidden_size: The size of the hidden layers in the model. Defaults
            to 300.
        dropout: The dropout rate to use in the model. Defaults to 0.5.
        num_classes: The number of classes in the output of the model.
            Defaults to 3.
        epochs: The maximum number of epochs for training. Defaults to 64.
        batch_size: The size of the batches for training. Defaults to 32.
        lr: The learning rate for the optimizer. Defaults to 0.0004.
        patience: The patience to use for early stopping. Defaults to 5.
        checkpoint: A checkpoint from which to continue training. If None,
            training starts from scratch. Defaults to None.
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    print(20 * "=", " Preparing for training ", 20 * "=")

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # -------------------- Data loading ------------------- #
    print("\t* Loading training data...")
    with open(train_file, "rb") as pkl:
        train_data = NLIDataset(pickle.load(pkl))

    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

    print("\t* Loading validation data...")
    with open(valid_file, "rb") as pkl:
        valid_data = NLIDataset(pickle.load(pkl))

    valid_loader = DataLoader(valid_data, shuffle=False, batch_size=batch_size)

    print("\t* Loading test data...")
    with open(test_file, "rb") as pkl:
        test_data = NLIDataset(pickle.load(pkl))

    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    with open(embeddings_file, "rb") as pkl:
        embeddings = torch.tensor(pickle.load(pkl), dtype=torch.float)\
            .to(device)

    model = []
    model0 = ESIM(embeddings.shape[0],
                  embeddings.shape[1],
                  hidden_size,
                  embeddings=embeddings,
                  dropout=0,
                  num_classes=num_classes,
                  device=device).to(device)
    model1 = TOP(embeddings.shape[0],
                 embeddings.shape[1],
                 hidden_size,
                 embeddings=embeddings,
                 dropout=dropout,
                 num_classes=num_classes,
                 device=device).to(device)
    model.append(model0)
    model.append(model1)

    # -------------------- Preparation for training  ------------------- #
    criterion = nn.CrossEntropyLoss()
    if finetuning:
        optimizer = torch.optim.Adam(itertools.chain(model[0].parameters(),
                                                     model[1].parameters()),
                                     lr=lr)
    else:
        optimizer = torch.optim.Adam(model[1].parameters(), lr=lr)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.5,
                                                           patience=0)

    best_score = 0.0
    start_epoch = 1

    # Data for loss curves plot.
    epochs_count = []
    train_losses = []
    valid_losses = []

    # Continuing training from a checkpoint if one was given as argument.
    if checkpoint_model0:
        checkpoint = torch.load(checkpoint_model0)
        # start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]

        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))

        model[0].load_state_dict(checkpoint["model"])
        # optimizer.load_state_dict(checkpoint["optimizer"])
        # epochs_count = checkpoint["epochs_count"]
        # train_losses = checkpoint["train_losses"]
        # valid_losses = checkpoint["valid_losses"]
    if checkpoint_model1:
        checkpoint = torch.load(checkpoint_model1)
        start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]

        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))

        model[1].load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        epochs_count = checkpoint["epochs_count"]
        train_losses = checkpoint["train_losses"]
        valid_losses = checkpoint["valid_losses"]
    else:
        model_dict = model1.state_dict()
        pretrained_dict = checkpoint["model"]
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in model_dict
        }
        model_dict.update(pretrained_dict)
        model1.load_state_dict(model_dict)

    # Compute loss and accuracy before starting (or resuming) training.
    # _, valid_loss, valid_accuracy = validate(model,
    #                                          valid_loader,
    #                                          criterion)
    # print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%"
    #       .format(valid_loss, (valid_accuracy*100)))
    #
    # _, valid_loss, valid_accuracy = validate(model,
    #                                          test_loader,
    #                                          criterion)
    # print("\t* test loss before training: {:.4f}, accuracy: {:.4f}%"
    #       .format(valid_loss, (valid_accuracy*100)))

    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training ESIM model on device: {}".format(device),
          20 * "=")

    patience_counter = 0
    for epoch in range(start_epoch, epochs + 1):
        epochs_count.append(epoch)

        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader,
                                                       optimizer, criterion,
                                                       epoch, max_grad_norm)

        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))

        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = validate(
            model, valid_loader, criterion)

        valid_losses.append(epoch_loss)
        print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))

        print("* test for epoch {}:".format(epoch))
        epoch_time, epoch_loss, test_accuracy = validate(
            model, test_loader, criterion)

        print(
            "-> test. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n".format(
                epoch_time, epoch_loss, (test_accuracy * 100)))

        sys.stdout.flush()  # 刷新输出
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)

        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            best_score = epoch_accuracy
            patience_counter = 0
            # Save the best model. The optimizer is not saved to avoid having
            # a checkpoint file that is too heavy to be shared. To resume
            # training from the best model, use the 'esim_*.pth.tar'
            # checkpoints instead.

            # torch.save({"epoch": epoch,
            #             "model": model[0].state_dict(),
            #             "best_score": best_score,
            #             "epochs_count": epochs_count,
            #             "train_losses": train_losses,
            #             "valid_losses": valid_losses},
            #            os.path.join(target_dir, "best_model0.pth.tar"))

            torch.save(
                {
                    "epoch": epoch,
                    "model": model[1].state_dict(),
                    "best_score": best_score,
                    "optimizer": optimizer.state_dict(),
                    "epochs_count": epochs_count,
                    "train_losses": train_losses,
                    "valid_losses": valid_losses
                }, os.path.join(target_dir, "best_model1.pth.tar"))

            # Save the model at each epoch.
        # torch.save({"epoch": epoch,
        #             "model": model[0].state_dict(),
        #             "best_score": best_score,
        #             "optimizer": optimizer.state_dict(),
        #             "epochs_count": epochs_count,
        #             "train_losses": train_losses,
        #             "valid_losses": valid_losses},
        #            os.path.join(target_dir, "esim_model0{}.pth.tar".format(epoch)))

        torch.save(
            {
                "epoch": epoch,
                "model": model[1].state_dict(),
                "best_score": best_score,
                "optimizer": optimizer.state_dict(),
                "epochs_count": epochs_count,
                "train_losses": train_losses,
                "valid_losses": valid_losses
            }, os.path.join(target_dir, "esim_model1{}.pth.tar".format(epoch)))

        if patience_counter >= patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break

    # Plotting of the loss curves for the train and validation sets.
    fig = plt.figure()
    plt.plot(epochs_count, train_losses, "-r")
    plt.plot(epochs_count, valid_losses, "-b")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.legend(["Training loss", "Validation loss"])
    plt.title("Cross entropy loss")
    fig.savefig('quora_loss.png')
コード例 #3
0
ファイル: top_esim_mnli_test.py プロジェクト: rzhangpku/VAA
def main(train_file,
         valid_file,
         test_file,
         embeddings_file,
         target_dir,
         hidden_size=300,
         dropout=0.5,
         num_classes=3,
         epochs=64,
         batch_size=32,
         lr=0.0004,
         patience=5,
         max_grad_norm=10.0,
         checkpoint_model0=None,
         checkpoint_model1=None,
         finetuning=False):
    """
    Train the ESIM model on the Quora dataset.

    Args:
        train_file: A path to some preprocessed data that must be used
            to train the model.
        valid_file: A path to some preprocessed data that must be used
            to validate the model.
        embeddings_file: A path to some preprocessed word embeddings that
            must be used to initialise the model.
        target_dir: The path to a directory where the trained model must
            be saved.
        hidden_size: The size of the hidden layers in the model. Defaults
            to 300.
        dropout: The dropout rate to use in the model. Defaults to 0.5.
        num_classes: The number of classes in the output of the model.
            Defaults to 3.
        epochs: The maximum number of epochs for training. Defaults to 64.
        batch_size: The size of the batches for training. Defaults to 32.
        lr: The learning rate for the optimizer. Defaults to 0.0004.
        patience: The patience to use for early stopping. Defaults to 5.
        checkpoint: A checkpoint from which to continue training. If None,
            training starts from scratch. Defaults to None.
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    print(20 * "=", " Preparing for training ", 20 * "=")

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)


    print("\t* Loading validation data...")
    with open(valid_file, "rb") as pkl:
        valid_data = NLIDataset(pickle.load(pkl))

    valid_loader = DataLoader(valid_data, shuffle=False, batch_size=batch_size)

    print("\t* Loading test data...")
    with open(test_file, "rb") as pkl:
        test_data = NLIDataset(pickle.load(pkl))

    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    with open(embeddings_file, "rb") as pkl:
        embeddings = torch.tensor(pickle.load(pkl), dtype=torch.float)\
            .to(device)

    model = []
    model0 = ESIM(embeddings.shape[0],
                 embeddings.shape[1],
                 hidden_size,
                 embeddings=embeddings,
                 dropout=0,
                 num_classes=num_classes,
                 device=device).to(device)
    model1 = TOP(embeddings.shape[0],
                 embeddings.shape[1],
                 hidden_size,
                 embeddings=embeddings,
                 dropout=dropout,
                 num_classes=num_classes,
                 device=device).to(device)
    model.append(model0)
    model.append(model1)

    # -------------------- Preparation for training  ------------------- #
    criterion = nn.CrossEntropyLoss()

    start_epoch = 1


    # Continuing training from a checkpoint if one was given as argument.
    if checkpoint_model0:
        checkpoint = torch.load(checkpoint_model0)
        # start_epoch = checkpoint["epoch"] + 1

        print("\t* Training will continue on existing model from epoch {}..."
              .format(start_epoch))

        model[0].load_state_dict(checkpoint["model"])
    if checkpoint_model1:
        checkpoint = torch.load(checkpoint_model1)
        start_epoch = checkpoint["epoch"] + 1

        print("\t* Training will continue on existing model from epoch {}..."
              .format(start_epoch))

        model[1].load_state_dict(checkpoint["model"])
    else:
        model_dict = model1.state_dict()
        pretrained_dict = checkpoint["model"]
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
        model_dict.update(pretrained_dict)
        model1.load_state_dict(model_dict)

    # Compute loss and accuracy before starting (or resuming) training.
    data = test(model, valid_loader, criterion)
    data.to_csv('matched_submission.csv', index=False)

    data = test(model, test_loader, criterion)
    data.to_csv('mismatched_submission.csv', index=False)
コード例 #4
0
def main(train_file,
         valid_file,
         test_file,
         embeddings_file,
         target_dir,
         hidden_size=300,
         dropout=0.5,
         num_classes=3,
         epochs=64,
         batch_size=32,
         lr=0.0004,
         patience=5,
         max_grad_norm=10.0,
         checkpoint=None):
    """
    Train the ESIM model on the Quora dataset.

    Args:
        train_file: A path to some preprocessed data that must be used
            to train the model.
        valid_file: A path to some preprocessed data that must be used
            to validate the model.
        embeddings_file: A path to some preprocessed word embeddings that
            must be used to initialise the model.
        target_dir: The path to a directory where the trained model must
            be saved.
        hidden_size: The size of the hidden layers in the model. Defaults
            to 300.
        dropout: The dropout rate to use in the model. Defaults to 0.5.
        num_classes: The number of classes in the output of the model.
            Defaults to 3.
        epochs: The maximum number of epochs for training. Defaults to 64.
        batch_size: The size of the batches for training. Defaults to 32.
        lr: The learning rate for the optimizer. Defaults to 0.0004.
        patience: The patience to use for early stopping. Defaults to 5.
        checkpoint: A checkpoint from which to continue training. If None,
            training starts from scratch. Defaults to None.
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    print(20 * "=", " Preparing for training ", 20 * "=")

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # -------------------- Data loading ------------------- #
    # print("\t* Loading training data...")
    # with open(train_file, "rb") as pkl:
    #     train_data = NLIDataset(pickle.load(pkl))
    #
    # train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

    print("\t* Loading validation data...")
    with open(valid_file, "rb") as pkl:
        valid_data = NLIDataset(pickle.load(pkl))

    valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size)

    # print("\t* Loading test data...")
    # with open(test_file, "rb") as pkl:
    #     test_data = NLIDataset(pickle.load(pkl))
    #
    # test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    with open(embeddings_file, "rb") as pkl:
        embeddings = torch.tensor(pickle.load(pkl), dtype=torch.float)\
            .to(device)

    model = ESIM(embeddings.shape[0],
                 embeddings.shape[1],
                 hidden_size,
                 embeddings=embeddings,
                 dropout=dropout,
                 num_classes=num_classes,
                 device=device).to(device)

    # Continuing training from a checkpoint if one was given as argument.
    if checkpoint:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint["epoch"] + 1

        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))

        model.load_state_dict(checkpoint["model"])

    # Compute loss and accuracy before starting (or resuming) training.
    _, valid_accuracy = validate(model, valid_loader)
    print("\t* Validation accuracy: {:.4f}%".format(valid_accuracy * 100))
コード例 #5
0
def main(test_files, pretrained_file, labeldict, output_dir, batch_size=32):
    """
    Test the ESIM model with pretrained weights on the MultiNLI dataset.

    Args:
        test_files: The paths to the preprocessed matched and mismatched MNLI
            test sets.
        pretrained_file: The path to a checkpoint produced by the
            'train_mnli' script.
        labeldict: A dictionary associating labels (classes) to integer values.
        output_dir: The path to a directory where the predictions of the model
            must be saved.
        batch_size: The size of the batches used for testing. Defaults to 32.
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    print(20 * "=", " Preparing for testing ", 20 * "=")

    output_dir = os.path.normpath(output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    checkpoint = torch.load(pretrained_file)

    # Retrieve model parameters from the checkpoint.
    vocab_size = checkpoint['model']['_word_embedding.weight'].size(0)
    embedding_dim = checkpoint['model']['_word_embedding.weight'].size(1)
    hidden_size = checkpoint['model']['_projection.0.weight'].size(0)
    num_classes = checkpoint['model']['_classification.4.weight'].size(0)

    print("\t* Loading test data...")
    with open(os.path.normpath(test_files["matched"]), 'rb') as pkl:
        matched_test_data = NLIDataset(pickle.load(pkl))
    with open(os.path.normpath(test_files["mismatched"]), 'rb') as pkl:
        mismatched_test_data = NLIDataset(pickle.load(pkl))

    matched_test_loader = DataLoader(matched_test_data,
                                     shuffle=False,
                                     batch_size=batch_size)
    mismatched_test_loader = DataLoader(mismatched_test_data,
                                        shuffle=False,
                                        batch_size=batch_size)

    print("\t* Building model...")
    model = ESIM(vocab_size,
                 embedding_dim,
                 hidden_size,
                 num_classes=num_classes,
                 device=device).to(device)

    model.load_state_dict(checkpoint['model'])

    print(20 * "=",
          " Prediction on MNLI with ESIM model on device: {} ".format(device),
          20 * "=")

    print("\t* Prediction for matched test set...")
    predictions = predict(model, matched_test_loader, labeldict)

    with open(os.path.join(output_dir, "matched_predictions.csv"),
              'w') as output_f:
        output_f.write("pairID,gold_label\n")
        for pair_id in predictions:
            output_f.write(pair_id + "," + predictions[pair_id] + "\n")

    print("\t* Prediction for mismatched test set...")
    predictions = predict(model, mismatched_test_loader, labeldict)

    with open(os.path.join(output_dir, "mismatched_predictions.csv"),
              'w') as output_f:
        output_f.write("pairID,gold_label\n")
        for pair_id in predictions:
            output_f.write(pair_id + "," + predictions[pair_id] + "\n")