コード例 #1
0
ファイル: test.py プロジェクト: maeda6uiui-backup/AIO2
def main(test_input_dir, model_dir, test_upper_bound, result_save_dir):
    #Create a dataloader.
    logger.info("Create test dataloader from {}.".format(test_input_dir))
    test_dataset = create_dataset(test_input_dir,
                                  num_examples=-1,
                                  num_options=20)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=4,
                                 shuffle=False,
                                 drop_last=True)

    #Create a classifier model.
    logger.info("Create a classifier model.")
    classifier_model = LinearClassifier.from_pretrained(
        "cl-tohoku/bert-base-japanese-whole-word-masking")
    classifier_model.to(device)

    #Create a directory to save the results in.
    logger.info("Results will be saved in {}.".format(result_save_dir))
    os.makedirs(result_save_dir, exist_ok=True)

    logger.info("Start model evaluation.")
    for i in range(test_upper_bound):
        model_filepath = os.path.join(model_dir,
                                      "checkpoint_{}.pt".format(i + 1))
        logger.info("Load model parameters from {}.".format(model_filepath))

        parameters = torch.load(model_filepath, map_location=device)
        classifier_model.load_state_dict(parameters)

        pred_labels, correct_labels, accuracy = evaluate(
            classifier_model, test_dataloader)
        logger.info("Accuracy: {}".format(accuracy))

        #Save results as text files.
        res_filepath = os.path.join(result_save_dir,
                                    "result_test_{}.txt".format(i + 1))
        labels_filepath = os.path.join(result_save_dir,
                                       "labels_test_{}.txt".format(i + 1))

        with open(res_filepath, "w") as w:
            w.write("Accuracy: {}\n".format(accuracy))

        with open(labels_filepath, "w") as w:
            for pred_label, correct_label in zip(pred_labels, correct_labels):
                w.write("{} {}\n".format(pred_label, correct_label))

    logger.info("Finished model evaluation.")
コード例 #2
0
ファイル: modeling.py プロジェクト: maeda6uiui-backup/AIO2
def main(batch_size, num_epochs, lr, train_input_dir, dev1_input_dir,
         result_save_dir):
    logger.info("batch_size: {} num_epochs: {} lr: {}".format(
        batch_size, num_epochs, lr))

    #Create dataloaders.
    logger.info("Create train dataloader from {}.".format(train_input_dir))
    train_dataset = create_dataset(train_input_dir,
                                   num_examples=-1,
                                   num_options=4)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  drop_last=True)

    logger.info("Create dev1 dataloader from {}.".format(dev1_input_dir))
    dev1_dataset = create_dataset(dev1_input_dir,
                                  num_examples=-1,
                                  num_options=20)
    dev1_dataloader = DataLoader(dev1_dataset,
                                 batch_size=4,
                                 shuffle=False,
                                 drop_last=True)

    #Create a classifier model.
    logger.info("Create a classifier model.")
    classifier_model = LinearClassifier.from_pretrained(
        "cl-tohoku/bert-base-japanese-whole-word-masking")
    classifier_model.to(device)

    #Create an optimizer and a scheduler.
    optimizer = AdamW(classifier_model.parameters(), lr=lr, eps=1e-8)
    total_steps = len(train_dataloader) * num_epochs
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0,
                                                num_training_steps=total_steps)

    #Create a directory to save the results in.
    os.makedirs(result_save_dir, exist_ok=True)

    logger.info("Start model training.")
    for epoch in range(num_epochs):
        logger.info("===== Epoch {}/{} =====".format(epoch + 1, num_epochs))

        mean_loss = train(classifier_model, optimizer, scheduler,
                          train_dataloader)
        logger.info("Mean loss: {}".format(mean_loss))

        #Save model parameters.
        checkpoint_filepath = os.path.join(
            result_save_dir, "checkpoint_{}.pt".format(epoch + 1))
        torch.save(classifier_model.state_dict(), checkpoint_filepath)

        pred_labels, correct_labels, accuracy = evaluate(
            classifier_model, dev1_dataloader)
        logger.info("Accuracy: {}".format(accuracy))

        #Save results as text files.
        res_filepath = os.path.join(result_save_dir,
                                    "result_eval_{}.txt".format(epoch + 1))
        labels_filepath = os.path.join(result_save_dir,
                                       "labels_eval_{}.txt".format(epoch + 1))

        with open(res_filepath, "w") as w:
            w.write("Accuracy: {}\n".format(accuracy))

        with open(labels_filepath, "w") as w:
            for pred_label, correct_label in zip(pred_labels, correct_labels):
                w.write("{} {}\n".format(pred_label, correct_label))

    logger.info("Finished model training.")