)

        if base_dev_acc < valid_acc:
            is_best = True
            base_dev_acc = valid_acc

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_loss': valid_loss,
                'best_dev_accuracy': valid_acc
            }, is_best, MODEL_PATH)

    print("Best validation set accuracy for the epochs is : {}".format(
        base_dev_acc))

    print("Prediction on Test :::::::")
    model = load_check_point(model, MODEL_PATH)
    test_loss, test_accuracy = evaluate(model,
                                        testdl,
                                        criterion,
                                        device=DEVICE)

    print(f"Test accuracy of the trained model is {test_accuracy * 100:.2f}%")
    print("\n\n")
    print("FINISH")
    print(
        "############################################################################"
    )
Exemple #2
0
def main(csv_folder_path,
         save_path_for_model,
         save_path_for_vocab,
         n_classes=2,
         epochs=1,
         embedding_dim=16,
         char_length=1024,
         train_file_name="train.csv",
         val_file_name="test.csv",
         train_batch_size=128,
         val_batch_size=64,
         use_shortcut=False,
         depth=9,
         pool_type="max_pool",
         k_max_k=8,
         linear_layer_size=2048,
         use_batch_norm_for_linear=False,
         linear_dropout=0.4,
         metric="accuracy",
         device=DEVICE,
         print_stats_at_step=50,
         max_grad_norm=1.0):
    """
    Main loop for training and evaluating on test
    Args:
        n_classes: Mention number of classes in the data
        epochs: Mention the number of epochs to train the data on.
        embedding_dim: Mention the dimension of embedding.
        char_length: Fix the sentence length for each sentence.
        save_path_for_model: Mention the path for saving the model
        save_path_for_vocab: Mention the path for saving the model related files.
        device: Mention the device to be used cuda or cpu,
        csv_folder_path: Mention the folder path where train, test and validation csv files are stored.
        train_file_name: Mention the train csv file name.
        val_file_name: Mention the validation csv file name
        train_batch_size: Mention the batch size for training the data.
        val_batch_size: Mention the batch size for validation the data.
        use_shortcut: Mention whether to use shortcuts or not while training.
        depth: Mention the maximum depth for the model.
        pool_type: Mention whether to use k_max or max_pool for pooling operations.
        k_max_k: Mention the value of K for K_max pool in just before linear layers.
        linear_layer_size: Mention the size of the linear layer to be used in model.
        use_batch_norm_for_linear: Mention whether to use batch norm or dropout to regularize linear layers.
        linear_dropout: Mention the drop out fraction amount if using dropout for regularization in linear layers.
        metric: Mention which metric to use for saving the models.
        print_stats_at_step: number of steps to update display statistics
        max_grad_norm: max grad norm
    Returns:
        type: description
    """
    text_field = data.Field(sequential=True,
                            use_vocab=True,
                            fix_length=char_length,
                            tokenize=tokenizer,
                            batch_first=True)
    if n_classes <= 2:
        label_field = data.Field(sequential=False,
                                 use_vocab=False,
                                 is_target=True,
                                 dtype=torch.float)
    else:
        label_field = data.Field(
            sequential=False,
            use_vocab=False,
            is_target=True,
        )
    csv_fields = [
        ("label", label_field),
        ("text", text_field),
    ]
    trainds, valds = data.TabularDataset.splits(path=csv_folder_path,
                                                format="csv",
                                                train=train_file_name,
                                                validation=val_file_name,
                                                fields=csv_fields,
                                                skip_header=True)
    text_field.build_vocab(trainds)
    label_field.build_vocab(trainds)

    train_dl, valid_dl = data.BucketIterator.splits(
        datasets=(trainds, valds),
        batch_sizes=(train_batch_size, val_batch_size),
        sort_key=lambda x: x.text,
        repeat=False,
        device=device)
    del trainds, valds
    save_dict_to_disk(text_field.vocab, save_path_for_vocab)
    # train_dl = BatchWrapper(traindl, "text", "label")
    # valid_dl = BatchWrapper(valdl, "text", "label")
    # del traindl, valdl
    vocab_size = len(text_field.vocab.stoi)
    if n_classes <= 2:
        model = get_vdcnn(depth,
                          embedding_dim,
                          vocab_size,
                          n_classes - 1,
                          shortcut=use_shortcut,
                          pool_type=pool_type,
                          final_k_max_k=k_max_k,
                          linear_layer_size=linear_layer_size,
                          use_batch_norm_for_linear=use_batch_norm_for_linear,
                          linear_dropout=linear_dropout)
        criterion = nn.BCEWithLogitsLoss()
    else:
        model = get_vdcnn(depth,
                          embedding_dim,
                          vocab_size,
                          n_classes,
                          shortcut=use_shortcut,
                          pool_type=pool_type,
                          final_k_max_k=k_max_k,
                          linear_layer_size=linear_layer_size,
                          use_batch_norm_for_linear=use_batch_norm_for_linear,
                          linear_dropout=linear_dropout)
        criterion = nn.CrossEntropyLoss()
    print(model)
    print_number_of_trainable_parameters(model)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    model.to(device)
    criterion.to(device)
    base_dev_metric = 0.0
    for epoch in range(epochs):
        if n_classes <= 2:
            train_loss, train_acc, train_f1 = train_epoch(
                model, train_dl, optimizer, criterion, n_classes,
                print_stats_at_step, max_grad_norm)
            valid_loss, valid_acc, valid_f1 = evaluate_epoch(
                model, valid_dl, criterion, n_classes, print_stats_at_step)
            print(
                f'| Epoch: {epoch + 1:02} | Train Loss: {train_loss:.3f} | Train F1: {train_f1 * 100:.2f} | Train Acc: {train_acc * 100:.2f}% |'
            )
            print(
                f'| Epoch: {epoch + 1:02} | Val. Loss: {valid_loss:.3f} | Val. F1: {valid_f1 * 100:.2f} | Val. Acc: {valid_acc * 100:.2f}% |'
            )
            is_best = False
            if metric == "accuracy":
                if base_dev_metric < valid_acc:
                    is_best = True,
                    base_dev_metric = valid_acc
            else:
                if base_dev_metric < valid_f1:
                    is_best = True,
                    base_dev_metric = valid_f1
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_loss': valid_loss,
                    'best_dev_accuracy': valid_acc
                }, is_best, save_path_for_model)
        else:

            train_loss, train_acc = train_epoch(model, train_dl, optimizer,
                                                criterion, n_classes,
                                                print_stats_at_step,
                                                max_grad_norm)
            valid_loss, valid_acc = evaluate_epoch(model, valid_dl, criterion,
                                                   n_classes,
                                                   print_stats_at_step)
            print(
                f'| Epoch: {epoch + 1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%'
            )
            print(
                f'| Epoch: {epoch + 1:02} | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc * 100:.2f}% |'
            )
            is_best = False
            if base_dev_metric < valid_acc:
                is_best = True,
                base_dev_roc = valid_acc
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_loss': valid_loss,
                    'best_dev_accuracy': valid_acc
                }, is_best, save_path_for_model)

    # best model result of test
    model = load_check_point(model, save_path_for_model)

    if n_classes <= 2:
        test_loss, test_acc, test_f1 = evaluate_epoch(model, valid_dl,
                                                      criterion, n_classes,
                                                      print_stats_at_step)
        print(
            f'| TEST RESULT | Test. Loss: {test_loss:.3f} | Test. F1: {test_f1 * 100:.2f} | Test. Acc: {test_acc * 100:.2f}% |'
        )
    else:
        test_loss, test_acc = evaluate_epoch(model, valid_dl, criterion,
                                             n_classes, print_stats_at_step)
        print(
            f'| TEST RESULT | Test. Loss: {test_loss:.3f} | Test. Acc: {test_acc * 100:.2f}% |'
        )

    print("DONE .......................... :D")