Ejemplo n.º 1
0
def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length,
         learning_rate, warmup_proportion, early_stopping_criteria, num_layers,
         hidden_dim, bidirectional, dropout, filter_sizes, embedding_file,
         model_name, use_mongo, _run):

    #Logger
    directory = f"results/{_run._id}/"

    #Batch sizes
    batch_sizes = [int(train_bs), int(val_bs), int(test_bs)]
    batch_size = int(train_bs)

    if "BERT" in model_name:  #Default = False, if BERT model is used then use_bert is set to True
        use_bert = True
    else:
        use_bert = False

    #Data
    if use_bert:
        train_dataloader, val_dataloader, test_dataloader = get_data_bert(
            int(max_seq_length), batch_sizes)
    else:
        embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data_features(
            int(max_seq_length),
            embedding_file=embedding_file,
            batch_size=batch_size)

    #Model
    if model_name == "MLP":
        model = models.MLP(embedding_matrix, embedding_dim, vocab_size,
                           int(hidden_dim), dropout, output_dim)
    if model_name == "MLP_Features":
        model = models.MLP_Features(embedding_matrix, embedding_dim,
                                    vocab_size, int(hidden_dim), 13, dropout,
                                    output_dim)
        print(model)
    elif model_name == "CNN":
        model = models.CNN(embedding_matrix, embedding_dim, vocab_size,
                           dropout, filter_sizes, output_dim)
        print(model)
    elif model_name == "LSTM":
        model = models.LSTM(embedding_matrix, embedding_dim, vocab_size,
                            int(hidden_dim), dropout, int(num_layers),
                            bidirectional, output_dim)
        print(model)
    elif model_name == "LSTMAttention":
        model = models.LSTMAttention(embedding_matrix, embedding_dim,
                                     vocab_size, int(hidden_dim), dropout,
                                     int(num_layers), bidirectional,
                                     output_dim)
        print(model)
    elif model_name == "BERT":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", output_dim)
        print(model)
    elif model_name == "BERTLinear":
        model = models.BertLinear(hidden_dim, dropout, output_dim)
        print(model)
    elif model_name == "BERTLSTM":
        model = models.BertLSTM(hidden_dim, dropout, output_dim)
        print(model)

    model = model.to(device)

    #Loss and optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = F.cross_entropy

    #Training and evaluation
    print('Training and evaluation for {} epochs...'.format(num_epochs))
    train_metrics, val_metrics = train_and_evaluate(
        num_epochs, model, optimizer, loss_fn, train_dataloader,
        val_dataloader, early_stopping_criteria, directory, use_bert,
        use_mongo)
    train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv(
        directory + "val_metrics.csv")

    #Test
    print('Testing...')
    load_checkpoint(directory + "best_model.pth.tar", model)

    test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader,
                                  device, use_bert)
    if use_mongo: log_scalars(test_metrics, "Test")

    test_metrics_df = pd.DataFrame(test_metrics)
    print(test_metrics)
    test_metrics_df.to_csv(directory + "test_metrics.csv")

    id_nummer = f'{_run._id}'

    results = {
        'id': id_nummer,
        'loss': np.round(np.mean(val_metrics['loss']), 4),
        'accuracy': test_metrics['accuracy'],
        'recall': test_metrics['recall'],
        'precision': test_metrics['precision'],
        'f1': test_metrics['f1'],
        'learning_rate': learning_rate,
        'hidden_dim': hidden_dim,
        'status': 'ok'
    }

    return results
def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length,
         learning_rate, warmup_proportion, early_stopping_criteria, num_layers,
         hidden_dim, bidirectional, dropout, filter_sizes, embedding_file,
         model_name, use_mongo, vm, subtask, _run):

    #Logger
    directory_checkpoints = f"results/checkpoints/{_run._id}/"
    directory = f"results/{_run._id}/"

    #Batch sizes
    batch_sizes = [int(train_bs), int(val_bs), int(test_bs)]
    batch_size = int(train_bs)

    if "BERT" in model_name:  #Default = False, if BERT model is used then use_bert is set to True
        use_bert = True
    else:
        use_bert = False

    if vm == "google":
        directory = f"results-bert-google/{_run._id}/"
    elif vm == "aws":
        directory = f"results-bert-aws/{_run._id}/"

    #Data
    if use_bert:
        train_dataloader, val_dataloader, test_dataloader = get_data_bert(
            int(max_seq_length), batch_sizes, subtask)
    else:
        embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data(
            int(max_seq_length),
            embedding_file=embedding_file,
            batch_size=batch_size,
            subtask=subtask)

    #Model
    if model_name == "MLP":
        model = models.MLP(embedding_matrix, embedding_dim, vocab_size,
                           int(hidden_dim), dropout, output_dim)
    if model_name == "MLP_Features":
        model = models.MLP_Features(embedding_matrix, embedding_dim,
                                    vocab_size, int(hidden_dim), 14, dropout,
                                    output_dim)
        print(model)
    elif model_name == "CNN":
        model = models.CNN(embedding_matrix, embedding_dim, vocab_size,
                           dropout, filter_sizes, output_dim)
        print(model)
    elif model_name == "LSTM":
        model = models.LSTM(embedding_matrix, embedding_dim, vocab_size,
                            int(hidden_dim), dropout, int(num_layers),
                            bidirectional, output_dim)
        print(model)
    elif model_name == "LSTMAttention":
        model = models.LSTMAttention(embedding_matrix, embedding_dim,
                                     vocab_size, int(hidden_dim), dropout,
                                     int(num_layers), bidirectional,
                                     output_dim)
        print(model)
    elif model_name == "BERTFreeze":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", output_dim)
        for param in model.bert.parameters():
            param.requires_grad = False
            print(param)
            print(param.requires_grad)
        print(model)
    elif model_name == "BERT":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", output_dim)
        print(model)
    elif model_name == "BERTLinear":
        model = models.BertLinear(hidden_dim, dropout, output_dim)
        print(model)
    elif model_name == "BERTLinearFreeze":
        model = models.BertLinearFreeze(hidden_dim, dropout, output_dim)
        print(model)
    elif model_name == "BERTLinearFreezeEmbeddings":
        model = models.BertLinearFreezeEmbeddings(hidden_dim, dropout,
                                                  output_dim)
        print(model)
    elif model_name == "BERTLSTM":
        model = models.BertLSTM(hidden_dim, dropout, bidirectional, output_dim)
        print(model)
    elif model_name == "BERTNonLinear":
        model = models.BertNonLinear(dropout, output_dim)
        print(model)
    elif model_name == "BERTNorm":
        model = models.BertNorm(dropout, output_dim)
        print(model)

    model = model.to(device)

    #Loss and optimizer
    #optimizer = optim.Adam([{'params': model.parameters(), 'weight_decay': 0.1}], lr=learning_rate)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = F.cross_entropy

    #Scheduler
    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 50], gamma=0.1)

    #Training and evaluation
    print('Training and evaluation for {} epochs...'.format(num_epochs))
    train_metrics, val_metrics = train_and_evaluate(
        num_epochs, model, optimizer, loss_fn, train_dataloader,
        val_dataloader, early_stopping_criteria, directory_checkpoints,
        use_bert, use_mongo)
    train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv(
        directory + "val_metrics.csv")

    #Test
    print('Testing...')
    load_checkpoint(directory_checkpoints + "best_model.pth.tar", model)

    #Add artifacts
    #ex.add_artifact(directory+"best_model.pth.tar")
    #ex.add_artifact(directory+"last_model.pth.tar")

    test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader,
                                  device, use_bert)
    if use_mongo: log_scalars(test_metrics, "Test")

    test_metrics_df = pd.DataFrame(test_metrics)
    #test_metrics_df = pd.DataFrame(test_metrics, index=["NOT","OFF"])
    print(test_metrics)
    test_metrics_df.to_csv(directory + "test_metrics.csv")

    id_nummer = f'{_run._id}'

    results = {
        'id': id_nummer,
        'loss': np.round(np.mean(val_metrics['loss']), 4),
        'accuracy': test_metrics['accuracy'],
        'recall': test_metrics['recall'],
        'precision': test_metrics['precision'],
        'f1': test_metrics['f1'],
        'learning_rate': learning_rate,
        'hidden_dim': hidden_dim,
        'status': 'ok'
    }

    return results