def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length, learning_rate, warmup_proportion, early_stopping_criteria, num_layers, hidden_dim, bidirectional, dropout, filter_sizes, embedding_file, model_name, use_mongo, _run): #Logger directory = f"results/{_run._id}/" #Batch sizes batch_sizes = [int(train_bs), int(val_bs), int(test_bs)] batch_size = int(train_bs) if "BERT" in model_name: #Default = False, if BERT model is used then use_bert is set to True use_bert = True else: use_bert = False #Data if use_bert: train_dataloader, val_dataloader, test_dataloader = get_data_bert( int(max_seq_length), batch_sizes) else: embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data_features( int(max_seq_length), embedding_file=embedding_file, batch_size=batch_size) #Model if model_name == "MLP": model = models.MLP(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, output_dim) if model_name == "MLP_Features": model = models.MLP_Features(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), 13, dropout, output_dim) print(model) elif model_name == "CNN": model = models.CNN(embedding_matrix, embedding_dim, vocab_size, dropout, filter_sizes, output_dim) print(model) elif model_name == "LSTM": model = models.LSTM(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, int(num_layers), bidirectional, output_dim) print(model) elif model_name == "LSTMAttention": model = models.LSTMAttention(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, int(num_layers), bidirectional, output_dim) print(model) elif model_name == "BERT": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", output_dim) print(model) elif model_name == "BERTLinear": model = models.BertLinear(hidden_dim, dropout, output_dim) print(model) elif model_name == "BERTLSTM": model = models.BertLSTM(hidden_dim, dropout, output_dim) print(model) model = model.to(device) #Loss and optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate) loss_fn = F.cross_entropy #Training and evaluation print('Training and evaluation for {} epochs...'.format(num_epochs)) train_metrics, val_metrics = train_and_evaluate( num_epochs, model, optimizer, loss_fn, train_dataloader, val_dataloader, early_stopping_criteria, directory, use_bert, use_mongo) train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv( directory + "val_metrics.csv") #Test print('Testing...') load_checkpoint(directory + "best_model.pth.tar", model) test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader, device, use_bert) if use_mongo: log_scalars(test_metrics, "Test") test_metrics_df = pd.DataFrame(test_metrics) print(test_metrics) test_metrics_df.to_csv(directory + "test_metrics.csv") id_nummer = f'{_run._id}' results = { 'id': id_nummer, 'loss': np.round(np.mean(val_metrics['loss']), 4), 'accuracy': test_metrics['accuracy'], 'recall': test_metrics['recall'], 'precision': test_metrics['precision'], 'f1': test_metrics['f1'], 'learning_rate': learning_rate, 'hidden_dim': hidden_dim, 'status': 'ok' } return results
def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length, learning_rate, warmup_proportion, early_stopping_criteria, num_layers, hidden_dim, bidirectional, dropout, filter_sizes, embedding_file, model_name, use_mongo, vm, subtask, _run): #Logger directory_checkpoints = f"results/checkpoints/{_run._id}/" directory = f"results/{_run._id}/" #Batch sizes batch_sizes = [int(train_bs), int(val_bs), int(test_bs)] batch_size = int(train_bs) if "BERT" in model_name: #Default = False, if BERT model is used then use_bert is set to True use_bert = True else: use_bert = False if vm == "google": directory = f"results-bert-google/{_run._id}/" elif vm == "aws": directory = f"results-bert-aws/{_run._id}/" #Data if use_bert: train_dataloader, val_dataloader, test_dataloader = get_data_bert( int(max_seq_length), batch_sizes, subtask) else: embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data( int(max_seq_length), embedding_file=embedding_file, batch_size=batch_size, subtask=subtask) #Model if model_name == "MLP": model = models.MLP(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, output_dim) if model_name == "MLP_Features": model = models.MLP_Features(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), 14, dropout, output_dim) print(model) elif model_name == "CNN": model = models.CNN(embedding_matrix, embedding_dim, vocab_size, dropout, filter_sizes, output_dim) print(model) elif model_name == "LSTM": model = models.LSTM(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, int(num_layers), bidirectional, output_dim) print(model) elif model_name == "LSTMAttention": model = models.LSTMAttention(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, int(num_layers), bidirectional, output_dim) print(model) elif model_name == "BERTFreeze": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", output_dim) for param in model.bert.parameters(): param.requires_grad = False print(param) print(param.requires_grad) print(model) elif model_name == "BERT": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", output_dim) print(model) elif model_name == "BERTLinear": model = models.BertLinear(hidden_dim, dropout, output_dim) print(model) elif model_name == "BERTLinearFreeze": model = models.BertLinearFreeze(hidden_dim, dropout, output_dim) print(model) elif model_name == "BERTLinearFreezeEmbeddings": model = models.BertLinearFreezeEmbeddings(hidden_dim, dropout, output_dim) print(model) elif model_name == "BERTLSTM": model = models.BertLSTM(hidden_dim, dropout, bidirectional, output_dim) print(model) elif model_name == "BERTNonLinear": model = models.BertNonLinear(dropout, output_dim) print(model) elif model_name == "BERTNorm": model = models.BertNorm(dropout, output_dim) print(model) model = model.to(device) #Loss and optimizer #optimizer = optim.Adam([{'params': model.parameters(), 'weight_decay': 0.1}], lr=learning_rate) optimizer = optim.Adam(model.parameters(), lr=learning_rate) loss_fn = F.cross_entropy #Scheduler #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 50], gamma=0.1) #Training and evaluation print('Training and evaluation for {} epochs...'.format(num_epochs)) train_metrics, val_metrics = train_and_evaluate( num_epochs, model, optimizer, loss_fn, train_dataloader, val_dataloader, early_stopping_criteria, directory_checkpoints, use_bert, use_mongo) train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv( directory + "val_metrics.csv") #Test print('Testing...') load_checkpoint(directory_checkpoints + "best_model.pth.tar", model) #Add artifacts #ex.add_artifact(directory+"best_model.pth.tar") #ex.add_artifact(directory+"last_model.pth.tar") test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader, device, use_bert) if use_mongo: log_scalars(test_metrics, "Test") test_metrics_df = pd.DataFrame(test_metrics) #test_metrics_df = pd.DataFrame(test_metrics, index=["NOT","OFF"]) print(test_metrics) test_metrics_df.to_csv(directory + "test_metrics.csv") id_nummer = f'{_run._id}' results = { 'id': id_nummer, 'loss': np.round(np.mean(val_metrics['loss']), 4), 'accuracy': test_metrics['accuracy'], 'recall': test_metrics['recall'], 'precision': test_metrics['precision'], 'f1': test_metrics['f1'], 'learning_rate': learning_rate, 'hidden_dim': hidden_dim, 'status': 'ok' } return results