test_data[:, y_column], preprocessing=preprocessing.process_text) test_loader = DataLoader(test_set, batch_size, collate_fn=collate_fn_cf) print('Creating model...') embeddings = ELMoForManyLangs(**embed_params) model = RNNClassifier(embeddings, encoder_params, **model_params).to(device) optimizer = torch.optim.Adam(model.parameters()) weights = class_weigths(train_set.labels).to(device) criterion = torch.nn.NLLLoss(weight=weights) trainer = ClassificationTrainer(model, criterion, optimizer, device) print('Training...') best_macro_f1 = None gold_labels = test_set.labels.astype(int) for epoch in range(training_params['n_epochs']): trainer.model = torch.load('checkpoints/best_valid_f1_model') test_loss, predicted, model_predictions, labels = trainer.evaluate_model( test_loader) print( '----------------------------------------------------Test results----------------------------------------------------' ) print('| Loss: {} | Acc: {}% |'.format(test_loss, accuracy_score(labels, predicted)))
print(get_histogram_data(train_set.labels)) print(get_histogram_data(valid_set.labels)) print(get_histogram_data(test_set.labels)) print('Creating model...') embeddings = ELMoForManyLangs(**embed_params) model = RNNClassifier(embeddings, encoder_params, **model_params).to(device) optimizer = torch.optim.Adam(model.parameters()) weights = class_weigths(train_set.labels).to(device) criterion = torch.nn.NLLLoss(weight=weights) trainer = ClassificationTrainer(model, criterion, optimizer, device) print('Training...') best_macro_f1 = None gold_labels = test_set.labels.astype(int) for epoch in range(training_params['n_epochs']): train_loss = trainer.train_model(train_loader) valid_loss, predicted, model_predictions, labels = trainer.evaluate_model(valid_loader) print('| Epoch: {} | Train Loss: {:2.5f} | Val. Loss: {:2.5f} | Val. Acc: {:2.5f} | Val. Macro F1: {:2.5f} | Val. Micro F1: {:2.5f} |' .format(epoch + 1, train_loss, valid_loss, accuracy_score(labels, predicted), f1_score(labels, predicted, average='macro'), f1_score(labels, predicted, average='micro'))) macro_f1 = f1_score(labels, predicted, average='macro')
train_loader = DataLoader(train_set, batch_size, shuffle=True, collate_fn=collate_fn_cf) valid_loader = DataLoader(valid_set, batch_size, collate_fn=collate_fn_cf) test_loader = DataLoader(test_set, batch_size, collate_fn=collate_fn_cf) test_loader_B = DataLoader(test_set_B, batch_size, collate_fn=collate_fn_cf) valid_loader_B = DataLoader(valid_set_B, batch_size, collate_fn=collate_fn_cf) print('Creating model...') weights = class_weigths(train_set.labels).to(device) criterion = torch.nn.CrossEntropyLoss(weight=weights) embeddings = ELMo(**embed_params) model = RNNClassifier(embeddings, encoder_params, **model_params).to(device) optimizer = torch.optim.Adam(model.parameters()) trainer = ClassificationTrainer(None, criterion, optimizer, device) print('Evaluate...') gold_labels = test_set.labels.astype(int) predictions = [] losses = [] import pandas as pd for model_name in ensemble_models: trainer.model = torch.load('checkpoints/' + model_name) test_loss, predicted, model_predictions, labels = trainer.evaluate_model(test_loader) predictions.append(model_predictions) losses.append(test_loss) # save_predictions(name='submissions/' + model_name, predictions=predicted, original_data=test_data) # save_predictions_with_probabilities(name='submissions/' + model_name + '_full', predictions=predicted, original_data=test_data, labels=gold_labels, probabilities=model_predictions)