Example #1
0
                                 test_data[:, y_column],
                                 preprocessing=preprocessing.process_text)
test_loader = DataLoader(test_set, batch_size, collate_fn=collate_fn_cf)

print('Creating model...')

embeddings = ELMoForManyLangs(**embed_params)

model = RNNClassifier(embeddings, encoder_params, **model_params).to(device)

optimizer = torch.optim.Adam(model.parameters())

weights = class_weigths(train_set.labels).to(device)
criterion = torch.nn.NLLLoss(weight=weights)

trainer = ClassificationTrainer(model, criterion, optimizer, device)

print('Training...')
best_macro_f1 = None
gold_labels = test_set.labels.astype(int)

for epoch in range(training_params['n_epochs']):
    trainer.model = torch.load('checkpoints/best_valid_f1_model')

    test_loss, predicted, model_predictions, labels = trainer.evaluate_model(
        test_loader)
    print(
        '----------------------------------------------------Test results----------------------------------------------------'
    )
    print('| Loss: {} | Acc: {}% |'.format(test_loss,
                                           accuracy_score(labels, predicted)))
print(get_histogram_data(train_set.labels))
print(get_histogram_data(valid_set.labels))
print(get_histogram_data(test_set.labels))

print('Creating model...')

embeddings = ELMoForManyLangs(**embed_params)

model = RNNClassifier(embeddings, encoder_params, **model_params).to(device)

optimizer = torch.optim.Adam(model.parameters())

weights = class_weigths(train_set.labels).to(device)
criterion = torch.nn.NLLLoss(weight=weights)

trainer = ClassificationTrainer(model, criterion, optimizer, device)

print('Training...')
best_macro_f1 = None
gold_labels = test_set.labels.astype(int)

for epoch in range(training_params['n_epochs']):

    train_loss = trainer.train_model(train_loader)
    valid_loss, predicted, model_predictions, labels = trainer.evaluate_model(valid_loader)

    print('| Epoch: {} | Train Loss: {:2.5f} | Val. Loss: {:2.5f} | Val. Acc: {:2.5f} | Val. Macro F1: {:2.5f} | Val. Micro F1: {:2.5f} |'
          .format(epoch + 1, train_loss, valid_loss, accuracy_score(labels, predicted),
                  f1_score(labels, predicted, average='macro'), f1_score(labels, predicted, average='micro')))

    macro_f1 = f1_score(labels, predicted, average='macro')
train_loader = DataLoader(train_set, batch_size, shuffle=True, collate_fn=collate_fn_cf)
valid_loader = DataLoader(valid_set, batch_size, collate_fn=collate_fn_cf)
test_loader = DataLoader(test_set, batch_size, collate_fn=collate_fn_cf)
test_loader_B = DataLoader(test_set_B, batch_size, collate_fn=collate_fn_cf)
valid_loader_B = DataLoader(valid_set_B, batch_size, collate_fn=collate_fn_cf)

print('Creating model...')
weights = class_weigths(train_set.labels).to(device)
criterion = torch.nn.CrossEntropyLoss(weight=weights)
embeddings = ELMo(**embed_params)
model = RNNClassifier(embeddings, encoder_params, **model_params).to(device)

optimizer = torch.optim.Adam(model.parameters())

trainer = ClassificationTrainer(None, criterion, optimizer, device)

print('Evaluate...')
gold_labels = test_set.labels.astype(int)
predictions = []
losses = []
import pandas as pd
for model_name in ensemble_models:
    trainer.model = torch.load('checkpoints/' + model_name)

    test_loss, predicted, model_predictions, labels = trainer.evaluate_model(test_loader)
    predictions.append(model_predictions)
    losses.append(test_loss)
#     save_predictions(name='submissions/' + model_name, predictions=predicted, original_data=test_data)
#     save_predictions_with_probabilities(name='submissions/' + model_name + '_full', predictions=predicted, original_data=test_data, labels=gold_labels, probabilities=model_predictions)