from utils.data import remap_data_to_2, remap_data_to_3, remap_data_to_3_from_text, get_histogram, save_predictions

print('Loading dataset...')
preprocessing = Preprocessing()

data = load_data_file_without_split(**data_params)

# data = remap_data_to_3(data)
data = remap_data_to_3_from_text(data)

print(get_histogram_data(data[:, 2]))

train_data, valid_data, test_data = split_data(data)
x_column, y_column = data_params['x_column'], data_params['y_column']

train_set = ClassificationDataset(train_data[:, x_column], train_data[:, y_column], preprocessing=preprocessing.process_text)
valid_set = ClassificationDataset(valid_data[:, x_column], valid_data[:, y_column], preprocessing=preprocessing.process_text)
test_set = ClassificationDataset(test_data[:, x_column], test_data[:, y_column], preprocessing=preprocessing.process_text)

vocab = ConcatVocabulary([train_set.vocab, valid_set.vocab, test_set.vocab])

train_loader = DataLoader(train_set, batch_size, shuffle=True, collate_fn=collate_fn_cf)
valid_loader = DataLoader(valid_set, batch_size, shuffle=True, collate_fn=collate_fn_cf)
test_loader = DataLoader(test_set, batch_size, collate_fn=collate_fn_cf)

print(get_histogram_data(train_set.labels))
print(get_histogram_data(valid_set.labels))
print(get_histogram_data(test_set.labels))

print('Creating model...')
Esempio n. 2
0
print('Loading dataset...')
preprocessing = Preprocessing()

data = load_data_file_without_split(**data_params)

# data = remap_data_to_3(data)
data = remap_data_to_3_from_text(data)

print(get_histogram_data(data[:, 2]))

train_data, valid_data, test_data = split_data(data)
x_column, y_column = data_params['x_column'], data_params['y_column']

test_set = ClassificationDataset(test_data[:, x_column],
                                 test_data[:, y_column],
                                 preprocessing=preprocessing.process_text)
test_loader = DataLoader(test_set, batch_size, collate_fn=collate_fn_cf)

print('Creating model...')

embeddings = ELMoForManyLangs(**embed_params)

model = RNNClassifier(embeddings, encoder_params, **model_params).to(device)

optimizer = torch.optim.Adam(model.parameters())

weights = class_weigths(train_set.labels).to(device)
criterion = torch.nn.NLLLoss(weight=weights)

trainer = ClassificationTrainer(model, criterion, optimizer, device)