Esempio n. 1
0
                                            embedding_dim)

train_iter = data.BucketIterator(
    dataset=train,
    batch_size=batch_size,
    sort_key=lambda x: len(x.Text1) + len(x.Text2),
    shuffle=True,
    device=device,
    repeat=False)
valid_iter = data.Iterator(dataset=valid,
                           batch_size=batch_size,
                           device=device,
                           shuffle=False,
                           repeat=False)

train_dl = datahelper.BatchWrapper(train_iter, ["Text1", "Text2", "Label"])
valid_dl = datahelper.BatchWrapper(valid_iter, ["Text1", "Text2", "Label"])
print('Reading data done.')


def predict_on(model, data_dl, loss_func, device, model_state_path=None):
    if model_state_path:
        model.load_state_dict(torch.load(model_state_path))
        print('Start predicting...')

    model.eval()
    res_list = []
    label_list = []
    loss = 0

    for text1, text2, label in data_dl:
Esempio n. 2
0
    infile = "../../data/train.tsv"
    outfile = "../../data/predict.tsv"

print('Reading data..')
jieba.load_userdict("../txt/dict.txt")
ID = data.Field(sequential=False, batch_first=True, use_vocab=False)
TEXT = data.Field(sequential=True, lower=True, eos_token='<EOS>', init_token='<BOS>',
                  pad_token='<PAD>', fix_length=None, batch_first=True, use_vocab=True, tokenize=jieba.lcut)
LABEL = data.Field(sequential=False, batch_first=True, use_vocab=False)

train = data.TabularDataset(
        path='../../data/train.tsv', format='tsv',
        fields=[('Id', ID), ('Text1', TEXT), ('Text2', TEXT), ('Label', LABEL)])

test = data.TabularDataset(
        path=infile, format='tsv',
    fields=[('Id', ID), ('Text1', TEXT), ('Text2', TEXT)])

TEXT.build_vocab(train, min_freq=3)
print('Building vocabulary Finished.')
word_matrix = datahelper.wordlist_to_matrix("../txt/embedding_300d.bin", TEXT.vocab.itos, device, embedding_dim)

test_iter = data.Iterator(dataset=test, batch_size=batch_size, device=device, shuffle=False, repeat=False)
test_dl = datahelper.BatchWrapper(test_iter, ["Id", "Text1", "Text2"])

MODEL = wide_deep(len(TEXT.vocab), embedding_dim, hidden_dim, batch_size, word_matrix, bidirectional=bidirectional)
MODEL.to(device)

predict_on(MODEL, test_dl, outfile, '../model_save/wide_deep.pth')