Example #1
0
train = data.TabularDataset(path='../../data/train.tsv',
                            format='tsv',
                            fields=[('Id', ID), ('Text1', TEXT),
                                    ('Text2', TEXT), ('Label', LABEL)],
                            skip_header=True)
valid = data.TabularDataset(path='../../data/valid.tsv',
                            format='tsv',
                            fields=[('Id', ID), ('Text1', TEXT),
                                    ('Text2', TEXT), ('Label', LABEL)],
                            skip_header=True)

TEXT.build_vocab(train, min_freq=3)
print('Building vocabulary Finished.')
word_matrix = datahelper.wordlist_to_matrix("../txt/embedding_300d.bin",
                                            TEXT.vocab.itos, device,
                                            embedding_dim)

train_iter = data.BucketIterator(
    dataset=train,
    batch_size=batch_size,
    sort_key=lambda x: len(x.Text1) + len(x.Text2),
    shuffle=True,
    device=device,
    repeat=False)
valid_iter = data.Iterator(dataset=valid,
                           batch_size=batch_size,
                           device=device,
                           shuffle=False,
                           repeat=False)
Example #2
0
LABEL = data.Field(sequential=False, batch_first=True, use_vocab=False)

train = data.TabularDataset(path='./train.tsv',
                            format='tsv',
                            fields=[('Id', ID), ('Text1', TEXT),
                                    ('Text2', TEXT), ('Label', LABEL)])

test = data.TabularDataset(path=infile,
                           format='tsv',
                           fields=[('Id', ID), ('Text1', TEXT),
                                   ('Text2', TEXT)])

TEXT.build_vocab(train, min_freq=3)
print('Building vocabulary Finished.')
word_matrix = datahelper.wordlist_to_matrix("./data/embedding_300d.txt",
                                            TEXT.vocab.itos, device,
                                            embedding_dim)

test_iter = data.Iterator(dataset=test,
                          batch_size=batch_size,
                          device=device,
                          shuffle=False,
                          repeat=False)
test_dl = datahelper.BatchWrapper(test_iter, ["Id", "Text1", "Text2"])

MODEL = LSTM_angel(len(TEXT.vocab),
                   embedding_dim,
                   hidden_dim,
                   batch_size,
                   word_matrix,
                   bidirectional=bidirectional)