Exemple #1
0
def test_minbatch_randomized_from_corpus(test_conf, test_corp):
    train_idxs, test_idxs, trains, tests = MinBatch.randomized_from_corpus(test_conf, test_corp, 2)
    assert train_idxs.shape == (4, 2)
    assert test_idxs.shape  == (2, 2)

    train_idxs2, test_idxs2, _, _ = MinBatch.randomized_from_corpus(test_conf, test_corp, 2)
    for i in test_idxs2.reshape(4):
        for j in train_idxs.reshape(8):
            assert i != j
Exemple #2
0
def test_minbatch_from_corpus(test_conf, test_corp):
    train_idxs = [[1, 3]]
    test_idxs  = [[0, 2]]
    trains     = MinBatch.from_corpus(test_conf, test_corp, train_idxs)
    tests      = MinBatch.from_corpus(test_conf, test_corp, test_idxs)
    f = lambda x: test_corp.ids_to_tokens(list(x))

    # I'm James.
    # He hasn't
    assert f(trains[0].data_batch_at(1)) == ["i",  "he"]
    assert f(trains[0].data_batch_at(2)) == ["am", "has"]

    # <sj>James</sj> <v>is</v> a teacher.
    # I haven't
    assert f(tests[0].teach_batch_at(1)) == ["<sj>",     "i"]
    assert f(tests[0].teach_batch_at(2)) == ["james", "have"]
    assert f(tests[0].teach_batch_at(7)) == ["a",       "<pad>"]
    assert f(tests[0].teach_batch_at(8)) == ["teacher", "<pad>"]