Esempio n. 1
0
def test_walk_graph():
    random_walks, word_freq = corpus.walk_graph(TEST_CSR,
                                                TEST_LABELS,
                                                walk_length=3,
                                                num_walks=2)
    assert np.array_equal(random_walks,
                          [['A', 'B', ''], ['B', '', ''], ['C', 'A', 'B'],
                           ['A', 'B', ''], ['B', '', ''], ['C', 'A', 'B']])
    assert word_freq == {'A': 4, 'B': 6, 'C': 2}
Esempio n. 2
0
def test_train_skipgram():
    walk_length = 4
    num_walks = 2
    corpus_count = num_walks * len(TEST_LABELS)
    random_walks, word_freq = corpus.walk_graph(TEST_CSR, TEST_LABELS,
                                                walk_length, num_walks)
    with tempfile.NamedTemporaryFile() as f:
        corpus.build_corpus(random_walks, outpath=f.name)
        model = (skipgram.train_model(f.name,
                                      size=50,
                                      window=5,
                                      word_freq=word_freq,
                                      corpus_count=corpus_count))
        assert len(model.wv.vocab) == 3
        assert model.window == 5
        assert model.vector_size == 50
Esempio n. 3
0
def test_build_corpus():
    with tempfile.NamedTemporaryFile() as f:
        random_walks, word_freqs = corpus.walk_graph(TEST_CSR, TEST_LABELS)
        corpus_path = corpus.build_corpus(random_walks, outpath=f.name)
        assert corpus_path == f.name