def test_walk_graph(): random_walks, word_freq = corpus.walk_graph(TEST_CSR, TEST_LABELS, walk_length=3, num_walks=2) assert np.array_equal(random_walks, [['A', 'B', ''], ['B', '', ''], ['C', 'A', 'B'], ['A', 'B', ''], ['B', '', ''], ['C', 'A', 'B']]) assert word_freq == {'A': 4, 'B': 6, 'C': 2}
def test_train_skipgram(): walk_length = 4 num_walks = 2 corpus_count = num_walks * len(TEST_LABELS) random_walks, word_freq = corpus.walk_graph(TEST_CSR, TEST_LABELS, walk_length, num_walks) with tempfile.NamedTemporaryFile() as f: corpus.build_corpus(random_walks, outpath=f.name) model = (skipgram.train_model(f.name, size=50, window=5, word_freq=word_freq, corpus_count=corpus_count)) assert len(model.wv.vocab) == 3 assert model.window == 5 assert model.vector_size == 50
def test_build_corpus(): with tempfile.NamedTemporaryFile() as f: random_walks, word_freqs = corpus.walk_graph(TEST_CSR, TEST_LABELS) corpus_path = corpus.build_corpus(random_walks, outpath=f.name) assert corpus_path == f.name