Example #1
0
def generate(n_docs=300, n_words=10, n_sent_length=5, n_hidden=8):
    words = fake_data.fake_data(n_docs, n_words, n_sent_length, n_hidden)
    words_flat = words.ravel()
    doc_ids = np.repeat(np.arange(words.shape[0]).astype('int32'),
                        n_sent_length)
    doc_ids = doc_ids.ravel()
    _, counts = np.unique(words_flat, return_counts=True)
    model = LDA2Vec(n_words, n_hidden, counts, n_samples=1)
    return model, words_flat, doc_ids
Example #2
0
def generate(n_docs=300, n_words=100, n_sent_length=5, n_hidden=8):
    words = fake_data.fake_data(n_docs, n_words, n_sent_length, n_hidden)
    words_flat = words.ravel()
    doc_ids = np.repeat(
        np.arange(words.shape[0]).astype('int32'), n_sent_length)
    doc_ids = doc_ids.ravel()
    _, counts = np.unique(words_flat, return_counts=True)
    model = LDA2Vec(n_words, n_hidden, counts, n_samples=1)
    return model, words_flat, doc_ids
def test_fake_data():
    n_docs = 100
    n_words = 10
    n_hidden = 2
    n_sent_length = 5
    data = fake_data.fake_data(n_docs, n_words, n_sent_length, n_hidden)
    assert data.dtype == np.dtype('int32')
    assert data.shape[0] == n_docs
    assert data.shape[1] == n_sent_length
    assert np.max(data) <= n_words - 1