Python LDA Examples

Programming Language: Python

Namespace/Package Name: river.preprocessing

Method/Function: LDA

Examples at hotexamples.com: 6

Python LDA - 6 examples found. These are the top rated real world Python examples of river.preprocessing.LDA extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def test_prunning_vocabulary():
    """
    Vocabulary prunning is available to improve accuracy and limit memory usage.
    You can perform vocabulary prunning with parameters vocab_prune_interval (int) and
    maximum_size_vocabulary (int).
    """

    lda = preprocessing.LDA(
        n_components=2,
        number_of_documents=60,
        vocab_prune_interval=2,
        maximum_size_vocabulary=3,
        seed=42,
    )

    components_list = []

    for document in DOC_SET:
        tokens = {token: 1 for token in document.split(" ")}
        components_list.append(lda.learn_transform_one(tokens))

    for index, component in enumerate(components_list):
        assert np.array_equal(
            a1=list(component.values()), a2=REFERENCE_COMPONENTS_WITH_PRUNNING[index]
        )

Example #2

Show file

def test_five_components():
    """
    Assert that components computed are identical to the original version for n dimensions.
    """

    n_components = 5

    lda = preprocessing.LDA(
        n_components=n_components,
        number_of_documents=60,
        maximum_size_vocabulary=100,
        alpha_beta=100,
        alpha_theta=0.5,
        seed=42,
    )

    components_list = []

    for document in DOC_SET:
        tokens = {token: 1 for token in document.split(" ")}
        components_list.append(lda.learn_transform_one(tokens))

    for index, component in enumerate(components_list):
        assert np.array_equal(
            a1=list(component.values()), a2=REFERENCE_FIVE_COMPONENTS[index]
        )

Example #3

Show file

def test_statistics_two_components():
    """
    Assert that online lda extracts waited statistics on current document.
    """
    n_components = 2

    lda = preprocessing.LDA(n_components, number_of_documents=60, seed=42)

    statistics_list = []

    for doc in DOC_SET:

        word_list = doc.split(" ")

        lda._update_indexes(word_list=word_list)

        word_indexes = [lda.word_to_index[word] for word in word_list]

        statistics, _ = lda._compute_statistics_components(
            words_indexes_list=word_indexes
        )

        statistics_list.append(statistics)

        lda._update_weights(statistics=statistics)

    for index, statistics in enumerate(statistics_list):
        for component in range(n_components):
            assert np.array_equal(
                a1=statistics[component],
                a2=REFERENCE_STATISTICS_TWO_COMPONENTS[index][component],
            )

Example #4

Show file

def test_extraction_words_ids():
    """
    Assert that input words are split.
    Assert that indexes are updated and extractable.
    """

    lda = preprocessing.LDA(2, number_of_documents=5, seed=42)

    word_indexes_list = []

    for doc in DOC_SET:

        words = doc.split(" ")

        lda._update_indexes(word_list=words)

        word_indexes_list.append([lda.word_to_index[word] for word in words])

    assert word_indexes_list == [
        [1, 2],
        [1, 3, 4],
        [1, 2, 5],
        [1, 3],
        [1, 2, 6],
    ]

Example #5

Show file

File: test_lda.py Project: xianghu-xiaokachekkk/river

def test_learn_transform():
    """
    Assert that learn_one and transform_one methods returns waited output.
    """

    lda = preprocessing.LDA(n_components=2,
                            number_of_documents=60,
                            vocab_prune_interval=2,
                            maximum_size_vocabulary=3,
                            seed=42)
    components_list = []

    for document in DOC_SET:
        tokens = {token: 1 for token in document.split(' ')}
        lda = lda.learn_one(x=tokens)

        components_list.append(lda.transform_one(x=tokens))

    for index, component in enumerate(components_list):
        assert np.array_equal(a1=list(component.values()),
                              a2=REFERENCE_LEARN_ONE_PREDICT_ONE[index])

Example #6

Show file

File: test_lda.py Project: xianghu-xiaokachekkk/river

def test_statistics_five_components():
    """
    Assert that online lda extracts waited statistics on current document.
    """

    n_components = 5

    lda = preprocessing.LDA(n_components=n_components,
                            number_of_documents=60,
                            maximum_size_vocabulary=100,
                            alpha_beta=100,
                            alpha_theta=0.5,
                            seed=42)

    statistics_list = []

    for doc in DOC_SET:

        word_list = doc.split(' ')

        lda._update_indexes(word_list=word_list)

        word_indexes = [lda.word_to_index[word] for word in word_list]

        statistics, _ = lda._compute_statistics_components(
            words_indexes_list=word_indexes, )

        statistics_list.append(statistics)

        lda._update_weights(statistics=statistics)

    for index, statistics in enumerate(statistics_list):
        for component in range(n_components):
            assert np.array_equal(
                a1=statistics[component],
                a2=REFERENCE_STATISTICS_FIVE_COMPONENTS[index][component],
            )