Python LDA._update_indexes Examples

Programming Language: Python

Namespace/Package Name: creme.decomposition

Class/Type: LDA

Method/Function: _update_indexes

Examples at hotexamples.com: 5

Python LDA._update_indexes - 5 examples found. These are the top rated real world Python examples of creme.decomposition.LDA._update_indexes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LDA(11)

_update_indexes(5)

fit_transform_one(4)

_compute_statistics_components(3)

_update_weights(3)

_get_text(2)

fit_one(2)

preprocess(2)

process_text(2)

tokenizer(2)

transform_one(2)

Example #1

Show file

File: test_.py Project: zie225/creme

def test_statistics_two_components():
    '''
    Assert that online lda extracts waited statistics on current document.
    '''
    n_components = 2

    np.random.seed(42)

    lda = LDA(n_components, number_of_documents=60)

    statistics_list = []

    for doc in DOC_SET:

        word_list = lda.tokenizer(lda.preprocess(lda._get_text(doc)))

        lda._update_indexes(word_list=word_list)

        word_indexes = [lda.word_to_index[word] for word in word_list]

        statistics, _ = lda._compute_statistics_components(
            words_indexes_list=word_indexes, )

        statistics_list.append(statistics)

        lda._update_weights(statistics=statistics)

    for index, statistics in enumerate(statistics_list):
        for component in range(n_components):
            assert np.array_equal(
                a1=statistics[component],
                a2=REFERENCE_STATISTICS_TWO_COMPONENTS[index][component],
            )

Example #2

Show file

File: test_.py Project: zie225/creme

def test_extraction_words_ids():
    '''
    Assert that inputs words are splitted.
    Assert that indexes are updated and extractable.
    '''
    np.random.seed(42)

    lda = LDA(2, number_of_documents=5)

    word_indexes_list = []

    for doc in DOC_SET:

        words = lda.tokenizer(lda.preprocess(lda._get_text(doc)))

        lda._update_indexes(word_list=words)

        word_indexes_list.append([lda.word_to_index[word] for word in words])

    assert word_indexes_list == [
        [1, 2],
        [1, 3, 4],
        [1, 2, 5],
        [1, 3],
        [1, 2, 6],
    ]

Example #3

Show file

File: test_.py Project: zeta1999/creme

def test_extraction_words_ids():
    """
    Assert that inputs words are splitted.
    Assert that indexes are updated and extractable.
    """

    lda = LDA(2, number_of_documents=5, seed=42)

    word_indexes_list = []

    for doc in DOC_SET:

        words = doc.split(' ')

        lda._update_indexes(word_list=words)

        word_indexes_list.append([lda.word_to_index[word] for word in words])

    assert word_indexes_list == [
        [1, 2],
        [1, 3, 4],
        [1, 2, 5],
        [1, 3],
        [1, 2, 6],
    ]

Example #4

Show file

File: test_.py Project: zeta1999/creme

def test_statistics_two_components():
    """
    Assert that online lda extracts waited statistics on current document.
    """
    n_components = 2

    lda = LDA(n_components, number_of_documents=60, seed=42)

    statistics_list = []

    for doc in DOC_SET:

        word_list = doc.split(' ')

        lda._update_indexes(word_list=word_list)

        word_indexes = [lda.word_to_index[word] for word in word_list]

        statistics, _ = lda._compute_statistics_components(
            words_indexes_list=word_indexes, )

        statistics_list.append(statistics)

        lda._update_weights(statistics=statistics)

    for index, statistics in enumerate(statistics_list):
        for component in range(n_components):
            assert np.array_equal(
                a1=statistics[component],
                a2=REFERENCE_STATISTICS_TWO_COMPONENTS[index][component],
            )

Example #5

Show file

def test_statistics_five_components():
    '''
    Assert that online lda extracts waited statistics on current document.
    '''
    np.random.seed(42)

    n_components = 5

    lda = LDA(
        n_components=n_components,
        number_of_documents=60,
        maximum_size_vocabulary=100,
        alpha_beta=100,
        alpha_theta=0.5,
    )

    statistics_list = []

    for doc in DOC_SET:

        word_list = lda.process_text(doc)

        lda._update_indexes(word_list=word_list)

        word_indexes = [lda.word_to_index[word] for word in word_list]

        statistics, _ = lda._compute_statistics_components(
            words_indexes_list=word_indexes, )

        statistics_list.append(statistics)

        lda._update_weights(statistics=statistics)

    for index, statistics in enumerate(statistics_list):
        for component in range(n_components):
            assert np.array_equal(
                a1=statistics[component],
                a2=REFERENCE_STATISTICS_FIVE_COMPONENTS[index][component],
            )