Python LDA.tokenizer Examples

Programming Language: Python

Namespace/Package Name: creme.decomposition

Class/Type: LDA

Method/Function: tokenizer

Examples at hotexamples.com: 2

Python LDA.tokenizer - 2 examples found. These are the top rated real world Python examples of creme.decomposition.LDA.tokenizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LDA(11)

_update_indexes(5)

fit_transform_one(4)

_compute_statistics_components(3)

_update_weights(3)

_get_text(2)

fit_one(2)

preprocess(2)

process_text(2)

tokenizer(2)

transform_one(2)

Example #1

Show file

File: test_.py Project: zie225/creme

def test_statistics_two_components():
    '''
    Assert that online lda extracts waited statistics on current document.
    '''
    n_components = 2

    np.random.seed(42)

    lda = LDA(n_components, number_of_documents=60)

    statistics_list = []

    for doc in DOC_SET:

        word_list = lda.tokenizer(lda.preprocess(lda._get_text(doc)))

        lda._update_indexes(word_list=word_list)

        word_indexes = [lda.word_to_index[word] for word in word_list]

        statistics, _ = lda._compute_statistics_components(
            words_indexes_list=word_indexes, )

        statistics_list.append(statistics)

        lda._update_weights(statistics=statistics)

    for index, statistics in enumerate(statistics_list):
        for component in range(n_components):
            assert np.array_equal(
                a1=statistics[component],
                a2=REFERENCE_STATISTICS_TWO_COMPONENTS[index][component],
            )

Example #2

Show file

File: test_.py Project: zie225/creme

def test_extraction_words_ids():
    '''
    Assert that inputs words are splitted.
    Assert that indexes are updated and extractable.
    '''
    np.random.seed(42)

    lda = LDA(2, number_of_documents=5)

    word_indexes_list = []

    for doc in DOC_SET:

        words = lda.tokenizer(lda.preprocess(lda._get_text(doc)))

        lda._update_indexes(word_list=words)

        word_indexes_list.append([lda.word_to_index[word] for word in words])

    assert word_indexes_list == [
        [1, 2],
        [1, 3, 4],
        [1, 2, 5],
        [1, 3],
        [1, 2, 6],
    ]