Python LDA Examples

Programming Language: Python

Namespace/Package Name: creme.decomposition

Class/Type: LDA

Examples at hotexamples.com: 11

Python LDA - 11 examples found. These are the top rated real world Python examples of creme.decomposition.LDA extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LDA(11)

_update_indexes(5)

fit_transform_one(4)

_compute_statistics_components(3)

_update_weights(3)

_get_text(2)

fit_one(2)

preprocess(2)

process_text(2)

tokenizer(2)

transform_one(2)

Example #1

Show file

File: test_.py Project: zie225/creme

def test_statistics_two_components():
    '''
    Assert that online lda extracts waited statistics on current document.
    '''
    n_components = 2

    np.random.seed(42)

    lda = LDA(n_components, number_of_documents=60)

    statistics_list = []

    for doc in DOC_SET:

        word_list = lda.tokenizer(lda.preprocess(lda._get_text(doc)))

        lda._update_indexes(word_list=word_list)

        word_indexes = [lda.word_to_index[word] for word in word_list]

        statistics, _ = lda._compute_statistics_components(
            words_indexes_list=word_indexes, )

        statistics_list.append(statistics)

        lda._update_weights(statistics=statistics)

    for index, statistics in enumerate(statistics_list):
        for component in range(n_components):
            assert np.array_equal(
                a1=statistics[component],
                a2=REFERENCE_STATISTICS_TWO_COMPONENTS[index][component],
            )

Example #2

Show file

File: test_.py Project: zie225/creme

def test_five_components():
    '''
    Assert that components computed are identical to the original version for n dimensions.
    '''
    np.random.seed(42)

    n_components = 5

    online_lda = LDA(
        n_components=n_components,
        number_of_documents=60,
        maximum_size_vocabulary=100,
        alpha_beta=100,
        alpha_theta=0.5,
    )

    components_list = []

    for document in DOC_SET:
        components_list.append(online_lda.fit_transform_one(document))

    for index, component in enumerate(components_list):
        assert np.array_equal(
            a1=list(component.values()),
            a2=REFERENCE_FIVE_COMPONENTS[index],
        )

Example #3

Show file

File: test_.py Project: zeta1999/creme

def test_five_components():
    """
    Assert that components computed are identical to the original version for n dimensions.
    """

    n_components = 5

    lda = LDA(n_components=n_components,
              number_of_documents=60,
              maximum_size_vocabulary=100,
              alpha_beta=100,
              alpha_theta=0.5,
              seed=42)

    components_list = []

    for document in DOC_SET:
        tokens = {token: 1 for token in document.split(' ')}
        components_list.append(lda.fit_transform_one(tokens))

    for index, component in enumerate(components_list):
        assert np.array_equal(
            a1=list(component.values()),
            a2=REFERENCE_FIVE_COMPONENTS[index],
        )

Example #4

Show file

File: test_.py Project: zeta1999/creme

def test_extraction_words_ids():
    """
    Assert that inputs words are splitted.
    Assert that indexes are updated and extractable.
    """

    lda = LDA(2, number_of_documents=5, seed=42)

    word_indexes_list = []

    for doc in DOC_SET:

        words = doc.split(' ')

        lda._update_indexes(word_list=words)

        word_indexes_list.append([lda.word_to_index[word] for word in words])

    assert word_indexes_list == [
        [1, 2],
        [1, 3, 4],
        [1, 2, 5],
        [1, 3],
        [1, 2, 6],
    ]

Example #5

Show file

def test_extraction_words_ids():
    '''
    Assert that inputs words are splitted.
    Assert that indexes are updated and extractable.
    '''
    np.random.seed(42)

    lda = LDA(2, number_of_documents=5)

    word_indexes_list = []

    for doc in DOC_SET:

        words = lda.process_text(doc)

        lda._update_indexes(word_list=words)

        word_indexes_list.append([lda.word_to_index[word] for word in words])

    assert word_indexes_list == [
        [1, 2],
        [1, 3, 4],
        [1, 2, 5],
        [1, 3],
        [1, 2, 6],
    ]

Example #6

Show file

File: test_.py Project: zeta1999/creme

def test_statistics_two_components():
    """
    Assert that online lda extracts waited statistics on current document.
    """
    n_components = 2

    lda = LDA(n_components, number_of_documents=60, seed=42)

    statistics_list = []

    for doc in DOC_SET:

        word_list = doc.split(' ')

        lda._update_indexes(word_list=word_list)

        word_indexes = [lda.word_to_index[word] for word in word_list]

        statistics, _ = lda._compute_statistics_components(
            words_indexes_list=word_indexes, )

        statistics_list.append(statistics)

        lda._update_weights(statistics=statistics)

    for index, statistics in enumerate(statistics_list):
        for component in range(n_components):
            assert np.array_equal(
                a1=statistics[component],
                a2=REFERENCE_STATISTICS_TWO_COMPONENTS[index][component],
            )

Example #7

Show file

File: test_.py Project: zie225/creme

def test_prunning_vocabulary():
    '''
    Vocabulary prunning is available to improve accuracy and limit memory usage.
    You can perform vocabulary prunning with parameters vocab_prune_interval (int) and
    maximum_size_vocabulary (int).
    '''
    np.random.seed(42)

    online_lda = LDA(n_components=2,
                     number_of_documents=60,
                     vocab_prune_interval=2,
                     maximum_size_vocabulary=3)

    components_list = []

    for document in DOC_SET:
        components_list.append(online_lda.fit_transform_one(x=document))

    for index, component in enumerate(components_list):
        assert np.array_equal(a1=list(component.values()),
                              a2=REFERENCE_COMPONENTS_WITH_PRUNNING[index])

Example #8

Show file

File: test_.py Project: zeta1999/creme

def test_fit_transform():
    """
    Assert that fit_one and transform_one methods returns waited ouput.
    """

    lda = LDA(n_components=2,
              number_of_documents=60,
              vocab_prune_interval=2,
              maximum_size_vocabulary=3,
              seed=42)
    components_list = []

    for document in DOC_SET:
        tokens = {token: 1 for token in document.split(' ')}
        lda = lda.fit_one(x=tokens)

        components_list.append(lda.transform_one(x=tokens))

    for index, component in enumerate(components_list):
        assert np.array_equal(a1=list(component.values()),
                              a2=REFERENCE_FIT_ONE_PREDICT_ONE[index])

Example #9

Show file

File: test_.py Project: zie225/creme

def test_fit_transform():
    '''
    Assert that fit_one and transform_one methods returns waited ouput.
    '''
    np.random.seed(42)

    online_lda = LDA(
        n_components=2,
        number_of_documents=60,
        vocab_prune_interval=2,
        maximum_size_vocabulary=3,
    )
    components_list = []

    for document in DOC_SET:
        online_lda = online_lda.fit_one(x=document)

        components_list.append(online_lda.transform_one(x=document))

    for index, component in enumerate(components_list):
        assert np.array_equal(a1=list(component.values()),
                              a2=REFERENCE_FIT_ONE_PREDICT_ONE[index])

Example #10

Show file

File: test_.py Project: zeta1999/creme

def test_prunning_vocabulary():
    """
    Vocabulary prunning is available to improve accuracy and limit memory usage.
    You can perform vocabulary prunning with parameters vocab_prune_interval (int) and
    maximum_size_vocabulary (int).
    """

    lda = LDA(n_components=2,
              number_of_documents=60,
              vocab_prune_interval=2,
              maximum_size_vocabulary=3,
              seed=42)

    components_list = []

    for document in DOC_SET:
        tokens = {token: 1 for token in document.split(' ')}
        components_list.append(lda.fit_transform_one(tokens))

    for index, component in enumerate(components_list):
        assert np.array_equal(a1=list(component.values()),
                              a2=REFERENCE_COMPONENTS_WITH_PRUNNING[index])

Example #11

Show file

def test_statistics_five_components():
    '''
    Assert that online lda extracts waited statistics on current document.
    '''
    np.random.seed(42)

    n_components = 5

    lda = LDA(
        n_components=n_components,
        number_of_documents=60,
        maximum_size_vocabulary=100,
        alpha_beta=100,
        alpha_theta=0.5,
    )

    statistics_list = []

    for doc in DOC_SET:

        word_list = lda.process_text(doc)

        lda._update_indexes(word_list=word_list)

        word_indexes = [lda.word_to_index[word] for word in word_list]

        statistics, _ = lda._compute_statistics_components(
            words_indexes_list=word_indexes, )

        statistics_list.append(statistics)

        lda._update_weights(statistics=statistics)

    for index, statistics in enumerate(statistics_list):
        for component in range(n_components):
            assert np.array_equal(
                a1=statistics[component],
                a2=REFERENCE_STATISTICS_FIVE_COMPONENTS[index][component],
            )