def test_statistics_two_components(): """ Assert that online lda extracts waited statistics on current document. """ n_components = 2 lda = LDA(n_components, number_of_documents=60, seed=42) statistics_list = [] for doc in DOC_SET: word_list = doc.split(' ') lda._update_indexes(word_list=word_list) word_indexes = [lda.word_to_index[word] for word in word_list] statistics, _ = lda._compute_statistics_components( words_indexes_list=word_indexes, ) statistics_list.append(statistics) lda._update_weights(statistics=statistics) for index, statistics in enumerate(statistics_list): for component in range(n_components): assert np.array_equal( a1=statistics[component], a2=REFERENCE_STATISTICS_TWO_COMPONENTS[index][component], )
def test_statistics_two_components(): ''' Assert that online lda extracts waited statistics on current document. ''' n_components = 2 np.random.seed(42) lda = LDA(n_components, number_of_documents=60) statistics_list = [] for doc in DOC_SET: word_list = lda.tokenizer(lda.preprocess(lda._get_text(doc))) lda._update_indexes(word_list=word_list) word_indexes = [lda.word_to_index[word] for word in word_list] statistics, _ = lda._compute_statistics_components( words_indexes_list=word_indexes, ) statistics_list.append(statistics) lda._update_weights(statistics=statistics) for index, statistics in enumerate(statistics_list): for component in range(n_components): assert np.array_equal( a1=statistics[component], a2=REFERENCE_STATISTICS_TWO_COMPONENTS[index][component], )
def test_statistics_five_components(): ''' Assert that online lda extracts waited statistics on current document. ''' np.random.seed(42) n_components = 5 lda = LDA( n_components=n_components, number_of_documents=60, maximum_size_vocabulary=100, alpha_beta=100, alpha_theta=0.5, ) statistics_list = [] for doc in DOC_SET: word_list = lda.process_text(doc) lda._update_indexes(word_list=word_list) word_indexes = [lda.word_to_index[word] for word in word_list] statistics, _ = lda._compute_statistics_components( words_indexes_list=word_indexes, ) statistics_list.append(statistics) lda._update_weights(statistics=statistics) for index, statistics in enumerate(statistics_list): for component in range(n_components): assert np.array_equal( a1=statistics[component], a2=REFERENCE_STATISTICS_FIVE_COMPONENTS[index][component], )