def test_five_components(): ''' Assert that components computed are identical to the original version for n dimensions. ''' np.random.seed(42) n_components = 5 online_lda = LDA( n_components=n_components, number_of_documents=60, maximum_size_vocabulary=100, alpha_beta=100, alpha_theta=0.5, ) components_list = [] for document in DOC_SET: components_list.append(online_lda.fit_transform_one(document)) for index, component in enumerate(components_list): assert np.array_equal( a1=list(component.values()), a2=REFERENCE_FIVE_COMPONENTS[index], )
def test_five_components(): """ Assert that components computed are identical to the original version for n dimensions. """ n_components = 5 lda = LDA(n_components=n_components, number_of_documents=60, maximum_size_vocabulary=100, alpha_beta=100, alpha_theta=0.5, seed=42) components_list = [] for document in DOC_SET: tokens = {token: 1 for token in document.split(' ')} components_list.append(lda.fit_transform_one(tokens)) for index, component in enumerate(components_list): assert np.array_equal( a1=list(component.values()), a2=REFERENCE_FIVE_COMPONENTS[index], )
def test_prunning_vocabulary(): ''' Vocabulary prunning is available to improve accuracy and limit memory usage. You can perform vocabulary prunning with parameters vocab_prune_interval (int) and maximum_size_vocabulary (int). ''' np.random.seed(42) online_lda = LDA(n_components=2, number_of_documents=60, vocab_prune_interval=2, maximum_size_vocabulary=3) components_list = [] for document in DOC_SET: components_list.append(online_lda.fit_transform_one(x=document)) for index, component in enumerate(components_list): assert np.array_equal(a1=list(component.values()), a2=REFERENCE_COMPONENTS_WITH_PRUNNING[index])
def test_prunning_vocabulary(): """ Vocabulary prunning is available to improve accuracy and limit memory usage. You can perform vocabulary prunning with parameters vocab_prune_interval (int) and maximum_size_vocabulary (int). """ lda = LDA(n_components=2, number_of_documents=60, vocab_prune_interval=2, maximum_size_vocabulary=3, seed=42) components_list = [] for document in DOC_SET: tokens = {token: 1 for token in document.split(' ')} components_list.append(lda.fit_transform_one(tokens)) for index, component in enumerate(components_list): assert np.array_equal(a1=list(component.values()), a2=REFERENCE_COMPONENTS_WITH_PRUNNING[index])