예제 #1
0
def test_cal_likelihood():
    dz = _rand_mat(len(test_vectorized_output), ntopics)
    zw = _rand_mat(ntopics, test_vectorized_output.global_term_count)
    p_dw = np.zeros((len(test_vectorized_output), test_vectorized_output.global_term_count))
    p_dw = _cal_p_dw(words_in_docs, word_cts_in_docs, range(ntopics), zw, dz, 0.8, p_dw)
    likelihood = _cal_likelihood(words_in_docs, word_cts_in_docs, p_dw)
    nt.assert_less(likelihood, 0)
예제 #2
0
파일: test_plsa.py 프로젝트: yusaks83/topik
def test_cal_likelihood():
    dz = _rand_mat(len(test_vectorized_output), ntopics)
    zw = _rand_mat(ntopics, test_vectorized_output.global_term_count)
    p_dw = np.zeros((len(test_vectorized_output),
                     test_vectorized_output.global_term_count))
    p_dw = _cal_p_dw(words_in_docs, word_cts_in_docs, range(ntopics), zw, dz,
                     0.8, p_dw)
    likelihood = _cal_likelihood(words_in_docs, word_cts_in_docs, p_dw)
    nt.assert_less(likelihood, 0)
예제 #3
0
def test_em():
    dz = _rand_mat(len(test_vectorized_output), ntopics)
    dw_z = np.zeros((len(test_vectorized_output), test_vectorized_output.global_term_count, ntopics))
    p_dw = np.zeros((len(test_vectorized_output), test_vectorized_output.global_term_count))
    zw = _rand_mat(ntopics, test_vectorized_output.global_term_count)
    p_dw = _cal_p_dw(words_in_docs, word_cts_in_docs, range(ntopics), zw, dz, 0.8, p_dw)
    dw_z = _e_step(words_in_docs, dw_z, range(ntopics), zw, dz, 0.8, p_dw)
    zw, dz = _m_step(words_in_docs, word_cts_in_docs, range(ntopics), zw, dw_z, dz)
    for topic in zw:
        nt.assert_almost_equal(sum(topic), 1)
    for doc in dz:
        nt.assert_almost_equal(sum(doc), 1)
예제 #4
0
파일: test_plsa.py 프로젝트: yusaks83/topik
def test_em():
    dz = _rand_mat(len(test_vectorized_output), ntopics)
    dw_z = np.zeros((len(test_vectorized_output),
                     test_vectorized_output.global_term_count, ntopics))
    p_dw = np.zeros((len(test_vectorized_output),
                     test_vectorized_output.global_term_count))
    zw = _rand_mat(ntopics, test_vectorized_output.global_term_count)
    p_dw = _cal_p_dw(words_in_docs, word_cts_in_docs, range(ntopics), zw, dz,
                     0.8, p_dw)
    dw_z = _e_step(words_in_docs, dw_z, range(ntopics), zw, dz, 0.8, p_dw)
    zw, dz = _m_step(words_in_docs, word_cts_in_docs, range(ntopics), zw, dw_z,
                     dz)
    for topic in zw:
        nt.assert_almost_equal(sum(topic), 1)
    for doc in dz:
        nt.assert_almost_equal(sum(doc), 1)