def test_lda_recalculate_beta(): K = 2 W = 3 doc0 = [(0,3), (1,1)] doc1 = [(1,3), (2,2), (0,1)] text = [doc0,doc1] text = [doc0,doc1] beta = np.empty((K,W), dtype=float) out = beta.copy() phi0 = np.zeros((sum([d[1] for d in doc0]), K)) # two to topic one (word 0) # two to topic two (word 1) phi0[0][0] = 1 phi0[1][0] = 1 phi0[2][1] = 1 phi0[3][1] = 1 phi1 = np.zeros((sum([d[1] for d in doc1]), K)) phi1[0][1] = 1 phi1[1][1] = 1 phi1[2][1] = 1 phi1[3][0] = 1 phi1[4][1] = 1 phi1[5][0] = 1 phi = [phi0, phi1] answer = np.array([[0.75, 0.0, 0.25], [1.0/6, 2.0/3, 1.0/6]]) assert out.shape == (2,3) lm.lda_recalculate_beta(text, out, phi) assert out.shape == (2,3) assert not same(beta, out) assert same(out, answer) # now test on docarray out = beta.copy() assert out.shape == (2,3) lm.lda_recalculate_beta([lm.doc_to_array(t) for t in text], out, phi) assert out.shape == (2,3) assert not same(beta, out) assert same(out, answer) # test log space log_out = np.log(out) log_phi = [np.log(p) for p in phi] assert log_out.shape == (2,3) lm.lda_recalculate_log_beta(text, log_out, log_phi) assert log_out.shape == (2,3) assert not same(beta, np.exp(log_out)) assert same(np.exp(log_out), answer)
def partial_slda_m_step(var): ### M-step: ### print 'updating betas..' # update betaD for documents first topiclib.lda_recalculate_beta(var.documents, var.beta, var.phi) print 'eta sigma...' # update response variable gaussian global parameters var.sigma_squared = topiclib.partial_slda_recalculate_eta_sigma(var.eta, var.y, var.phi)
def tlc_m_step(var): ### M-step: ### print 'updating betas..' Ku, Ks, Kb = var.Ku, var.Ks, var.Kb print 'update unlabeled document topics..' dc = var.documents + var.comments #phi_dc = var.phiD + [p[:,:Ku] for p in var.phiC] phi_dc = [d*var.document_multiplier for d in var.phiD] + [var.comment_multiplier*p[:,:Ku] for p in var.phiC] topiclib.lda_recalculate_beta(dc, var.beta[:Ku], phi_dc) print 'update sentiment topics...' cl = var.comments + var.labeled phi_cl = [var.comment_multiplier*p[:,-Ks:] for p in var.phiC] + [var.labeled_multiplier*p[:,:Ks] for p in var.phiL] topiclib.lda_recalculate_beta(cl, var.beta[Ku:Ku+Ks], phi_cl) print 'update background topics...' lb = var.labeled + var.background phi_lb = [var.labeled_multiplier*p[:,-Kb:] for p in var.phiL] + [var.background_multiplier*b for b in var.phiB] topiclib.lda_recalculate_beta(lb, var.beta[-Kb:], phi_lb) print 'eta sigma...' # update response variable gaussian global parameters var.sigma_squared = topiclib.partial_slda_recalculate_eta_sigma(var.eta, var.y, var.phiL)
def lda_m_step(var): print 'updating betas..' topiclib.lda_recalculate_beta(var.documents, var.beta, var.phi)