Exemple #1
0
def test_lda_recalculate_beta():
    K = 2
    W = 3

    doc0 = [(0,3), (1,1)]
    doc1 = [(1,3), (2,2), (0,1)]
    text = [doc0,doc1]

    text = [doc0,doc1]
    beta = np.empty((K,W), dtype=float)
    out = beta.copy()

    phi0 = np.zeros((sum([d[1] for d in doc0]), K))
    # two to topic one (word 0)
    # two to topic two (word 1)
    phi0[0][0] = 1
    phi0[1][0] = 1
    phi0[2][1] = 1
    phi0[3][1] = 1

    phi1 = np.zeros((sum([d[1] for d in doc1]), K))
    phi1[0][1] = 1
    phi1[1][1] = 1
    phi1[2][1] = 1
    phi1[3][0] = 1
    phi1[4][1] = 1
    phi1[5][0] = 1

    phi = [phi0, phi1]

    answer = np.array([[0.75, 0.0, 0.25],
                       [1.0/6, 2.0/3, 1.0/6]])

    assert out.shape == (2,3)
    lm.lda_recalculate_beta(text, out, phi)
    assert out.shape == (2,3)

    assert not same(beta, out)
    assert same(out, answer)

    # now test on docarray
    out = beta.copy()
    assert out.shape == (2,3)
    lm.lda_recalculate_beta([lm.doc_to_array(t) for t in text], out, phi)
    assert out.shape == (2,3)

    assert not same(beta, out)
    assert same(out, answer)


    # test log space
    log_out = np.log(out)
    log_phi = [np.log(p) for p in phi]
    assert log_out.shape == (2,3)
    lm.lda_recalculate_log_beta(text, log_out, log_phi)
    assert log_out.shape == (2,3)

    assert not same(beta, np.exp(log_out))
    assert same(np.exp(log_out), answer)
Exemple #2
0
def partial_slda_m_step(var):
    ### M-step: ###
    print 'updating betas..'
    # update betaD for documents first
    topiclib.lda_recalculate_beta(var.documents, var.beta, var.phi)

    print 'eta sigma...'
    # update response variable gaussian global parameters
    var.sigma_squared = topiclib.partial_slda_recalculate_eta_sigma(var.eta, var.y, var.phi)
Exemple #3
0
def tlc_m_step(var):
    ### M-step: ###
    print 'updating betas..'
    Ku, Ks, Kb = var.Ku, var.Ks, var.Kb
    
    print 'update unlabeled document topics..'
    dc = var.documents + var.comments
    #phi_dc = var.phiD + [p[:,:Ku] for p in var.phiC]
    phi_dc = [d*var.document_multiplier for d in var.phiD] + [var.comment_multiplier*p[:,:Ku] for p in var.phiC]
    topiclib.lda_recalculate_beta(dc, var.beta[:Ku], phi_dc)

    print 'update sentiment topics...'
    cl = var.comments + var.labeled
    phi_cl = [var.comment_multiplier*p[:,-Ks:] for p in var.phiC] + [var.labeled_multiplier*p[:,:Ks] for p in var.phiL]
    topiclib.lda_recalculate_beta(cl, var.beta[Ku:Ku+Ks], phi_cl)

    print 'update background topics...'
    lb = var.labeled + var.background
    phi_lb = [var.labeled_multiplier*p[:,-Kb:] for p in var.phiL] + [var.background_multiplier*b for b in var.phiB]
    topiclib.lda_recalculate_beta(lb, var.beta[-Kb:], phi_lb)

    print 'eta sigma...'
    # update response variable gaussian global parameters
    var.sigma_squared = topiclib.partial_slda_recalculate_eta_sigma(var.eta, var.y, var.phiL)
Exemple #4
0
def lda_m_step(var):
    print 'updating betas..'
    topiclib.lda_recalculate_beta(var.documents, var.beta, var.phi)