Beispiel #1
0
def lda_recalculate_beta(text, beta, phi):
    """
    update topics: βk,wnew ∝ ΣdΣn 1(wd,n = w) φkd,n

    Accepts beta matrix (KxW) and 
        phi, a D-length list of (N x K) matrices.
    """
    (K,W) = beta.shape
    D = len(phi)
    beta[:,:] = np.zeros(beta.shape)    

    if isinstance(text[0], np.ndarray):
        for d in xrange(D):
            ensure(phi[d].shape[1] == K)
            for n,word in enumerate(text[d]):
                beta[:,word] += phi[d][n,:]
            #words, indexes = text[d], np.array(range(len(text[d])))
            #beta[:,words] += phi[d][indexes,:].T
    else:
        for d in xrange(D):
            ensure(phi[d].shape[1] == K)
            for n,word,count in iterwords(text[d]):
                beta[:,word] += phi[d][n,:]
    graphlib.row_normalize(beta)
    return beta
Beispiel #2
0
def test_row_normalize():
    t = np.ones((3,4))
    out = graphlib.row_normalize(t)
    answer = np.ones(t.shape) * 0.25
    assert same(out, answer)

    a = np.array([[ 0.75      ,  0.        ,  0.25      ],
                  [ 0.16666667,  0.66666667,  0.16666667]])
    graphlib.row_normalize(a)
    assert a.shape == (2, 3)
Beispiel #3
0
def test_slda_update_phi():
    gamma = np.array([3,4,5])
    text = [(0,1), (1,1)]
    beta = np.array([
                     [0.75, 0.25],
                     [0.40, 0.60],
                     [0.10, 0.90],
                    ])
    y_d = -0.5
    eta = np.array([-2.5, 1.6, 0.1])
    sigma_squared = 0.8
    phi = np.array([
                    [0.65, 0.25, 0.10],
                    [0.09, 0.78, 0.13],
                   ])

    """
    update phid:
    φd,n ∝ exp{ E[log θ|γ] + 
                E[log p(wn|β1:K)] + 
                (y / Nσ2) η  — 
                [2(ηTφd,-n)η + (η∘η)] / (2N2σ2) }
    Note that E[log p(wn|β1:K)] = log βTwn
    """
    eta_dot_eta = np.array([6.25, 2.56, 0.01])
    term1 = np.array([-1.51987734, -1.18654401154, -0.93654401154401])
    term2 = np.log(np.array([0.75, 0.40, 0.10]))
    term3 = np.array([0.78125, -0.5, -0.03125])
    term4 = -0.15625 * ((2 * (np.dot(eta, phi[1])) * eta) + eta_dot_eta)

    first_row = np.exp(term1 + term2 + term3 + term4)
    first_row /= np.sum(first_row) # normalize it, then set

    # note that this happens in sequential order, so must use first row, not old phi[0]
    term2 = np.log(np.array([0.25, 0.60, 0.90]))
    term4 = -0.15625 * ((2 * (np.dot(eta, first_row)) * eta) + eta_dot_eta)

    second_row = np.exp(term1 + term2 + term3 + term4)
    answer = np.array([first_row, second_row])

    graphlib.row_normalize(answer)

    out = phi.copy()
    lm.slda_update_phi(text, out, gamma, beta, y_d, eta, sigma_squared)
    assert same(out, answer)

    # test the fast updates; which will be slightly different
    fast_answer = answer.copy()
    fast_answer[1,:] = np.array([0.03422278, 0.26873478, 0.69704244])

    out = phi.copy()
    docarray = lm.doc_to_array([(0,1), (1,1)])
    lm.slda_update_phi(docarray, out, gamma, beta, y_d, eta, sigma_squared)
    
    assert same(out, fast_answer)
Beispiel #4
0
def test_calculate_EZZT():
    big_phi = lm.calculate_big_phi(phi1[0], phi2[0])
    out = lm.calculate_EZZT(big_phi)

    e = 8.0 / 9.0
    t = 2.0 / 9.0
    h = 3.0 / 2.0
    answer = (1.0 / 25) * np.array([
                [e, t, t, 1, 1],
                [t, e, t, 1, 1],
                [t, t, e, 1, 1],
                [1, 1, 1, 3, h],
                [1, 1, 1, h, 3],
               ])
    assert same(out, answer)

    out = lm.calculate_EZZT_from_small_phis(phi1[0], phi2[0])
    assert same(out, answer)

    # try it on logs
    out = lm.calculate_EZZT_from_small_log_phis(log_phi1, log_phi2)
    assert same(out, np.log(answer))

    # now try a harder random matrix
    r1 = answer.copy()
    r1[0,0] = 5
    r1[1,1] = 9
    r1 = graphlib.row_normalize(r1)
    r2 = r1.copy()
    r1[1,1] = 2
    r1[1,0] = 1
    r1 = graphlib.row_normalize(r1)

    big_phi = lm.calculate_big_phi(r1, r2)
    answer = lm.calculate_EZZT(big_phi)
    out = lm.calculate_EZZT_from_small_phis(r1, r2)
    assert same(out, answer)

    # test out same anwer on logs
    out = lm.calculate_EZZT_from_small_log_phis(np.log(r1), np.log(r2))
    assert same(out, np.log(answer))
Beispiel #5
0
    b[4,1] = 8

    out = lm.calculate_big_phi(a, b)
    answer = np.array([
                       [1, 1, 5, 0, 0, 0, 0,],
                       [1, 1, 1, 0, 0, 0, 0,],
                       [0, 0, 0, 1, 1, 1, 1,],
                       [0, 0, 0, 1, 1, 1, 1,],
                       [0, 0, 0, 1, 1, 1, 1,],
                       [0, 0, 0, 1, 1, 1, 1,],
                       [0, 0, 0, 1, 8, 1, 1,],
                       [0, 0, 0, 1, 1, 1, 1,],
                      ])
    assert same(out, answer)

phi1 = [graphlib.row_normalize(np.ones((2,3))), ]
phi2 = [graphlib.row_normalize(np.ones((3,2))), ]
log_phi1, log_phi2 = np.log(phi1[0]), np.log(phi2[0])

def test_calculate_EZ():
    big_phi = lm.calculate_big_phi(phi1[0], phi2[0])
    out = lm.calculate_EZ(big_phi)
    answer = (1.0 / 25) * np.array([30.0/9, 30.0/9, 30.0/9, 7.5, 7.5])
    assert same(out, answer)

    out = lm.calculate_EZ_from_small_phis(phi1[0], phi2[0])
    assert same(out, answer)

    # now test log phis
    big_log_phi = lm.calculate_big_log_phi(log_phi1, log_phi2)
    out = lm.calculate_EZ_from_big_log_phi(big_log_phi)