def kl_divergence(A, B):
    """
    @param A: doubly centered symmetric nxn matrix of rank n-1
    @param B: doubly centered symmetric nxn matrix of rank n-1
    """
    assert_symmetric(A)
    assert_symmetric(B)
    n = A.shape[0]
    B_pinv = restored(inv(augmented(B)))
    stein_loss = (
            trace(dot(B_pinv, A)) -
            (log_pdet(B_pinv) + log_pdet(A)) - (n-1))
    return 0.5 * stein_loss
def clever_cross_entropy_trees(B, nleaves, va, vb):
    """
    Try being a little more clever.

    @param B: augmented incidence matrix
    @param nleaves: number of leaves
    @param va: augmented reference point edge variances
    @param vb: augmented test point edge variances
    """

    # deduce some quantities assuming an unrooted bifurcating tree
    ninternal = nleaves - 2
    nvertices = nleaves + ninternal
    nedges = nvertices - 1

    # define an index for taking schur complements
    n = nvertices
    k = nleaves + 1

    # Construct the full Laplacian matrix plus J/n.
    # Take a block of the diagonal, corresponding to the inverse
    # of a schur complement.
    Wa = diag(reciprocal(va))
    La_plus = dot(B.T, dot(Wa, B))
    print(La_plus)
    print(scipy.linalg.eigh(La_plus))
    Laa = La_plus[:k, :k]
    Lab = La_plus[:k, k:]
    Lba = La_plus[k:, :k]
    Lbb = La_plus[k:, k:]
    L_schur_plus = Laa - dot(Lab, dot(inv(Lbb), Lba))
    assert_allclose(inv(L_schur_plus), inv(La_plus)[:k, :k])
    A = inv(La_plus)[:k, :k]
    print(scipy.linalg.eigh(A))

    # Construct the Schur complement of the test point matrix.
    Wb = diag(reciprocal(vb))
    L_plus = dot(B.T, dot(Wb, B))
    Laa = L_plus[:k, :k]
    Lab = L_plus[:k, k:]
    Lba = L_plus[k:, :k]
    Lbb = L_plus[k:, k:]
    L_schur_plus = Laa - dot(Lab, dot(inv(Lbb), Lba))
    B_inv = L_schur_plus
    #return 0.5 * ((n-1) * LOG2PI + trace(dot(B_inv, A)) - log(det(B_inv)))
    return 0.5 * (n * LOG2PI + trace(dot(B_inv, A) - 1) - log(det(B_inv)))
Beispiel #3
0
    def test_pullback_gradient(self):
        (D,M,N) = 3,3,2
        P = M*N
        A = UTPM(numpy.zeros((D,P,M,M)))

        A0 = numpy.random.rand(M,N)

        for m in range(M):
            for n in range(N):
                p = m*N + n
                A.data[0,p,:M,:N] = A0
                A.data[1,p,m,n] = 1.

        cg = CGraph()
        A = Function(A)
        Q,R = qr(A)
        B = dot(Q,R)
        y = trace(B)

        cg.independentFunctionList = [A]
        cg.dependentFunctionList = [y]

        # print cg

        # print y.x.data

        g1  =  y.x.data[1]
        g11 =  y.x.data[2]

        # print g1
        ybar = y.x.zeros_like()
        ybar.data[0,:] = 1.
        cg.pullback([ybar])


        for m in range(M):
            for n in range(N):
                p = m*N + n

                #check gradient
                assert_array_almost_equal(y.x.data[1,p], A.xbar.data[0,p,m,n])

                #check hessian
                assert_array_almost_equal(0, A.xbar.data[1,p,m,n])
Beispiel #4
0
    def test_outer(self):
        x = numpy.arange(4)

        cg = algopy.CGraph()
        x = algopy.Function(x)
        x1 = x[:x.size//2]
        x2 = x[x.size//2:]
        y = algopy.trace(algopy.outer(x1,x2))
        cg.trace_off()
        cg.independentFunctionList = [x]
        cg.dependentFunctionList = [y]

        cg2 = algopy.CGraph()
        x = algopy.Function(x)
        x1 = x[:x.size//2]
        x2 = x[x.size//2:]
        z = algopy.dot(x1,x2)
        cg2.trace_off()
        cg2.independentFunctionList = [x]
        cg2.dependentFunctionList = [z]


        assert_array_almost_equal(cg.jacobian(numpy.arange(4)), cg2.jacobian(numpy.arange(4)))
Beispiel #5
0
    def test_pullback_gradient2(self):
        (D,P,M,N) = 3,9,3,3
        A = UTPM(numpy.zeros((D,P,M,M)))

        A0 = numpy.random.rand(M,N)
        for m in range(M):
            for n in range(N):
                p = m*N + n
                A.data[0,p,:M,:N] = A0
                A.data[1,p,m,n] = 1.

        cg = CGraph()
        A = Function(A)
        B = inv(A)
        y = trace(B)

        cg.independentFunctionList = [A]
        cg.dependentFunctionList = [y]

        ybar = y.x.zeros_like()
        ybar.data[0,:] = 1.
        cg.pullback([ybar])

        g1  =  y.x.data[1]
        g2 = A.xbar.data[0,0].ravel()

        assert_array_almost_equal(g1, g2)

        tmp = []
        for m in range(M):
            for n in range(N):
                p = m*N + n
                tmp.append( A.xbar.data[1,p,m,n])

        h1 = y.x.data[2]
        h2 = numpy.array(tmp)
        assert_array_almost_equal(2*h1, h2)
Beispiel #6
0
xdata[0] = [1, 2, 3]
xdata[1] = dirs

# STEP 3: compute function F in UTP arithmetic
x = algopy.UTPM(xdata)
y = eval_F(x)

# STEP 4: use polarization identity to build univariate Taylor polynomial
#         of the Jacobian J
Jdata = numpy.zeros((D - 1, N, N, N))
count, count2 = 0, 0

# build J_0
for n in range(N):
    Jdata[0, :, :, n] = y.data[1, 2 * n * (N + 1), :] / 2.

# build J_1
count = 0
for n1 in range(N):
    for n2 in range(N):
        Jdata[1, n2, :, n1] = 0.5 * (y.data[2, count] - y.data[2, count + 1])
        count += 2

# initialize UTPM instance
J = algopy.UTPM(Jdata)

# STEP 5: evaluate Phi in UTP arithmetic
Phi = algopy.trace(algopy.dot(J.T, J))
print('Phi=', Phi)
print('gradient of Phi =', Phi.data[1, :])
P = M*N

# generate badly conditioned matrix A

A = UTPM(numpy.zeros((D,P,M,N)))
A.data[0,:] = numpy.random.rand(*(M,N))

for m in range(M):
    for n in range(N):
        p = m*N + n
        A.data[1,p,m,n] = 1.


cg = CGraph()
A = Function(A)
y = trace(inv(A))
cg.trace_off()

cg.independentFunctionList = [A]
cg.dependentFunctionList = [y]
    
ybar = y.x.zeros_like()
ybar.data[0,:] = 1.
cg.pullback([ybar])

# check gradient
g_forward = numpy.zeros(N*N)
g_reverse = numpy.zeros(N*N)

for m in range(M):
    for n in range(N):
Beispiel #8
0
# STEP 3: compute function F in UTP arithmetic
x = algopy.UTPM(xdata)
y = eval_F(x)

# STEP 4: use polarization identity to build univariate Taylor polynomial
#         of the Jacobian J
Jdata = numpy.zeros((D-1, N, N, N))
count, count2 = 0,0

# build J_0
for n in range(N):
    Jdata[0,:,:,n] = y.data[1, 2*n*(N+1), :]/2.
    
# build J_1
count = 0
for n1 in range(N):
    for n2 in range(N):
        Jdata[1,n2,:,n1] = 0.5*( y.data[2, count] - y.data[2, count+1])
        count += 2

# initialize UTPM instance
J = algopy.UTPM(Jdata)

# STEP 5: evaluate Phi in UTP arithmetic
Phi = algopy.trace(algopy.dot(J.T, J))
print 'Phi=',Phi
print 'gradient of Phi =', Phi.data[1,:]


D, M, N = 2, 3, 3
P = M * N

# generate badly conditioned matrix A

A = UTPM(numpy.zeros((D, P, M, N)))
A.data[0, :] = numpy.random.rand(*(M, N))

for m in range(M):
    for n in range(N):
        p = m * N + n
        A.data[1, p, m, n] = 1.

cg = CGraph()
A = Function(A)
y = trace(inv(A))
cg.trace_off()

cg.independentFunctionList = [A]
cg.dependentFunctionList = [y]

ybar = y.x.zeros_like()
ybar.data[0, :] = 1.
cg.pullback([ybar])

# check gradient
g_forward = numpy.zeros(N * N)
g_reverse = numpy.zeros(N * N)

for m in range(M):
    for n in range(N):