def kl_divergence(A, B): """ @param A: doubly centered symmetric nxn matrix of rank n-1 @param B: doubly centered symmetric nxn matrix of rank n-1 """ assert_symmetric(A) assert_symmetric(B) n = A.shape[0] B_pinv = restored(inv(augmented(B))) stein_loss = ( trace(dot(B_pinv, A)) - (log_pdet(B_pinv) + log_pdet(A)) - (n-1)) return 0.5 * stein_loss
def clever_cross_entropy_trees(B, nleaves, va, vb): """ Try being a little more clever. @param B: augmented incidence matrix @param nleaves: number of leaves @param va: augmented reference point edge variances @param vb: augmented test point edge variances """ # deduce some quantities assuming an unrooted bifurcating tree ninternal = nleaves - 2 nvertices = nleaves + ninternal nedges = nvertices - 1 # define an index for taking schur complements n = nvertices k = nleaves + 1 # Construct the full Laplacian matrix plus J/n. # Take a block of the diagonal, corresponding to the inverse # of a schur complement. Wa = diag(reciprocal(va)) La_plus = dot(B.T, dot(Wa, B)) print(La_plus) print(scipy.linalg.eigh(La_plus)) Laa = La_plus[:k, :k] Lab = La_plus[:k, k:] Lba = La_plus[k:, :k] Lbb = La_plus[k:, k:] L_schur_plus = Laa - dot(Lab, dot(inv(Lbb), Lba)) assert_allclose(inv(L_schur_plus), inv(La_plus)[:k, :k]) A = inv(La_plus)[:k, :k] print(scipy.linalg.eigh(A)) # Construct the Schur complement of the test point matrix. Wb = diag(reciprocal(vb)) L_plus = dot(B.T, dot(Wb, B)) Laa = L_plus[:k, :k] Lab = L_plus[:k, k:] Lba = L_plus[k:, :k] Lbb = L_plus[k:, k:] L_schur_plus = Laa - dot(Lab, dot(inv(Lbb), Lba)) B_inv = L_schur_plus #return 0.5 * ((n-1) * LOG2PI + trace(dot(B_inv, A)) - log(det(B_inv))) return 0.5 * (n * LOG2PI + trace(dot(B_inv, A) - 1) - log(det(B_inv)))
def test_pullback_gradient(self): (D,M,N) = 3,3,2 P = M*N A = UTPM(numpy.zeros((D,P,M,M))) A0 = numpy.random.rand(M,N) for m in range(M): for n in range(N): p = m*N + n A.data[0,p,:M,:N] = A0 A.data[1,p,m,n] = 1. cg = CGraph() A = Function(A) Q,R = qr(A) B = dot(Q,R) y = trace(B) cg.independentFunctionList = [A] cg.dependentFunctionList = [y] # print cg # print y.x.data g1 = y.x.data[1] g11 = y.x.data[2] # print g1 ybar = y.x.zeros_like() ybar.data[0,:] = 1. cg.pullback([ybar]) for m in range(M): for n in range(N): p = m*N + n #check gradient assert_array_almost_equal(y.x.data[1,p], A.xbar.data[0,p,m,n]) #check hessian assert_array_almost_equal(0, A.xbar.data[1,p,m,n])
def test_outer(self): x = numpy.arange(4) cg = algopy.CGraph() x = algopy.Function(x) x1 = x[:x.size//2] x2 = x[x.size//2:] y = algopy.trace(algopy.outer(x1,x2)) cg.trace_off() cg.independentFunctionList = [x] cg.dependentFunctionList = [y] cg2 = algopy.CGraph() x = algopy.Function(x) x1 = x[:x.size//2] x2 = x[x.size//2:] z = algopy.dot(x1,x2) cg2.trace_off() cg2.independentFunctionList = [x] cg2.dependentFunctionList = [z] assert_array_almost_equal(cg.jacobian(numpy.arange(4)), cg2.jacobian(numpy.arange(4)))
def test_pullback_gradient2(self): (D,P,M,N) = 3,9,3,3 A = UTPM(numpy.zeros((D,P,M,M))) A0 = numpy.random.rand(M,N) for m in range(M): for n in range(N): p = m*N + n A.data[0,p,:M,:N] = A0 A.data[1,p,m,n] = 1. cg = CGraph() A = Function(A) B = inv(A) y = trace(B) cg.independentFunctionList = [A] cg.dependentFunctionList = [y] ybar = y.x.zeros_like() ybar.data[0,:] = 1. cg.pullback([ybar]) g1 = y.x.data[1] g2 = A.xbar.data[0,0].ravel() assert_array_almost_equal(g1, g2) tmp = [] for m in range(M): for n in range(N): p = m*N + n tmp.append( A.xbar.data[1,p,m,n]) h1 = y.x.data[2] h2 = numpy.array(tmp) assert_array_almost_equal(2*h1, h2)
xdata[0] = [1, 2, 3] xdata[1] = dirs # STEP 3: compute function F in UTP arithmetic x = algopy.UTPM(xdata) y = eval_F(x) # STEP 4: use polarization identity to build univariate Taylor polynomial # of the Jacobian J Jdata = numpy.zeros((D - 1, N, N, N)) count, count2 = 0, 0 # build J_0 for n in range(N): Jdata[0, :, :, n] = y.data[1, 2 * n * (N + 1), :] / 2. # build J_1 count = 0 for n1 in range(N): for n2 in range(N): Jdata[1, n2, :, n1] = 0.5 * (y.data[2, count] - y.data[2, count + 1]) count += 2 # initialize UTPM instance J = algopy.UTPM(Jdata) # STEP 5: evaluate Phi in UTP arithmetic Phi = algopy.trace(algopy.dot(J.T, J)) print('Phi=', Phi) print('gradient of Phi =', Phi.data[1, :])
P = M*N # generate badly conditioned matrix A A = UTPM(numpy.zeros((D,P,M,N))) A.data[0,:] = numpy.random.rand(*(M,N)) for m in range(M): for n in range(N): p = m*N + n A.data[1,p,m,n] = 1. cg = CGraph() A = Function(A) y = trace(inv(A)) cg.trace_off() cg.independentFunctionList = [A] cg.dependentFunctionList = [y] ybar = y.x.zeros_like() ybar.data[0,:] = 1. cg.pullback([ybar]) # check gradient g_forward = numpy.zeros(N*N) g_reverse = numpy.zeros(N*N) for m in range(M): for n in range(N):
# STEP 3: compute function F in UTP arithmetic x = algopy.UTPM(xdata) y = eval_F(x) # STEP 4: use polarization identity to build univariate Taylor polynomial # of the Jacobian J Jdata = numpy.zeros((D-1, N, N, N)) count, count2 = 0,0 # build J_0 for n in range(N): Jdata[0,:,:,n] = y.data[1, 2*n*(N+1), :]/2. # build J_1 count = 0 for n1 in range(N): for n2 in range(N): Jdata[1,n2,:,n1] = 0.5*( y.data[2, count] - y.data[2, count+1]) count += 2 # initialize UTPM instance J = algopy.UTPM(Jdata) # STEP 5: evaluate Phi in UTP arithmetic Phi = algopy.trace(algopy.dot(J.T, J)) print 'Phi=',Phi print 'gradient of Phi =', Phi.data[1,:]
D, M, N = 2, 3, 3 P = M * N # generate badly conditioned matrix A A = UTPM(numpy.zeros((D, P, M, N))) A.data[0, :] = numpy.random.rand(*(M, N)) for m in range(M): for n in range(N): p = m * N + n A.data[1, p, m, n] = 1. cg = CGraph() A = Function(A) y = trace(inv(A)) cg.trace_off() cg.independentFunctionList = [A] cg.dependentFunctionList = [y] ybar = y.x.zeros_like() ybar.data[0, :] = 1. cg.pullback([ybar]) # check gradient g_forward = numpy.zeros(N * N) g_reverse = numpy.zeros(N * N) for m in range(M): for n in range(N):