def centered_tree_covariance(B, nleaves, v): """ @param B: rows of this unweighted incidence matrix are edges @param nleaves: number of leaves @param v: vector of edge variances """ #TODO: track the block multiplication through the schur complement W = diag(reciprocal(v)) L = dot(B.T, dot(W, B)) #print('full laplacian matrix:') #print(L) #print() nvertices = v.shape[0] ninternal = nvertices - nleaves Laa = L[:nleaves, :nleaves] Lab = L[:nleaves, nleaves:] Lba = L[nleaves:, :nleaves] Lbb = L[nleaves:, nleaves:] L_schur = Laa - dot(Lab, dot(inv(Lbb), Lba)) L_schur_pinv = restored(inv(augmented(L_schur))) #print('schur laplacian matrix:') #print(L_schur) #print() #print('pinv of schur laplacian matrix:') #print(L_schur_pinv) #print() return L_schur_pinv
def f(x): A = zeros((2, 2), dtype=x) A[0, 0] = numpy.log(x[0] * x[1]) A[0, 1] = numpy.log(x[1]) + exp(x[0]) A[1, 0] = sin(x[0])**2 + abs(cos(x[0]))**3.1 A[1, 1] = x[0]**cos(x[1]) return log(dot(x.T, dot(inv(A), x)))
def cross_entropy(A, B): assert_symmetric(A) assert_symmetric(B) n = A.shape[0] B_pinv = restored(inv(augmented(B))) #return 0.5 * ((n-1) * LOG2PI + trace(dot(B_pinv, A)) + log_pdet(B)) return 0.5 * ((n-1) * LOG2PI + (B_pinv * A).sum() + log_pdet(B))
def f(x): A = zeros((2,2),dtype=x) A[0,0] = numpy.log(x[0]*x[1]) A[0,1] = numpy.log(x[1]) + exp(x[0]) A[1,0] = sin(x[0])**2 + abs(cos(x[0]))**3.1 A[1,1] = x[0]**cos(x[1]) return log( dot(x.T, dot( inv(A), x)))
def Cfcn(F1p_list, out=None, work=None): from numpy import sum, zeros from algopy import inv, dot, zeros # check input arguments Nex = len(F1p_list) Np = F1p_list[0].shape[1] # create temporary matrix M if not provided # to store M = [[J_1^T J_1, J_2^T],[J_2, 0]] if work is None: work = zeros((Np, Np), dtype=F1p_list[0]) M = work # Step 1: compute M = J_1^T J_1 for nex in range(Nex): M += dot(F1p_list[nex].T, F1p_list[nex]) # Step 2: invert M and prepare output if out is None: out = inv(M) else: out[...] = M return out
def Cfcn(F1p_list, out = None, work = None): from numpy import sum, zeros from algopy import inv, dot, zeros # check input arguments Nex = len(F1p_list) Np = F1p_list[0].shape[1] # create temporary matrix M if not provided # to store M = [[J_1^T J_1, J_2^T],[J_2, 0]] if work is None: work = zeros((Np,Np), dtype=F1p_list[0]) M = work # Step 1: compute M = J_1^T J_1 for nex in range(Nex): M += dot(F1p_list[nex].T, F1p_list[nex]) # Step 2: invert M and prepare output if out is None: out = inv(M) else: out[...] = M return out
def eval_covariance_matrix_naive(J1, J2): M, N = J1.shape K, N = J2.shape tmp = zeros((N + K, N + K), dtype=J1) tmp[:N, :N] = dot(J1.T, J1) tmp[:N, N:] = J2.T tmp[N:, :N] = J2 return inv(tmp)[:N, :N]
def clever_cross_entropy_trees(B, nleaves, va, vb): """ Try being a little more clever. @param B: augmented incidence matrix @param nleaves: number of leaves @param va: augmented reference point edge variances @param vb: augmented test point edge variances """ # deduce some quantities assuming an unrooted bifurcating tree ninternal = nleaves - 2 nvertices = nleaves + ninternal nedges = nvertices - 1 # define an index for taking schur complements n = nvertices k = nleaves + 1 # Construct the full Laplacian matrix plus J/n. # Take a block of the diagonal, corresponding to the inverse # of a schur complement. Wa = diag(reciprocal(va)) La_plus = dot(B.T, dot(Wa, B)) print(La_plus) print(scipy.linalg.eigh(La_plus)) Laa = La_plus[:k, :k] Lab = La_plus[:k, k:] Lba = La_plus[k:, :k] Lbb = La_plus[k:, k:] L_schur_plus = Laa - dot(Lab, dot(inv(Lbb), Lba)) assert_allclose(inv(L_schur_plus), inv(La_plus)[:k, :k]) A = inv(La_plus)[:k, :k] print(scipy.linalg.eigh(A)) # Construct the Schur complement of the test point matrix. Wb = diag(reciprocal(vb)) L_plus = dot(B.T, dot(Wb, B)) Laa = L_plus[:k, :k] Lab = L_plus[:k, k:] Lba = L_plus[k:, :k] Lbb = L_plus[k:, k:] L_schur_plus = Laa - dot(Lab, dot(inv(Lbb), Lba)) B_inv = L_schur_plus #return 0.5 * ((n-1) * LOG2PI + trace(dot(B_inv, A)) - log(det(B_inv))) return 0.5 * (n * LOG2PI + trace(dot(B_inv, A) - 1) - log(det(B_inv)))
def eval_covariance_matrix_naive(J1, J2): M,N = J1.shape K,N = J2.shape tmp = zeros((N+K, N+K), dtype=J1) tmp[:N,:N] = dot(J1.T,J1) tmp[:N,N:] = J2.T tmp[N:,:N] = J2 return inv(tmp)[:N,:N]
def cross_entropy(A, B): # return differential_entropy(A) + kl_divergence(A, B) # Note that trace(dot(A, B)) == sum(A * B) assert_symmetric(A) assert_symmetric(B) n = A.shape[0] B_pinv = restored(inv(augmented(B))) #return 0.5 * ((n-1) * LOG2PI + trace(dot(B_pinv, A)) + log_pdet(B)) return 0.5 * ((n-1) * LOG2PI + (B_pinv * A).sum() + log_pdet(B))
def demo_covariances(): # define the dimensionality n = 4 H = centering(n) # sample a random symmetric matrix A_raw = np.random.rand(n, n) A = np.dot(A_raw, A_raw.T) assert_allclose(H, centering_like(A)) # check the matrix centering code HAH_slow = np.dot(H, np.dot(A, H)) HAH_fast = doubly_centered(A) assert_allclose(HAH_slow, HAH_fast) # check the pseudoinversion HAH = HAH_slow HAH_pinv_direct = scipy.linalg.pinvh(HAH) HAH_pinv_clever = restored(inv(augmented(HAH))) assert_allclose(HAH_pinv_direct, HAH_pinv_clever) # check the logarithm of the pseudo determinant logpdet_direct = np.log(scipy.linalg.eigvalsh(HAH)[1:]).sum() logpdet_clever = log_pdet(HAH) assert_allclose(logpdet_direct, logpdet_clever) # check the log likelihood print('average log likelihoods:') for nsamples in (100, 1000, 10000, 100000): X = np.random.multivariate_normal(np.zeros(n), HAH, size=nsamples) ll = log_likelihoods(HAH, X).mean() print(ll) print() # check differential entropy print('differential entropy:') print(differential_entropy(HAH)) print() # make another covariance matrix B_raw = A_raw + 0.5 * np.random.rand(n, n) B = np.dot(B_raw, B_raw.T) HBH = doubly_centered(B) # check another expected log likelihood print('average log likelihoods:') for nsamples in (100, 1000, 10000, 100000): X = np.random.multivariate_normal(np.zeros(n), HAH, size=nsamples) ll = log_likelihoods(HBH, X).mean() print(ll) print() # check cross entropy print('cross entropy from A to B:') print(cross_entropy(HAH, HBH)) print()
def log_likelihoods(A, xs): """ @param A: doubly centered symmetric nxn matrix of rank n-1 @param xs: vectors of data """ #NOTE: this formula is wrong on wikipedia assert_symmetric(A) n = A.shape[0] A_pinv = restored(inv(augmented(A))) a = (n-1) * LOG2PI + log_pdet(A) bs = np.array([dot(x, dot(A_pinv, x)) for x in xs]) return -0.5 * (a + bs)
def kl_divergence(A, B): """ @param A: doubly centered symmetric nxn matrix of rank n-1 @param B: doubly centered symmetric nxn matrix of rank n-1 """ assert_symmetric(A) assert_symmetric(B) n = A.shape[0] B_pinv = restored(inv(augmented(B))) stein_loss = ( trace(dot(B_pinv, A)) - (log_pdet(B_pinv) + log_pdet(A)) - (n-1)) return 0.5 * stein_loss
def test_pullback_solve_inv_comparison(self): """simple check that the reverse mode of solve(A,Id) computes the same solution as inv(A) """ (D,P,N) = 3,7,10 A_data = numpy.random.rand(D,P,N,N) # make A_data sufficiently regular for p in range(P): for n in range(N): A_data[0,p,n,n] += (N + 1) A = UTPM(A_data) # method 1: computation of the inverse matrix by solving an extended linear system # tracing cg1 = CGraph() A = Function(A) Id = numpy.eye(N) Ainv1 = solve(A,Id) cg1.trace_off() cg1.independentFunctionList = [A] cg1.dependentFunctionList = [Ainv1] # reverse Ainvbar = UTPM(numpy.random.rand(*(D,P,N,N))) cg1.pullback([Ainvbar]) # method 2: direct inversion # tracing cg2 = CGraph() A = Function(A.x) Ainv2 = inv(A) cg2.trace_off() cg2.independentFunctionList = [A] cg2.dependentFunctionList = [Ainv2] # reverse cg2.pullback([Ainvbar]) Abar1 = cg1.independentFunctionList[0].xbar Abar2 = cg2.independentFunctionList[0].xbar assert_array_almost_equal(Abar1.data, Abar2.data)
def test_pullback_gradient2(self): (D,P,M,N) = 3,9,3,3 A = UTPM(numpy.zeros((D,P,M,M))) A0 = numpy.random.rand(M,N) for m in range(M): for n in range(N): p = m*N + n A.data[0,p,:M,:N] = A0 A.data[1,p,m,n] = 1. cg = CGraph() A = Function(A) B = inv(A) y = trace(B) cg.independentFunctionList = [A] cg.dependentFunctionList = [y] ybar = y.x.zeros_like() ybar.data[0,:] = 1. cg.pullback([ybar]) g1 = y.x.data[1] g2 = A.xbar.data[0,0].ravel() assert_array_almost_equal(g1, g2) tmp = [] for m in range(M): for n in range(N): p = m*N + n tmp.append( A.xbar.data[1,p,m,n]) h1 = y.x.data[2] h2 = numpy.array(tmp) assert_array_almost_equal(2*h1, h2)
def check_multivariate_normal_log_likelihood(): # sample a full rank covariance matrix and some data n = 5 x = np.random.randn(n) X = np.random.randn(n, n) A = dot(X.T, X) # get the log likelihood directly a = n * LOG2PI + np.linalg.slogdet(A)[1] b = dot(x, dot(inv(A), x)) ll_direct = -0.5 * (a + b) # get the log likelihood using scipy multivariate distributions ll_scipy = scipy.stats.multivariate_normal.logpdf( x, mean=np.zeros(n), cov=A) print('log likelihood computed directly:') print(ll_direct) print() print('log likelihood computed using scipy:') print(ll_scipy) print()
cg1.dependentFunctionList = [C] print('covariance matrix: C =\n',C) print('check that Q2.T spans the nullspace of J2:\n', dot(J2,Q2.T)) # METHOD 2: image space method (potentially numerically unstable) cg2 = CGraph() J1 = Function(J1.x) J2 = Function(J2.x) M = Function(UTPM(numpy.zeros((D,P,N+K,N+K)))) M[:N,:N] = dot(J1.T,J1) M[:N,N:] = J2.T M[N:,:N] = J2 C2 = inv(M)[:N,:N] cg2.trace_off() cg2.independentFunctionList = [J1, J2] cg2.dependentFunctionList = [C2] print('covariance matrix: C =\n',C2) print('difference between image and nullspace method:\n',C - C2) Cbar = UTPM(numpy.random.rand(D,P,N,N)) cg1.pullback([Cbar]) cg2.pullback([Cbar]) print('J1\n',cg2.independentFunctionList[0].xbar - cg1.independentFunctionList[0].xbar) print('J2\n',cg2.independentFunctionList[1].xbar - cg1.independentFunctionList[1].xbar)
y = UTPM(numpy.zeros((D,P,M,1))) x.data[0,0,:,0] = [1,1,1,1,1] x.data[1,0,:,0] = [1,1,1,1,1] y.data[0,0,:,0] = [1,2,1,2,1] y.data[1,0,:,0] = [1,2,1,2,1] alpha = 10**-5 A = dot(x,x.T) + alpha*dot(y,y.T) A = A[:,:2] # Method 1: Naive approach Apinv = dot(inv(dot(A.T,A)),A.T) print('naive approach: A Apinv A - A = 0 \n', dot(dot(A, Apinv),A) - A) print('naive approach: Apinv A Apinv - Apinv = 0 \n', dot(dot(Apinv, A),Apinv) - Apinv) print('naive approach: (Apinv A)^T - Apinv A = 0 \n', dot(Apinv, A).T - dot(Apinv, A)) print('naive approach: (A Apinv)^T - A Apinv = 0 \n', dot(A, Apinv).T - dot(A, Apinv)) # Method 2: Using the differentiated QR decomposition Q,R = qr(A) tmp1 = solve(R.T, A.T) tmp2 = solve(R, tmp1) Apinv = tmp2 print('QR approach: A Apinv A - A = 0 \n', dot(dot(A, Apinv),A) - A) print('QR approach: Apinv A Apinv - Apinv = 0 \n', dot(dot(Apinv, A),Apinv) - Apinv)
x = UTPM(numpy.zeros((D, P, M, 1))) y = UTPM(numpy.zeros((D, P, M, 1))) x.data[0, 0, :, 0] = [1, 1, 1, 1, 1] x.data[1, 0, :, 0] = [1, 1, 1, 1, 1] y.data[0, 0, :, 0] = [1, 2, 1, 2, 1] y.data[1, 0, :, 0] = [1, 2, 1, 2, 1] alpha = 10**-5 A = dot(x, x.T) + alpha * dot(y, y.T) A = A[:, :2] # Method 1: Naive approach Apinv = dot(inv(dot(A.T, A)), A.T) print('naive approach: A Apinv A - A = 0 \n', dot(dot(A, Apinv), A) - A) print('naive approach: Apinv A Apinv - Apinv = 0 \n', dot(dot(Apinv, A), Apinv) - Apinv) print('naive approach: (Apinv A)^T - Apinv A = 0 \n', dot(Apinv, A).T - dot(Apinv, A)) print('naive approach: (A Apinv)^T - A Apinv = 0 \n', dot(A, Apinv).T - dot(A, Apinv)) # Method 2: Using the differentiated QR decomposition Q, R = qr(A) tmp1 = solve(R.T, A.T) tmp2 = solve(R, tmp1) Apinv = tmp2
D, M, N = 2, 3, 3 P = M * N # generate badly conditioned matrix A A = UTPM(numpy.zeros((D, P, M, N))) A.data[0, :] = numpy.random.rand(*(M, N)) for m in range(M): for n in range(N): p = m * N + n A.data[1, p, m, n] = 1. cg = CGraph() A = Function(A) y = trace(inv(A)) cg.trace_off() cg.independentFunctionList = [A] cg.dependentFunctionList = [y] ybar = y.x.zeros_like() ybar.data[0, :] = 1. cg.pullback([ybar]) # check gradient g_forward = numpy.zeros(N * N) g_reverse = numpy.zeros(N * N) for m in range(M): for n in range(N):
cg1.dependentFunctionList = [C] print('covariance matrix: C =\n', C) print('check that Q2.T spans the nullspace of J2:\n', dot(J2, Q2.T)) # METHOD 2: image space method (potentially numerically unstable) cg2 = CGraph() J1 = Function(J1.x) J2 = Function(J2.x) M = Function(UTPM(numpy.zeros((D, P, N + K, N + K)))) M[:N, :N] = dot(J1.T, J1) M[:N, N:] = J2.T M[N:, :N] = J2 C2 = inv(M)[:N, :N] cg2.trace_off() cg2.independentFunctionList = [J1, J2] cg2.dependentFunctionList = [C2] print('covariance matrix: C =\n', C2) print('difference between image and nullspace method:\n', C - C2) Cbar = UTPM(numpy.random.rand(D, P, N, N)) cg1.pullback([Cbar]) cg2.pullback([Cbar]) print( 'J1\n',
P = M*N # generate badly conditioned matrix A A = UTPM(numpy.zeros((D,P,M,N))) A.data[0,:] = numpy.random.rand(*(M,N)) for m in range(M): for n in range(N): p = m*N + n A.data[1,p,m,n] = 1. cg = CGraph() A = Function(A) y = trace(inv(A)) cg.trace_off() cg.independentFunctionList = [A] cg.dependentFunctionList = [y] ybar = y.x.zeros_like() ybar.data[0,:] = 1. cg.pullback([ybar]) # check gradient g_forward = numpy.zeros(N*N) g_reverse = numpy.zeros(N*N) for m in range(M): for n in range(N):