def testRank(self): X = numpy.random.rand(10, 1) self.assertEquals(Util.rank(X), 1) X = numpy.random.rand(10, 12) self.assertEquals(Util.rank(X), 10) X = numpy.random.rand(31, 12) self.assertEquals(Util.rank(X), 12) K = numpy.dot(X, X.T) self.assertEquals(Util.rank(X), 12)
def testEigenRemove(self): tol = 10**-6 for i in range(10): m = numpy.random.randint(5, 10) n = numpy.random.randint(5, 10) #How many rows/cols to remove p = numpy.random.randint(1, 5) A = numpy.random.randn(m, n) C = A.conj().T.dot(A) lastError = 100 omega, Q = numpy.linalg.eigh(C) self.assertTrue(numpy.linalg.norm(C-(Q*omega).dot(Q.conj().T)) < tol ) # Cprime = C[0:n-p, 0:n-p] for k in range(1,9): pi, V, K, Y1, Y2, omega2 = EigenUpdater.eigenRemove(omega, Q, n-p, k, debug=True) # V is "orthogonal" self.assertTrue(numpy.linalg.norm(V.conj().T.dot(V) - numpy.eye(V.shape[1])) < tol ) # The approximation converges to the exact decomposition C_k = (V*pi).dot(V.conj().T) error = numpy.linalg.norm(Cprime-C_k) if Util.rank(C)<k: self.assertTrue(error <= tol) lastError = error
def addRows(U, s, V, B, k=None): """ Find the SVD of a matrix [A ; B] where A = U diag(s) V.T. Uses the QR decomposition to find an orthogonal basis on B. :param U: The left singular vectors of A :param s: The singular values of A :param V: The right singular vectors of A :param B: The matrix to append to A """ if V.shape[0] != B.shape[1]: raise ValueError("U must have same number of rows as B cols") if s.shape[0] != U.shape[1]: raise ValueError("Number of cols of U must be the same size as s") if s.shape[0] != V.shape[1]: raise ValueError("Number of cols of V must be the same size as s") if k == None: k = U.shape[1] m, p = U.shape r = B.shape[0] C = B.T - V.dot(V.T).dot(B.T) Q, R = numpy.linalg.qr(C) rPrime = Util.rank(C) Q = Q[:, 0:rPrime] R = R[0:rPrime, :] D = numpy.c_[numpy.diag(s), numpy.zeros((p, rPrime))] E = numpy.c_[B.dot(V), R.T] D = numpy.r_[D, E] G1 = numpy.c_[U, numpy.zeros((m, r))] G2 = numpy.c_[numpy.zeros((r, p)), numpy.eye(r)] G = numpy.r_[G1, G2] H = numpy.c_[V, Q] nptst.assert_array_almost_equal(G.T.dot(G), numpy.eye(G.shape[1])) nptst.assert_array_almost_equal(H.T.dot(H), numpy.eye(H.shape[1])) nptst.assert_array_almost_equal( G.dot(D).dot(H.T), numpy.r_[(U * s).dot(V.T), B]) Uhat, sHat, Vhat = numpy.linalg.svd(D, full_matrices=False) inds = numpy.flipud(numpy.argsort(sHat))[0:k] Uhat, sHat, Vhat = Util.indSvd(Uhat, sHat, Vhat, inds) #The best rank k approximation of [A ; B] Utilde = G.dot(Uhat) Stilde = sHat Vtilde = H.dot(Vhat) return Utilde, Stilde, Vtilde
def addRows(U, s, V, B, k=None): """ Find the SVD of a matrix [A ; B] where A = U diag(s) V.T. Uses the QR decomposition to find an orthogonal basis on B. :param U: The left singular vectors of A :param s: The singular values of A :param V: The right singular vectors of A :param B: The matrix to append to A """ if V.shape[0] != B.shape[1]: raise ValueError("U must have same number of rows as B cols") if s.shape[0] != U.shape[1]: raise ValueError("Number of cols of U must be the same size as s") if s.shape[0] != V.shape[1]: raise ValueError("Number of cols of V must be the same size as s") if k == None: k = U.shape[1] m, p = U.shape r = B.shape[0] C = B.T - V.dot(V.T).dot(B.T) Q, R = numpy.linalg.qr(C) rPrime = Util.rank(C) Q = Q[:, 0:rPrime] R = R[0:rPrime, :] D = numpy.c_[numpy.diag(s), numpy.zeros((p, rPrime))] E = numpy.c_[B.dot(V), R.T] D = numpy.r_[D, E] G1 = numpy.c_[U, numpy.zeros((m, r))] G2 = numpy.c_[numpy.zeros((r, p)), numpy.eye(r)] G = numpy.r_[G1, G2] H = numpy.c_[V, Q] nptst.assert_array_almost_equal(G.T.dot(G), numpy.eye(G.shape[1])) nptst.assert_array_almost_equal(H.T.dot(H), numpy.eye(H.shape[1])) nptst.assert_array_almost_equal(G.dot(D).dot(H.T), numpy.r_[(U*s).dot(V.T), B]) Uhat, sHat, Vhat = numpy.linalg.svd(D, full_matrices=False) inds = numpy.flipud(numpy.argsort(sHat))[0:k] Uhat, sHat, Vhat = Util.indSvd(Uhat, sHat, Vhat, inds) #The best rank k approximation of [A ; B] Utilde = G.dot(Uhat) Stilde = sHat Vtilde = H.dot(Vhat) return Utilde, Stilde, Vtilde
def testEigenAdd2(self): tol = 10**-6 for i in range(10): m = numpy.random.randint(5, 10) n = numpy.random.randint(5, 10) p = numpy.random.randint(5, 10) A = numpy.random.randn(m, n) Y1 = numpy.random.randn(n, p) Y2 = numpy.random.randn(n, p) AA = A.conj().T.dot(A) Y1Y2 = Y1.dot(Y2.conj().T) lastError = 100 omega, Q = numpy.linalg.eigh(AA) self.assertTrue( numpy.linalg.norm(AA - (Q * omega).dot(Q.conj().T)) < tol) C = AA + Y1Y2 + Y1Y2.conj().T for k in range(1, 9): pi, V, D, DUD = EigenUpdater.eigenAdd2(omega, Q, Y1, Y2, k, debug=True) # V is "orthogonal" self.assertTrue( numpy.linalg.norm(V.conj().T.dot(V) - numpy.eye(V.shape[1])) < tol) # The approximation converges to the exact decomposition C_k = (V * pi).dot(V.conj().T) error = numpy.linalg.norm(C - C_k) if Util.rank(C) == k: self.assertTrue(error <= tol) lastError = error # DomegaD corresponds to AA_k omega_k, Q_k = Util.indEig( omega, Q, numpy.flipud(numpy.argsort(omega))[0:k]) DomegakD = (D * numpy.c_[omega_k[numpy.newaxis, :], numpy.zeros( (1, max(D.shape[1] - k, 0)))]).dot( D.conj().T) self.assertTrue( numpy.linalg.norm((Q_k * omega_k).dot(Q_k.conj().T) - DomegakD) < tol) # DUD is exactly decomposed self.assertTrue( numpy.linalg.norm(Y1Y2 + Y1Y2.conj().T - D.dot(DUD).dot(D.conj().T)) < tol)
def testEigenRemove(self): tol = 10**-6 for i in range(10): m = numpy.random.randint(5, 10) n = numpy.random.randint(5, 10) #How many rows/cols to remove p = numpy.random.randint(1, 5) A = numpy.random.randn(m, n) C = A.conj().T.dot(A) lastError = 100 omega, Q = numpy.linalg.eigh(C) self.assertTrue( numpy.linalg.norm(C - (Q * omega).dot(Q.conj().T)) < tol) # Cprime = C[0:n - p, 0:n - p] for k in range(1, 9): pi, V, K, Y1, Y2, omega2 = EigenUpdater.eigenRemove(omega, Q, n - p, k, debug=True) # V is "orthogonal" self.assertTrue( numpy.linalg.norm(V.conj().T.dot(V) - numpy.eye(V.shape[1])) < tol) # The approximation converges to the exact decomposition C_k = (V * pi).dot(V.conj().T) error = numpy.linalg.norm(Cprime - C_k) if Util.rank(C) < k: self.assertTrue(error <= tol) lastError = error
def testEigenAdd(self): for i in range(3): numCols = numpy.random.randint(5, 10) numXRows = numpy.random.randint(5, 10) numYRows = numpy.random.randint(5, 10) A = numpy.random.rand(numXRows, numCols) Y = numpy.random.rand(numYRows, numCols) AA = A.conj().T.dot(A) AA = (AA + AA.conj().T) / 2 YY = Y.conj().T.dot(Y) lastError = 1000 for k in range(1, min((numXRows, numCols))): #Note using eigh since AA is hermatian omega, Q = numpy.linalg.eigh(AA) pi, V = EigenUpdater.eigenAdd(omega, Q, Y, k) Pi = numpy.diag(pi) tol = 10**-3 t = min(k, Util.rank(AA + YY)) self.assertTrue(pi.shape[0] == t) self.assertTrue( numpy.linalg.norm(V.conj().T.dot(V) - numpy.eye(t)) < tol) inds2 = numpy.flipud(numpy.argsort(numpy.abs(omega))) Q = Q[:, inds2[0:k]] omega = omega[inds2[0:k]] AAk = Q.dot(numpy.diag(omega)).dot(Q.conj().T) AAkpYY = AAk + YY AApYYEst = V.dot(Pi.dot(V.conj().T)) error = numpy.linalg.norm(AApYYEst - (AA + YY)) self.assertTrue(lastError - error >= -tol) lastError = error
def testEigenAdd2(self): tol = 10**-6 for i in range(10): m = numpy.random.randint(5, 10) n = numpy.random.randint(5, 10) p = numpy.random.randint(5, 10) A = numpy.random.randn(m, n) Y1 = numpy.random.randn(n, p) Y2 = numpy.random.randn(n, p) AA = A.conj().T.dot(A) Y1Y2 = Y1.dot(Y2.conj().T) lastError = 100 omega, Q = numpy.linalg.eigh(AA) self.assertTrue(numpy.linalg.norm(AA-(Q*omega).dot(Q.conj().T)) < tol ) C = AA + Y1Y2 + Y1Y2.conj().T for k in range(1,9): pi, V, D, DUD = EigenUpdater.eigenAdd2(omega, Q, Y1, Y2, k, debug = True) # V is "orthogonal" self.assertTrue(numpy.linalg.norm(V.conj().T.dot(V) - numpy.eye(V.shape[1])) < tol ) # The approximation converges to the exact decomposition C_k = (V*pi).dot(V.conj().T) error = numpy.linalg.norm(C-C_k) if Util.rank(C)==k: self.assertTrue(error <= tol) lastError = error # DomegaD corresponds to AA_k omega_k, Q_k = Util.indEig(omega, Q, numpy.flipud(numpy.argsort(omega))[0:k]) DomegakD = (D*numpy.c_[omega_k[numpy.newaxis,:],numpy.zeros((1,max(D.shape[1]-k,0)))]).dot(D.conj().T) self.assertTrue(numpy.linalg.norm((Q_k*omega_k).dot(Q_k.conj().T)-DomegakD) < tol ) # DUD is exactly decomposed self.assertTrue(numpy.linalg.norm(Y1Y2 + Y1Y2.conj().T - D.dot(DUD).dot(D.conj().T)) < tol )
def testEigenAdd(self): for i in range(3): numCols = numpy.random.randint(5, 10) numXRows = numpy.random.randint(5, 10) numYRows = numpy.random.randint(5, 10) A = numpy.random.rand(numXRows, numCols) Y = numpy.random.rand(numYRows, numCols) AA = A.conj().T.dot(A) AA = (AA + AA.conj().T)/2 YY = Y.conj().T.dot(Y) lastError = 1000 for k in range(1, min((numXRows, numCols))): #Note using eigh since AA is hermatian omega, Q = numpy.linalg.eigh(AA) pi, V = EigenUpdater.eigenAdd(omega, Q, Y, k) Pi = numpy.diag(pi) tol = 10**-3 t = min(k, Util.rank(AA+YY)) self.assertTrue(pi.shape[0] == t) self.assertTrue(numpy.linalg.norm(V.conj().T.dot(V) - numpy.eye(t)) < tol) inds2 = numpy.flipud(numpy.argsort(numpy.abs(omega))) Q = Q[:, inds2[0:k]] omega = omega[inds2[0:k]] AAk = Q.dot(numpy.diag(omega)).dot(Q.conj().T) AAkpYY = AAk + YY AApYYEst = V.dot(Pi.dot(V.conj().T)) error = numpy.linalg.norm(AApYYEst - (AA+YY)) self.assertTrue(lastError - error >= -tol) lastError = error
import numpy import scipy.sparse from apgl.graph import GraphUtils from sandbox.util.Util import Util numpy.set_printoptions(suppress=True, precision=3) n = 10 W1 = scipy.sparse.rand(n, n, 0.5).todense() W1 = W1.T.dot(W1) W2 = W1.copy() W2[1, 2] = 1 W2[2, 1] = 1 print("W1=" + str(W1)) print("W2=" + str(W2)) L1 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W1)) L2 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W2)) deltaL = L2 - L1 print("L1=" + str(L1.todense())) print("L2=" + str(L2.todense())) print("deltaL=" + str(deltaL.todense())) print("rank(deltaL)=" + str(Util.rank(deltaL.todense())))
def testEigenConcat(self): tol = 10**-6 for i in range(3): m = numpy.random.randint(10, 20) n = numpy.random.randint(5, 10) p = numpy.random.randint(5, 10) # A = numpy.zeros((m, n), numpy.complex) # B = numpy.zeros((m, p), numpy.complex) # A.real = numpy.random.randn(m, n) # A.imag = numpy.random.randn(m, n) # B.real = numpy.random.randn(m, p) # B.imag = numpy.random.randn(m, p) A = numpy.random.randn(m, n) B = numpy.random.randn(m, p) #logging.debug("m="+str(m)+" n="+str(n)+" p="+str(p)) AcB = numpy.c_[A, B] ABBA = AcB.conj().T.dot(AcB) AA = ABBA[0:n, 0:n] AB = ABBA[0:n, n:] BB = ABBA[n:, n:] lastError = 1000 lastError2 = 1000 for k in range(1, n): #logging.debug("k="+str(k)) #First compute eigen update estimate omega, Q = numpy.linalg.eig(AA) pi, V = EigenUpdater.eigenConcat(omega, Q, AB, BB, k) ABBAEst = V.dot(numpy.diag(pi)).dot(V.conj().T) t = min(k, Util.rank(ABBA)) self.assertTrue(pi.shape[0] == t) self.assertTrue( numpy.linalg.norm(V.conj().T.dot(V) - numpy.eye(t)) < tol) #Second compute another eigen update estimate omega, Q = numpy.linalg.eig(AA) pi2, V2, D2, D2UD2 = EigenUpdater.lazyEigenConcatAsUpdate( omega, Q, AB, BB, k, debug=True) ABBAEst2 = V2.dot(numpy.diag(pi2)).dot(V2.conj().T) U = ABBA.copy() U[0:n, 0:n] = 0 self.assertTrue( numpy.linalg.norm(U - D2.dot(D2UD2).dot(D2.conj().T)) < tol) t = min(k, Util.rank(ABBA)) self.assertTrue( numpy.linalg.norm(V2.conj().T.dot(V2) - numpy.eye(pi2.shape[0])) < tol) #Compute estimate using eigendecomposition of full matrix sfull, Vfull = numpy.linalg.eig(ABBA) indsfull = numpy.flipud(numpy.argsort(numpy.abs(sfull))) Vfull = Vfull[:, indsfull[0:k]] sfull = sfull[indsfull[0:k]] ABBAEstfull = Vfull.dot(numpy.diag(sfull)).dot(Vfull.conj().T) #The errors should reduce error = numpy.linalg.norm(ABBAEst - ABBA) if Util.rank(ABBA) == k: self.assertTrue(error <= tol) lastError = error error = numpy.linalg.norm(ABBAEst2 - ABBA) self.assertTrue(error <= lastError2 + tol) lastError2 = error
def clusterFromIterator(self, graphListIterator, verbose=False): """ Find a set of clusters for the graphs given by the iterator. If verbose is true the each iteration is timed and bounded the results are returned as lists. The difference between a weight matrix and the previous one should be positive. """ clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] sinThetaList = [] i = 0 for subW in graphListIterator: if __debug__: Parameter.checkSymmetric(subW) if self.logStep and i % self.logStep == 0: logging.debug("Graph index: " + str(i)) logging.debug("Clustering graph of size " + str(subW.shape)) if self.alg!="efficientNystrom": ABBA = GraphUtils.shiftLaplacian(subW) # --- Eigen value decomposition --- startTime = time.time() if self.alg=="IASC": if i % self.T != 0: omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q) if self.computeBound: inds = numpy.flipud(numpy.argsort(omega)) Q = Q[:, inds] omega = omega[inds] bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2) #boundList.append([i, bounds[0], bounds[1]]) #Now use accurate values of norm of R and delta rank = Util.rank(ABBA.todense()) gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0]) #logging.debug("gamma=" + str(gamma)) bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2) boundList.append([bounds[0], bounds[1], bounds2[0], bounds2[1]]) else: logging.debug("Computing exact eigenvectors") self.storeInformation(subW, ABBA) if self.computeBound: #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) rank = Util.rank(ABBA.todense()) omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0]) inds = numpy.flipud(numpy.argsort(omega)) omegaKbot = omega[inds[self.k2:]] QKbot = Q[:, inds[self.k2:]] AKbot = (QKbot*omegaKbot).dot(QKbot.T) omegaSort = numpy.flipud(numpy.sort(omega)) boundList.append([0]*4) else: omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) elif self.alg == "nystrom": omega, Q = Nystrom.eigpsd(ABBA, self.k3) elif self.alg == "exact": omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k1, ABBA.shape[0]-1), which="LM", ncv = min(15*self.k1, ABBA.shape[0])) elif self.alg == "efficientNystrom": omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1) elif self.alg == "randomisedSvd": Q, omega, R = RandomisedSVD.svd(ABBA, self.k4) else: raise ValueError("Invalid Algorithm: " + str(self.alg)) if self.computeSinTheta: omegaExact, QExact = scipy.linalg.eigh(ABBA.todense()) inds = numpy.flipud(numpy.argsort(omegaExact)) QExactKbot = QExact[:, inds[self.k1:]] inds = numpy.flipud(numpy.argsort(omega)) QApproxK = Q[:,inds[:self.k1]] sinThetaList.append(scipy.linalg.norm(QExactKbot.T.dot(QApproxK))) decompositionTimeList.append(time.time()-startTime) if self.alg=="IASC": self.storeInformation(subW, ABBA) # --- Kmeans --- startTime = time.time() inds = numpy.flipud(numpy.argsort(omega)) standardiser = Standardiser() #For some very strange reason we get an overflow when computing the #norm of the rows of Q even though its elements are bounded by 1. #We'll ignore it for now try: V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T except FloatingPointError as e: logging.warn("FloatingPointError: " + str(e)) V = VqUtils.whiten(V) if i == 0: centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans) else: centroids = self.findCentroids(V, clusters[:subW.shape[0]]) if centroids.shape[0] < self.k1: nb_missing_centroids = self.k1 - centroids.shape[0] random_centroids = V[numpy.random.randint(0, V.shape[0], nb_missing_centroids),:] centroids = numpy.vstack((centroids, random_centroids)) centroids, distortion = vq.kmeans(V, centroids) #iter can only be 1 clusters, distortion = vq.vq(V, centroids) kMeansTimeList.append(time.time()-startTime) clustersList.append(clusters) #logging.debug("subW.shape: " + str(subW.shape)) #logging.debug("len(clusters): " + str(len(clusters))) #from sandbox.util.ProfileUtils import ProfileUtils #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB") if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) i += 1 if verbose: eigenQuality = {"boundList" : boundList, "sinThetaList" : sinThetaList} return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, eigenQuality else: return clustersList
import numpy import scipy.sparse from apgl.graph import GraphUtils from sandbox.util.Util import Util numpy.set_printoptions(suppress=True, precision=3) n = 10 W1 = scipy.sparse.rand(n, n, 0.5).todense() W1 = W1.T.dot(W1) W2 = W1.copy() W2[1, 2] = 1 W2[2, 1] = 1 print("W1="+str(W1)) print("W2="+str(W2)) L1 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W1)) L2 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W2)) deltaL = L2 - L1 print("L1="+str(L1.todense())) print("L2="+str(L2.todense())) print("deltaL="+str(deltaL.todense())) print("rank(deltaL)=" + str(Util.rank(deltaL.todense())))
def testEigenConcat(self): tol = 10**-6 for i in range(3): m = numpy.random.randint(10, 20) n = numpy.random.randint(5, 10) p = numpy.random.randint(5, 10) # A = numpy.zeros((m, n), numpy.complex) # B = numpy.zeros((m, p), numpy.complex) # A.real = numpy.random.randn(m, n) # A.imag = numpy.random.randn(m, n) # B.real = numpy.random.randn(m, p) # B.imag = numpy.random.randn(m, p) A = numpy.random.randn(m, n) B = numpy.random.randn(m, p) #logging.debug("m="+str(m)+" n="+str(n)+" p="+str(p)) AcB = numpy.c_[A, B] ABBA = AcB.conj().T.dot(AcB) AA = ABBA[0:n, 0:n] AB = ABBA[0:n, n:] BB = ABBA[n:, n:] lastError = 1000 lastError2 = 1000 for k in range(1,n): #logging.debug("k="+str(k)) #First compute eigen update estimate omega, Q = numpy.linalg.eig(AA) pi, V = EigenUpdater.eigenConcat(omega, Q, AB, BB, k) ABBAEst = V.dot(numpy.diag(pi)).dot(V.conj().T) t = min(k, Util.rank(ABBA)) self.assertTrue(pi.shape[0] == t) self.assertTrue(numpy.linalg.norm(V.conj().T.dot(V) - numpy.eye(t)) < tol) #Second compute another eigen update estimate omega, Q = numpy.linalg.eig(AA) pi2, V2, D2, D2UD2 = EigenUpdater.lazyEigenConcatAsUpdate(omega, Q, AB, BB, k, debug=True) ABBAEst2 = V2.dot(numpy.diag(pi2)).dot(V2.conj().T) U = ABBA.copy() U[0:n, 0:n] = 0 self.assertTrue(numpy.linalg.norm(U - D2.dot(D2UD2).dot(D2.conj().T)) < tol ) t = min(k, Util.rank(ABBA)) self.assertTrue(numpy.linalg.norm(V2.conj().T.dot(V2) - numpy.eye(pi2.shape[0])) < tol) #Compute estimate using eigendecomposition of full matrix sfull, Vfull = numpy.linalg.eig(ABBA) indsfull = numpy.flipud(numpy.argsort(numpy.abs(sfull))) Vfull = Vfull[:, indsfull[0:k]] sfull = sfull[indsfull[0:k]] ABBAEstfull = Vfull.dot(numpy.diag(sfull)).dot(Vfull.conj().T) #The errors should reduce error = numpy.linalg.norm(ABBAEst - ABBA) if Util.rank(ABBA)==k: self.assertTrue(error <= tol) lastError = error error = numpy.linalg.norm(ABBAEst2 - ABBA) self.assertTrue(error <= lastError2+tol) lastError2 = error
Xhat1 = X1 - numpy.outer(numpy.mean(X, 1), numpy.ones(numExamples1)) Xhat2 = X2 - numpy.outer(numpy.mean(X, 1), numpy.ones(numExamples2)) Xhat = numpy.c_[Xhat1, Xhat2] sigma = numpy.dot(Xhat.T, Xhat) sigma1 = numpy.dot(Xhat1.T, Xhat1) sigma2 = numpy.dot(Xhat1.T, Xhat2) sigma3 = numpy.dot(Xhat2.T, Xhat2) d, U = numpy.linalg.eig(sigma1) inds = numpy.flipud(numpy.argsort(d)) indsk = inds[0:k] #rank k approximation of sigma sigma1k = numpy.dot(U[:, indsk], numpy.dot(numpy.diag(d[indsk]), U[:, indsk].T )) ell = Util.rank(sigma1) Ptilde1 = numpy.dot(numpy.diag(numpy.sqrt(d[indsk])), U[:, indsk].T) Ptilde1 = numpy.r_[Ptilde1, numpy.zeros((ell-k, numExamples1))] LambdaTildeSq = numpy.diag(d[inds[0:ell]] ** -0.5) Utilde = U[:, inds[0:ell]] Q1 = numpy.dot(LambdaTildeSq, numpy.dot(Utilde.T, sigma2)) Q2 = numpy.zeros((numExamples2, numExamples1)) #Q3 is zero which is odd Q3 = scipy.linalg.sqrtm(sigma3 - numpy.dot(Q1.T, Q1)) Ptilde2 = numpy.r_[Ptilde1, Q2] Y = numpy.r_[Q1, Q3]
def clusterFromIterator(self, graphListIterator, verbose=False): """ Find a set of clusters for the graphs given by the iterator. If verbose is true the each iteration is timed and bounded the results are returned as lists. The difference between a weight matrix and the previous one should be positive. """ clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] sinThetaList = [] i = 0 for subW in graphListIterator: if __debug__: Parameter.checkSymmetric(subW) if self.logStep and i % self.logStep == 0: logging.debug("Graph index: " + str(i)) logging.debug("Clustering graph of size " + str(subW.shape)) if self.alg != "efficientNystrom": ABBA = GraphUtils.shiftLaplacian(subW) # --- Eigen value decomposition --- startTime = time.time() if self.alg == "IASC": if i % self.T != 0: omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q) if self.computeBound: inds = numpy.flipud(numpy.argsort(omega)) Q = Q[:, inds] omega = omega[inds] bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2) #boundList.append([i, bounds[0], bounds[1]]) #Now use accurate values of norm of R and delta rank = Util.rank(ABBA.todense()) gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank - 1, which="LM", ncv=ABBA.shape[0]) #logging.debug("gamma=" + str(gamma)) bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2) boundList.append( [bounds[0], bounds[1], bounds2[0], bounds2[1]]) else: logging.debug("Computing exact eigenvectors") self.storeInformation(subW, ABBA) if self.computeBound: #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) rank = Util.rank(ABBA.todense()) omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank - 1, which="LM", ncv=ABBA.shape[0]) inds = numpy.flipud(numpy.argsort(omega)) omegaKbot = omega[inds[self.k2:]] QKbot = Q[:, inds[self.k2:]] AKbot = (QKbot * omegaKbot).dot(QKbot.T) omegaSort = numpy.flipud(numpy.sort(omega)) boundList.append([0] * 4) else: omega, Q = scipy.sparse.linalg.eigsh( ABBA, min(self.k2, ABBA.shape[0] - 1), which="LM", ncv=min(10 * self.k2, ABBA.shape[0])) elif self.alg == "nystrom": omega, Q = Nystrom.eigpsd(ABBA, self.k3) elif self.alg == "exact": omega, Q = scipy.sparse.linalg.eigsh( ABBA, min(self.k1, ABBA.shape[0] - 1), which="LM", ncv=min(15 * self.k1, ABBA.shape[0])) elif self.alg == "efficientNystrom": omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1) elif self.alg == "randomisedSvd": Q, omega, R = RandomisedSVD.svd(ABBA, self.k4) else: raise ValueError("Invalid Algorithm: " + str(self.alg)) if self.computeSinTheta: omegaExact, QExact = scipy.linalg.eigh(ABBA.todense()) inds = numpy.flipud(numpy.argsort(omegaExact)) QExactKbot = QExact[:, inds[self.k1:]] inds = numpy.flipud(numpy.argsort(omega)) QApproxK = Q[:, inds[:self.k1]] sinThetaList.append( scipy.linalg.norm(QExactKbot.T.dot(QApproxK))) decompositionTimeList.append(time.time() - startTime) if self.alg == "IASC": self.storeInformation(subW, ABBA) # --- Kmeans --- startTime = time.time() inds = numpy.flipud(numpy.argsort(omega)) standardiser = Standardiser() #For some very strange reason we get an overflow when computing the #norm of the rows of Q even though its elements are bounded by 1. #We'll ignore it for now try: V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T except FloatingPointError as e: logging.warn("FloatingPointError: " + str(e)) V = VqUtils.whiten(V) if i == 0: centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans) else: centroids = self.findCentroids(V, clusters[:subW.shape[0]]) if centroids.shape[0] < self.k1: nb_missing_centroids = self.k1 - centroids.shape[0] random_centroids = V[numpy.random.randint( 0, V.shape[0], nb_missing_centroids), :] centroids = numpy.vstack((centroids, random_centroids)) centroids, distortion = vq.kmeans( V, centroids) #iter can only be 1 clusters, distortion = vq.vq(V, centroids) kMeansTimeList.append(time.time() - startTime) clustersList.append(clusters) #logging.debug("subW.shape: " + str(subW.shape)) #logging.debug("len(clusters): " + str(len(clusters))) #from sandbox.util.ProfileUtils import ProfileUtils #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB") if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) i += 1 if verbose: eigenQuality = { "boundList": boundList, "sinThetaList": sinThetaList } return clustersList, numpy.array( (decompositionTimeList, kMeansTimeList)).T, eigenQuality else: return clustersList
Xhat2 = X2 - numpy.outer(numpy.mean(X, 1), numpy.ones(numExamples2)) Xhat = numpy.c_[Xhat1, Xhat2] sigma = numpy.dot(Xhat.T, Xhat) sigma1 = numpy.dot(Xhat1.T, Xhat1) sigma2 = numpy.dot(Xhat1.T, Xhat2) sigma3 = numpy.dot(Xhat2.T, Xhat2) d, U = numpy.linalg.eig(sigma1) inds = numpy.flipud(numpy.argsort(d)) indsk = inds[0:k] #rank k approximation of sigma sigma1k = numpy.dot(U[:, indsk], numpy.dot(numpy.diag(d[indsk]), U[:, indsk].T)) ell = Util.rank(sigma1) Ptilde1 = numpy.dot(numpy.diag(numpy.sqrt(d[indsk])), U[:, indsk].T) Ptilde1 = numpy.r_[Ptilde1, numpy.zeros((ell - k, numExamples1))] LambdaTildeSq = numpy.diag(d[inds[0:ell]]**-0.5) Utilde = U[:, inds[0:ell]] Q1 = numpy.dot(LambdaTildeSq, numpy.dot(Utilde.T, sigma2)) Q2 = numpy.zeros((numExamples2, numExamples1)) #Q3 is zero which is odd Q3 = scipy.linalg.sqrtm(sigma3 - numpy.dot(Q1.T, Q1)) Ptilde2 = numpy.r_[Ptilde1, Q2] Y = numpy.r_[Q1, Q3]