def clusterFromIterator(self, graphListIterator, verbose=False): """ Find a set of clusters for the graphs given by the iterator. If verbose is true the each iteration is timed and bounded the results are returned as lists. The difference between a weight matrix and the previous one should be positive. """ clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] sinThetaList = [] i = 0 for subW in graphListIterator: if __debug__: Parameter.checkSymmetric(subW) if self.logStep and i % self.logStep == 0: logging.debug("Graph index: " + str(i)) logging.debug("Clustering graph of size " + str(subW.shape)) if self.alg != "efficientNystrom": ABBA = GraphUtils.shiftLaplacian(subW) # --- Eigen value decomposition --- startTime = time.time() if self.alg == "IASC": if i % self.T != 0: omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q) if self.computeBound: inds = numpy.flipud(numpy.argsort(omega)) Q = Q[:, inds] omega = omega[inds] bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2) #boundList.append([i, bounds[0], bounds[1]]) #Now use accurate values of norm of R and delta rank = Util.rank(ABBA.todense()) gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank - 1, which="LM", ncv=ABBA.shape[0]) #logging.debug("gamma=" + str(gamma)) bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2) boundList.append( [bounds[0], bounds[1], bounds2[0], bounds2[1]]) else: logging.debug("Computing exact eigenvectors") self.storeInformation(subW, ABBA) if self.computeBound: #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) rank = Util.rank(ABBA.todense()) omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank - 1, which="LM", ncv=ABBA.shape[0]) inds = numpy.flipud(numpy.argsort(omega)) omegaKbot = omega[inds[self.k2:]] QKbot = Q[:, inds[self.k2:]] AKbot = (QKbot * omegaKbot).dot(QKbot.T) omegaSort = numpy.flipud(numpy.sort(omega)) boundList.append([0] * 4) else: omega, Q = scipy.sparse.linalg.eigsh( ABBA, min(self.k2, ABBA.shape[0] - 1), which="LM", ncv=min(10 * self.k2, ABBA.shape[0])) elif self.alg == "nystrom": omega, Q = Nystrom.eigpsd(ABBA, self.k3) elif self.alg == "exact": omega, Q = scipy.sparse.linalg.eigsh( ABBA, min(self.k1, ABBA.shape[0] - 1), which="LM", ncv=min(15 * self.k1, ABBA.shape[0])) elif self.alg == "efficientNystrom": omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1) elif self.alg == "randomisedSvd": Q, omega, R = RandomisedSVD.svd(ABBA, self.k4) else: raise ValueError("Invalid Algorithm: " + str(self.alg)) if self.computeSinTheta: omegaExact, QExact = scipy.linalg.eigh(ABBA.todense()) inds = numpy.flipud(numpy.argsort(omegaExact)) QExactKbot = QExact[:, inds[self.k1:]] inds = numpy.flipud(numpy.argsort(omega)) QApproxK = Q[:, inds[:self.k1]] sinThetaList.append( scipy.linalg.norm(QExactKbot.T.dot(QApproxK))) decompositionTimeList.append(time.time() - startTime) if self.alg == "IASC": self.storeInformation(subW, ABBA) # --- Kmeans --- startTime = time.time() inds = numpy.flipud(numpy.argsort(omega)) standardiser = Standardiser() #For some very strange reason we get an overflow when computing the #norm of the rows of Q even though its elements are bounded by 1. #We'll ignore it for now try: V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T except FloatingPointError as e: logging.warn("FloatingPointError: " + str(e)) V = VqUtils.whiten(V) if i == 0: centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans) else: centroids = self.findCentroids(V, clusters[:subW.shape[0]]) if centroids.shape[0] < self.k1: nb_missing_centroids = self.k1 - centroids.shape[0] random_centroids = V[numpy.random.randint( 0, V.shape[0], nb_missing_centroids), :] centroids = numpy.vstack((centroids, random_centroids)) centroids, distortion = vq.kmeans( V, centroids) #iter can only be 1 clusters, distortion = vq.vq(V, centroids) kMeansTimeList.append(time.time() - startTime) clustersList.append(clusters) #logging.debug("subW.shape: " + str(subW.shape)) #logging.debug("len(clusters): " + str(len(clusters))) #from sandbox.util.ProfileUtils import ProfileUtils #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB") if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) i += 1 if verbose: eigenQuality = { "boundList": boundList, "sinThetaList": sinThetaList } return clustersList, numpy.array( (decompositionTimeList, kMeansTimeList)).T, eigenQuality else: return clustersList
def eigenAdd2(omega, Q, Y1, Y2, k, debug= False): """ Compute an approximation of the eigendecomposition A^*A + Y1Y2^* +Y2Y1^* in which Y1, Y2 are low rank matrices, Y1^*Y2=0 and A^*A = Q Omega Q*. We use the rank-k approximation of A^*A: Q_k Omega_k Q_k^* and then find [A^*A_k + Y1Y2^* + Y2Y1^*]. If debug=False then pi, V are returned which respectively correspond to all the eigenvalues/eigenvectors of [A^*A_k + Y1Y2^* + Y2Y1^*]. """ #logging.debug("< eigenAdd2 >") Parameter.checkInt(k, 0, float('inf')) Parameter.checkClass(omega, numpy.ndarray) Parameter.checkClass(Q, numpy.ndarray) Parameter.checkClass(Y1, numpy.ndarray) Parameter.checkClass(Y2, numpy.ndarray) if not numpy.isrealobj(omega) or not numpy.isrealobj(Q): logging.warn("Eigenvalues or eigenvectors are not real") if not numpy.isrealobj(Y1) or not numpy.isrealobj(Y2): logging.warn("Y1 or Y2 are not real") if omega.ndim != 1: raise ValueError("omega must be 1-d array") if omega.shape[0] != Q.shape[1]: raise ValueError("Must have same number of eigenvalues and eigenvectors") if Q.shape[0] != Y1.shape[0]: raise ValueError("Q must have the same number of rows as Y1 rows") if Q.shape[0] != Y2.shape[0]: raise ValueError("Q must have the same number of rows as Y2 rows") if Y1.shape[1] != Y2.shape[1]: raise ValueError("Y1 must have the same number of columns as Y2 columns") if __debug__: Parameter.checkArray(omega, softCheck=True, arrayInfo="omega as input in eigenAdd2()") Parameter.checkArray(Q, softCheck=True, arrayInfo="Q as input in eigenAdd2()") Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q as input in eigenAdd2()") Parameter.checkArray(Y1, softCheck=True, arrayInfo="Y1 as input in eigenAdd2()") Parameter.checkArray(Y2, softCheck=True, arrayInfo="Y2 as input in eigenAdd2()") #Get first k eigenvectors/values of A^*A omega, Q = Util.indEig(omega, Q, numpy.flipud(numpy.argsort(omega))[0:k]) QY1 = Q.conj().T.dot(Y1) Y1bar = Y1 - Q.dot(QY1) P1bar, sigma1Bar, Q1bar = Util.safeSvd(Y1bar) inds = numpy.arange(sigma1Bar.shape[0])[numpy.abs(sigma1Bar)>EigenUpdater.tol] P1bar, sigma1Bar, Q1bar = Util.indSvd(P1bar, sigma1Bar, Q1bar, inds) # checks on SVD decomposition of Y1bar if __debug__: Parameter.checkArray(QY1, softCheck=True, arrayInfo="QY1 in eigenAdd2()") Parameter.checkArray(Y1bar, softCheck=True, arrayInfo="Y1bar in eigenAdd2()") Parameter.checkArray(P1bar, softCheck=True, arrayInfo="P1bar in eigenAdd2()") if not Parameter.checkOrthogonal(P1bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="P1bar in eigenAdd2()", investigate=True): print ("corresponding sigma: ", sigma1Bar) Parameter.checkArray(sigma1Bar, softCheck=True, arrayInfo="sigma1Bar in eigenAdd2()") Parameter.checkArray(Q1bar, softCheck=True, arrayInfo="Q1bar in eigenAdd2()") if not Parameter.checkOrthogonal(Q1bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q1bar in eigenAdd2()"): print ("corresponding sigma: ", sigma1Bar) del Y1bar P1barY2 = P1bar.conj().T.dot(Y2) QY2 = Q.conj().T.dot(Y2) Y2bar = Y2 - Q.dot(QY2) - P1bar.dot(P1barY2) P2bar, sigma2Bar, Q2bar = Util.safeSvd(Y2bar) inds = numpy.arange(sigma2Bar.shape[0])[numpy.abs(sigma2Bar)>EigenUpdater.tol] P2bar, sigma2Bar, Q2bar = Util.indSvd(P2bar, sigma2Bar, Q2bar, inds) # checks on SVD decomposition of Y1bar if __debug__: Parameter.checkArray(P1barY2, softCheck=True, arrayInfo="P1barY2 in eigenAdd2()") Parameter.checkArray(QY2, softCheck=True, arrayInfo="QY2 in eigenAdd2()") Parameter.checkArray(Y2bar, softCheck=True, arrayInfo="Y2bar in eigenAdd2()") Parameter.checkArray(P2bar, softCheck=True, arrayInfo="P2bar in eigenAdd2()") Parameter.checkOrthogonal(P2bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="P2bar in eigenAdd2()") Parameter.checkArray(sigma2Bar, softCheck=True, arrayInfo="sigma2Bar in eigenAdd2()") Parameter.checkArray(Q2bar, softCheck=True, arrayInfo="Q2bar in eigenAdd2()") Parameter.checkOrthogonal(Q2bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q2bar in eigenAdd2()") del Y2bar r = omega.shape[0] p = Y1.shape[1] p1 = sigma1Bar.shape[0] p2 = sigma2Bar.shape[0] D = numpy.c_[Q, P1bar, P2bar] del P1bar del P2bar # rem: A*s = A.dot(diag(s)) ; A*s[:,new] = diag(s).dot(A) DStarY1 = numpy.r_[QY1, sigma1Bar[:,numpy.newaxis] * Q1bar.conj().T, numpy.zeros((p2, p))] DStarY2 = numpy.r_[QY2, P1barY2, sigma2Bar[:,numpy.newaxis] * Q2bar.conj().T] DStarY1Y2StarD = DStarY1.dot(DStarY2.conj().T) del DStarY1 del DStarY2 r = omega.shape[0] F = numpy.zeros((r+p1+p2, r+p1+p2)) F[range(r),range(r)] = omega F = F + DStarY1Y2StarD + DStarY1Y2StarD.conj().T #A check to make sure DFD^T is AA_k + Y1Y2 + Y2Y1 #assert numpy.linalg.norm(D.dot(F).dot(D.T) - Q.dot(numpy.diag(omega).dot(Q.T)) - Y1.dot(Y2.T) - Y2.dot(Y1.T)) < 10**-6 # checks on F if __debug__: #Parameter.checkArray(DStarY1, softCheck=True, arrayInfo="DStarY1 in eigenAdd2()") #Parameter.checkArray(DStarY2, softCheck=True, arrayInfo="DStarY2 in eigenAdd2()") Parameter.checkArray(DStarY1Y2StarD, softCheck=True, arrayInfo="DStarY1Y2StarD in eigenAdd2()") Parameter.checkArray(F, softCheck=True, arrayInfo="F in eigenAdd2()") Parameter.checkSymmetric(F, tol=EigenUpdater.tol, softCheck=True, arrayInfo="F in eigenAdd2()") pi, H = scipy.linalg.eigh(F) # remove too small eigenvalues pi, H = Util.indEig(pi, H, numpy.arange(pi.shape[0])[numpy.abs(pi)>EigenUpdater.tol]) # keep greatest eigenvalues #pi, H = Util.indEig(pi, H, numpy.flipud(numpy.argsort(pi))[:min(k,pi.shape[0])]) V = D.dot(H) if __debug__: if not Parameter.checkOrthogonal(D, tol=EigenUpdater.tol, softCheck=True, investigate=True, arrayInfo="D in eigenAdd2()"): print("pi:\n", pi) if not Parameter.checkOrthogonal(H, tol=EigenUpdater.tol, softCheck=True, investigate=True, arrayInfo="H in eigenAdd2()"): print("pi:\n", pi) if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) #logging.debug("</ eigenAdd2 >") if debug: return pi, V, D, DStarY1Y2StarD + DStarY1Y2StarD.conj().T else: return pi, V
def safeSvd(A, eps=10**-8, tol=10**-8): """ Compute the SVD of a matrix using scipy.linalg.svd, and if convergence fails revert to Util.svd. """ # check input matrix if __debug__: if not Parameter.checkArray(A, softCheck=True): logging.info("... in Util.safeSvd") try: # run scipy.linalg.svd try: P, sigma, Qh = scipy.linalg.svd(A, full_matrices=False) except scipy.linalg.LinAlgError as e: logging.warn(str(e)) raise Exception( 'SVD decomposition has to be computed from EVD decomposition' ) # --- only when the SVD decomposition comes from scipy.linalg.svd --- # clean output singular values (sometimes scipy.linalg.svd returns NaN or negative singular values, let's remove them) inds = numpy.arange(sigma.shape[0])[sigma > tol] if inds.shape[0] < sigma.shape[0]: P, sigma, Q = Util.indSvd(P, sigma, Qh, inds) Qh = Q.conj().T # an expensive check but we really need it # rem: A*s = A.dot(diag(s)) ; A*s[:,new] = diag(s).dot(A) if not scipy.allclose(A, (P * sigma).dot(Qh)): logging.warn( " After cleaning singular values from scipy.linalg.svd, the SVD decomposition is too far from the original matrix" ) # numpy.savez("matrix_leading_to_bad_SVD.npz", A) raise Exception( 'SVD decomposition has to be computed from EVD decomposition' ) # check scipy.linalg.svd output matrices (expensive) if __debug__: badAnswerFromScipySvd = False if not Parameter.checkArray( P, softCheck=True, arrayInfo="P in Util.safeSvd()"): badAnswerFromScipySvd = True if not Parameter.checkArray( sigma, softCheck=True, arrayInfo="sigma in Util.safeSvd()"): badAnswerFromScipySvd = True if not Parameter.checkArray( Qh, softCheck=True, arrayInfo="Qh in Util.safeSvd()"): badAnswerFromScipySvd = True if badAnswerFromScipySvd: logging.warn( " After cleaning singular values from scipy.linalg.svd, the SVD decomposition still contains 'NaN', 'inf' or complex values" ) raise Exception( 'SVD decomposition has to be computed from EVD decomposition' ) except Exception as inst: if inst.args != ( 'SVD decomposition has to be computed from EVD decomposition', ): raise logging.warn(" Using EVD method to compute the SVD.") P, sigma, Qh = Util.svd(A, eps, tol) # check Util.svd output matrices (expensive) if __debug__: badAnswerFromUtilSvd = False if not Parameter.checkArray(P, softCheck=True): logging.info("... in P in Util.safeSvd") badAnswerFromUtilSvd = True # print nan_rows in P: numpy.isnan(P).sum(0).nonzero() if not Parameter.checkArray(sigma, softCheck=True): logging.info("... in sigma in Util.safeSvd") badAnswerFromUtilSvd = True # print numpy.isnan(sigma).nonzero() if not Parameter.checkArray(Qh, softCheck=True): logging.info("... in Q in Util.safeSvd") badAnswerFromUtilSvd = True # blop = numpy.isnan(Qh).sum(1) # print blop.nonzero() # print blop[blop.nonzero()] if badAnswerFromUtilSvd: logging.warn( " SVD decomposition obtained from EVD decomposition contains 'NaN', 'inf' or real values" ) from sandbox.util.ProfileUtils import ProfileUtils if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) return P, sigma, Qh
def safeSvd(A, eps=10**-8, tol=10**-8): """ Compute the SVD of a matrix using scipy.linalg.svd, and if convergence fails revert to Util.svd. """ # check input matrix if __debug__: if not Parameter.checkArray(A, softCheck = True): logging.info("... in Util.safeSvd") try: # run scipy.linalg.svd try: P, sigma, Qh = scipy.linalg.svd(A, full_matrices=False) except scipy.linalg.LinAlgError as e: logging.warn(str(e)) raise Exception('SVD decomposition has to be computed from EVD decomposition') # --- only when the SVD decomposition comes from scipy.linalg.svd --- # clean output singular values (sometimes scipy.linalg.svd returns NaN or negative singular values, let's remove them) inds = numpy.arange(sigma.shape[0])[sigma > tol] if inds.shape[0] < sigma.shape[0]: P, sigma, Q = Util.indSvd(P, sigma, Qh, inds) Qh = Q.conj().T # an expensive check but we really need it # rem: A*s = A.dot(diag(s)) ; A*s[:,new] = diag(s).dot(A) if not scipy.allclose(A, (P*sigma).dot(Qh)): logging.warn(" After cleaning singular values from scipy.linalg.svd, the SVD decomposition is too far from the original matrix") # numpy.savez("matrix_leading_to_bad_SVD.npz", A) raise Exception('SVD decomposition has to be computed from EVD decomposition') # check scipy.linalg.svd output matrices (expensive) if __debug__: badAnswerFromScipySvd = False if not Parameter.checkArray(P, softCheck=True, arrayInfo="P in Util.safeSvd()"): badAnswerFromScipySvd = True if not Parameter.checkArray(sigma, softCheck = True, arrayInfo="sigma in Util.safeSvd()"): badAnswerFromScipySvd = True if not Parameter.checkArray(Qh, softCheck = True, arrayInfo="Qh in Util.safeSvd()"): badAnswerFromScipySvd = True if badAnswerFromScipySvd: logging.warn(" After cleaning singular values from scipy.linalg.svd, the SVD decomposition still contains 'NaN', 'inf' or complex values") raise Exception('SVD decomposition has to be computed from EVD decomposition') except Exception as inst: if inst.args != ('SVD decomposition has to be computed from EVD decomposition',): raise logging.warn(" Using EVD method to compute the SVD.") P, sigma, Qh = Util.svd(A, eps, tol) # check Util.svd output matrices (expensive) if __debug__: badAnswerFromUtilSvd = False if not Parameter.checkArray(P, softCheck = True): logging.info("... in P in Util.safeSvd") badAnswerFromUtilSvd = True # print nan_rows in P: numpy.isnan(P).sum(0).nonzero() if not Parameter.checkArray(sigma, softCheck = True): logging.info("... in sigma in Util.safeSvd") badAnswerFromUtilSvd = True # print numpy.isnan(sigma).nonzero() if not Parameter.checkArray(Qh, softCheck = True): logging.info("... in Q in Util.safeSvd") badAnswerFromUtilSvd = True # blop = numpy.isnan(Qh).sum(1) # print blop.nonzero() # print blop[blop.nonzero()] if badAnswerFromUtilSvd: logging.warn(" SVD decomposition obtained from EVD decomposition contains 'NaN', 'inf' or real values") from sandbox.util.ProfileUtils import ProfileUtils if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) return P, sigma, Qh
def clusterFromIterator(self, graphListIterator, verbose=False): """ Find a set of clusters for the graphs given by the iterator. If verbose is true the each iteration is timed and bounded the results are returned as lists. The difference between a weight matrix and the previous one should be positive. """ clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] sinThetaList = [] i = 0 for subW in graphListIterator: if __debug__: Parameter.checkSymmetric(subW) if self.logStep and i % self.logStep == 0: logging.debug("Graph index: " + str(i)) logging.debug("Clustering graph of size " + str(subW.shape)) if self.alg!="efficientNystrom": ABBA = GraphUtils.shiftLaplacian(subW) # --- Eigen value decomposition --- startTime = time.time() if self.alg=="IASC": if i % self.T != 0: omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q) if self.computeBound: inds = numpy.flipud(numpy.argsort(omega)) Q = Q[:, inds] omega = omega[inds] bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2) #boundList.append([i, bounds[0], bounds[1]]) #Now use accurate values of norm of R and delta rank = Util.rank(ABBA.todense()) gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0]) #logging.debug("gamma=" + str(gamma)) bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2) boundList.append([bounds[0], bounds[1], bounds2[0], bounds2[1]]) else: logging.debug("Computing exact eigenvectors") self.storeInformation(subW, ABBA) if self.computeBound: #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) rank = Util.rank(ABBA.todense()) omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0]) inds = numpy.flipud(numpy.argsort(omega)) omegaKbot = omega[inds[self.k2:]] QKbot = Q[:, inds[self.k2:]] AKbot = (QKbot*omegaKbot).dot(QKbot.T) omegaSort = numpy.flipud(numpy.sort(omega)) boundList.append([0]*4) else: omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) elif self.alg == "nystrom": omega, Q = Nystrom.eigpsd(ABBA, self.k3) elif self.alg == "exact": omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k1, ABBA.shape[0]-1), which="LM", ncv = min(15*self.k1, ABBA.shape[0])) elif self.alg == "efficientNystrom": omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1) elif self.alg == "randomisedSvd": Q, omega, R = RandomisedSVD.svd(ABBA, self.k4) else: raise ValueError("Invalid Algorithm: " + str(self.alg)) if self.computeSinTheta: omegaExact, QExact = scipy.linalg.eigh(ABBA.todense()) inds = numpy.flipud(numpy.argsort(omegaExact)) QExactKbot = QExact[:, inds[self.k1:]] inds = numpy.flipud(numpy.argsort(omega)) QApproxK = Q[:,inds[:self.k1]] sinThetaList.append(scipy.linalg.norm(QExactKbot.T.dot(QApproxK))) decompositionTimeList.append(time.time()-startTime) if self.alg=="IASC": self.storeInformation(subW, ABBA) # --- Kmeans --- startTime = time.time() inds = numpy.flipud(numpy.argsort(omega)) standardiser = Standardiser() #For some very strange reason we get an overflow when computing the #norm of the rows of Q even though its elements are bounded by 1. #We'll ignore it for now try: V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T except FloatingPointError as e: logging.warn("FloatingPointError: " + str(e)) V = VqUtils.whiten(V) if i == 0: centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans) else: centroids = self.findCentroids(V, clusters[:subW.shape[0]]) if centroids.shape[0] < self.k1: nb_missing_centroids = self.k1 - centroids.shape[0] random_centroids = V[numpy.random.randint(0, V.shape[0], nb_missing_centroids),:] centroids = numpy.vstack((centroids, random_centroids)) centroids, distortion = vq.kmeans(V, centroids) #iter can only be 1 clusters, distortion = vq.vq(V, centroids) kMeansTimeList.append(time.time()-startTime) clustersList.append(clusters) #logging.debug("subW.shape: " + str(subW.shape)) #logging.debug("len(clusters): " + str(len(clusters))) #from sandbox.util.ProfileUtils import ProfileUtils #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB") if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) i += 1 if verbose: eigenQuality = {"boundList" : boundList, "sinThetaList" : sinThetaList} return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, eigenQuality else: return clustersList