Esempio n. 1
0
    def testMatrixApprox(self):
        tol = 10**-6 
        A = numpy.random.rand(10, 10)
        A = A.dot(A.T)

        n = 5
        inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
        AHat = Nystrom.matrixApprox(A, inds)

        n = 10
        AHat2 = Nystrom.matrixApprox(A, n)
        self.assertTrue(numpy.linalg.norm(A - AHat2) < numpy.linalg.norm(A - AHat))
        self.assertTrue(numpy.linalg.norm(A - AHat2) < tol)

        #Test on a sparse matrix
        As = scipy.sparse.csr_matrix(A)
        n = 5
        inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
        AHat = Nystrom.matrixApprox(As, inds)

        n = 10
        AHat2 = Nystrom.matrixApprox(As, n)
        self.assertTrue(SparseUtils.norm(As - AHat2) < SparseUtils.norm(As - AHat))
        self.assertTrue(SparseUtils.norm(As - AHat2) < tol)

        #Compare dense and sparse solutions
        for n in range(1, 9):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
            AHats = Nystrom.matrixApprox(As, inds)
            AHat = Nystrom.matrixApprox(A, inds)

            self.assertTrue(numpy.linalg.norm(AHat - numpy.array(AHats.todense())) < tol)
Esempio n. 2
0
    def testEig(self):
        tol = 10**-5

        #Test with an indeterminate matrix 
        A = numpy.random.rand(10, 10)
        A = A.dot(A.T)
        w, U = numpy.linalg.eig(A)
        A = U.dot(numpy.diag(w-1)).dot(U.T)

        n = 10
        lmbda, V = Nystrom.eig(A, n)
        AHat = V.dot(numpy.diag(lmbda)).dot(V.T)
        self.assertTrue(numpy.linalg.norm(A - AHat) < tol)
        
        #Approximation should be good when n < 10
        for n in range(2, 11):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
            
            lmbda, V = Nystrom.eig(A, inds)
            AHat = V.dot(numpy.diag(lmbda)).dot(V.T)
            AHat2 = Nystrom.matrixApprox(A, inds)
            #self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A))
            #print(n)
            #print(numpy.linalg.norm(A - AHat))
            #print(numpy.linalg.norm(A - AHat2))

        #Test with a positive definite matrix 
        A = numpy.random.rand(10, 10)
        A = A.dot(A.T)
        w, U = numpy.linalg.eig(A)
        A = U.dot(numpy.diag(w+1)).dot(U.T)

        #Approximation should be good when n < 10
        for n in range(2, 11):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])

            lmbda, V = Nystrom.eig(A, inds)
            AHat = V.dot(numpy.diag(lmbda)).dot(V.T)
            AHat2 = Nystrom.matrixApprox(A, inds)
            self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A))
Esempio n. 3
0
    def testEigpsd2(self):
        #These tests are on sparse matrices 
        tol = 10**-5

        A = numpy.random.rand(10, 10)
        A = A.dot(A.T)
        w, U = numpy.linalg.eig(A)
        A = U.dot(numpy.diag(w+1)).dot(U.T)
        As = scipy.sparse.csr_matrix(A)

        n = 10
        lmbda, V = Nystrom.eigpsd(As, n)
        AHat = scipy.sparse.csr_matrix(V.dot(numpy.diag(lmbda)).dot(V.T))
        self.assertTrue(numpy.linalg.norm(A - AHat) < tol)

        for n in range(2, 11):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
            lmbda, V = Nystrom.eigpsd(As, inds)
            AHat = V.dot(numpy.diag(lmbda)).dot(V.T)
            AHat2 = Nystrom.matrixApprox(A, inds)
            self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A))
            self.assertAlmostEquals(numpy.linalg.norm(A - AHat), numpy.linalg.norm(A - AHat2))
Esempio n. 4
0
    def testEigpsd3(self):
        # These tests are on big matrices
        tol = 10**-3
        n = 1000        # size of the matrices
        m = 100          # rank of the matrices
        max_k = int(m*1.1)     # maximum rank of the approximation

        # relevant matrix 
        Arel = numpy.random.rand(m, m)
        Arel = Arel.dot(Arel.T)
        w, U = numpy.linalg.eigh(Arel)
        Arel = U.dot(numpy.diag(w+1)).dot(U.T)
        tolArel = tol*numpy.linalg.norm(Arel)

        # big matrix 
        P = numpy.random.rand(n, n)
        A = P.dot(scipy.linalg.block_diag(Arel, numpy.identity(n-m)/numpy.sqrt(n-m)*tolArel/10)).dot(P.T)
        
        #Resulting matrix is really badly conditioned so we reduce the largest eigenvalue 
        lmbda, V = numpy.linalg.eigh(A) 
        inds = numpy.argsort(lmbda)
        lmbda[inds[-1]] = 5000 
        A = (V*lmbda).dot(V.T)
        tolA = tol*numpy.linalg.norm(A)

        min_error = float('infinity')
        for k in map(int,2+numpy.array(range(11))*(max_k-2)/10):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:k])
            lmbda, V = Nystrom.eigpsd(A, inds)
            AHat = V.dot(numpy.diag(lmbda)).dot(V.T)
            AHat2 = Nystrom.matrixApprox(A, inds)
                       
            self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A))
            min_error = min(min_error, numpy.linalg.norm(A - AHat))
            a, b, places = numpy.linalg.norm(A - AHat), numpy.linalg.norm(A - AHat2), -int(numpy.log10(tolA))
            self.assertAlmostEquals(a, b, places=places, msg= "both approximations differ: " + str(a) + " != " + str(b) + " within " + str(places) + " places (with rank " + str(k) + " approximation)")
Esempio n. 5
0
def run(): 
    W = createDataset()
    
    numVertices = W.shape[0]
    graph = SparseGraph(GeneralVertexList(numVertices))
    graph.setWeightMatrix(W)
    L = graph.normalisedLaplacianSym()
    #L = GraphUtils.shiftLaplacian(scipy.sparse.csr_matrix(W)).todense()
    n = 100 
    omega, Q = numpy.linalg.eigh(L)
    omega2, Q2 = Nystrom.eigpsd(L, n)
    
    print(omega)
    print(omega2)
    
    plt.figure(1)
    plt.plot(numpy.arange(omega.shape[0]), omega)
    plt.plot(numpy.arange(omega2.shape[0]), omega2)
    plt.show()

#run()
Esempio n. 6
0
    def testEigpsd(self):
        tol = 10**-3

        A = numpy.random.rand(10, 10)
        A = A.dot(A.T)
        w, U = numpy.linalg.eig(A)
        A = U.dot(numpy.diag(w+1)).dot(U.T)

        n = 10
        lmbda, V = Nystrom.eigpsd(A, n)
        AHat = V.dot(numpy.diag(lmbda)).dot(V.T)
        self.assertTrue(numpy.linalg.norm(A - AHat) < tol)

        #Approximation should be good when n < 10
        for n in range(2, 11):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
            lmbda, V = Nystrom.eigpsd(A, inds)
            AHat = V.dot(numpy.diag(lmbda)).dot(V.T)
            AHat2 = Nystrom.matrixApprox(A, inds)
            self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A))
            self.assertAlmostEquals(numpy.linalg.norm(A - AHat), numpy.linalg.norm(A - AHat2))

        #Now let's test on positive semi-definite
        w[9] = 0
        A = U.dot(numpy.diag(w+1)).dot(U.T)

        n = 10
        lmbda, V = Nystrom.eigpsd(A, n)
        AHat = V.dot(numpy.diag(lmbda)).dot(V.T)
        self.assertTrue(numpy.linalg.norm(A - AHat) < tol)

        #Approximation should be good when n < 10
        for n in range(2, 11):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
            lmbda, V = Nystrom.eigpsd(A, inds)
            AHat = V.dot(numpy.diag(lmbda)).dot(V.T)
            AHat2 = Nystrom.matrixApprox(A, inds)
            self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A))
            self.assertAlmostEquals(numpy.linalg.norm(A - AHat), numpy.linalg.norm(A - AHat2))
Esempio n. 7
0
                lastL = initialL
                lastOmegas = [initialOmega]*len(IASCL)
                lastQs = [initialQ]*len(IASCL)
            
            #Compute exact eigenvalues 
            omega, Q = numpy.linalg.eigh(L.todense())
            inds = numpy.flipud(numpy.argsort(omega))
            omega, Q = omega[inds], Q[:, inds]
            omegak, Qk = omega[0:k], Q[:, 0:k]
            omegakbot, Qkbot = omega[k:], Q[:, k:]
               

            #Nystrom method 
            print("Running Nystrom")
            for j, nystromN in enumerate(nystromNs):  
                omega2, Q2 = Nystrom.eigpsd(L, nystromN)
                inds = numpy.flipud(numpy.argsort(omega2))
                omega2, Q2 = omega2[inds], Q2[:, inds]
                omega2k, Q2k = omega2[0:k], Q2[:, 0:k]
                
#                errors[i, j] += computeBound(L, omega, Q, omega2k, Q2k, k)
                errors[i, j] += computeSinTheta(Qkbot, Q2k)
            

            #Randomised SVD method 
            print("Running Random SVD")
            for j, r in enumerate(randSVDVecs):  
                Q4, omega4, R4 = RandomisedSVD.svd(L, r)
                inds = numpy.flipud(numpy.argsort(omega4))
                omega4, Q4 = omega4[inds], Q4[:, inds]
                omega4k, Q4k = omega4[0:k], Q4[:, 0:k]
Esempio n. 8
0
print(omega)
print(Q)

omegaHat, Qhat = numpy.linalg.eigh(L2.todense())
inds = numpy.argsort(omegaHat)
omegaHat, Qhat = omegaHat[inds], Qhat[:, inds]
omegaHatk, Qhatk = omegaHat[0:k], Qhat[:, 0:k] 


print(computeInnerProd(Qk, Qhatk))

for i, nystromN in enumerate(nystromNs):
    print(nystromN)
    #omega2, Q2 = numpy.linalg.eigh(L.todense())
    omega2, Q2 = Nystrom.eigpsd(L, int(nystromN))
    inds = numpy.flipud(numpy.argsort(omega2))
    omega2, Q2 = omega2[inds], Q2[:, inds]
    omega2k, Q2k = omega2[0:k], Q2[:, 0:k]

    errors[i, 0] = computeBound(L, omega, Q, omega2k, Q2k, k)
    innerProds[i, 0] = computeInnerProd(Qk, Q2k)
    
    #omega2, Q2 = numpy.linalg.eigh(L2.todense())
    omega2, Q2 = Nystrom.eigpsd(L2, int(nystromN))
    inds = numpy.argsort(omega2)
    omega2, Q2 = omega2[inds], Q2[:, inds]
    omega2k, Q2k = omega2[0:k], Q2[:, 0:k]
    print(omega2)

    errors[i, 1] = computeBound(L2, omegaHat, Qhat, omega2k, Q2k, k)
    def clusterFromIterator(self, graphListIterator, verbose=False):
        """
        Find a set of clusters for the graphs given by the iterator. If verbose 
        is true the each iteration is timed and bounded the results are returned 
        as lists.
        
        The difference between a weight matrix and the previous one should be
        positive.
        """
        clustersList = []
        decompositionTimeList = [] 
        kMeansTimeList = [] 
        boundList = []
        i = 0

        for subW in graphListIterator:
            if __debug__:
                Parameter.checkSymmetric(subW)

            if self.logStep and i % self.logStep == 0:
                logging.debug("Graph index: " + str(i))
            logging.debug("Clustering graph of size " + str(subW.shape))
            if self.alg!="efficientNystrom": 
                ABBA = GraphUtils.shiftLaplacian(subW)

            # --- Eigen value decomposition ---
            startTime = time.time()
            if self.alg=="IASC": 
                if i % self.T != 0:
                    omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q)   
                    
                    if self.computeBound:
                        inds = numpy.flipud(numpy.argsort(omega))
                        Q = Q[:, inds]
                        omega = omega[inds]
                        bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2)
                        #boundList.append([i, bounds[0], bounds[1]])
                        
                        #Now use accurate values of norm of R and delta   
                        rank = Util.rank(ABBA.todense())
                        gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0])
                        #logging.debug("gamma=" + str(gamma))
                        bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2)                  
                        boundList.append([i, bounds[0], bounds[1], bounds2[0], bounds2[1]])      
                else: 
                    logging.debug("Computing exact eigenvectors")
                    self.storeInformation(subW, ABBA)

                    if self.computeBound: 
                        #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0]))
                        rank = Util.rank(ABBA.todense())
                        omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0])
                        inds = numpy.flipud(numpy.argsort(omega))
                        omegaKbot = omega[inds[self.k2:]]  
                        QKbot = Q[:, inds[self.k2:]] 
                        AKbot = (QKbot*omegaKbot).dot(QKbot.T)
                        
                        omegaSort = numpy.flipud(numpy.sort(omega))
                    else: 
                        omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0]))
                            
            elif self.alg == "nystrom":
                omega, Q = Nystrom.eigpsd(ABBA, self.k3)
            elif self.alg == "exact": 
                omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k1, ABBA.shape[0]-1), which="LM", ncv = min(15*self.k1, ABBA.shape[0]))
            elif self.alg == "efficientNystrom":
                omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1)
            elif self.alg == "randomisedSvd": 
                Q, omega, R = RandomisedSVD.svd(ABBA, self.k4)
            else:
                raise ValueError("Invalid Algorithm: " + str(self.alg))

            decompositionTimeList.append(time.time()-startTime)                  
                  
            if self.alg=="IASC":
                self.storeInformation(subW, ABBA)
            
            # --- Kmeans ---
            startTime = time.time()
            inds = numpy.flipud(numpy.argsort(omega))

            standardiser = Standardiser()
            #For some very strange reason we get an overflow when computing the
            #norm of the rows of Q even though its elements are bounded by 1.
            #We'll ignore it for now
            try:
                V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T
            except FloatingPointError as e:
                logging.warn("FloatingPointError: " + str(e))
            V = VqUtils.whiten(V)
            if i == 0:
                centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans)
            else:
                centroids = self.findCentroids(V, clusters[:subW.shape[0]])
                if centroids.shape[0] < self.k1:
                    nb_missing_centroids = self.k1 - centroids.shape[0]
                    random_centroids = V[numpy.random.randint(0, V.shape[0], nb_missing_centroids),:]
                    centroids = numpy.vstack((centroids, random_centroids))
                centroids, distortion = vq.kmeans(V, centroids) #iter can only be 1
            clusters, distortion = vq.vq(V, centroids)
            kMeansTimeList.append(time.time()-startTime)

            clustersList.append(clusters)

            #logging.debug("subW.shape: " + str(subW.shape))
            #logging.debug("len(clusters): " + str(len(clusters)))
            #from apgl.util.ProfileUtils import ProfileUtils
            #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB")
            if ProfileUtils.memory() > 10**9:
                ProfileUtils.memDisplay(locals())

            i += 1

        if verbose:
            return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, boundList
        else:
            return clustersList