コード例 #1
0
ファイル: SparseUtilsTest.py プロジェクト: charanpald/sandbox
 def testResize(self): 
     numRows = 10
     numCols = 10        
     
     A = scipy.sparse.rand(numRows, numCols, 0.1, "csr") 
     
     B = SparseUtils.resize(A, (5, 5))
     
     self.assertEquals(B.shape, (5, 5))
     for i in range(5): 
         for j in range(5): 
             self.assertEquals(B[i,j], A[i,j])
             
     B = SparseUtils.resize(A, (15, 15))
     
     self.assertEquals(B.shape, (15, 15))
     self.assertEquals(B.nnz, A.nnz) 
     for i in range(10): 
         for j in range(10): 
             self.assertEquals(B[i,j], A[i,j])
コード例 #2
0
ファイル: SparseUtilsTest.py プロジェクト: rezaarmand/sandbox
    def testResize(self):
        numRows = 10
        numCols = 10

        A = scipy.sparse.rand(numRows, numCols, 0.1, "csr")

        B = SparseUtils.resize(A, (5, 5))

        self.assertEquals(B.shape, (5, 5))
        for i in range(5):
            for j in range(5):
                self.assertEquals(B[i, j], A[i, j])

        B = SparseUtils.resize(A, (15, 15))

        self.assertEquals(B.shape, (15, 15))
        self.assertEquals(B.nnz, A.nnz)
        for i in range(10):
            for j in range(10):
                self.assertEquals(B[i, j], A[i, j])
コード例 #3
0
    def cluster(self, graphIterator, verbose=False):
        """
        Find a set of clusters using the graph and list of subgraph indices. 
        """
        tol = 10**-6
        clustersList = []
        decompositionTimeList = []
        kMeansTimeList = []
        boundList = []
        sinThetaList = []
        numpy.random.seed(self.seed)

        iter = 0

        for W in graphIterator:
            startTime = time.time()
            logging.debug("Graph index:" + str(iter))

            startTime = time.time()
            if iter % self.T != 0:
                # --- Figure out the similarity changes in existing edges ---
                n = lastW.shape[0]
                deltaW = W.copy()
                #Vertices are removed
                if n > W.shape[0]:
                    #deltaW = Util.extendArray(deltaW, lastW.shape)
                    deltaW = SparseUtils.resize(deltaW, lastW.shape)

                #Vertices added
                elif n < W.shape[0]:
                    lastWInds = lastW.nonzero()
                    lastWVal = scipy.zeros(len(lastWInds[0]))
                    for i, j, k in zip(lastWInds[0], lastWInds[1],
                                       range(len(lastWInds[0]))):
                        lastWVal[k] = lastW[i, j]
                    lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds),
                                                    shape=W.shape)
                deltaW = deltaW - lastW

                # --- Update the decomposition ---
                if n < W.shape[0]:
                    #                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                    Q = numpy.r_[Q,
                                 numpy.zeros(
                                     (W.shape[0] - Q.shape[0], Q.shape[1]))]
                lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW)

                # --- resize the decomposition if the graph is losing vertices ---
                if n > W.shape[0]:
                    Q = Q[0:W.shape[0], :]
            else:
                logging.debug("Recomputing eigensystem")
                # We want to solve the generalized eigen problem $L.v = lambda.D.v$
                # with L and D hermitians.
                # scipy.sparse.linalg does not solve this problem actualy (it
                # solves it, forgetting about hermitian information, from version
                # 0.11)
                # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is
                # no more hermitian.
                L = GraphUtils.normalisedLaplacianRw(W)
                lmbda, Q = scipy.sparse.linalg.eigs(
                    L,
                    min(self.k, L.shape[0] - 1),
                    which="SM",
                    ncv=min(20 * self.k, L.shape[0]),
                    v0=numpy.random.rand(L.shape[0]))
                #                n = L.shape[0]
                #                inds = list(range(n))
                #                Lprime = 2*scipy.sparse.csr_matrix( ([1]*n, (inds,inds)), shape=(n,n))-L
                #                lmbda, Q = scipy.sparse.linalg.eigs(Lprime, min(self.k, L.shape[0]-1), which="LM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))
                #                lmbda = 2-lmbda
                lmbda = lmbda.real
                Q = Q.real

            if self.computeSinTheta:
                L = GraphUtils.normalisedLaplacianRw(W)
                lmbdaExact, QExact = scipy.linalg.eig(L.todense())
                lmbdaExact = lmbdaExact.real
                QExact = QExact.real
                indsExact = numpy.argsort(lmbdaExact)
                QExactKbot = QExact[:, indsExact[self.k:]]
                #                UQExactKbot, sQExactKbot, VhQExactKbot = scipy.linalg.svd(QExactKbot)
                inds = numpy.argsort(lmbda)
                QApproxK = Q[:, inds[:self.k]]
                #                UQApproxK, sQApproxK, VhQApproxK = scipy.linalg.svd(QApproxK)
                #                sinThetaList.append(scipy.linalg.norm(UQExactKbot.T.dot(UQApproxK)))
                sinThetaList.append(
                    scipy.linalg.norm(QExactKbot.T.dot(QApproxK)))
#                print("blop", UQExactKbot.shape, UQApproxK.shape, sinThetaList[-1])
#                UQExactK, sQExactK, VhQExactK = scipy.linalg.svd(QExact[:, indsExact[:self.k]])
#                print("blop", scipy.linalg.norm(UQExactKbot.T.dot(UQExactK)))
#                print("blop", lmbdaExact[indsExact[:10]], lmbda[inds[:10]], sep = "\n")
#                quit()

            decompositionTimeList.append(time.time() - startTime)

            # Now do actual clustering

            startTime = time.time()
            V = VqUtils.whiten(Q)
            centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter)
            clusters, distortion = vq.vq(V, centroids)
            clustersList.append(clusters)
            kMeansTimeList.append(time.time() - startTime)

            lastW = W.copy()
            iter += 1

        if verbose:
            eigenQuality = {
                "boundList": boundList,
                "sinThetaList": sinThetaList
            }
            return clustersList, numpy.array(
                (decompositionTimeList, kMeansTimeList)).T, eigenQuality
        else:
            return clustersList
コード例 #4
0
    def cluster(self, graphIterator, verbose=False):
        """
        Find a set of clusters using the graph and list of subgraph indices. 
        """
        tol = 10**-6 
        clustersList = []
        decompositionTimeList = [] 
        kMeansTimeList = [] 
        boundList = []
        sinThetaList = []
        numpy.random.seed(self.seed)

        iter = 0 

        for W in graphIterator:
            startTime = time.time()
            logging.debug("Graph index:" + str(iter))

            startTime = time.time()
            if iter % self.T != 0:
                # --- Figure out the similarity changes in existing edges ---
                n = lastW.shape[0] 
                deltaW = W.copy()
                #Vertices are removed 
                if n > W.shape[0]:  
                    #deltaW = Util.extendArray(deltaW, lastW.shape)
                    deltaW = SparseUtils.resize(deltaW, lastW.shape)
                    
                #Vertices added 
                elif n < W.shape[0]: 
                    lastWInds = lastW.nonzero()
                    lastWVal = scipy.zeros(len(lastWInds[0]))
                    for i,j,k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))):
                        lastWVal[k] = lastW[i,j]
                    lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape)
                deltaW = deltaW - lastW
                
                # --- Update the decomposition ---
                if n < W.shape[0]:
#                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW)
                
                # --- resize the decomposition if the graph is losing vertices ---
                if n > W.shape[0]:
                    Q = Q[0:W.shape[0], :]
            else:
                logging.debug("Recomputing eigensystem")
                # We want to solve the generalized eigen problem $L.v = lambda.D.v$
                # with L and D hermitians.
                # scipy.sparse.linalg does not solve this problem actualy (it
                # solves it, forgetting about hermitian information, from version
                # 0.11)
                # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is
                # no more hermitian.
                L = GraphUtils.normalisedLaplacianRw(W) 
                lmbda, Q = scipy.sparse.linalg.eigs(L, min(self.k, L.shape[0]-1), which="SM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))
#                n = L.shape[0]
#                inds = list(range(n))
#                Lprime = 2*scipy.sparse.csr_matrix( ([1]*n, (inds,inds)), shape=(n,n))-L
#                lmbda, Q = scipy.sparse.linalg.eigs(Lprime, min(self.k, L.shape[0]-1), which="LM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))
#                lmbda = 2-lmbda
                lmbda = lmbda.real
                Q = Q.real
                
            if self.computeSinTheta:
                L = GraphUtils.normalisedLaplacianRw(W)
                lmbdaExact, QExact = scipy.linalg.eig(L.todense())
                lmbdaExact = lmbdaExact.real
                QExact = QExact.real
                indsExact = numpy.argsort(lmbdaExact)
                QExactKbot = QExact[:, indsExact[self.k:]]
#                UQExactKbot, sQExactKbot, VhQExactKbot = scipy.linalg.svd(QExactKbot)
                inds = numpy.argsort(lmbda)
                QApproxK = Q[:,inds[:self.k]]
#                UQApproxK, sQApproxK, VhQApproxK = scipy.linalg.svd(QApproxK)
#                sinThetaList.append(scipy.linalg.norm(UQExactKbot.T.dot(UQApproxK)))
                sinThetaList.append(scipy.linalg.norm(QExactKbot.T.dot(QApproxK)))
#                print("blop", UQExactKbot.shape, UQApproxK.shape, sinThetaList[-1])
#                UQExactK, sQExactK, VhQExactK = scipy.linalg.svd(QExact[:, indsExact[:self.k]])
#                print("blop", scipy.linalg.norm(UQExactKbot.T.dot(UQExactK)))
#                print("blop", lmbdaExact[indsExact[:10]], lmbda[inds[:10]], sep = "\n")
#                quit()
            
            
            decompositionTimeList.append(time.time()-startTime)

            # Now do actual clustering 
            
            startTime = time.time()
            V = VqUtils.whiten(Q)
            centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter)
            clusters, distortion = vq.vq(V, centroids)
            clustersList.append(clusters)
            kMeansTimeList.append(time.time()-startTime)

            lastW = W.copy()
            iter += 1

        if verbose:
            eigenQuality = {"boundList" : boundList, "sinThetaList" : sinThetaList}
            return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, eigenQuality
        else:
            return clustersList