예제 #1
0
    def testMatrixApprox(self):
        tol = 10**-6 
        A = numpy.random.rand(10, 10)
        A = A.dot(A.T)

        n = 5
        inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
        AHat = Nystrom.matrixApprox(A, inds)

        n = 10
        AHat2 = Nystrom.matrixApprox(A, n)
        self.assertTrue(numpy.linalg.norm(A - AHat2) < numpy.linalg.norm(A - AHat))
        self.assertTrue(numpy.linalg.norm(A - AHat2) < tol)

        #Test on a sparse matrix
        As = scipy.sparse.csr_matrix(A)
        n = 5
        inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
        AHat = Nystrom.matrixApprox(As, inds)

        n = 10
        AHat2 = Nystrom.matrixApprox(As, n)
        self.assertTrue(SparseUtils.norm(As - AHat2) < SparseUtils.norm(As - AHat))
        self.assertTrue(SparseUtils.norm(As - AHat2) < tol)

        #Compare dense and sparse solutions
        for n in range(1, 9):
            inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n])
            AHats = Nystrom.matrixApprox(As, inds)
            AHat = Nystrom.matrixApprox(A, inds)

            self.assertTrue(numpy.linalg.norm(AHat - numpy.array(AHats.todense())) < tol)
    def setWeightMatrix(self, W):
        """
        Set the weight matrix of this graph. Requires as input an ndarray or
        a scipy sparse matrix with the same dimensions as the current weight
        matrix. Edges are represented by non-zero edges.

        :param W: The weight matrix to use.
        :type W: :class:`ndarray` or :class:`scipy.sparse` matrix
        """
        if W.shape != (self.vList.getNumVertices(),
                       self.vList.getNumVertices()):
            raise ValueError("Weight matrix has wrong shape : " + str(W.shape))

        if self.undirected and type(W) == numpy.ndarray and (W != W.T).any():
            raise ValueError(
                "Weight matrix of undirected graph must be symmetric")

        if self.undirected and scipy.sparse.issparse(
                W) and not SparseUtils.equals(W, W.T):
            raise ValueError(
                "Weight matrix of undirected graph must be symmetric")

        if scipy.sparse.issparse(W):
            W = W.todense()

        self.W = numpy.array(W)
예제 #3
0
def recommend(learner): 
    """
    Take a list of coauthors and read in the complete graph into a sparse 
    matrix X such that X_ij = k means author i has worked with j, k times. Then 
    do matrix factorisation on the resulting methods. 
    """
    outputDir = PathDefaults.getOutputDir() + "erasm/" 
    matrixFileName = outputDir + "Toy"
    
    numExamples = 50 
    numFolds = 5    
      
    X = scipy.io.mmread(matrixFileName)
    X = scipy.sparse.csr_matrix(X)
    logging.debug("Loaded matrix " + str(X.shape) + " with " + str(X.getnnz()) + " non zeros")
    X = X.tocsr()
    X = X[0:numExamples ,:]
    X, maxS = preprocess(X)

    #Take out some ratings to form a training set
    rowInds, colInds = X.nonzero()
    randInds = numpy.random.permutation(rowInds.shape[0])
    indexList = Sampling.crossValidation(numFolds, rowInds.shape[0])
    
    paramList = [] 
    for j, (trnIdx, tstIdx) in enumerate(indexList): 
        trainInds = randInds[trnIdx]
        testInds = randInds[tstIdx]
        
        trainX = SparseUtils.selectMatrix(X, rowInds[trainInds], colInds[trainInds]).tocsr()
        testX = SparseUtils.selectMatrix(X, rowInds[testInds], colInds[testInds]).tocsr()
        
        paramList.append((trainX, testX, learner))
        
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
    results = pool.map(computeTestError, paramList)
    #results = map(computeTestError, paramList)
    
    testErrors = numpy.array(results)
    meanTestErrors = testErrors.mean()
    logging.debug("Test errors = " + str(meanTestErrors))
    
    errorFileName = outputDir + "results_" + learner.name()
    numpy.savez(errorFileName, meanTestErrors)   
    logging.debug("Saved results as " + errorFileName)
예제 #4
0
    def testDiag(self):
        numRows = 10
        numCols = 10  
        A = scipy.sparse.rand(numRows, numCols, 0.5, "csr")

        d = SparseUtils.diag(A)

        for i in range(numRows): 
            self.assertEquals(d[i], A[i,i])             
예제 #5
0
 def testResize(self): 
     numRows = 10
     numCols = 10        
     
     A = scipy.sparse.rand(numRows, numCols, 0.1, "csr") 
     
     B = SparseUtils.resize(A, (5, 5))
     
     self.assertEquals(B.shape, (5, 5))
     for i in range(5): 
         for j in range(5): 
             self.assertEquals(B[i,j], A[i,j])
             
     B = SparseUtils.resize(A, (15, 15))
     
     self.assertEquals(B.shape, (15, 15))
     self.assertEquals(B.nnz, A.nnz) 
     for i in range(10): 
         for j in range(10): 
             self.assertEquals(B[i,j], A[i,j])
    def getNumEdges(self):
        """
        :returns: the total number of edges in this graph.
        """
        if self.getNumVertices() == 0:
            return 0

        #Note that self.W.getnnz() doesn't seem to work correctly
        if self.undirected == True:
            return (self.W.nonzero()[0].shape[0] +
                    numpy.sum(SparseUtils.diag(self.W) != 0)) / 2
        else:
            return self.W.nonzero()[0].shape[0]
예제 #7
0
    def testEquals(self):
        A = numpy.array([[4, 2, 1], [6, 3, 9], [3, 6, 0]])
        B = numpy.array([[4, 2, 1], [6, 3, 9], [3, 6, 0]])

        A = scipy.sparse.csr_matrix(A)
        B = scipy.sparse.csr_matrix(B)

        self.assertTrue(SparseUtils.equals(A, B))

        A[0, 1] = 5
        self.assertFalse(SparseUtils.equals(A, B))

        A[0, 1] = 2
        B[0, 1] = 5
        self.assertFalse(SparseUtils.equals(A, B))

        A[2, 2] = -1
        self.assertFalse(SparseUtils.equals(A, B))

        #Test two empty graphs
        A = scipy.sparse.csr_matrix((5, 5)) 
        B = scipy.sparse.csr_matrix((5, 5))

        self.assertTrue(SparseUtils.equals(A, B))
예제 #8
0
    def testSelectMatrix(self): 
        numRows = 10
        numCols = 10  
        A = scipy.sparse.rand(numRows, numCols, 0.5, "csr")
        
        #Select first row 
        rowInds = numpy.zeros(numCols)
        colInds = numpy.arange(10)

        newA = SparseUtils.selectMatrix(A, rowInds, colInds)
        
        for i in range(numCols): 
            self.assertEquals(A[0, i], newA[0, i])
            
        for i in range(1, numRows): 
            for j in range(numCols): 
                self.assertEquals(newA[i, j], 0)
예제 #9
0
    def testNorm(self):
        numRows = 10
        numCols = 10

        for k in range(10):
            A = scipy.sparse.rand(numRows, numCols, 0.1, "csr")

            norm = SparseUtils.norm(A)

            norm2 = 0
            for i in range(numRows):
                for j in range(numCols):
                    norm2 += A[i, j]**2

            norm2 = numpy.sqrt(norm2)
            norm3 = numpy.linalg.norm(numpy.array(A.todense()))
            self.assertAlmostEquals(norm, norm2)
            self.assertAlmostEquals(norm, norm3)
    def setWeightMatrix(self, W):
        """
        Set the weight matrix of this graph. Requires as input an ndarray or
        a scipy sparse matrix with the same dimensions as the current weight
        matrix. Edges are represented by non-zero edges.

        :param W: The weight matrix to use.
        :type W: :class:`ndarray` or :class:`scipy.sparse` matrix
        """
        if W.shape != (self.vList.getNumVertices(), self.vList.getNumVertices()):
            raise ValueError("Weight matrix has wrong shape : " + str(W.shape))

        if self.undirected and type(W) == numpy.ndarray and (W != W.T).any():
            raise ValueError("Weight matrix of undirected graph must be symmetric")

        if self.undirected and scipy.sparse.issparse(W) and not SparseUtils.equals(W, W.T):
            raise ValueError("Weight matrix of undirected graph must be symmetric")

        if scipy.sparse.issparse(W):
            W = W.todense()

        self.W = numpy.array(W)
예제 #11
0
    def cluster(self, graphIterator, verbose=False):
        """
        Find a set of clusters using the graph and list of subgraph indices. 
        """
        tol = 10**-6 
        clustersList = []
        decompositionTimeList = [] 
        kMeansTimeList = [] 
        boundList = []
        numpy.random.seed(self.seed)

        iter = 0 

        for W in graphIterator:
            startTime = time.time()
            logging.debug("Graph index:" + str(iter))

            startTime = time.time()
            if iter % self.T != 0:
                # --- Figure out the similarity changes in existing edges ---
                n = lastW.shape[0] 
                deltaW = W.copy()
                #Vertices are removed 
                if n > W.shape[0]:  
                    #deltaW = Util.extendArray(deltaW, lastW.shape)
                    deltaW = SparseUtils.resize(deltaW, lastW.shape)
                    
                #Vertices added 
                elif n < W.shape[0]: 
                    lastWInds = lastW.nonzero()
                    lastWVal = scipy.zeros(len(lastWInds[0]))
                    for i,j,k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))):
                        lastWVal[k] = lastW[i,j]
                    lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape)
                deltaW = deltaW - lastW
                
                # --- Update the decomposition ---
                if n < W.shape[0]:
#                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW)
                
                # --- resize the decomposition if the graph is losing vertices ---
                if n > W.shape[0]:
                    Q = Q[0:W.shape[0], :]
            else:
                logging.debug("Recomputing eigensystem")
                # We want to solve the generalized eigen problem $L.v = lambda.D.v$
                # with L and D hermitians.
                # scipy.sparse.linalg does not solve this problem actualy (it
                # solves it, forgetting about hermitian information, from version
                # 0.11)
                # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is
                # no more hermitian.
                L = GraphUtils.normalisedLaplacianRw(W) 
                lmbda, Q = scipy.sparse.linalg.eigs(L, min(self.k, L.shape[0]-1), which="SM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))
                
                lmbda = lmbda.real
                Q = Q.real
            decompositionTimeList.append(time.time()-startTime)

            # Now do actual clustering 
            
            startTime = time.time()
            V = VqUtils.whiten(Q)
            centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter)
            clusters, distortion = vq.vq(V, centroids)
            clustersList.append(clusters)
            kMeansTimeList.append(time.time()-startTime)

            lastW = W.copy()
            iter += 1

        if verbose:
            return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, boundList
        else:
            return clustersList