def testIncrementEigenSystem(self):
        print "< testIncrementEigenSystem >"
        numVertices = 10
        graph = SparseGraph(GeneralVertexList(numVertices))

        p = 0.4
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)

        W = graph.getWeightMatrix()
        L = graph.laplacianMatrix()
        degrees = graph.outDegreeSequence()
        D = numpy.diag(degrees)
        
        lmbda1, Q1 = scipy.linalg.eig(L, D)
        lmbda1 = lmbda1.real
        Q1 = Q1.dot(numpy.diag(numpy.diag(Q1.T.dot(D).dot(Q1))**-0.5))

        tol = 10**-6
        k = 3
        inds = numpy.argsort(lmbda1)[0:k]
        lmbda1, Q1 = Util.indEig(lmbda1, Q1, inds)

        #Similarity change vector
        w = graph.getEdge(5,7)
        deltaW = 0.5

        k = 3
        clusterer = NingSpectralClustering(k)
        lmbda2Approx, Q2Approx = clusterer.incrementEigenSystem(lmbda1, Q1, scipy.sparse.csr_matrix(W), 5, 7, deltaW)

        #Compute real eigenvectors then compare against these
        Lhat = L.copy();
        Lhat[5,5] += deltaW; Lhat[7,7] += deltaW
        Lhat[5,7] -= deltaW; Lhat[7,5] -= deltaW
        Dhat = numpy.diag(numpy.diag(Lhat))
        lmbda2, Q2 = scipy.linalg.eig(Lhat, Dhat)
        lmbda2, Q2 = Util.indEig(lmbda2, Q2, inds)

        Q2Approx = Q2Approx.dot(numpy.diag(numpy.diag(Q2Approx.T.dot(Q2Approx))**-0.5))
        Q2 = Q2.dot(numpy.diag(numpy.sum(Q2**2, 0)**-0.5))
        Q1 = Q1.dot(numpy.diag(numpy.sum(Q1**2, 0)**-0.5))

        #Errors in the eigenvalues
        logging.debug("Eigenvalue Errors")
        logging.debug(numpy.linalg.norm(lmbda2 - lmbda2Approx))
        logging.debug(numpy.linalg.norm(lmbda2 - lmbda1))

        #Compute error according to the paper 
        error = numpy.sum(1 - numpy.diag(Q2.T.dot(Q2Approx))**2)
        error2 = numpy.sum(1 - numpy.diag(Q2.T.dot(Q1))**2)
        logging.debug("Eigenvector Errors")
        logging.debug(error)
        logging.debug(error2)
 def testDebug(self): 
     if not os.path.isfile("lmbda.npy"):
         print "blop"
         return
     print "< debugging Nings approach >"
     lmbda = numpy.load("lmbda.npy")
     Q = numpy.load("Q.npy")
     W = numpy.load("W.npy")
     i = numpy.load("i.npy")
     j = numpy.load("j.npy")
     deltaW = numpy.load("deltaW.npy")
     
     clusterer = NingSpectralClustering(Q.shape[1])
     clusterer.incrementEigenSystem(lmbda, Q, W, i, j, deltaW)
     print "</ debugging Nings approach >"
Example #3
0
 
k2s = [3, 6, 12, 24]
 
if saveResults: 
    numClusters = 3
    k1 = numClusters
    
    k3 = 90
    k4 = 90 
    T = 8 # index of iteration where exact decomposition is computed
    exactClusterer = IterativeSpectralClustering(k1, alg="exact")
    iascClusterers = []
    for k2 in k2s: 
        iascClusterers.append(IterativeSpectralClustering(k1, k2, alg="IASC", T=T)) 
    nystromClusterer = IterativeSpectralClustering(k1, k3=k3, alg="nystrom")
    ningsClusterer = NingSpectralClustering(k1, T=T)
    randSvdCluster = IterativeSpectralClustering(k1, k4=k4, alg="randomisedSvd")
    
    numRepetitions = 50
    #numRepetitions = 2
    do_Nings = True
    
    clustErrApprox = numpy.zeros((ps.shape[0], numGraphs, numRepetitions, len(k2s)))
    clustErrExact = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
    clustErrNings = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
    clustErrNystrom = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
    clustErrRandSvd = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
    
    for r in range(numRepetitions):
        Util.printIteration(r, 1, numRepetitions)
    
    return float(error) * 2 / (numVertices) / (numVertices - 1)


# =========================================================================
# =========================================================================
# run
# =========================================================================
# =========================================================================
numIter = len(range(args.startingIteration, args.endingIteration))

logging.info("compute clusters")
exactClusterer = IterativeSpectralClustering(args.k1, alg="exact", computeSinTheta=True)
approxClusterer = IterativeSpectralClustering(args.k1, args.k2, T=args.exactFreq, alg="IASC", computeSinTheta=True)
nystromClusterer = IterativeSpectralClustering(args.k1, k3=args.k3, alg="nystrom", computeSinTheta=True)
RSvdClusterer = IterativeSpectralClustering(args.k1, k4=args.k4, alg="randomisedSvd", computeSinTheta=True)
ningsClusterer = NingSpectralClustering(args.k1, T=args.exactFreq, computeSinTheta=True)


exactClusterer.nb_iter_kmeans = 20
approxClusterer.nb_iter_kmeans = 20
nystromClusterer.nb_iter_kmeans = 20
RSvdClusterer.nb_iter_kmeans = 20
ningsClusterer.nb_iter_kmeans = 20

# exactClusterer.computeBound = args.computeBound        # computeBound not implemented for exactClusterer
approxClusterer.computeBound = args.computeBound
# nystromClusterer.computeBound = args.computeBound      # computeBound not implemented for nystromClusterer
# RSvdClusterer.computeBound = args.computeBound      # computeBound not implemented for RSvdClusterer
# ningsClusterer.computeBound = args.computeBound        # computeBound not implemented for ningsClusterer

    def testIncrementEigenSystem2(self):
        print "< testIncrementEigenSystem2 >"
        """
        We use the example from the paper to see if the error in the eigenvalues
        and eigenvectors decreases. 
        """

        numVertices = 10
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0, 1, 0.7)
        graph.addEdge(1, 2, 0.4)
        graph.addEdge(2, 3, 0.3)
        graph.addEdge(1, 3, 0.1)
        graph.addEdge(0, 4, 0.5)
        graph.addEdge(3, 4, 0.4)
        graph.addEdge(4, 5, 0.8)
        graph.addEdge(3, 5, 0.3)
        graph.addEdge(6, 5, 0.4)
        graph.addEdge(5, 9, 0.5)
        graph.addEdge(6, 9, 0.3)
        graph.addEdge(6, 7, 0.1)
        graph.addEdge(6, 8, 0.6)
        graph.addEdge(7, 8, 0.7)
        graph.addEdge(9, 8, 0.7)

        W = graph.getWeightMatrix()
        L = graph.laplacianWeightMatrix()
        degrees = numpy.sum(W, 0)
        D = numpy.diag(degrees)

        k = 3 
        lmbda1, Q1 = scipy.linalg.eig(L, D)
        inds = numpy.argsort(lmbda1)[0:k]
        lmbda1, Q1 = Util.indEig(lmbda1, Q1, inds)
        lmbda1 = lmbda1.real 

        #Remove edge 0, 4
        r = numpy.zeros(numVertices, numpy.complex)
        deltaW = -0.5

        clusterer = NingSpectralClustering(k)
        lmbda2Approx, Q2Approx = clusterer.incrementEigenSystem(lmbda1, Q1, scipy.sparse.csr_matrix(W), 0, 4, deltaW)
        
        #Compute real eigenvectors then compare against these
        Lhat = L + numpy.outer(r, r)
        Dhat = numpy.diag(numpy.diag(Lhat))
        lmbda2, Q2 = scipy.linalg.eig(Lhat, Dhat)
        lmbda2, Q2 = Util.indEig(lmbda2, Q2, inds)

        Q2Approx = Q2Approx.dot(numpy.diag(numpy.diag(Q2Approx.T.dot(Q2Approx))**-0.5))
        Q2 = Q2.dot(numpy.diag(numpy.sum(Q2**2, 0)**-0.5))
        Q1 = Q1.dot(numpy.diag(numpy.sum(Q1**2, 0)**-0.5))

        #Compute error according to the paper
        #2 iterations works best - 3 seems to be worse!!! 
        error2 = 1 - numpy.diag(Q2.T.dot(Q2Approx))**2
        errors2 = 1 - numpy.diag(Q2.T.dot(Q1))**2
        logging.debug("Eigenvector Errors")
        logging.debug(error2)
        logging.debug(errors2)
    def testCluster(self):
        print "< testCluster >"
        numVertices = 8
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(1, 2)

        graph.addEdge(3, 4)
        graph.addEdge(3, 5)
        graph.addEdge(4, 5)

        graph.addEdge(0, 3)

        W = graph.getWeightMatrix()

        graphIterator = []
        graphIterator.append(W[0:6, 0:6].copy())
        W[1, 6] += 1
        W[6, 1] += 1
        graphIterator.append(W[0:7, 0:7].copy())
        W[4, 7] += 1
        W[7, 4] += 1
        graphIterator.append(W.copy())
        graphIterator = iter(graphIterator)

        k = 2
        clusterer = NingSpectralClustering(k)
        clustersList = clusterer.cluster(toSparseGraphListIterator(graphIterator))

        #Why are the bottom rows of Q still zero?

        #Try example in which only edges change
        numVertices = 7
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(1, 2)

        graph.addEdge(3, 4)

        WList = [] 
        W = graph.getWeightMatrix()
        WList.append(W[0:5, 0:5].copy())

        graph.addEdge(3, 5)
        graph.addEdge(4, 5)
        W = graph.getWeightMatrix()
        WList.append(W[0:6, 0:6].copy())

        graph.addEdge(0, 6)
        graph.addEdge(1, 6)
        graph.addEdge(2, 6)
        W = graph.getWeightMatrix()
        WList.append(W[0:7, 0:7].copy())

        iterator = iter(WList)
        clustersList = clusterer.cluster(toSparseGraphListIterator(iterator))

        #Seems to work, amazingly 
        #print(clustersList)
        
        #Try removing rows/cols
        W2 = W[0:5, 0:5]
        W3 = W[0:4, 0:4]
        WList = [W, W2, W3]
        iterator = iter(WList)
        clustersList = clusterer.cluster(toSparseGraphListIterator(iterator))
        
        #nptst.assert_array_equal(clustersList[0][0:5], clustersList[1])
        nptst.assert_array_equal(clustersList[1][0:4], clustersList[2])
        
        #Make sure 1st clustering (without updates) is correct 
        L = GraphUtils.normalisedLaplacianRw(scipy.sparse.csr_matrix(W))
        numpy.random.seed(21)
        lmbda, Q = scipy.sparse.linalg.eigs(L, min(k, L.shape[0]-1), which="SM", ncv = min(20*k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))

        
        V = VqUtils.whiten(Q)
        centroids, distortion = vq.kmeans(V, k, iter=20)
        clusters, distortion = vq.vq(V, centroids)
        
        #This should be equal but the eigenvector computation is unstable 
        #even with repeated runs (and no way to set the seed)
        nptst.assert_array_equal(clusters, clustersList[0])
    def testIncrementalEigenSystem3(self):
        print "< testIncrementEigenSystem3 >"
        """
        Test case where we add a vertex and need to increase size of eigenvectors. 
        """
        numVertices = 8
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(1, 2)
        graph.addEdge(3, 4)
        graph.addEdge(3, 5)
        graph.addEdge(4, 5)
        graph.addEdge(0, 3)
        graph.addEdge(1, 6)
        graph.addEdge(4, 7)

        subgraph = graph.subgraph(range(7))

        W1 = subgraph.getWeightMatrix()
        L1 = subgraph.laplacianWeightMatrix()
        degrees1 = numpy.sum(W1, 0)
        D1 = numpy.diag(degrees1)

        W2 = graph.getWeightMatrix()
        L2 = graph.laplacianWeightMatrix()
        degrees1 = numpy.sum(W2, 0)
        D2 = numpy.diag(degrees1)

        k = 3
        lmbda1, Q1 = scipy.linalg.eig(L1, D1)
        inds = numpy.argsort(lmbda1)[0:k]
        lmbda1, Q1 = Util.indEig(lmbda1, Q1, inds)
        lmbda1 = lmbda1.real

        L1hat = numpy.r_[numpy.c_[L1, numpy.zeros(numVertices-1)], numpy.zeros((1, numVertices))]
        W1hat = numpy.r_[numpy.c_[W1, numpy.zeros(numVertices-1)], numpy.zeros((1, numVertices))]
        D1hat = numpy.r_[numpy.c_[D1, numpy.zeros(numVertices-1)], numpy.zeros((1, numVertices))]

        lmbda1, Q2 = scipy.linalg.eig(L2, D2)
        inds = numpy.argsort(lmbda1)[0:k]
        lmbda1, Q2 = Util.indEig(lmbda1, Q2, inds)
        lmbda1 = lmbda1.real
        
        Q1 = numpy.r_[Q1, numpy.ones((1, Q1.shape[1]))]

        #Increase size of eigenvector - not clear how to do this 

        clusterer = NingSpectralClustering(k)
        lmbda2Approx, Q2Approx = clusterer.incrementEigenSystem(lmbda1, Q1, scipy.sparse.csr_matrix(W1hat), 4, 7, 1)

        Q2Approx = Q2Approx.dot(numpy.diag(numpy.diag(Q2Approx.T.dot(Q2Approx))**-0.5))
        Q2 = Q2.dot(numpy.diag(numpy.sum(Q2**2, 0)**-0.5))
        Q1 = Q1.dot(numpy.diag(numpy.sum(Q1**2, 0)**-0.5))

        #Setting the last value of the eigenvectors to zero seems to improve
        #over setting them to 1, but the last eigenvector has a huge error. 
        errors1 = 1 - numpy.diag(Q2.T.dot(Q2Approx))**2
        errors2 = 1 - numpy.diag(Q2.T.dot(Q1))**2
        logging.debug("Eigenvector Errors for added vertex")
        logging.debug(errors1)
        logging.debug(errors2)
Example #8
0
#k3s = [3]
#k4s = [3]
 
if saveResults: 
    numClusters = 3
    k1 = numClusters
    
    T = 8 # index of iteration where exact decomposition is computed
    exactClusterer = IterativeSpectralClustering(k1, alg="exact", computeSinTheta=True)
    iascClusterers = []
    for k2 in k2s: 
        iascClusterers.append(IterativeSpectralClustering(k1, k2, alg="IASC", computeSinTheta=True, T=T)) 
    nystromClusterers = []
    for k3 in k3s: 
        nystromClusterers.append(IterativeSpectralClustering(k1, k3=k3, alg="nystrom", computeSinTheta=True))
    ningsClusterer = NingSpectralClustering(k1, T=T, computeSinTheta=True)
    randSvdClusterers = []
    for k4 in k4s: 
        randSvdClusterers.append(IterativeSpectralClustering(k1, k4=k4, alg="randomisedSvd", computeSinTheta=True))
    
    numRepetitions = 50
#    numRepetitions = 2
    do_Nings = True
    
    clustErrApprox = numpy.zeros((ps.shape[0], numGraphs, numRepetitions, len(k2s)))
    clustErrExact = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
    clustErrNings = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
    clustErrNystrom = numpy.zeros((ps.shape[0], numGraphs, numRepetitions, len(k3s)))
    clustErrRandSvd = numpy.zeros((ps.shape[0], numGraphs, numRepetitions, len(k4s)))
    sinThetaApprox = numpy.zeros((ps.shape[0], numGraphs, numRepetitions, len(k2s)))
    sinThetaExact = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))