def testFindCentroids(self):
        V = numpy.random.rand(10, 3)
        clusters = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

        k1 = 2
        k2 = 2
        clusterer = IterativeSpectralClustering(k1, k2)
        centroids = clusterer.findCentroids(V, clusters)

        centroids2 = numpy.zeros((2, 3))
        centroids2[0, :] = numpy.mean(V[0:5, :], 0)
        centroids2[1, :] = numpy.mean(V[5:, :], 0)

        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(centroids - centroids2) < tol)
Exemple #2
0
def cluster():
    k1 = 20 # numCluster to learn
    k2 = 40 # numEigenVector kept

    dir = PathDefaults.getDataDir() + "cluster/"
    graphIterator = getBemolGraphIterator(dir)
    #===========================================
    # cluster
    print("compute clusters")
    clusterer = IterativeSpectralClustering(k1, k2)
    clustersList = clusterer.clusterFromIterator(graphIterator, True)

    for i in range(len(clustersList)):
              clusters = clustersList[i]
              print(clusters)
class IterativeSpectralClusteringProfile(object):
    def __init__(self):
        numVertices = 1000
        graph = SparseGraph(GeneralVertexList(numVertices))

        p = 0.1
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)

        subgraphIndicesList = []
        for i in range(100, numVertices, 10):
            subgraphIndicesList.append(range(i))

        k1 = 5
        k2 = 100 

        self.graph = graph
        self.subgraphIndicesList = subgraphIndicesList
        self.clusterer = IterativeSpectralClustering(k1, k2, T=10, alg="IASC")


    def profileClusterFromIterator(self):
        iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList)
        dataDir = PathDefaults.getDataDir() + "cluster/"
        #iterator = getBemolGraphIterator(dataDir)
        
        def run(): 
            clusterList, timeList, boundList = self.clusterer.clusterFromIterator(iterator, verbose=True)
            print(timeList.cumsum(0))
            
        ProfileUtils.profile('run()', globals(), locals())
Exemple #4
0
class IterativeSpectralClusteringProfile(object):
    def __init__(self):
        numVertices = 1000
        graph = SparseGraph(GeneralVertexList(numVertices))

        p = 0.1
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)

        subgraphIndicesList = []
        for i in range(100, numVertices, 10):
            subgraphIndicesList.append(range(i))

        k1 = 5
        k2 = 100

        self.graph = graph
        self.subgraphIndicesList = subgraphIndicesList
        self.clusterer = IterativeSpectralClustering(k1, k2, T=10, alg="IASC")

    def profileClusterFromIterator(self):
        iterator = IncreasingSubgraphListIterator(self.graph,
                                                  self.subgraphIndicesList)
        dataDir = PathDefaults.getDataDir() + "cluster/"

        #iterator = getBemolGraphIterator(dataDir)

        def run():
            clusterList, timeList, boundList = self.clusterer.clusterFromIterator(
                iterator, verbose=True)
            print(timeList.cumsum(0))

        ProfileUtils.profile('run()', globals(), locals())
Exemple #5
0
    def __init__(self):
        numVertices = 1000
        graph = SparseGraph(GeneralVertexList(numVertices))

        p = 0.1
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)

        subgraphIndicesList = []
        for i in range(100, numVertices, 10):
            subgraphIndicesList.append(range(i))

        k1 = 5
        k2 = 100

        self.graph = graph
        self.subgraphIndicesList = subgraphIndicesList
        self.clusterer = IterativeSpectralClustering(k1, k2, T=10, alg="IASC")
    def testClusterOnPurchases(self):
        #Create a list of purchases and cluster it
        numProd = 30
        numUser = 30
        numPurchasesPerDate = 10
        numDate = 10
        
        numPurchase = numPurchasesPerDate * numDate
        listProd = numpy.random.randint(0, numProd, numPurchase)
        listUser = numpy.random.randint(0, numUser, numPurchase)
        # third week is the same as first one
        listProd[numPurchasesPerDate*3:numPurchasesPerDate*4] = listProd[:numPurchasesPerDate]
        listUser[numPurchasesPerDate*3:numPurchasesPerDate*4] = listUser[:numPurchasesPerDate]
        listWeek = range(numDate)*numPurchasesPerDate
        listWeek.sort()
        listYear = [2011]*numPurchase
        purchasesList = list(list(tup) for tup in itertools.izip(listProd, listUser, listWeek, listYear))
#        print purchasesList
        
        k1 = 10
        k2 = 10 
        clusterer = IterativeSpectralClustering(k1, k2)
        #Test full computation of eigenvectors 
        graphIterator = DatedPurchasesGraphListIterator(purchasesList)
        clustersList = clusterer.clusterFromIterator(graphIterator, False)


        for i in range(len(clustersList)):
            clusters = clustersList[i]
#            self.assertEquals(len(subgraphIndicesList[i]), len(clusters))
            print(clusters)

        #Now test approximation of eigenvectors 
        graphIterator = DatedPurchasesGraphListIterator(purchasesList)
        clustersList = clusterer.clusterFromIterator(graphIterator, True)

        for i in range(len(clustersList)):
            clusters = clustersList[i]
    def __init__(self):
        numVertices = 1000
        graph = SparseGraph(GeneralVertexList(numVertices))

        p = 0.1
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)

        subgraphIndicesList = []
        for i in range(100, numVertices, 10):
            subgraphIndicesList.append(range(i))

        k1 = 5
        k2 = 100 

        self.graph = graph
        self.subgraphIndicesList = subgraphIndicesList
        self.clusterer = IterativeSpectralClustering(k1, k2, T=10, alg="IASC")
    def testIncreasingSubgraphListIterator(self):
        #Create a small graph and try the iterator increasing the number of vertices.
        numVertices = 50
        graph = SparseGraph(GeneralVertexList(numVertices))

        ell = 2 
        m = 2 
        generator = BarabasiAlbertGenerator(ell, m)
        graph = generator.generate(graph)

        indices = numpy.random.permutation(numVertices)
        subgraphIndicesList = [indices[0:5], indices]

        graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)

        #Try a much longer sequence of vertices
        subgraphIndicesList = []
        for i in range(10, numVertices):
            subgraphIndicesList.append(range(i))

        graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)

        k1 = 3
        k2 = 6
        clusterer = IterativeSpectralClustering(k1, k2)
        clustersList = clusterer.clusterFromIterator(graphIterator)

        #Now test the Nystrom method
        graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)
        clusterer = IterativeSpectralClustering(k1, alg="nystrom")
        clustersList = clusterer.clusterFromIterator(graphIterator)
        
        #Test efficient Nystrom method 
        graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)
        clusterer = IterativeSpectralClustering(k1, alg="efficientNystrom")
        clustersList = clusterer.clusterFromIterator(graphIterator)
detectionIndex = fInds["detectDate"]
vertexArray = graph.getVertexList().getVertices()
detections = vertexArray[:, detectionIndex]

startYear = 1900
daysInMonth = 30
monthStep = 3
dayList = list(range(int(numpy.min(detections)), int(numpy.max(detections)), daysInMonth*monthStep))
dayList.append(numpy.max(detections))

subgraphIndicesList = []
subgraphIndicesList.append(range(graph.getNumVertices()))

k1 = 25
k2 = 2*k1
clusterer = IterativeSpectralClustering(k1, k2)
clusterer.nb_iter_kmeans = 20

logging.info("Running exact method")
iterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)
clusterListExact, timeListExact, boundList = clusterer.clusterFromIterator(iterator, False, verbose=True)

clusters = clusterListExact[0]

subgraphIndicesList = []
#minGraphSize = 100
minGraphSize = 500

#Generate subgraph indices list
for i in dayList:
    logging.info("Date: " + str(DateUtils.getDateStrFromDay(i, startYear)))
        print(u)
        plt.plot(numpy.arange(u.shape[0]), u)

plt.show()
"""
 
k2s = [3, 6, 12, 24]
 
if saveResults: 
    numClusters = 3
    k1 = numClusters
    
    k3 = 90
    k4 = 90 
    T = 8 # index of iteration where exact decomposition is computed
    exactClusterer = IterativeSpectralClustering(k1, alg="exact")
    iascClusterers = []
    for k2 in k2s: 
        iascClusterers.append(IterativeSpectralClustering(k1, k2, alg="IASC", T=T)) 
    nystromClusterer = IterativeSpectralClustering(k1, k3=k3, alg="nystrom")
    ningsClusterer = NingSpectralClustering(k1, T=T)
    randSvdCluster = IterativeSpectralClustering(k1, k4=k4, alg="randomisedSvd")
    
    numRepetitions = 50
    #numRepetitions = 2
    do_Nings = True
    
    clustErrApprox = numpy.zeros((ps.shape[0], numGraphs, numRepetitions, len(k2s)))
    clustErrExact = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
    clustErrNings = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
    clustErrNystrom = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
Exemple #11
0
"""
for W in graphIterator: 
    graph = SparseGraph(GeneralVertexList(W.shape[0]))
    graph.setWeightMatrixSparse(W) 
    components = graph.findConnectedComponents()
    print(graph)
    
    
    L = GraphUtils.shiftLaplacian(graph.getSparseWeightMatrix())
    
    u, V = numpy.linalg.eig(L.todense())
    inds = numpy.argsort(u)
    u = u[inds]
    
    k = 20 
    print((u[0:k]**2).sum())
    print((u[k:]**2).sum())
"""

numGraphs = len(subgraphIndicesList)

k1 = 3 
k2 = 3

clusterer = IterativeSpectralClustering(k1, k2)
clusterer.nb_iter_kmeans = 20
clusterer.computeBound = True 
clusterList, timeList, boundList = clusterer.clusterFromIterator(graphIterator, verbose=True)

boundList = numpy.array(boundList)
print(boundList)
    def testClusterOnIncreasingGraphs(self):
        #Create a large graph and try the clustering.
        numClusters = 3
        ClusterSize = 30
        numFeatures = 0
        
        pNoise = 0
        pClust = 1

        numVertices = numClusters*ClusterSize
        vList = GeneralVertexList(numVertices)

        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList)

#        ell = 2 
#        m = 2 
#        generator = BarabasiAlbertGenerator(ell, m)
#        graph = generator.generate(graph)
        #Generate matrix of probabilities
        W = numpy.ones((numVertices, numVertices))*pNoise
        for i in range(numClusters):
            W[ClusterSize*i:ClusterSize*(i+1), ClusterSize*i:ClusterSize*(i+1)] = pClust
        P = numpy.random.rand(numVertices, numVertices)
        W = numpy.array(P < W, numpy.float)
        upTriInds = numpy.triu_indices(numVertices)
        W[upTriInds] = 0
        W = W + W.T
        graph = SparseGraph(vList)
        graph.setWeightMatrix(W)

        indices = numpy.random.permutation(numVertices)
        subgraphIndicesList = [indices[0:numVertices/2], indices]

        k1 = numClusters
        k2 = 10
        clusterer = IterativeSpectralClustering(k1, k2)
        #Test full computation of eigenvectors
        graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)
        clustersList = clusterer.clusterFromIterator(graphIterator, False)

        self.assertEquals(len(clustersList), len(subgraphIndicesList))

        for i in range(len(clustersList)):
            clusters = clustersList[i]
            self.assertEquals(len(subgraphIndicesList[i]), len(clusters))
            #print(clusters)

        #Test full computation of eigenvectors with iterator
        graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)
        clustersList = clusterer.clusterFromIterator(graphIterator, False)

        self.assertEquals(len(clustersList), len(subgraphIndicesList))

        for i in range(len(clustersList)):
            clusters = clustersList[i]
            self.assertEquals(len(subgraphIndicesList[i]), len(clusters))
            #print(clusters)

        #Now test approximation of eigenvectors with iterator
        graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)
        clustersList2 = clusterer.clusterFromIterator(graphIterator)

        for i in range(len(clustersList2)):
            clusters = clustersList2[i]
            self.assertEquals(len(subgraphIndicesList[i]), len(clusters))
            #print(clusters)

        #Test case where 2 graphs are identical
        subgraphIndicesList = []
        subgraphIndicesList.append(range(graph.getNumVertices()))
        subgraphIndicesList.append(range(graph.getNumVertices()))

        graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)
        clustersList = clusterer.clusterFromIterator(graphIterator, True)
            same_cl = (v1 // clust_size) == (v2 // clust_size)
            same_learned_cl = learnedClustering[v1] == learnedClustering[v2]
            error += same_cl != same_learned_cl

    return float(error) * 2 / (numVertices) / (numVertices - 1)


# =========================================================================
# =========================================================================
# run
# =========================================================================
# =========================================================================
numIter = len(range(args.startingIteration, args.endingIteration))

logging.info("compute clusters")
exactClusterer = IterativeSpectralClustering(args.k1, alg="exact", computeSinTheta=True)
approxClusterer = IterativeSpectralClustering(args.k1, args.k2, T=args.exactFreq, alg="IASC", computeSinTheta=True)
nystromClusterer = IterativeSpectralClustering(args.k1, k3=args.k3, alg="nystrom", computeSinTheta=True)
RSvdClusterer = IterativeSpectralClustering(args.k1, k4=args.k4, alg="randomisedSvd", computeSinTheta=True)
ningsClusterer = NingSpectralClustering(args.k1, T=args.exactFreq, computeSinTheta=True)


exactClusterer.nb_iter_kmeans = 20
approxClusterer.nb_iter_kmeans = 20
nystromClusterer.nb_iter_kmeans = 20
RSvdClusterer.nb_iter_kmeans = 20
ningsClusterer.nb_iter_kmeans = 20

# exactClusterer.computeBound = args.computeBound        # computeBound not implemented for exactClusterer
approxClusterer.computeBound = args.computeBound
# nystromClusterer.computeBound = args.computeBound      # computeBound not implemented for nystromClusterer
    def runExperiment(self):
        """
        Run the selected clustering experiments and save results
        """
        
        if self.algoArgs.runIASC:
            logging.debug("Running approximate method")
            
            for k2 in self.algoArgs.k2s: 
                logging.debug("k2=" + str(k2))
                clusterer = IterativeSpectralClustering(self.algoArgs.k1, k2=k2, T=self.algoArgs.T, alg="IASC", logStep=self.logStep)
                clusterer.nb_iter_kmeans = 20
                clusterer.computeBound = self.algoArgs.computeBound
                iterator = self.getIterator()
                clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True)
    
                resultsFileName = self.resultsDir + "ResultsIASC_k1=" + str(self.algoArgs.k1) + "_k2=" + str(k2) + "_T=" + str(self.algoArgs.T) + ".npz"
                self.recordResults(clusterList, timeList, resultsFileName)

        if self.algoArgs.runExact:
            logging.debug("Running exact method")
            clusterer = IterativeSpectralClustering(self.algoArgs.k1, alg="exact", logStep=self.logStep)
            clusterer.nb_iter_kmeans = 20
            iterator = self.getIterator()
            clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True)

            resultsFileName = self.resultsDir + "ResultsExact_k1=" + str(self.algoArgs.k1) + ".npz"
            self.recordResults(clusterList, timeList, resultsFileName)

        if self.algoArgs.runNystrom:
            logging.debug("Running Nystrom method")
            
            for k3 in self.algoArgs.k3s: 
                logging.debug("k3=" + str(k3))
                clusterer = IterativeSpectralClustering(self.algoArgs.k1, k3=k3, alg="nystrom", logStep=self.logStep)
                clusterer.nb_iter_kmeans = 20
                clusterer.computeBound = self.algoArgs.computeBound
                iterator = self.getIterator()
                clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True)
    
                resultsFileName = self.resultsDir + "ResultsNystrom_k1="+ str(self.algoArgs.k1) + "_k3=" + str(k3) + ".npz"
                self.recordResults(clusterList, timeList, resultsFileName)
                
        if self.algoArgs.runRandomisedSvd:
            logging.debug("Running randomised SVD method")
            
            for k4 in self.algoArgs.k4s: 
                logging.debug("k4=" + str(k4))
                clusterer = IterativeSpectralClustering(self.algoArgs.k1, k4=k4, alg="randomisedSvd", logStep=self.logStep)
                clusterer.nb_iter_kmeans = 20
                clusterer.computeBound = self.algoArgs.computeBound
                iterator = self.getIterator()
                clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True)
    
                resultsFileName = self.resultsDir + "ResultsRandomisedSvd_k1="+ str(self.algoArgs.k1) + "_k4=" + str(k4) + ".npz"
                self.recordResults(clusterList, timeList, resultsFileName)
                
        if self.algoArgs.runEfficientNystrom:
            logging.debug("Running efficient Nystrom method")
            
            for k3 in self.algoArgs.k3s: 
                logging.debug("k3=" + str(k3))
                clusterer = IterativeSpectralClustering(self.algoArgs.k1, k3=k3, alg="efficientNystrom", logStep=self.logStep)
                clusterer.nb_iter_kmeans = 20
                clusterer.computeBound = self.algoArgs.computeBound
                iterator = self.getIterator()
                clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True)
    
                resultsFileName = self.resultsDir + "ResultsEfficientNystrom_k1="+ str(self.algoArgs.k1) + "_k3=" + str(k3) + ".npz"
                self.recordResults(clusterList, timeList, resultsFileName)

        if self.algoArgs.runModularity: 
            logging.info("Running modularity clustering")
            clusterer = IterativeModularityClustering(self.algoArgs.k1)
            iterator = self.getIterator()

            clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True)

            resultsFileName = self.resultsDir + "ResultsModularity_k1=" + str(self.algoArgs.k1) + ".npz"
            self.recordResults(clusterList, timeList, resultsFileName)

        if self.algoArgs.runNing:
            logging.info("Running Nings method")
            iterator = self.getIterator()
            clusterer = NingSpectralClustering(self.algoArgs.k1, T=self.algoArgs.T)
            clusterList, timeList, boundList = clusterer.cluster(iterator, verbose=True)

            resultsFileName = self.resultsDir + "ResultsNing_k1=" + str(self.algoArgs.k1) + "_T=" + str(self.algoArgs.T) + ".npz"
            self.recordResults(clusterList, timeList, resultsFileName)

        logging.info("All done: see you around!")
Exemple #15
0
"""
 
k2s = [3, 6, 12, 24, 150]
k3s = [3, 24, 90]
k4s = [3, 24]
# debug of IASC
#k2s = [3, 6, 12, 24, 150]
#k3s = [3]
#k4s = [3]
 
if saveResults: 
    numClusters = 3
    k1 = numClusters
    
    T = 8 # index of iteration where exact decomposition is computed
    exactClusterer = IterativeSpectralClustering(k1, alg="exact", computeSinTheta=True)
    iascClusterers = []
    for k2 in k2s: 
        iascClusterers.append(IterativeSpectralClustering(k1, k2, alg="IASC", computeSinTheta=True, T=T)) 
    nystromClusterers = []
    for k3 in k3s: 
        nystromClusterers.append(IterativeSpectralClustering(k1, k3=k3, alg="nystrom", computeSinTheta=True))
    ningsClusterer = NingSpectralClustering(k1, T=T, computeSinTheta=True)
    randSvdClusterers = []
    for k4 in k4s: 
        randSvdClusterers.append(IterativeSpectralClustering(k1, k4=k4, alg="randomisedSvd", computeSinTheta=True))
    
    numRepetitions = 50
#    numRepetitions = 2
    do_Nings = True