def getClusters(self): PASS = 1 matrix = csr_matrix(self.similarityMatrixBuilder) continu = True realClusters = [] clusters = [] nextMatrix = [] firstRound = True while continu: print "-" * 40 print "PASS #{0}".format(PASS) PASS += 1 clustersNumber, clusters = LouvainClusterer.getOnePassLouvainCommunities( matrix) if firstRound: firstRound = False realClusters = clusters else: LouvainClusterer.updateRealClusters(realClusters, clusters) if (clustersNumber == matrix.shape[0]): continu = False else: nextMatrix = LouvainClusterer.buildNewSimilarityMatrix( matrix, clusters, clustersNumber) matrix = nextMatrix return np.array(realClusters)
def getClusters(self) : PASS=1 matrix=csr_matrix(self.similarityMatrixBuilder) continu=True realClusters=[] clusters=[] nextMatrix=[] firstRound=True while continu : print "-"*40 print "PASS #{0}".format(PASS) PASS+=1 clustersNumber,clusters=LouvainClusterer.getOnePassLouvainCommunities(matrix) if firstRound : firstRound=False realClusters=clusters else : LouvainClusterer.updateRealClusters(realClusters,clusters) if (clustersNumber==matrix.shape[0]) : continu=False else : nextMatrix=LouvainClusterer.buildNewSimilarityMatrix(matrix,clusters,clustersNumber) matrix=nextMatrix return np.array(realClusters)
def getClusters(self): PASS = 1 matrix = csr_matrix(self.similarityMatrixBuilder) continu = True realClusters = [] clusters = [] nextMatrix = [] firstRound = True while continu: print "-" * 40 print "PASS #{0}".format(PASS) PASS += 1 # on recupere le nombre de clusters et le vecteur de mapping obtenus a l'aide de la matrice de similarité clustersNumber, clusters = LouvainClusterer.getOnePassLouvainCommunities( matrix) # on charge nos donnees de clusters dans realClusters if firstRound: firstRound = False realClusters = clusters else: LouvainClusterer.updateRealClusters(realClusters, clusters) # on regarde si on a reussi a reduire le nombre de clusters # si oui on réitère en modifiant la matrice de travail # et ce jusqu'à ne plus arriver à réduire le nbr de clusters if (clustersNumber == matrix.shape[0]): continu = False else: nextMatrix = LouvainClusterer.buildNewSimilarityMatrix( matrix, clusters, clustersNumber) matrix = nextMatrix return np.array(realClusters)
def getOnePassLouvainCommunities(matrix): ITER = 1 matrixSize = matrix.shape[0] sumsOfWeights = np.array([ sum(matrix.getrow(i).data) + sum(matrix.getcol(i).data) for i in range(matrixSize) ]) totalSumOfWeight = sum(sumsOfWeights) clusters = np.array(range(0, matrixSize)) modified = True while modified: print " ITER #{0}".format(ITER) ITER += 1 modified = False for i in range(0, matrixSize): newCluster = LouvainClusterer.getArgMaxModularity( matrix, clusters, sumsOfWeights, totalSumOfWeight, i) if (newCluster != -1): modified = True clusters[i] = newCluster newClusterIdentifiers = {} clustersNumber = 0 for i in range(0, len(clusters)): if (clusters[i] in newClusterIdentifiers): clusters[i] = newClusterIdentifiers[clusters[i]] else: newClusterIdentifiers[clusters[i]] = clustersNumber clusters[i] = clustersNumber clustersNumber += 1 print "End of a Pass" print "-" * 40 return clustersNumber, clusters
def getArgMaxModularity(matrix, clusters, sumsOfWeights, totalSumOfWeight, i): """ This function returns -1 if there is no optimization of modularity """ DMCE = LouvainClusterer.getDeltaModularityCalculElements( matrix, clusters, sumsOfWeights, totalSumOfWeight, i) DMCEI = DMCE[clusters[i]] maxDelta = 0 maxJ = -1 for j in matrix.getrow(i).indices: delta = DMCE[clusters[j]] - DMCEI if (delta > maxDelta): maxDelta, maxJ = delta, j for j in matrix.getcol(i).indices: delta = DMCE[clusters[j]] - DMCEI if (delta > maxDelta): maxDelta, maxJ = delta, j if (maxDelta == 0): return -1 return clusters[maxJ]
def getArgMaxModularity(matrix,clusters,sumsOfWeights,totalSumOfWeight,i) : """ This function returns -1 if there is no optimization of modularity """ DMCE = LouvainClusterer.getDeltaModularityCalculElements(matrix,clusters,sumsOfWeights,totalSumOfWeight,i) DMCEI=DMCE[clusters[i]] maxDelta=0 maxJ=-1 for j in matrix.getrow(i).indices : delta=DMCE[clusters[j]]-DMCEI if (delta>maxDelta) : maxDelta,maxJ=delta,j for j in matrix.getcol(i).indices : delta=DMCE[clusters[j]]-DMCEI if (delta>maxDelta) : maxDelta,maxJ=delta,j if (maxDelta==0) : return -1 return clusters[maxJ]
def getOnePassLouvainCommunities(matrix) : ITER=1 matrixSize=matrix.shape[0] sumsOfWeights=np.array([sum(matrix.getrow(i).data)+sum(matrix.getcol(i).data) for i in range(matrixSize)]) totalSumOfWeight=sum(sumsOfWeights) clusters=np.array(range(0,matrixSize)) modified=True while modified : print " ITER #{0}".format(ITER) ITER+=1 modified=False for i in range(0,matrixSize) : newCluster=LouvainClusterer.getArgMaxModularity(matrix,clusters,sumsOfWeights,totalSumOfWeight,i) if (newCluster!=-1) : modified=True clusters[i]=newCluster newClusterIdentifiers={} clustersNumber=0 for i in range(0,len(clusters)) : if (clusters[i] in newClusterIdentifiers) : clusters[i]=newClusterIdentifiers[clusters[i]] else : newClusterIdentifiers[clusters[i]]=clustersNumber clusters[i]=clustersNumber clustersNumber+=1 print "End of a Pass" print "-"*40 return clustersNumber, clusters
def getSimilarityOf2Clusters(matrix, clusters, clusterI, clusterJ): iVertices = np.where(clusters == clusterI)[0] jVertices = np.where(clusters == clusterJ)[0] sumOfWeights = 0 for k in iVertices: for l in jVertices: sumOfWeights += LouvainClusterer.getFromMatrix(matrix, k, l) return sumOfWeights
def buildNewSimilarityMatrix(matrix, clusters, clustersNumber): nextMatrix = dok_matrix((clustersNumber, clustersNumber), dtype=np.float) for i in range(0, clustersNumber): for j in range(i, clustersNumber): nextMatrix[i, j] = LouvainClusterer.getSimilarityOf2Clusters( matrix, clusters, i, j) return csr_matrix(nextMatrix)
def getSimilarityOf2Clusters(matrix,clusters,clusterI,clusterJ) : iVertices=np.where(clusters == clusterI)[0] jVertices=np.where(clusters == clusterJ)[0] sumOfWeights=0 for k in iVertices : for l in jVertices : sumOfWeights+=LouvainClusterer.getFromMatrix(matrix,k,l) return sumOfWeights
def getOnePassLouvainCommunities(matrix): ITER = 1 matrixSize = matrix.shape[0] # on somme les valeurs en colonnes et en lignes pour chaque cluster de la matrice # sumsOfWeights -> liste de matrixSize valeurs sumsOfWeights = np.array([ sum(matrix.getrow(i).data) + sum(matrix.getcol(i).data) for i in range(matrixSize) ]) # totalSumOfWeight -> somme de toutes les valeurs totalSumOfWeight = sum(sumsOfWeights) # on crée une liste de matrixSize valeurs pour representer les clusters # au début on a autant de clusteur que d'elements de la matrice clusters = np.array(range(0, matrixSize)) modified = True # boucle pour agglomérer les elements entre eux while modified: print " ITER #{0}".format(ITER) ITER += 1 modified = False for i in range(0, matrixSize): newCluster = LouvainClusterer.getArgMaxModularity( matrix, clusters, sumsOfWeights, totalSumOfWeight, i) if (newCluster != -1): modified = True clusters[i] = newCluster # on aplique un reduce sur le vecteur clusters pour avoir les vrais clusters # newClusterIdentifiers est une map d'association entre les clusters du vecteur cluster et les nouveaux clusters newClusterIdentifiers = {} clustersNumber = 0 for i in range(0, len(clusters)): if (clusters[i] in newClusterIdentifiers): clusters[i] = newClusterIdentifiers[clusters[i]] else: newClusterIdentifiers[clusters[i]] = clustersNumber clusters[i] = clustersNumber clustersNumber += 1 print "End of a Pass" print "-" * 40 return clustersNumber, clusters
def buildNewSimilarityMatrix(matrix,clusters,clustersNumber) : nextMatrix=dok_matrix((clustersNumber,clustersNumber),dtype=np.float) for i in range(0,clustersNumber) : for j in range(i,clustersNumber) : nextMatrix[i,j]=LouvainClusterer.getSimilarityOf2Clusters(matrix,clusters,i,j) return csr_matrix(nextMatrix)