def main(): edges = [[6,4,5,1],[0,5,2],[1,3,5],[2,6,4,5],[3,6,0,5], [2,3,4,0,1],[3,4,0,10,11,7],[6,11,12,8],[7,12,9], [8,12,13,10],[6,9,11,12],[6,7,10,12],[11,10,7,8,9], [9]] vertice_ids = [0,1,2,3,4,5,6,7,8,9,10,11,12,13] communities = [] for i in range(14): v = ScanVertex(i,None,edges[i]) communities.append(v) for i in range(14): neighbors = [] for j in edges[i]: neighbors.append(communities[j]) communities[i].neighbors = neighbors #initialize communities[12].value = 'A' communities[5].value = 'B' print(id(communities)) '''for v in vertices: print(v.id,v.value)''' print('running scan pregel') p = Pregel(communities,num_workers) p.run() print(id(communities)) for v in communities: print(v.id,v.value)
def pagerank_pregel(vertices): """Computes the pagerank vector associated to vertices, using Pregel.""" p = Pregel(vertices,num_workers,num_iterations) p.run() for vertex in p.vertices: print "#%s: %s" % (vertex.id, vertex.value) print "Sum=%f" % sum(v.value for v in p.vertices)
def main(filename): read_vertices(vertices, filename) read_edges(vertices, filename) p = Pregel(vertices.values(),num_workers) p.run() for vertex in p.vertices: print "#%s: %s" % (vertex.id, vertex.value)
def main(filename): read_vertices(vertices, filename) read_edges(vertices, filename) p = Pregel(vertices.values(), num_workers) p.run() for vertex in p.vertices: print "#%s: %s" % (vertex.id, vertex.value)
def pagerank_pregel(vertices): """Computes the pagerank vector associated to vertices, using Pregel.""" p = Pregel(vertices, num_workers, num_iterations) p.run() for vertex in p.vertices: print "#%s: %s" % (vertex.id, vertex.value) print "Sum=%f" % sum(v.value for v in p.vertices)
def shortest_path_pregel(vertices): """Computes the single-source shortest path using Pregel.""" p = Pregel(vertices,num_workers) p.run() # We present our result as a dict to conform to NetworkX. # NetworkX will only include values for the reachable nodes so we # check for finite values return {vertex.id: vertex.value for vertex in p.vertices if np.isfinite(vertex.value)}
def shortest_path_pregel(vertices): """Computes the single-source shortest path using Pregel.""" p = Pregel(vertices, num_workers) p.run() # We present our result as a dict to conform to NetworkX. # NetworkX will only include values for the reachable nodes so we # check for finite values return { vertex.id: vertex.value for vertex in p.vertices if np.isfinite(vertex.value) }
def main(filename): global vertices global num_vertices # читаем граф из файла, используя конструктор MaxValueVertex vertices = read_graph(filename, MaxValueVertex) # Заполняем случайными значениями for v in vertices.values(): v.value = randint(1, len(vertices) * 2) # Запускаем подсчет, ограничивая количеством итераций p = Pregel(vertices.values(),num_workers,max_supersteps) p.run() print "Completed in %d supersteps" % p.superstep for vertex in p.vertices: print "#%s: %s" % (vertex.id, vertex.value)
def randomWalk(G, A, t): N = A.shape[0] vertices = [0] * N for i in range(N): vertex = RandomWalkVertex(i, 0, []) vertex.t = t vertex.num_vertices = N vertices[i] = vertex vertices = np.array(vertices) for i in range(N): A_i = A[i] for j in range(N): if A_i[j] == 1: vertices[i].out_vertices.append(vertices[j]) vertices[i].value = A_i p = Pregel(vertices, 8) p.run() return np.array([vertex.value for vertex in p.vertices])
def main(): vertices = [PageRankVertex(j, 1.0/num_vertices, []) for j in range(num_vertices)] X = [vertices[j].x for j in range(num_vertices)] Y = [vertices[j].y for j in range(num_vertices)] create_edges(vertices) pr_test = pagerank_test(vertices) # print("Test computation of pagerank:\n%s" % pr_test) p = Pregel(vertices, num_workers) pr_pregel = pagerank_pregel(p) # print("Pregel computation of pagerank:\n%s" % pr_pregel) diff = pr_pregel-pr_test # print("Difference between the two pagerank vectors:\n%s" % diff) print("The norm of the difference is: %s" % np.linalg.norm(diff)) plt.show()
def pagerank_pregel(vertices): """Computes the pagerank vector associated to vertices, using Pregel.""" p = Pregel(vertices,num_workers) p.run() return mat([vertex.value for vertex in p.vertices]).transpose()
def pagerank_pregel(vertices): """Computes the pagerank vector associated to vertices, using Pregel.""" p = Pregel(vertices, num_workers) p.run() return mat([vertex.value for vertex in p.vertices]).transpose()
def walktrap(G, t, tRW): for vertex in G.nodes: G.add_edge(vertex, vertex) G = nx.convert_node_labels_to_integers(G) N = G.number_of_nodes() A = np.array(nx.to_numpy_matrix(G)) Dx = np.zeros((N, N)) P = np.zeros((N, N)) for i, A_row in enumerate(A): d_i = np.sum(A_row) P[i] = A_row / d_i Dx[i, i] = d_i**(-0.5) P_t = randomWalk(G, A, tRW) # Weight of all the edges excluding self-edges G_total_weight = G.number_of_edges() - N class RandomWalkVertex(Vertex): def modularity(self): return (self.internal_weight - (self.total_weight * self.total_weight / G_total_weight)) / G_total_weight def custom_init(self, id, t=200): self.id = id self.community = str(id) self.communityMembers = set([]) self.history = [str(id)] self.internal_weight = 0 self.total_weight = self.internal_weight + (len([ id for id, edge in enumerate(A[self.id]) if edge == 1 and id != self.id ]) / 2) self.vert = set([id]) self.P_c = P_t[self.id] self.size = 1 self.min_sigma_heap = [] self.t = t self.neighbourCommu = {} self.minDeltaSigma = None self.defunctCommunities = set([]) self.modularities = [self.modularity()] self.events = [0] self.sentFusion = False def update(self): if self.superstep == 0: self.outgoing_messages = [ (vertex, ("delta", self.community, self.min_sigma_heap[0], self.communityMembers)) for vertex in set(self.out_vertices + list(self.communityMembers)) ] elif self.superstep < self.t: self.min_sigma_heap.sort() types = [x[1][0] for x in self.incoming_messages] if "fusion" in types: # Ici on a une fusion à effectuer self.sentFusion = False numMessage = types.index("fusion") _, otherId, otherCommu, otherSize, otherP_c, otherInternal_weight, otherTotal_weight, otherVert, otherNeighbourCommu, deltaSigma, otherCommunityMembers, otherMinSigmaHeap, otherDefunct = self.incoming_messages[ numMessage][1] # On commence par fusionner toutes les informations faciles à partager entre les deux commu oldSize = self.size oldCommu = self.community self.defunctCommunities = self.defunctCommunities.union( otherDefunct) self.defunctCommunities.add(self.community) self.defunctCommunities.add(otherCommu) self.communityMembers = self.communityMembers.union( otherCommunityMembers) self.communityMembers.add( self.incoming_messages[numMessage][0]) self.community = (min(self.community, otherCommu) + "_" + max(self.community, otherCommu)) self.history.append(self.community) self.size = self.size + otherSize self.P_c = (oldSize * self.P_c + otherSize * otherP_c) / self.size oldVert = self.vert self.vert = self.vert.union(otherVert) two_commu_weight = 0 for v1 in oldVert: for id, edge in enumerate(A[v1]): if edge == 1 and id in otherVert: two_commu_weight += 1 self.internal_weight = self.internal_weight + otherInternal_weight + two_commu_weight self.total_weight = self.total_weight + otherTotal_weight oldNeighbourCommu = self.neighbourCommu self.neighbourCommu = { **self.neighbourCommu, **otherNeighbourCommu } self.min_sigma_heap = list( merge(self.min_sigma_heap, otherMinSigmaHeap)) self.events.append(self.superstep) self.modularities.append(self.modularity()) # Maintenant, on va mettre à jour les distances avec les communautés voisines self.outgoing_messages = [] deltaS = heappop(self.min_sigma_heap)[0] for C_id in [x for x in self.neighbourCommu]: if C_id.community != self.community: # Calcul de delta_sigma si double-voisin if (C_id in [ x for x in oldNeighbourCommu ]) and (C_id in [x for x in otherNeighbourCommu]): infoC1C = oldNeighbourCommu[C_id] infoC2C = otherNeighbourCommu[C_id] delta_sigma_C1C = infoC1C[0] delta_sigma_C2C = infoC2C[0] ds = (((oldSize + int(infoC2C[1])) * (delta_sigma_C1C) / (self.size + int(infoC2C[1])) + ((otherSize + int(infoC2C[1])) * (delta_sigma_C2C) - (int(infoC2C[1]) * deltaS)) / (self.size + int(infoC2C[1])))) self.neighbourCommu[C_id] = (ds, self.community, C_id.community) delta_sigma = (ds, min(self.community, C_id.community), max(self.community, C_id.community)) # On ajoute cette distance au tas des distances if delta_sigma not in self.min_sigma_heap: heappush(self.min_sigma_heap, delta_sigma) # Sinon si C_id est voisin d'une seule des deux communautés qui ont fusionné else: ds = np.sum( np.square( np.matmul(Dx, C_id.P_c) - np.matmul(Dx, self.P_c)) ) * C_id.size * self.size / ( (C_id.size + self.size) * N) delta_sigma = (ds, min(self.community, C_id.community), max(self.community, C_id.community)) self.neighbourCommu[C_id] = delta_sigma if delta_sigma not in self.min_sigma_heap: heappush(self.min_sigma_heap, delta_sigma) # On prévient les voisins de la nouvelle distance self.outgoing_messages.append( (C_id, ("synchroF", self.community, (ds, self.community, C_id.community), self.size, self.P_c))) # Et on prévient les voisins que les deux anciennes communautés sont maintenant caduques self.outgoing_messages.append( (C_id, ("defunct", oldCommu))) self.outgoing_messages.append( (C_id, ("defunct", otherCommu))) # On retire du tas toutes les paires qui contiennent une des deux communautés qui ont fusionné for ds in self.min_sigma_heap: if ds[1] == oldCommu or ds[2] == otherCommu: self.min_sigma_heap.remove(ds) self.minDeltaSigma = None elif ds[1] == ds[2]: self.min_sigma_heap.remove(ds) self.minDeltaSigma = None # Fin de la partie fusion else: self.outgoing_messages = [] hasSynchro = False deltaSigmaChanged = False # On commence par regarder les annonces de communautés caduques for (vertex, message) in [ x for x in self.incoming_messages if x[1][0] == "defunct" ]: for ds in self.min_sigma_heap: if ds[1] == message[1] or ds[2] == message[1]: self.min_sigma_heap.remove(ds) self.minDeltaSigma = None self.defunctCommunities.add(message[1]) # Puis on gère les synchronisation for (vertex, message) in [ x for x in self.incoming_messages if x[1][0][:7] == "synchro" ]: if message[ 0] == "synchroF": # synchroF => on doit renvoyer un message de synchronisation interne ds = message[2] if ds[1] in self.defunctCommunities: self.outgoing_messages.append( (vertex, ("defunct", ds[1]))) elif ds[2] in self.defunctCommunities: self.outgoing_messages.append( (vertex, ("defunct", ds[2]))) else: if ds not in self.min_sigma_heap: heappush(self.min_sigma_heap, ds) hasSynchro = True for member in self.communityMembers: self.outgoing_messages.append( (member, ("synchro", message[1], message[2], message[3], message[4]))) if message[ 0] == "synchro": # synchro (sans F) => pas besoin de propager la synchronisation ds = message[2] if ds[1] in self.defunctCommunities: self.outgoing_messages.append( (vertex, ("defunct", ds[1]))) elif ds[2] in self.defunctCommunities: self.outgoing_messages.append( (vertex, ("defunct", ds[2]))) else: if ds not in self.min_sigma_heap: heappush(self.min_sigma_heap, ds) hasSynchro = True # Ici les Delta, càd les partages d'informations sur quelles sont les communautés les plus proches for (vertex, message) in [ x for x in self.incoming_messages if x[1][0] == "delta" ]: ds = message[2] if ds[1] in self.defunctCommunities: self.outgoing_messages.append( (vertex, ("defunct", ds[1]))) elif ds[2] in self.defunctCommunities: self.outgoing_messages.append( (vertex, ("defunct", ds[2]))) else: if ds not in self.min_sigma_heap: heappush(self.min_sigma_heap, ds) # Si on a reçu l'ordre d'attendre une étape for (vertex, message) in [ x for x in self.incoming_messages if x[1][0] == "hold" ]: deltaSigmaChanged = True hasSynchro = True # Si notre tas des distances n'est pas vide, on met à jour quelles sont les communautés les plus proches if self.min_sigma_heap != []: try: newMin = min(self.minDeltaSigma, self.min_sigma_heap[0]) deltaSigmaChanged = newMin != self.minDeltaSigma self.minDeltaSigma = newMin except (TypeError, IndexError) as e: try: newMin = self.min_sigma_heap[0] deltaSigmaChanged = newMin != self.minDeltaSigma self.minDeltaSigma = newMin except IndexError: deltaSigmaChanged = True if deltaSigmaChanged or hasSynchro: self.outgoing_messages += [ (vertex, "hold") for vertex in self.communityMembers ] # Les modulos nous permettent de gérer le système en 4 temps, afin que tous les nœuds soient sur la même longueur d'onde # Ici on partage les delta if self.superstep % 5 == 0 and self.min_sigma_heap != []: self.outgoing_messages += [ (vertex, ("delta", self.community, self.minDeltaSigma, self.communityMembers)) for vertex in set(self.out_vertices + list(self.communityMembers)) ] # Ici on synchronise les informations en interne de la communauté if self.superstep % 5 == 1 and deltaSigmaChanged and self.min_sigma_heap != []: self.outgoing_messages += [ (vertex, ("synchro", self.community, self.minDeltaSigma, self.communityMembers)) for vertex in set(self.out_vertices + list(self.communityMembers)) ] # Si on a pas eu de changement récemment, on suppose que la paire de commu les plus proches est stable # Si on est une de ces deux commu les plus proches, on engage donc une fusion en envoyant un message if self.min_sigma_heap != [] and not hasSynchro and self.superstep % 10 == 8 and self.community in self.minDeltaSigma[ 1:]: if str(self.minDeltaSigma[1]) == str(self.community): otherCommu = str(self.minDeltaSigma[2]) else: otherCommu = str(self.minDeltaSigma[1]) out_message = ("fusion", self.id, self.community, self.size, self.P_c, self.internal_weight, self.total_weight, self.vert, self.neighbourCommu, self.minDeltaSigma, self.communityMembers, self.min_sigma_heap, self.defunctCommunities) self.outgoing_messages += [ (vertex, out_message) for vertex in self.allVertices if vertex.community == otherCommu ] self.sentFusion = True else: self.active = False vertices = [0] * N for i in range(N): vertex = RandomWalkVertex(i, 0, []) vertex.custom_init(i, t) vertices[i] = vertex vertices = np.array(vertices) for vertex in vertices: vertex.allVertices = vertices # On génère les nœuds for i in range(N): A_i = A[i] for j in range(N): if A_i[j] == 1: vertices[i].out_vertices.append(vertices[j]) if i != j: ds = (0.5 / N) * np.sum( np.square( np.matmul(Dx, P_t[i]) - np.matmul(Dx, P_t[j]))) delta_sigma = (ds, min(str(i), str(j)), max(str(i), str(j))) if delta_sigma not in vertices[i].min_sigma_heap: heappush(vertices[i].min_sigma_heap, delta_sigma) vertices[i].neighbourCommu[vertices[j]] = delta_sigma p = Pregel(vertices, 8) p.run() # date des fusions, nécessaire pour extraire les informations dans l'ordre dateEvents = [] for vertex in vertices: dateEvents += vertex.events dateEvents = sorted(list(set(dateEvents))) # tableau donnant la modularité à chaque nouvelle fusion modularities = [] for event in dateEvents: temp = 0 for vertex in vertices: # print(vertex.community, vertex.min_sigma_heap) try: index = next(i for i, v in enumerate(vertex.events) if v >= event) temp += vertex.modularities[index] except StopIteration: pass modularities.append(temp) print("Date des fusions : ", dateEvents) Qmax_index = np.argmax( modularities) # Moment où la modularité est maximale print("On a un Q maximal après la fusion numéro : ", Qmax_index, " sur un total de ", len(dateEvents)) timeMax = dateEvents[Qmax_index] partition = set( [] ) # Partition (ensemble des communautés) au moment où la modularité est maximale dicCommunities = {} # Donne les nœuds dans chaque communauté for vertex in vertices: try: index = next( i for i, v in enumerate(vertex.events) if v > timeMax) - 1 except: index = len(vertex.events) - 1 partition.add(vertex.history[index]) if vertex.history[index] not in dicCommunities: dicCommunities[vertex.history[index]] = [vertex] else: dicCommunities[vertex.history[index]].append(vertex) allPartition = [ ] # Liste de toutes les partitions à chaque nouvelle fusion for timeMax in dateEvents: tempPartition = set([]) for vertex in vertices: try: index = next( i for i, v in enumerate(vertex.events) if v > timeMax) - 1 except: index = len(vertex.events) - 1 tempPartition.add(vertex.history[index]) allPartition.append(tempPartition) return dicCommunities, partition, modularities, allPartition
def pregel_pagerank(vertices): p = Pregel(vertices, num_workers) p.run() return mat([vertex.value for vertex in p.vertices]).transpose()