Python Pregel 예제들, pregel.Pregel Python 예제들

예제 #1

0

파일 보기

def main():
    edges = [[6,4,5,1],[0,5,2],[1,3,5],[2,6,4,5],[3,6,0,5],
                [2,3,4,0,1],[3,4,0,10,11,7],[6,11,12,8],[7,12,9],
                [8,12,13,10],[6,9,11,12],[6,7,10,12],[11,10,7,8,9],
                [9]]

    
    
    vertice_ids  = [0,1,2,3,4,5,6,7,8,9,10,11,12,13]
    communities = []
    for i in range(14):
        v = ScanVertex(i,None,edges[i])
        communities.append(v)

    for i in range(14):
        neighbors = []
        for j in edges[i]:
            neighbors.append(communities[j])
        communities[i].neighbors = neighbors
            

    #initialize
    communities[12].value = 'A'
    communities[5].value = 'B'
    print(id(communities))
    '''for v in vertices:
        print(v.id,v.value)'''
    print('running scan pregel')
    p = Pregel(communities,num_workers)
    p.run()
    print(id(communities))
    for v in communities:
        print(v.id,v.value)

예제 #2

0

파일 보기

파일: pagerank.py 프로젝트: adkozlov/big_data-2016

def pagerank_pregel(vertices):
    """Computes the pagerank vector associated to vertices, using
    Pregel."""
    p = Pregel(vertices,num_workers,num_iterations)
    p.run()
    for vertex in p.vertices:
      print "#%s: %s" % (vertex.id, vertex.value)
    print "Sum=%f" % sum(v.value for v in p.vertices)

예제 #3

0

파일 보기

파일: sssp.py 프로젝트: adkozlov/big_data-2016

def main(filename):
    read_vertices(vertices, filename)
    read_edges(vertices, filename)
        
    p = Pregel(vertices.values(),num_workers)
    p.run()
    for vertex in p.vertices:
      print "#%s: %s" % (vertex.id, vertex.value)

예제 #4

0

파일 보기

def main(filename):
    read_vertices(vertices, filename)
    read_edges(vertices, filename)

    p = Pregel(vertices.values(), num_workers)
    p.run()
    for vertex in p.vertices:
        print "#%s: %s" % (vertex.id, vertex.value)

예제 #5

0

파일 보기

파일: pagerank.py 프로젝트: adkozlov/big_data-2016

def pagerank_pregel(vertices):
    """Computes the pagerank vector associated to vertices, using
    Pregel."""
    p = Pregel(vertices, num_workers, num_iterations)
    p.run()
    for vertex in p.vertices:
        print "#%s: %s" % (vertex.id, vertex.value)
    print "Sum=%f" % sum(v.value for v in p.vertices)

예제 #6

0

파일 보기

파일: shortest_path.py 프로젝트: jmmcd/Pregel

def shortest_path_pregel(vertices):
    """Computes the single-source shortest path using Pregel."""
    p = Pregel(vertices,num_workers)
    p.run()
    # We present our result as a dict to conform to NetworkX.
    # NetworkX will only include values for the reachable nodes so we
    # check for finite values
    return {vertex.id: vertex.value for vertex in p.vertices
            if np.isfinite(vertex.value)}

예제 #7

0

파일 보기

def shortest_path_pregel(vertices):
    """Computes the single-source shortest path using Pregel."""
    p = Pregel(vertices, num_workers)
    p.run()
    # We present our result as a dict to conform to NetworkX.
    # NetworkX will only include values for the reachable nodes so we
    # check for finite values
    return {
        vertex.id: vertex.value
        for vertex in p.vertices if np.isfinite(vertex.value)
    }

예제 #8

0

파일 보기

def main(filename):
  global vertices
  global num_vertices
  # читаем граф из файла, используя конструктор MaxValueVertex
  vertices = read_graph(filename, MaxValueVertex)

  # Заполняем случайными значениями
  for v in vertices.values():
    v.value = randint(1, len(vertices) * 2)

  # Запускаем подсчет, ограничивая количеством итераций
  p = Pregel(vertices.values(),num_workers,max_supersteps)
  p.run()
  print "Completed in %d supersteps" % p.superstep
  for vertex in p.vertices:
    print "#%s: %s" % (vertex.id, vertex.value)

예제 #9

0

파일 보기

def randomWalk(G, A, t):
    N = A.shape[0]
    vertices = [0] * N
    for i in range(N):
        vertex = RandomWalkVertex(i, 0, [])
        vertex.t = t
        vertex.num_vertices = N
        vertices[i] = vertex
    vertices = np.array(vertices)
    for i in range(N):
        A_i = A[i]
        for j in range(N):
            if A_i[j] == 1:
                vertices[i].out_vertices.append(vertices[j])
        vertices[i].value = A_i

    p = Pregel(vertices, 8)
    p.run()
    return np.array([vertex.value for vertex in p.vertices])

예제 #10

0

파일 보기

파일: pagerank.py 프로젝트: ChrisArnault/GraphX

def main():
    vertices = [PageRankVertex(j, 1.0/num_vertices, [])
                for j in range(num_vertices)]
    X = [vertices[j].x for j in range(num_vertices)]
    Y = [vertices[j].y for j in range(num_vertices)]

    create_edges(vertices)

    pr_test = pagerank_test(vertices)
    # print("Test computation of pagerank:\n%s" % pr_test)

    p = Pregel(vertices, num_workers)


    pr_pregel = pagerank_pregel(p)
    # print("Pregel computation of pagerank:\n%s" % pr_pregel)

    diff = pr_pregel-pr_test
    # print("Difference between the two pagerank vectors:\n%s" % diff)
    print("The norm of the difference is: %s" % np.linalg.norm(diff))

    plt.show()

예제 #11

0

파일 보기

파일: pagerank.py 프로젝트: jmmcd/Pregel

def pagerank_pregel(vertices):
    """Computes the pagerank vector associated to vertices, using
    Pregel."""
    p = Pregel(vertices,num_workers)
    p.run()
    return mat([vertex.value for vertex in p.vertices]).transpose()

예제 #12

0

파일 보기

def pagerank_pregel(vertices):
    """Computes the pagerank vector associated to vertices, using
    Pregel."""
    p = Pregel(vertices, num_workers)
    p.run()
    return mat([vertex.value for vertex in p.vertices]).transpose()

예제 #13

0

파일 보기

def walktrap(G, t, tRW):

    for vertex in G.nodes:
        G.add_edge(vertex, vertex)

    G = nx.convert_node_labels_to_integers(G)
    N = G.number_of_nodes()

    A = np.array(nx.to_numpy_matrix(G))

    Dx = np.zeros((N, N))
    P = np.zeros((N, N))
    for i, A_row in enumerate(A):
        d_i = np.sum(A_row)
        P[i] = A_row / d_i
        Dx[i, i] = d_i**(-0.5)

    P_t = randomWalk(G, A, tRW)

    # Weight of all the edges excluding self-edges
    G_total_weight = G.number_of_edges() - N

    class RandomWalkVertex(Vertex):
        def modularity(self):
            return (self.internal_weight -
                    (self.total_weight * self.total_weight /
                     G_total_weight)) / G_total_weight

        def custom_init(self, id, t=200):
            self.id = id
            self.community = str(id)
            self.communityMembers = set([])
            self.history = [str(id)]
            self.internal_weight = 0
            self.total_weight = self.internal_weight + (len([
                id for id, edge in enumerate(A[self.id])
                if edge == 1 and id != self.id
            ]) / 2)
            self.vert = set([id])
            self.P_c = P_t[self.id]
            self.size = 1
            self.min_sigma_heap = []
            self.t = t
            self.neighbourCommu = {}
            self.minDeltaSigma = None
            self.defunctCommunities = set([])
            self.modularities = [self.modularity()]
            self.events = [0]
            self.sentFusion = False

        def update(self):
            if self.superstep == 0:
                self.outgoing_messages = [
                    (vertex, ("delta", self.community, self.min_sigma_heap[0],
                              self.communityMembers))
                    for vertex in set(self.out_vertices +
                                      list(self.communityMembers))
                ]

            elif self.superstep < self.t:
                self.min_sigma_heap.sort()

                types = [x[1][0] for x in self.incoming_messages]

                if "fusion" in types:
                    # Ici on a une fusion à effectuer
                    self.sentFusion = False
                    numMessage = types.index("fusion")
                    _, otherId, otherCommu, otherSize, otherP_c, otherInternal_weight, otherTotal_weight, otherVert, otherNeighbourCommu, deltaSigma, otherCommunityMembers, otherMinSigmaHeap, otherDefunct = self.incoming_messages[
                        numMessage][1]

                    # On commence par fusionner toutes les informations faciles à partager entre les deux commu
                    oldSize = self.size
                    oldCommu = self.community
                    self.defunctCommunities = self.defunctCommunities.union(
                        otherDefunct)
                    self.defunctCommunities.add(self.community)
                    self.defunctCommunities.add(otherCommu)
                    self.communityMembers = self.communityMembers.union(
                        otherCommunityMembers)
                    self.communityMembers.add(
                        self.incoming_messages[numMessage][0])
                    self.community = (min(self.community, otherCommu) + "_" +
                                      max(self.community, otherCommu))
                    self.history.append(self.community)
                    self.size = self.size + otherSize
                    self.P_c = (oldSize * self.P_c +
                                otherSize * otherP_c) / self.size
                    oldVert = self.vert
                    self.vert = self.vert.union(otherVert)
                    two_commu_weight = 0
                    for v1 in oldVert:
                        for id, edge in enumerate(A[v1]):
                            if edge == 1 and id in otherVert:
                                two_commu_weight += 1
                    self.internal_weight = self.internal_weight + otherInternal_weight + two_commu_weight
                    self.total_weight = self.total_weight + otherTotal_weight
                    oldNeighbourCommu = self.neighbourCommu
                    self.neighbourCommu = {
                        **self.neighbourCommu,
                        **otherNeighbourCommu
                    }
                    self.min_sigma_heap = list(
                        merge(self.min_sigma_heap, otherMinSigmaHeap))

                    self.events.append(self.superstep)
                    self.modularities.append(self.modularity())

                    # Maintenant, on va mettre à jour les distances avec les communautés voisines
                    self.outgoing_messages = []
                    deltaS = heappop(self.min_sigma_heap)[0]
                    for C_id in [x for x in self.neighbourCommu]:
                        if C_id.community != self.community:
                            # Calcul de delta_sigma si double-voisin
                            if (C_id in [
                                    x for x in oldNeighbourCommu
                            ]) and (C_id in [x for x in otherNeighbourCommu]):
                                infoC1C = oldNeighbourCommu[C_id]
                                infoC2C = otherNeighbourCommu[C_id]
                                delta_sigma_C1C = infoC1C[0]
                                delta_sigma_C2C = infoC2C[0]
                                ds = (((oldSize + int(infoC2C[1])) *
                                       (delta_sigma_C1C) /
                                       (self.size + int(infoC2C[1])) +
                                       ((otherSize + int(infoC2C[1])) *
                                        (delta_sigma_C2C) -
                                        (int(infoC2C[1]) * deltaS)) /
                                       (self.size + int(infoC2C[1]))))
                                self.neighbourCommu[C_id] = (ds,
                                                             self.community,
                                                             C_id.community)

                                delta_sigma = (ds,
                                               min(self.community,
                                                   C_id.community),
                                               max(self.community,
                                                   C_id.community))

                                # On ajoute cette distance au tas des distances
                                if delta_sigma not in self.min_sigma_heap:
                                    heappush(self.min_sigma_heap, delta_sigma)
                            # Sinon si C_id est voisin d'une seule des deux communautés qui ont fusionné
                            else:
                                ds = np.sum(
                                    np.square(
                                        np.matmul(Dx, C_id.P_c) -
                                        np.matmul(Dx, self.P_c))
                                ) * C_id.size * self.size / (
                                    (C_id.size + self.size) * N)

                                delta_sigma = (ds,
                                               min(self.community,
                                                   C_id.community),
                                               max(self.community,
                                                   C_id.community))
                                self.neighbourCommu[C_id] = delta_sigma
                                if delta_sigma not in self.min_sigma_heap:
                                    heappush(self.min_sigma_heap, delta_sigma)

                            # On prévient les voisins de la nouvelle distance
                            self.outgoing_messages.append(
                                (C_id, ("synchroF", self.community,
                                        (ds, self.community, C_id.community),
                                        self.size, self.P_c)))
                            # Et on prévient les voisins que les deux anciennes communautés sont maintenant caduques
                            self.outgoing_messages.append(
                                (C_id, ("defunct", oldCommu)))
                            self.outgoing_messages.append(
                                (C_id, ("defunct", otherCommu)))

                    # On retire du tas toutes les paires qui contiennent une des deux communautés qui ont fusionné
                    for ds in self.min_sigma_heap:
                        if ds[1] == oldCommu or ds[2] == otherCommu:
                            self.min_sigma_heap.remove(ds)
                            self.minDeltaSigma = None
                        elif ds[1] == ds[2]:
                            self.min_sigma_heap.remove(ds)
                            self.minDeltaSigma = None
                    # Fin de la partie fusion
                else:
                    self.outgoing_messages = []
                    hasSynchro = False
                    deltaSigmaChanged = False

                    # On commence par regarder les annonces de communautés caduques
                    for (vertex, message) in [
                            x for x in self.incoming_messages
                            if x[1][0] == "defunct"
                    ]:
                        for ds in self.min_sigma_heap:
                            if ds[1] == message[1] or ds[2] == message[1]:
                                self.min_sigma_heap.remove(ds)
                                self.minDeltaSigma = None
                        self.defunctCommunities.add(message[1])

                    # Puis on gère les synchronisation
                    for (vertex, message) in [
                            x for x in self.incoming_messages
                            if x[1][0][:7] == "synchro"
                    ]:
                        if message[
                                0] == "synchroF":  # synchroF => on doit renvoyer un message de synchronisation interne
                            ds = message[2]
                            if ds[1] in self.defunctCommunities:
                                self.outgoing_messages.append(
                                    (vertex, ("defunct", ds[1])))
                            elif ds[2] in self.defunctCommunities:
                                self.outgoing_messages.append(
                                    (vertex, ("defunct", ds[2])))

                            else:
                                if ds not in self.min_sigma_heap:
                                    heappush(self.min_sigma_heap, ds)
                                    hasSynchro = True
                                for member in self.communityMembers:
                                    self.outgoing_messages.append(
                                        (member,
                                         ("synchro", message[1], message[2],
                                          message[3], message[4])))

                        if message[
                                0] == "synchro":  # synchro (sans F) => pas besoin de propager la synchronisation
                            ds = message[2]
                            if ds[1] in self.defunctCommunities:
                                self.outgoing_messages.append(
                                    (vertex, ("defunct", ds[1])))
                            elif ds[2] in self.defunctCommunities:
                                self.outgoing_messages.append(
                                    (vertex, ("defunct", ds[2])))
                            else:
                                if ds not in self.min_sigma_heap:
                                    heappush(self.min_sigma_heap, ds)
                                    hasSynchro = True

                    # Ici les Delta, càd les partages d'informations sur quelles sont les communautés les plus proches
                    for (vertex, message) in [
                            x for x in self.incoming_messages
                            if x[1][0] == "delta"
                    ]:
                        ds = message[2]
                        if ds[1] in self.defunctCommunities:
                            self.outgoing_messages.append(
                                (vertex, ("defunct", ds[1])))
                        elif ds[2] in self.defunctCommunities:
                            self.outgoing_messages.append(
                                (vertex, ("defunct", ds[2])))
                        else:
                            if ds not in self.min_sigma_heap:
                                heappush(self.min_sigma_heap, ds)

                    # Si on a reçu l'ordre d'attendre une étape
                    for (vertex, message) in [
                            x for x in self.incoming_messages
                            if x[1][0] == "hold"
                    ]:
                        deltaSigmaChanged = True
                        hasSynchro = True

                    # Si notre tas des distances n'est pas vide, on met à jour quelles sont les communautés les plus proches
                    if self.min_sigma_heap != []:
                        try:
                            newMin = min(self.minDeltaSigma,
                                         self.min_sigma_heap[0])
                            deltaSigmaChanged = newMin != self.minDeltaSigma
                            self.minDeltaSigma = newMin
                        except (TypeError, IndexError) as e:
                            try:
                                newMin = self.min_sigma_heap[0]
                                deltaSigmaChanged = newMin != self.minDeltaSigma
                                self.minDeltaSigma = newMin
                            except IndexError:
                                deltaSigmaChanged = True

                    if deltaSigmaChanged or hasSynchro:
                        self.outgoing_messages += [
                            (vertex, "hold")
                            for vertex in self.communityMembers
                        ]

                    # Les modulos nous permettent de gérer le système en 4 temps, afin que tous les nœuds soient sur la même longueur d'onde
                    # Ici on partage les delta
                    if self.superstep % 5 == 0 and self.min_sigma_heap != []:
                        self.outgoing_messages += [
                            (vertex,
                             ("delta", self.community, self.minDeltaSigma,
                              self.communityMembers))
                            for vertex in set(self.out_vertices +
                                              list(self.communityMembers))
                        ]

                    # Ici on synchronise les informations en interne de la communauté
                    if self.superstep % 5 == 1 and deltaSigmaChanged and self.min_sigma_heap != []:
                        self.outgoing_messages += [
                            (vertex,
                             ("synchro", self.community, self.minDeltaSigma,
                              self.communityMembers))
                            for vertex in set(self.out_vertices +
                                              list(self.communityMembers))
                        ]

                    # Si on a pas eu de changement récemment, on suppose que la paire de commu les plus proches est stable
                    # Si on est une de ces deux commu les plus proches, on engage donc une fusion en envoyant un message
                    if self.min_sigma_heap != [] and not hasSynchro and self.superstep % 10 == 8 and self.community in self.minDeltaSigma[
                            1:]:
                        if str(self.minDeltaSigma[1]) == str(self.community):
                            otherCommu = str(self.minDeltaSigma[2])
                        else:
                            otherCommu = str(self.minDeltaSigma[1])
                        out_message = ("fusion", self.id, self.community,
                                       self.size, self.P_c,
                                       self.internal_weight, self.total_weight,
                                       self.vert, self.neighbourCommu,
                                       self.minDeltaSigma,
                                       self.communityMembers,
                                       self.min_sigma_heap,
                                       self.defunctCommunities)

                        self.outgoing_messages += [
                            (vertex, out_message)
                            for vertex in self.allVertices
                            if vertex.community == otherCommu
                        ]
                        self.sentFusion = True
            else:
                self.active = False

    vertices = [0] * N
    for i in range(N):
        vertex = RandomWalkVertex(i, 0, [])
        vertex.custom_init(i, t)
        vertices[i] = vertex
    vertices = np.array(vertices)
    for vertex in vertices:
        vertex.allVertices = vertices

    # On génère les nœuds
    for i in range(N):
        A_i = A[i]
        for j in range(N):
            if A_i[j] == 1:
                vertices[i].out_vertices.append(vertices[j])
                if i != j:
                    ds = (0.5 / N) * np.sum(
                        np.square(
                            np.matmul(Dx, P_t[i]) - np.matmul(Dx, P_t[j])))

                    delta_sigma = (ds, min(str(i),
                                           str(j)), max(str(i), str(j)))
                    if delta_sigma not in vertices[i].min_sigma_heap:
                        heappush(vertices[i].min_sigma_heap, delta_sigma)
                    vertices[i].neighbourCommu[vertices[j]] = delta_sigma

    p = Pregel(vertices, 8)
    p.run()

    # date des fusions, nécessaire pour extraire les informations dans l'ordre
    dateEvents = []
    for vertex in vertices:
        dateEvents += vertex.events
    dateEvents = sorted(list(set(dateEvents)))

    # tableau donnant la modularité à chaque nouvelle fusion
    modularities = []
    for event in dateEvents:
        temp = 0
        for vertex in vertices:
            # print(vertex.community, vertex.min_sigma_heap)
            try:
                index = next(i for i, v in enumerate(vertex.events)
                             if v >= event)
                temp += vertex.modularities[index]
            except StopIteration:
                pass
        modularities.append(temp)

    print("Date des fusions : ", dateEvents)
    Qmax_index = np.argmax(
        modularities)  # Moment où la modularité est maximale
    print("On a un Q maximal après la fusion numéro : ", Qmax_index,
          " sur un total de ", len(dateEvents))
    timeMax = dateEvents[Qmax_index]

    partition = set(
        []
    )  # Partition (ensemble des communautés) au moment où la modularité est maximale
    dicCommunities = {}  # Donne les nœuds dans chaque communauté
    for vertex in vertices:
        try:
            index = next(
                i for i, v in enumerate(vertex.events) if v > timeMax) - 1
        except:
            index = len(vertex.events) - 1
        partition.add(vertex.history[index])
        if vertex.history[index] not in dicCommunities:
            dicCommunities[vertex.history[index]] = [vertex]
        else:
            dicCommunities[vertex.history[index]].append(vertex)

    allPartition = [
    ]  # Liste de toutes les partitions à chaque nouvelle fusion
    for timeMax in dateEvents:
        tempPartition = set([])
        for vertex in vertices:
            try:
                index = next(
                    i for i, v in enumerate(vertex.events) if v > timeMax) - 1
            except:
                index = len(vertex.events) - 1
            tempPartition.add(vertex.history[index])
        allPartition.append(tempPartition)

    return dicCommunities, partition, modularities, allPartition

예제 #14

0

파일 보기

def pregel_pagerank(vertices):
    p = Pregel(vertices, num_workers)
    p.run()
    return mat([vertex.value for vertex in p.vertices]).transpose()