Beispiel #1
0
 def __init__(self, vertex_jrdd, edge_jrdd,
              partition_strategy=PartitionStrategy.EdgePartition1D):
     self._vertex_jrdd = VertexRDD(vertex_jrdd, vertex_jrdd.context,
                                   BatchedSerializer(PickleSerializer()))
     self._edge_jrdd = EdgeRDD(edge_jrdd, edge_jrdd.context,
                               BatchedSerializer(PickleSerializer()))
     self._partition_strategy = partition_strategy
     self._jsc = vertex_jrdd.context
Beispiel #2
0
class Graph(object):
    def __init__(self, vertex_jrdd, edge_jrdd,
                 partition_strategy=PartitionStrategy.EdgePartition1D):
        self._vertex_jrdd = VertexRDD(vertex_jrdd, vertex_jrdd.context,
                                      BatchedSerializer(PickleSerializer()))
        self._edge_jrdd = EdgeRDD(edge_jrdd, edge_jrdd.context,
                                  BatchedSerializer(PickleSerializer()))
        self._partition_strategy = partition_strategy
        self._jsc = vertex_jrdd.context

    def persist(self, storageLevel):
        self._vertex_jrdd.persist(storageLevel)
        self._edge_jrdd.persist(storageLevel)
        return

    def cache(self):
        self._vertex_jrdd.cache()
        self._edge_jrdd.cache()
        return

    def vertices(self):
        return self._vertex_jrdd

    def edges(self):
        return self._edge_jrdd

    def numEdges(self):
        return self._edge_jrdd.count()

    def numVertices(self):
        return self._vertex_jrdd.count()

    # TODO
    def partitionBy(self, partitionStrategy):
        return

    # TODO
    def inDegrees(self):
        return

    # TODO
    def outDegrees(self):
        return

    # TODO
    def degrees(self):
        return

    def triplets(self):
        if (isinstance(self._jsc, SparkContext)):
            pyGraph = self._jsc.jvm.org.apache.spark.PythonGraph()
            return pyGraph.triplets()

    # TODO
    def unpersistVertices(self, blocking = True):
        return

    def mapVertices(self, f):
        return self._vertex_jrdd.mapValues(f)

    def mapEdges(self, f):
        return self._vertex_jrdd.mapValues(f)

    # TODO
    def mapTriplets(self, f):
        return

    # TODO
    def reverse(self):
        return

    # TODO
    def subgraph(self, epred, pred):
        return

    # TODO
    def groupEdges(self, mergeFunc):
        return

    # TODO
    def joinVertices(self, mapFunc):
        return

    # TODO
    def outerJoinVertices(self, mapFunc):
        return

    # TODO
    def collectNeighborIds(self, edgeDirection):
        return

    # TODO
    def collectNeighbors(self, edgeDirection):
        return

    # TODO
    def mapReduceTriplets(self, mapFunc, reduceFunc):
        return

    def pagerank(self, num_iterations, reset_probability = 0.15):
        """
        Pagerank on the graph depends on valid vertex and edge RDDs
        Users can specify terminating conditions as number of
        iterations or the Random reset probability or alpha

        :param num_iterations:    Number of iterations for the
                                  algorithm to terminate
        :param reset_probability: Random reset probability
        :return:
        """

        py_graph = self._sc._jvm.org.apache.PythonGraph.pagerank(num_iterations, reset_probability)
        return py_graph.asJavaRDD()

    def connected_components(self):
        py_graph = self._sc._jvm.org.apache.PythonGraph.connectedComponents()
        return py_graph.asJavaRDD()

    def reverse(self):
        py_graph = self._sc._jvm.org.apache.PythonGraph.reverse()
        return py_graph.asJavaRDD()

    def apply(self, f):
        def func(iterator):
            return itertools.imap(f, iterator)
        py_graph = self._sc._jvm.org.apache.PythonGraph.apply(func)
        return py_graph.asJavaRDD()

    # TODO
    def triangleCount(self):
        return

    # TODO
    def stronglyConnectedComponents(self, iterations):
        return

    def Pregel(self, initial_message, vertex_program, send_message, combine_message):
        return