Esempio n. 1
0
# print("\n strong connected component")
# result = g.stronglyConnectedComponents(maxIter=10)
# result.select("id", "component").orderBy("component").show()

# Page rank
print("\n Page rank")
# # run until convergence to tol
# results = g.pageRank(resetProbability=0.15, tol=0.01)
# results.vertices.select("id", "pagerank").show()
# results.edges.select("src", "dst", "weight").show()

## Run PageRank personalized for vertex ["a", "b", "c", "d"] in parallel
# results4 = g.parallelPersonalizedPageRank(resetProbability=0.15, sourceIds=["a", "b", "c", "d"], maxIter=10)\

print("\n shortest paths from each node to landmards node")
results = g.shortestPaths(landmarks=["a", "d"])
results.select("id", "distances").show()

# # Saving and Loading GraphFrames
# g.vertices.write.parquet("hdfs://myLocation/vertices")
# g.edges.write.parquet("hdfs://myLocation/edges")
#
# # Load the vertices and edges back.
# sameV = sqlContext.read.parquet("hdfs://myLocation/vertices")
# sameE = sqlContext.read.parquet("hdfs://myLocation/edges")

# message passing via AggregateMessages
# For each user, sum the ages of the adjacent users.
msgToSrc = AM.dst["age"]
msgToDst = AM.src["age"]
agg = g.aggregateMessages(sum(AM.msg).alias("summedAges"),
    # |  a|  b|      follow|
    # |  b|  c|      follow|
    # |  c|  d|      follow|
    # |  d|  e|      follow|
    # |  b|  e|      follow|
    # |  c|  e|      follow|
    # |  e|  f|      follow|
    # +---+---+------------+

    # Step-3: Create a GraphFrame. Using GraphFrames API, a graph
    # is built as an instance of a GraphFrame, which is a pair of
    # vertices (as `v`) and edges (as `e`):
    graph = GraphFrame(v, e)
    print("graph=", graph)
    # GraphFrame(v:[id: string, name: string ... 1 more field],
    #            e:[src: string, dst: string ... 1 more field])

    # Computes shortest paths for landmarks ["a", "f"]
    #
    results = graph.shortestPaths(landmarks=["a", "f"])
    print("results=", results)
    #
    print("results.show()=")
    results.show(truncate=False)
    #
    print('results.select("id", "distances").show()=')
    results.select("id", "distances").show()

    # done!
    spark.stop()