# print("\n strong connected component") # result = g.stronglyConnectedComponents(maxIter=10) # result.select("id", "component").orderBy("component").show() # Page rank print("\n Page rank") # # run until convergence to tol # results = g.pageRank(resetProbability=0.15, tol=0.01) # results.vertices.select("id", "pagerank").show() # results.edges.select("src", "dst", "weight").show() ## Run PageRank personalized for vertex ["a", "b", "c", "d"] in parallel # results4 = g.parallelPersonalizedPageRank(resetProbability=0.15, sourceIds=["a", "b", "c", "d"], maxIter=10)\ print("\n shortest paths from each node to landmards node") results = g.shortestPaths(landmarks=["a", "d"]) results.select("id", "distances").show() # # Saving and Loading GraphFrames # g.vertices.write.parquet("hdfs://myLocation/vertices") # g.edges.write.parquet("hdfs://myLocation/edges") # # # Load the vertices and edges back. # sameV = sqlContext.read.parquet("hdfs://myLocation/vertices") # sameE = sqlContext.read.parquet("hdfs://myLocation/edges") # message passing via AggregateMessages # For each user, sum the ages of the adjacent users. msgToSrc = AM.dst["age"] msgToDst = AM.src["age"] agg = g.aggregateMessages(sum(AM.msg).alias("summedAges"),
# | a| b| follow| # | b| c| follow| # | c| d| follow| # | d| e| follow| # | b| e| follow| # | c| e| follow| # | e| f| follow| # +---+---+------------+ # Step-3: Create a GraphFrame. Using GraphFrames API, a graph # is built as an instance of a GraphFrame, which is a pair of # vertices (as `v`) and edges (as `e`): graph = GraphFrame(v, e) print("graph=", graph) # GraphFrame(v:[id: string, name: string ... 1 more field], # e:[src: string, dst: string ... 1 more field]) # Computes shortest paths for landmarks ["a", "f"] # results = graph.shortestPaths(landmarks=["a", "f"]) print("results=", results) # print("results.show()=") results.show(truncate=False) # print('results.select("id", "distances").show()=') results.select("id", "distances").show() # done! spark.stop()