if graph_statistics.getIndeg(): output_rdd = deg.statistics_compute(D_w, 'weighted_in') # generate outputs to hdfs temp = output_rdd.map(ut.toTSVLine).coalesce(1) temp.saveAsTextFile(output_file_path+'in_degree_weighted') if graph_statistics.getTotaldge(): output_rdd = deg.statistics_compute(D_w, 'weighted_total') # generate outputs to hdfs temp = output_rdd.map(ut.toTSVLine).coalesce(1) temp.saveAsTextFile(output_file_path+'total_degree_weighted') ''' PageRank ''' pr = PageRank() if graph_statistics.getPR(): output_rdd = pr.statistics_compute_weighted(D_w, 32, 0.85, debug_mod) # generate outputs to hdfs temp = output_rdd.map(ut.toTSVLine).coalesce(1) temp.saveAsTextFile(output_file_path+'pagerank_weighted') # # Edges_rdd = sc.parallelize(Edges) # temp = Edges_rdd.map(lambda x: ",".join(map(str,x))).coalesce(1) # temp.saveAsTextFile(output_file_path+'edges')
[centers, counts] = deg.deg_vs_count_weight(output_rdd, N) centers = sc.parallelize(centers) counts = sc.parallelize(counts) deg_vs_count_rdd = centers.zip(counts) # generate outputs to hdfs temp = deg_vs_count_rdd.map(ut.toTSVLine).coalesce(1) temp.saveAsTextFile(output_file_path + 'total_degree_vs_count_weighted') ''' PageRank ''' pr = PageRank() if graph_statistics.getPR(): pr_rdd = pr.statistics_compute_weighted(D_w, Iter, 0.85, debug_mod) # generate outputs to hdfs temp = pr_rdd.map(ut.toTSVLine).coalesce(1) temp.saveAsTextFile(output_file_path + 'pagerank_weighted') if graph_statistics.getPR_vs_Count(): pr_rdd = pr.statistics_compute(D, Iter, 0.85, debug_mod) [centers, counts] = pr.pr_vs_count(pr_rdd, N) centers = sc.parallelize(centers) counts = sc.parallelize(counts) pr_vs_count = centers.zip(counts) # generate outputs to hdfs temp = pr_vs_count.map(ut.toTSVLine).coalesce(1) temp.saveAsTextFile(output_file_path + 'pr_vs_count_weighted')