예제 #1
0
        if graph_statistics.getIndeg():
            output_rdd = deg.statistics_compute(D_w, 'weighted_in')
            
            # generate outputs to hdfs
            temp = output_rdd.map(ut.toTSVLine).coalesce(1)
            temp.saveAsTextFile(output_file_path+'in_degree_weighted')
           
        if graph_statistics.getTotaldge():
            output_rdd = deg.statistics_compute(D_w, 'weighted_total')
            
            # generate outputs to hdfs
            temp = output_rdd.map(ut.toTSVLine).coalesce(1)
            temp.saveAsTextFile(output_file_path+'total_degree_weighted')
   
        '''
        PageRank
        '''       
        pr = PageRank() 
        
        if graph_statistics.getPR():
            output_rdd = pr.statistics_compute_weighted(D_w, 32, 0.85, debug_mod)
            
            # generate outputs to hdfs
            temp = output_rdd.map(ut.toTSVLine).coalesce(1)
            temp.saveAsTextFile(output_file_path+'pagerank_weighted')
    
#      
#     Edges_rdd = sc.parallelize(Edges)
#     temp = Edges_rdd.map(lambda x: ",".join(map(str,x))).coalesce(1)
#     temp.saveAsTextFile(output_file_path+'edges')
예제 #2
0
            [centers, counts] = deg.deg_vs_count_weight(output_rdd, N)
            centers = sc.parallelize(centers)
            counts = sc.parallelize(counts)
            deg_vs_count_rdd = centers.zip(counts)

            # generate outputs to hdfs
            temp = deg_vs_count_rdd.map(ut.toTSVLine).coalesce(1)
            temp.saveAsTextFile(output_file_path +
                                'total_degree_vs_count_weighted')
        '''
        PageRank
        '''
        pr = PageRank()

        if graph_statistics.getPR():
            pr_rdd = pr.statistics_compute_weighted(D_w, Iter, 0.85, debug_mod)

            # generate outputs to hdfs
            temp = pr_rdd.map(ut.toTSVLine).coalesce(1)
            temp.saveAsTextFile(output_file_path + 'pagerank_weighted')

        if graph_statistics.getPR_vs_Count():
            pr_rdd = pr.statistics_compute(D, Iter, 0.85, debug_mod)
            [centers, counts] = pr.pr_vs_count(pr_rdd, N)
            centers = sc.parallelize(centers)
            counts = sc.parallelize(counts)
            pr_vs_count = centers.zip(counts)

            # generate outputs to hdfs
            temp = pr_vs_count.map(ut.toTSVLine).coalesce(1)
            temp.saveAsTextFile(output_file_path + 'pr_vs_count_weighted')