Exemple #1
0
 def test_rank_results(self):
     from pygrank.algorithms.pagerank import PageRank as Ranker
     from pygrank.algorithms.utils import preprocessor
     G = create_test_graph()
     test_result = Ranker(to_scipy=preprocessor('col')).rank(G)
     nx_result = nx.pagerank_scipy(G)
     abs_diffs = sum(abs(test_result[v]-nx_result[v]) for v in nx_result.keys())/len(nx_result)
     self.assertAlmostEqual(abs_diffs, 0, places=16, msg="PageRank compliance with nx results")
Exemple #2
0
 def test_immutability_speedup(self):
     from pygrank.algorithms.pagerank import PageRank as Ranker
     from pygrank.algorithms.utils import preprocessor
     import scipy.stats
     nx_time = list()
     test_time = list()
     repeats = 50
     G = create_test_graph()
     ranker = Ranker(to_scipy=preprocessor('col'))
     tic = time.clock()
     for _ in range(repeats):
         ranker.rank(G)
     unhashed_time = time.clock()-tic
     ranker = Ranker(to_scipy=preprocessor('col', assume_immutability=True))
     tic = time.clock()
     for _ in range(repeats):
         ranker.rank(G)
     hashed_time = time.clock()-tic
     self.assertLessEqual(hashed_time, unhashed_time, msg="Hashing speedup")
Exemple #3
0
 def test_rank_time(self):
     from pygrank.algorithms.pagerank import PageRank as ranker
     from pygrank.algorithms.utils import preprocessor
     import scipy.stats
     nx_time = list()
     test_time = list()
     repeats = 50
     for _ in range(repeats):
         G = create_test_graph()
         tic = time.clock()
         ranker(to_scipy=preprocessor('col')).rank(G)
         test_time.append(time.clock()-tic)
         tic = time.clock()
         nx.pagerank_scipy(G)
         nx_time.append(time.clock()-tic)
     self.assertLessEqual(scipy.stats.ttest_ind(nx_time, test_time)[1], 0.001, msg="PageRank time comparable to nx with p-value<0.001")
Exemple #4
0
    from scipy.stats import rankdata
    ranks = rankdata(ranks)
    ground_truth = rankdata(ground_truth)
    plt.scatter(ground_truth, ranks)
    plt.grid()
    plt.show()


# CHANGE THE FOLLOWING BLOCK TO SELECT DATASET
specific_ids = [1723] # community ids
dataset = 'snap_amazon' # dataset
dataset_name = dataset


G, groups = import_SNAP_data(dataset, specific_ids=specific_ids)#left one is amazon, right is dblp
pre = preprocessor('col', assume_immutability=True) # a preprocessor that hashes the outcome of normalization for faster running time of the same algoriths
pre(G) # run once the preprocessor to not affect potential time measurements

result_spearmans = ""
result_iterations = ""

for group_number in range(len(groups)):
    for alpha in [0.85, 0.90, 0.95, 0.99, 0.995, 0.999]:
        result_spearmans += dataset_name+"-"+str(specific_ids[group_number])+" & & "+(str(alpha)[1:])
        result_iterations += dataset_name+"-"+str(specific_ids[group_number])+" & & "+(str(alpha)[1:])
        seeds = {v:1 for v in groups[group_number]}
        ground_truth_ranker = PageRank(alpha=alpha, to_scipy=pre, tol=1.E-20, max_iters=30000, use_quotient=False)
        ground_truth_ranks = ground_truth_ranker.rank(G, seeds)
        result_iterations += " & "+str(ground_truth_ranker.convergence.iteration)
        print("Found ground truth ranks ("+str(ground_truth_ranker.convergence.iteration)+" iterations)")
        compared_rankers = list()
Exemple #5
0
 max_iters = 10000
 for dataset_name in datasets:
     G, groups = import_SNAP_data(
         dataset_name,
         min_group_size=5000)  #12000 for dblp, 5000 for amazon
     group_sets = [set(group) for group in groups.values()]
     for group in group_sets:
         print(len(group))
     count = sum(
         1 for u, v in G.edges()
         if sum(1 for group in group_sets if u in group and v in group) > 0)
     print('Homophily', count / float(G.number_of_edges()))
     seeds = [0.001, 0.01, 0.1, 0.25, 0.5]
     print('Number of groups', len(groups))
     for seed in seeds:
         pre = preprocessor('col', assume_immutability=True)
         preL = preprocessor('symmetric', assume_immutability=True)
         pre(G)
         tol = 1.E-6
         base_algorithms = {
             "PPRL 0.85":
             pygrank.algorithms.pagerank.PageRank(alpha=0.85,
                                                  to_scipy=preL,
                                                  max_iters=max_iters,
                                                  tol=tol),
             "PPRL 0.90":
             pygrank.algorithms.pagerank.PageRank(alpha=0.9,
                                                  to_scipy=preL,
                                                  max_iters=max_iters,
                                                  tol=tol),
             "PPRL 0.95":