Beispiel #1
0
 def test_heat_kernel_locality(self):
     from pygrank.algorithms.pagerank import PageRank
     from pygrank.algorithms.pagerank import HeatKernel
     G = create_test_graph()
     personalization = {"A": 1, "B": 1}
     pagerank = PageRank().rank(G, personalization)
     heatkernel = HeatKernel().rank(G, personalization)
     self.assertLess(pagerank['D']/sum(pagerank.values()), heatkernel['D']/sum(heatkernel.values()), msg="HeatKernel more local than PageRank")
     self.assertLess(heatkernel['I']/sum(heatkernel.values()), pagerank['I']/sum(pagerank.values()), msg="HeatKernel more local than PageRank")
Beispiel #2
0
 def test_immutability_speedup(self):
     from pygrank.algorithms.pagerank import PageRank as Ranker
     from pygrank.algorithms.utils import preprocessor
     import scipy.stats
     nx_time = list()
     test_time = list()
     repeats = 50
     G = create_test_graph()
     ranker = Ranker(to_scipy=preprocessor('col'))
     tic = time.clock()
     for _ in range(repeats):
         ranker.rank(G)
     unhashed_time = time.clock()-tic
     ranker = Ranker(to_scipy=preprocessor('col', assume_immutability=True))
     tic = time.clock()
     for _ in range(repeats):
         ranker.rank(G)
     hashed_time = time.clock()-tic
     self.assertLessEqual(hashed_time, unhashed_time, msg="Hashing speedup")
Beispiel #3
0
    def test_use_quotient_filter(self):
        from pygrank.algorithms.pagerank import PageRank
        from pygrank.algorithms.postprocess import Normalize

        G = create_test_graph()
        personalization = {"A": 1, "B": 1}

        ranks1 = PageRank(use_quotient=True).rank(G, personalization)
        ranks2 = PageRank(use_quotient=Normalize(method="sum")).rank(
            G, personalization)

        err = sum(abs(ranks1[v] - ranks2[v]) for v in G)
        self.assertAlmostEqual(
            err,
            0,
            places=15,
            msg=
            "use_quotient=Normalize(method='sum') should yield the same results (albeit a little slower than) True"
        )
Beispiel #4
0
 def __call__(self, G, r):
     from pygrank.algorithms.pagerank import PageRank
     G = G.to_directed()
     ranks = PageRank().rank(G, {r: 1})
     ranks = {v: ranks[v] / G.degree(v) for v in G}
     max_grap = 0
     threshold = 0
     prev_rank = None
     for v, rank in sorted(ranks.items(),
                           key=lambda item: item[1],
                           reverse=True):
         if prev_rank is not None:
             gap = (prev_rank - rank)
             print(gap)
             if gap > max_grap:
                 max_grap = gap
                 threshold = rank
         prev_rank = rank
     T = nx.DiGraph()
     T.add_node(r)
     for u, v in G.edges():
         if ranks[u] <= threshold and ranks[v] <= threshold:
             T.add_edge(u, v)
     return nx.ego_graph(T, r, radius=1000000)
Beispiel #5
0
    def test_venuerank(self):
        from pygrank.algorithms.pagerank import PageRank
        from pygrank.algorithms.postprocess import Ordinals
        G = nx.fast_gnp_random_graph(600, 0.001, seed=1)
        ranker1 = PageRank(alpha=0.9,
                           max_iters=10000,
                           converge_to_eigenvectors=True,
                           tol=1.E-12)
        ranks1 = ranker1.rank(G, personalization={0: 1, 1: 1})
        ranker2 = PageRank(alpha=0.99, max_iters=10000, tol=1.E-12)
        ranks2 = ranker2.rank(G, personalization={0: 1, 1: 1})
        self.assertLess(
            ranker1.convergence.iteration,
            ranker2.convergence.iteration / 10,
            msg=
            "converge_to_eigenvectors (VenueRank) should be much faster in difficult-to-rank graphs"
        )

        from scipy.stats import spearmanr
        corr = spearmanr(list(Ordinals().transform(ranks1).values()),
                         list(Ordinals().transform(ranks2).values()))
        self.assertAlmostEqual(corr[0], 1., 4)
Beispiel #6
0
dataset_name = dataset


G, groups = import_SNAP_data(dataset, specific_ids=specific_ids)#left one is amazon, right is dblp
pre = preprocessor('col', assume_immutability=True) # a preprocessor that hashes the outcome of normalization for faster running time of the same algoriths
pre(G) # run once the preprocessor to not affect potential time measurements

result_spearmans = ""
result_iterations = ""

for group_number in range(len(groups)):
    for alpha in [0.85, 0.90, 0.95, 0.99, 0.995, 0.999]:
        result_spearmans += dataset_name+"-"+str(specific_ids[group_number])+" & & "+(str(alpha)[1:])
        result_iterations += dataset_name+"-"+str(specific_ids[group_number])+" & & "+(str(alpha)[1:])
        seeds = {v:1 for v in groups[group_number]}
        ground_truth_ranker = PageRank(alpha=alpha, to_scipy=pre, tol=1.E-20, max_iters=30000, use_quotient=False)
        ground_truth_ranks = ground_truth_ranker.rank(G, seeds)
        result_iterations += " & "+str(ground_truth_ranker.convergence.iteration)
        print("Found ground truth ranks ("+str(ground_truth_ranker.convergence.iteration)+" iterations)")
        compared_rankers = list()
        for tol in [1.E-6, 1.E-7, 1.E-8, 1.E-9, 1.E-10, 1.E-11, 1.E-12]:
            compared_rankers.append(PageRank(alpha=alpha, to_scipy=pre, tol=tol, max_iters=30000, use_quotient=False))
        compared_rankers.append(PageRank(alpha=alpha, to_scipy=pre, tol=tol, max_iters=estimate_mixing(alpha), error_type="iters"))
        compared_rankers.append(PageRank(alpha=alpha, to_scipy=pre, use_quotient=False, convergence=RankOrderConvergenceManager(alpha, confidence=0.99, criterion="fraction_of_walks")))
        compared_rankers.append(PageRank(alpha=alpha, to_scipy=pre, use_quotient=False, convergence=RankOrderConvergenceManager(alpha, confidence=0.98, criterion="rank_gap")))
        for ranker in compared_rankers:
            ranks = ranker.rank(G, seeds)
            sp = spearmanr(list(ranks.values()), list(ground_truth_ranks.values()))
            #show_correlations(list(ranks.values()), list(ground_truth_ranks.values()))
            #print(sp[0])
            result_spearmans += " & "+str(-int(np.log10(1-sp[0])*10)/10.)
Beispiel #7
0
    return sum(accuracy) / len(accuracy), sum(treatment) / len(treatment), sum(
        mistreatment) / len(mistreatment), sum(treatment_overtrained) / len(
            treatment_overtrained)


datasets = [
    "facebook 0", "facebook 686", "facebook 0 extreme", "facebook 686 extreme",
    "twitter extreme", "amazon"
]
for dataset in datasets:
    print('%', dataset)
    points = 10

    ppr = PageRank(alpha=0.99,
                   max_iters=10000,
                   tol=1.E-9,
                   assume_immutability=True,
                   normalization="symmetric")
    #ppr = HeatKernel(t=5, max_iters=10000, tol=1.E-9, assume_immutability=True, normalization="symmetric")
    seeds = [(1. + i) / points for i in range(points)
             if (1. + i) / points <= 0.9 and (1. + i) / points >= 0.1]
    seeds = seeds[:3]

    algorithms = {
        "None": ppr,
        "Mult": FairPostprocessor(ppr, "B"),
        "LFRPO": FairPostprocessor(ppr, "O"),
        "Sweep": Normalize(Sweep(ppr)),
        "FP": Normalize(FairPersonalizer(ppr)),
        "CFP": Normalize(FairPersonalizer(ppr, .80, pRule_weight=10)),
        "SweepLFRPO": Normalize(FairPostprocessor(Sweep(ppr), "O")),