コード例 #1
0
ファイル: test_autotune.py プロジェクト: maniospas/pygrank
def test_hoptuner_autorgression():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.01)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training))
    auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC, autoregression=5).rank(training))
    assert auc3 > auc1*0.9
コード例 #2
0
def test_sweep_streaming():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.1)
        auc1 = pg.AUC({v: 1
                       for v in evaluation}, exclude=training).evaluate(
                           (pg.PageRank() >> pg.Sweep()).rank(
                               graph, {v: 1
                                       for v in training}))
        auc2 = pg.AUC({v: 1
                       for v in evaluation},
                      exclude=training).evaluate(pg.PageRank().rank(
                          graph, {v: 1
                                  for v in training}))
        auc3 = pg.AUC(
            {v: 1
             for v in evaluation}, exclude=training).evaluate(
                 pg.PageRank() >> pg.Transformer(pg.log) >> pg.LinearSweep()
                 | pg.to_signal(graph, {v: 1
                                        for v in training}))
        assert auc1 > auc2
        assert abs(auc1 - auc3) < pg.epsilon()

    with pytest.raises(Exception):
        pg.Sweep() << "a"
コード例 #3
0
ファイル: test_autotune.py プロジェクト: maniospas/pygrank
def test_hoptuner_explicit_algorithm():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(lambda params: pg.GenericGraphFilter(params, krylov_dims=10), basis="arnoldi", measure=pg.AUC).rank(training))
    auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", krylov_dims=10, measure=pg.AUC).rank(training))
    assert abs(auc1-auc2) < 0.005
コード例 #4
0
ファイル: test_autotune.py プロジェクト: maniospas/pygrank
def test_lowpass_tuning():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.1)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.GenericGraphFilter(params)).rank(training))
    auc2 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.LowPassRecursiveGraphFilter(params)).rank(training))
    assert auc2 > auc1*0.8
コード例 #5
0
ファイル: test_autotune.py プロジェクト: maniospas/pygrank
def test_autotune_methods():
    import numpy as np
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}))
    aucs = [pg.AUC(evaluation, exclude=training)(ranker.rank(training)) for ranker in pg.create_demo_filters().values()]
    auc2 = pg.AUC(evaluation, exclude=training)(pg.AlgorithmSelection().rank(training))
    assert max(aucs)-np.std(aucs) <= auc2
コード例 #6
0
ファイル: test_autotune.py プロジェクト: maniospas/pygrank
def test_autotune_manual():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training))
    alg2 = pg.ParameterTuner(lambda params: pg.PageRank(params[0]), max_vals=[0.99], min_vals=[0.5]).tune(training)
    auc2 = pg.AUC(evaluation, exclude=training)(alg2.rank(training))
    assert auc1 <= auc2
コード例 #7
0
ファイル: test_autotune.py プロジェクト: maniospas/pygrank
def test_autotune():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training))
    auc2 = pg.AUC(evaluation, exclude=training)(pg.HeatKernel().rank(training))
    auc3 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(optimization_dict=dict()).rank(training))
    assert min(auc1, auc2) <= auc3 and max(auc1, auc2)*0.9 <= auc3
コード例 #8
0
ファイル: test_autotune.py プロジェクト: maniospas/pygrank
def test_hoptuner_arnoldi_backends():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC).rank(training))
    auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="pytorch").rank(training))
    auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="tensorflow").rank(training))
    assert auc1 == auc2
    assert auc1 == auc3
コード例 #9
0
def test_chebyshev():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    #  do not test with tensorflow, as it can be too slow
    training, evaluation = pg.split(pg.to_signal(graph, {v: 1 for v in group}))
    tuned_auc = pg.AUC(evaluation, training).evaluate(pg.ParameterTuner().rank(
        graph, training))
    tuned_chebyshev_auc = pg.AUC(evaluation, training).evaluate(
        pg.ParameterTuner(coefficient_type="chebyshev").rank(graph, training))
    assert (tuned_auc - tuned_chebyshev_auc) < 0.1
コード例 #10
0
ファイル: test_autotune.py プロジェクト: maniospas/pygrank
def test_autotune_backends():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    for tuner in [pg.HopTuner, pg.AlgorithmSelection, pg.ParameterTuner]:
        auc3 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="pytorch").rank(training))
        auc2 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="tensorflow").rank(training))
        auc1 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence).rank(training))
        # TODO: maybe fix KLDivergence implementation to not be affected by backend.epsilon()
        assert abs(auc1-auc2) < 0.005  # different results due to different backend.epsilon()
        assert abs(auc1-auc3) < 0.005
コード例 #11
0
def test_hoptuner_arnoldi():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1
                                                     for v in group}),
                                    training_samples=0.5)
    auc1 = pg.AUC(evaluation,
                  exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training))
    auc2 = pg.AUC(evaluation,
                  exclude=training)(pg.HopTuner(basis="arnoldi",
                                                measure=pg.AUC).rank(training))
    assert abs(auc1 - auc2) < 0.005
コード例 #12
0
ファイル: test_measures.py プロジェクト: MKLab-ITI/pygrank
def test_auc_ndcg_compliance():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    training, test = pg.split(group, 0.5)
    for _ in supported_backends():
        scores1 = pg.PageRank()(graph, training)
        scores2 = pg.HeatKernel()(graph, training)
        AUC1 = pg.AUC(test, exclude=training)(scores1)
        AUC2 = pg.AUC(test, exclude=training)(scores2)
        NDCG1 = float(pg.NDCG(test, exclude=training)(scores1))
        NDCG2 = float(pg.NDCG(test, exclude=training)(scores2))
        assert (AUC1 < AUC2) == (NDCG1 < NDCG2)
        with pytest.raises(Exception):
            pg.AUC(test, exclude=test, k=len(graph) + 1)(scores2)
        with pytest.raises(Exception):
            pg.NDCG(test, exclude=training, k=len(graph) + 1)(scores2)
コード例 #13
0
def test_sweep():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.1)
        auc1 = pg.AUC({v: 1
                       for v in evaluation}, exclude=training).evaluate(
                           pg.Sweep(pg.PageRank()).rank(
                               graph, {v: 1
                                       for v in training}))
        auc2 = pg.AUC({v: 1
                       for v in evaluation},
                      exclude=training).evaluate(pg.PageRank().rank(
                          graph, {v: 1
                                  for v in training}))
        assert auc1 > auc2
コード例 #14
0
ファイル: link_prediction.py プロジェクト: maniospas/pygrank
def evaluate(graph, algorithm):
    tprs = list()
    ppvs = list()
    f1s = list()
    aucs = list()
    for node in list(graph):
        neighbors = list(graph.neighbors(node))
        if len(neighbors) < 10:
            continue
        training = pg.to_signal(graph, {node: 1})
        test = pg.to_signal(graph, {neighbor: 1 for neighbor in neighbors})
        for neighbor in random.sample(neighbors, 1):
            assert graph.has_edge(node, neighbor)
            graph.remove_edge(node, neighbor)
            assert not graph.has_edge(node, neighbor)
            assert not graph.has_edge(neighbor, node)
        result = (training >> algorithm) * (1 - training)
        aucs.append(pg.AUC(test, exclude=training)(result))
        top = result >> pg.Top(10) >> pg.Threshold()
        prec = pg.PPV(test, exclude=training)(top)
        rec = pg.TPR(test, exclude=training)(top)
        ppvs.append(prec)
        tprs.append(rec)
        f1s.append(pg.safe_div(2 * prec * rec, prec + rec))
        for neighbor in graph.neighbors(node):
            if not graph.has_edge(node, neighbor):
                graph.add_edge(node, neighbor)
        print(
            f"\r{algorithm.cite()}\t AUC {sum(aucs) / len(aucs):.3f}\t f1 {sum(f1s) / len(f1s):.3f}\t prec {sum(ppvs) / len(ppvs):.3f}\t rec {sum(tprs)/len(tprs):.3f}\t",
            end="")
    print()
コード例 #15
0
ファイル: test_benchmarks.py プロジェクト: maniospas/pygrank
def test_algorithm_selection():
    for _ in supported_backends():
        _, graph, communities = next(
            pg.load_datasets_multiple_communities(["bigraph"],
                                                  max_group_number=3))
        train, test = pg.split(communities,
                               0.05)  # 5% of community members are known
        algorithms = pg.create_variations(pg.create_demo_filters(),
                                          pg.Normalize)

        supervised_algorithm = pg.AlgorithmSelection(algorithms.values(),
                                                     measure=pg.AUC,
                                                     tuning_backend="numpy")
        print(supervised_algorithm.cite())
        modularity_algorithm = pg.AlgorithmSelection(
            algorithms.values(),
            fraction_of_training=1,
            measure=pg.Modularity().as_supervised_method(),
            tuning_backend="numpy")

        supervised_aucs = list()
        modularity_aucs = list()
        for seeds, members in zip(train.values(), test.values()):
            measure = pg.AUC(members, exclude=seeds)
            supervised_aucs.append(measure(supervised_algorithm(graph, seeds)))
            modularity_aucs.append(measure(modularity_algorithm(graph, seeds)))

        assert abs(
            sum(supervised_aucs) / len(supervised_aucs) -
            sum(modularity_aucs) / len(modularity_aucs)) < 0.05
コード例 #16
0
ファイル: test_measures.py プロジェクト: MKLab-ITI/pygrank
def test_best_direction():
    assert pg.Conductance().best_direction() == -1
    assert pg.Density().best_direction() == 1
    assert pg.Modularity().best_direction() == 1
    assert pg.AUC([1, 2, 3]).best_direction() == 1
    assert pg.Cos([1, 2, 3]).best_direction() == 1
    assert pg.Dot([1, 2, 3]).best_direction() == 1
    assert pg.TPR([1, 2, 3]).best_direction() == 1
    assert pg.TNR([1, 2, 3]).best_direction() == 1
コード例 #17
0
ファイル: test_measures.py プロジェクト: MKLab-ITI/pygrank
def test_aggregated():
    y1 = [1, 1, 0]
    y2 = [1, 0, 0]
    y3 = [1, 1, 0]
    for _ in supported_backends():
        # TODO: investigate why not exactly the same always (numerical precision should be lower for numpy)
        epsilon = 1.E-6
        assert abs(
            float(pg.GM().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3)) - 0.45**0.5) < epsilon
        assert abs(
            float(pg.AM().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3)) - 0.7) < epsilon
        assert abs(
            float(pg.Disparity().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3)) - 0.4) < epsilon
        assert abs(
            float(pg.Disparity().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3)) +
            float(pg.Parity().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3) - 1)) < epsilon
コード例 #18
0
ファイル: test_measures.py プロジェクト: MKLab-ITI/pygrank
def test_edge_cases():
    assert pg.pRule([0])([0]) == 0
    assert pg.Cos([0])([0]) == 0
    with pytest.raises(Exception):
        pg.Measure()([0, 1, 0])
    with pytest.raises(Exception):
        pg.AUC([0, 0, 0])([0, 1, 0])
    with pytest.raises(Exception):
        pg.AUC([1, 1, 1])([0, 1, 0])
    with pytest.raises(Exception):
        pg.KLDivergence([0], exclude={"A": 1})([1])
    with pytest.raises(Exception):
        pg.Conductance(next(pg.load_datasets_graph(["graph5"])),
                       max_rank=0.5)([1, 1, 1, 1, 1])
    import networkx as nx
    for _ in supported_backends():
        assert pg.Conductance(nx.Graph())([]) == float(
            "inf")  # this is indeed correct in python
        assert pg.Density(nx.Graph())([]) == 0
        assert pg.Modularity(nx.Graph())([]) == 0
        assert pg.KLDivergence([0, 1, 0])([0, 1, 0]) == 0
        assert pg.MKLDivergence([0, 1, 0])([0, 1, 0]) == 0
        assert pg.KLDivergence([0])([-1]) == 0
コード例 #19
0
    algorithms.values(),
    fraction_of_training=1,
    measure=pg.Modularity().as_supervised_method())

linkauc_algorithm = None
best_evaluation = 0
linkAUC = pg.LinkAssessment(
    graph, similarity="cos",
    hops=1)  # LinkAUC, because emails systemically exhibit homophily
for algorithm in algorithms.values():
    evaluation = linkAUC.evaluate({
        community: algorithm(graph, seeds)
        for community, seeds in train.items()
    })
    if evaluation > best_evaluation:
        best_evaluation = evaluation
        linkauc_algorithm = algorithm

supervised_aucs = list()
modularity_aucs = list()
linkauc_aucs = list()
for seeds, members in zip(train.values(), test.values()):
    measure = pg.AUC(members, exclude=seeds)
    supervised_aucs.append(measure(supervised_algorithm(graph, seeds)))
    modularity_aucs.append(measure(modularity_algorithm(graph, seeds)))
    linkauc_aucs.append(measure(linkauc_algorithm(graph, seeds)))

print("Supervised", sum(supervised_aucs) / len(supervised_aucs))
print("Modularity", sum(modularity_aucs) / len(modularity_aucs))
print("LinkAUC", sum(modularity_aucs) / len(modularity_aucs))