Example #1
0
def test_autorefs():
    """
    Tests that different (base) algorithms yield different citations, that all citations have at least one
    reference to a publication and that wrapping the same base algorithms yields the same citations.
    """
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    algs = {
        "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9,
                              max_iters=1000),
        "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9,
                              max_iters=1000),
        "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000),
        "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000),
        "hk5'": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000),
    }
    algs = algs | pg.create_variations(
        algs, {
            "+Sweep": pg.Sweep,
            "+SO": pg.SeedOversampling,
            "+BSO": pg.BoostedSeedOversampling
        })
    citations = set()
    for alg in algs.values():
        citation = alg.cite()
        assert "\\cite{" in citation
        citations.add(citation)
    assert len(citations) == len(algs) - 4
Example #2
0
def test_unsupervised_vs_auc():
    def loader():
        return pg.load_datasets_multiple_communities(["graph9"])

    algorithms = pg.create_variations(pg.create_many_filters(), pg.create_many_variation_types())
    time_scores = pg.benchmark_scores(pg.benchmark(algorithms, loader(), pg.Time))
    assert sum(time_scores) > 0

    measures = {"AUC": lambda ground_truth, exlude: pg.MultiSupervised(pg.AUC, ground_truth, exlude),
                "NDCG": lambda ground_truth, exlude: pg.MultiSupervised(pg.NDCG, ground_truth, exlude),
                "Density": lambda graph: pg.MultiUnsupervised(pg.Density, graph),
                "Conductance": lambda graph: pg.MultiUnsupervised(pg.Conductance(autofix=True).as_unsupervised_method(), graph),
                "Modularity": lambda graph: pg.MultiUnsupervised(pg.Modularity(max_positive_samples=5).as_unsupervised_method(), graph),
                "CCcos": lambda graph: pg.ClusteringCoefficient(graph, similarity="cos", max_positive_samples=5),
                "CCdot": lambda graph: pg.ClusteringCoefficient(graph, similarity="dot", max_positive_samples=5),
                "LinkAUCcos": lambda graph: pg.LinkAssessment(graph, similarity="cos", max_positive_samples=5),
                "LinkAUCdot": lambda graph: pg.LinkAssessment(graph, similarity="dot", max_positive_samples=5),
                "HopAUCcos": lambda graph: pg.LinkAssessment(graph, similarity="cos", hops=2, max_positive_samples=5),
                "HopAUCdot": lambda graph: pg.LinkAssessment(graph, similarity="dot", hops=2, max_positive_samples=5),
                }

    scores = {}#measure: pg.benchmark_scores(pg.benchmark(algorithms, loader(), measures[measure])) for measure in measures}
    for measure in measures:  # do this as a for loop, because pytest becomes a little slow above list comprehension
        scores[measure] = pg.benchmark_scores(pg.benchmark(algorithms, loader(), measures[measure]))
    supervised = {"AUC", "NDCG"}
    evaluations = dict()
    for measure in measures:
        evaluations[measure] = abs(pg.SpearmanCorrelation(scores["AUC"])(scores[measure]))
    #for measure in measures:
    #    print(measure, evaluations[measure])
    assert max([evaluations[measure] for measure in measures if measure not in supervised]) == evaluations["LinkAUCdot"]
Example #3
0
def test_algorithm_selection():
    for _ in supported_backends():
        _, graph, communities = next(
            pg.load_datasets_multiple_communities(["bigraph"],
                                                  max_group_number=3))
        train, test = pg.split(communities,
                               0.05)  # 5% of community members are known
        algorithms = pg.create_variations(pg.create_demo_filters(),
                                          pg.Normalize)

        supervised_algorithm = pg.AlgorithmSelection(algorithms.values(),
                                                     measure=pg.AUC,
                                                     tuning_backend="numpy")
        print(supervised_algorithm.cite())
        modularity_algorithm = pg.AlgorithmSelection(
            algorithms.values(),
            fraction_of_training=1,
            measure=pg.Modularity().as_supervised_method(),
            tuning_backend="numpy")

        supervised_aucs = list()
        modularity_aucs = list()
        for seeds, members in zip(train.values(), test.values()):
            measure = pg.AUC(members, exclude=seeds)
            supervised_aucs.append(measure(supervised_algorithm(graph, seeds)))
            modularity_aucs.append(measure(modularity_algorithm(graph, seeds)))

        assert abs(
            sum(supervised_aucs) / len(supervised_aucs) -
            sum(modularity_aucs) / len(modularity_aucs)) < 0.05
Example #4
0
    "facebook0", "facebook686", "log4j", "ant", "eucore", "citeseer", "dblp"
]
seed_fractions = [0.3, 0.5]
pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")

filters = {
    "ppr0.85":
    pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-6),
    "ppr0.99":
    pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-6),
    "hk3":
    pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-6),
    "hk7":
    pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=1.E-6),
}
filters = pg.create_variations(filters, {"": pg.Tautology, "+Sweep": pg.Sweep})

for name, filter in filters.items():
    print("=====", name, "=====")
    algorithms = {
        "None":
        filter,
        "Mult":
        pg.AdHocFairness(filter, "B"),
        "LFPRO":
        pg.AdHocFairness(filter, "O"),
        #"FBuck-C": pg.FairPersonalizer(filter, .8, pRule_weight=10, max_residual=1, error_type=pg.Mabs, parameter_buckets=0),
        "FPers-C":
        pg.FairPersonalizer(filter,
                            .8,
                            pRule_weight=10,
Example #5
0
import pygrank as pg
datasets = ["EUCore", "Amazon"]
pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
algs = {
    "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000),
}

algs = algs | pg.create_variations(algs, {"+Sweep": pg.Sweep})
loader = pg.load_datasets_one_community(datasets)
algs["tuned"] = pg.ParameterTuner(preprocessor=pre, tol=1.E-9, max_iters=1000)
algs["selected"] = pg.AlgorithmSelection(
    pg.create_demo_filters(preprocessor=pre, tol=1.E-9,
                           max_iters=1000).values())
algs["tuned+Sweep"] = pg.ParameterTuner(
    ranker_generator=lambda params: pg.Sweep(
        pg.GenericGraphFilter(
            params, preprocessor=pre, tol=1.E-9, max_iters=1000)))

for alg in algs.values():
    print(alg.cite())  # prints a list of algorithm citations

pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5),
                   delimiter=" & ",
                   end_line="\\\\")
Example #6
0
import pygrank as pg

loader = list(pg.load_datasets_multiple_communities(["bigraph", "cora", "citeseer"]))
algorithms = pg.create_variations(pg.create_demo_filters(), pg.create_many_variation_types())
algorithms = pg.create_variations(algorithms, pg.Normalize)  # add normalization to all algorithms
print("Algorithms", len(algorithms))

measures = {"AUC": lambda ground_truth, exlude: pg.MultiSupervised(pg.AUC, ground_truth, exlude),
            "NDCG": lambda ground_truth, exlude: pg.MultiSupervised(pg.NDCG, ground_truth, exlude),
            "Density": lambda graph: pg.MultiUnsupervised(pg.Density, graph),
            "Modularity": lambda graph: pg.MultiUnsupervised(pg.Modularity, graph),
            "LinkCC": lambda graph: pg.ClusteringCoefficient(graph, similarity="dot"),
            "LinkAUCcos": lambda graph: pg.LinkAssessment(graph, similarity="cos"),
            "HopAUCdot": lambda graph: pg.LinkAssessment(graph, similarity="dot", hops=2),
            }

scores = {measure: pg.benchmark_scores(pg.benchmark(algorithms, loader, measures[measure])) for measure in measures}
evaluations_vs_auc = dict()
evaluations_vs_ndcg = dict()
for measure in measures:
    evaluations_vs_auc[measure] = abs(pg.SpearmanCorrelation(scores["AUC"])(scores[measure]))
    evaluations_vs_ndcg[measure] = abs(pg.SpearmanCorrelation(scores["NDCG"])(scores[measure]))

pg.benchmark_print([("Measure", "AUC corr", "NDCG corr")]
                   + [(measure, evaluations_vs_auc[measure], evaluations_vs_ndcg[measure]) for measure in measures])
Example #7
0
import pygrank as pg
_, graph, community = next(pg.load_datasets_one_community(["EUCore"]))
algorithm = pg.HeatKernel(
    t=5,  # the number of hops away HeatKernel places maximal importance on
    normalization="symmetric",
    renormalize=True)
personalization = {node: 1.
                   for node in community}  # ignored nodes assumed to be zeroes
algorithms = {
    "HK5": algorithm,
    "HK5+Oversampling": pg.SeedOversampling(algorithm)
}
algorithms = algorithms | pg.create_variations(algorithms,
                                               {"+Sweep": pg.Sweep})
algorithms = pg.create_variations(algorithms, {"": pg.Normalize})

measure = pg.Conductance()
for algorithm_name, algorithm in algorithms.items():
    scores = algorithm(graph,
                       personalization)  # returns a dict-like pg.GraphSignal
    print(algorithm_name, measure(scores))
Example #8
0
import pygrank as pg
_, graph, communities = next(
    pg.load_datasets_multiple_communities(["EUCore"], max_group_number=3))
train, test = pg.split(communities, 0.05)  # 5% of community members are known
algorithms = pg.create_variations(pg.create_demo_filters(), pg.Normalize)

supervised_algorithm = pg.AlgorithmSelection(algorithms.values(),
                                             measure=pg.AUC)
print(supervised_algorithm.cite())
modularity_algorithm = pg.AlgorithmSelection(
    algorithms.values(),
    fraction_of_training=1,
    measure=pg.Modularity().as_supervised_method())

linkauc_algorithm = None
best_evaluation = 0
linkAUC = pg.LinkAssessment(
    graph, similarity="cos",
    hops=1)  # LinkAUC, because emails systemically exhibit homophily
for algorithm in algorithms.values():
    evaluation = linkAUC.evaluate({
        community: algorithm(graph, seeds)
        for community, seeds in train.items()
    })
    if evaluation > best_evaluation:
        best_evaluation = evaluation
        linkauc_algorithm = algorithm

supervised_aucs = list()
modularity_aucs = list()
linkauc_aucs = list()