Beispiel #1
0
def run_bfs(property_graph: PropertyGraph, input_args, source_node_file):
    property_name = "NewProp"
    start_node = input_args["source_node"]

    if not source_node_file == "":
        if not os.path.exists(source_node_file):
            print(f"Source node file doesn't exist: {source_node_file}")
        with open(source_node_file, "r") as fi:
            sources = [int(l) for l in fi.readlines()]

        for source in sources:
            with time_block(f"bfs on {source}"):
                analytics.bfs(property_graph, int(source), property_name)
            check_schema(property_graph, property_name)

            analytics.bfs_assert_valid(property_graph, property_name)

            stats = analytics.BfsStatistics(property_graph, property_name)
            print(f"STATS:\n{stats}")
            property_graph.remove_node_property(property_name)
    else:
        with time_block("bfs"):
            analytics.bfs(property_graph, start_node, property_name)

        check_schema(property_graph, property_name)

        analytics.bfs_assert_valid(property_graph, property_name)

        stats = analytics.BfsStatistics(property_graph, property_name)
        print(f"STATS:\n{stats}")
        property_graph.remove_node_property(property_name)
Beispiel #2
0
def run_bc(property_graph: PropertyGraph, input_args, source_node_file):
    property_name = "NewProp"
    start_node = input_args["source_node"]

    bc_plan = analytics.BetweennessCentralityPlan.level()

    n = 4
    if not source_node_file == "":
        if not os.path.exists(source_node_file):
            print(f"Source node file doesn't exist: {source_node_file}")
        sources = open(source_node_file, "r").readlines()

        for i in range(0, len(sources), n):
            sources_to_use = [int(i) for i in sources[i : i + n]]
            print(f"Using source: {sources_to_use}")
            with time_block("betweenness centrality"):
                analytics.betweenness_centrality(property_graph, property_name, sources_to_use, bc_plan)

            check_schema(property_graph, property_name)

            stats = analytics.BetweennessCentralityStatistics(property_graph, property_name)
            print(f"STATS:\n{stats}")
            property_graph.remove_node_property(property_name)
    else:
        sources = [start_node]
        with time_block("betweenness centrality"):
            analytics.betweenness_centrality(property_graph, property_name, sources, bc_plan)

        check_schema(property_graph, property_name)

        stats = analytics.BetweennessCentralityStatistics(property_graph, property_name)
        print(f"STATS:\n{stats}")
        property_graph.remove_node_property(property_name)
Beispiel #3
0
def degree_assortativity_coefficient(
    graph: PropertyGraph,
    source_degree_type: DegreeType = DegreeType.OUT,
    destination_degree_type: DegreeType = DegreeType.IN,
    weight=None,
):
    """
    Calculates and returns the degree assortativity of a given graph.
    Paramaters:
       * graph: the PropertyGraph to be analyzed
       * source_degree_type: description of degree type to consider for the source node on an edge
            expected values are DegreeType.IN or DegreeType.OUT
       * destination_degree_type: description the degree type to consider for the destination node on an edge
            expected values are DegreeType.IN or DegreeType.OUT
       * weight (optional): edge property to use if using weighted degrees
    """
    # get the tables associated with the degree types of the source and destination nodes
    calculate_degree(graph, "temp_DegreeType.IN", "temp_DegreeType.OUT",
                     weight)
    source_degree = graph.get_node_property("temp_" + str(source_degree_type))
    destination_degree = graph.get_node_property("temp_" +
                                                 str(destination_degree_type))

    try:
        # Calculate the average in and out degrees of graph
        # (with respect to number of edges, not number of nodes)
        num_edges = graph.num_edges()
        source_average, destination_average = average_degree(
            graph, num_edges, source_degree, destination_degree)

        # Calculate the numerator (product of deviation from mean)
        # and the factors of the denominator (square deviation from mean)
        product_of_dev = GAccumulator[float](0)
        square_of_source_dev = GAccumulator[float](0)
        square_of_destination_dev = GAccumulator[float](0)
        do_all(
            range(graph.num_nodes()),
            degree_assortativity_coefficient_operator(
                graph,
                source_degree,
                source_average,
                destination_degree,
                destination_average,
                product_of_dev,
                square_of_source_dev,
                square_of_destination_dev,
            ),
            steal=True,
            loop_name="degree assortativity coefficient calculation",
        )
        return product_of_dev.reduce() / sqrt(
            square_of_source_dev.reduce() * square_of_destination_dev.reduce())
    finally:
        graph.remove_node_property("temp_DegreeType.IN")
        graph.remove_node_property("temp_DegreeType.OUT")
Beispiel #4
0
def run_cc(property_graph: PropertyGraph, _input_args):
    property_name = "NewProp"

    with time_block("connected components"):
        analytics.connected_components(property_graph, property_name)

    check_schema(property_graph, property_name)

    analytics.connected_components_assert_valid(property_graph, property_name)

    stats = analytics.ConnectedComponentsStatistics(property_graph, property_name)
    print(f"STATS:\n{stats}")
    property_graph.remove_node_property(property_name)
Beispiel #5
0
def run_kcore(property_graph: PropertyGraph, _input_args):
    property_name = "NewProp"
    k = 10

    with time_block("k-core"):
        analytics.k_core(property_graph, k, property_name)

    check_schema(property_graph, property_name)

    analytics.k_core_assert_valid(property_graph, k, property_name)

    stats = analytics.KCoreStatistics(property_graph, k, property_name)
    print(f"STATS:\n{stats}")
    property_graph.remove_node_property(property_name)
Beispiel #6
0
def run_louvain(property_graph: PropertyGraph, input_args):
    property_name = "NewProp"
    edge_prop_name = input_args["edge_wt"]

    with time_block("louvain"):
        louvain_plan = analytics.LouvainClusteringPlan.do_all(False, 0.0001, 0.0001, 10000, 100)
        analytics.louvain_clustering(property_graph, edge_prop_name, property_name, louvain_plan)

    check_schema(property_graph, property_name)

    analytics.louvain_clustering_assert_valid(property_graph, edge_prop_name, property_name)

    stats = analytics.LouvainClusteringStatistics(property_graph, edge_prop_name, property_name)
    print(f"STATS:\n{stats}")
    property_graph.remove_node_property(property_name)
Beispiel #7
0
def run_jaccard(property_graph: PropertyGraph, input_args):
    property_name = "NewProp"
    compare_node = input_args["source_node"]

    with time_block(f"jaccard on {compare_node}"):
        analytics.jaccard(property_graph, compare_node, property_name)

    check_schema(property_graph, property_name)

    similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy()
    assert similarities[compare_node] == 1

    analytics.jaccard_assert_valid(property_graph, compare_node, property_name)

    stats = analytics.JaccardStatistics(property_graph, compare_node, property_name)
    print(f"STATS:\n{stats}")
    property_graph.remove_node_property(property_name)
Beispiel #8
0
def run_bc(property_graph: PropertyGraph, input_args, source_node_file,
           num_sources):
    property_name = "NewProp"
    start_node = input_args["source_node"]

    bc_plan = analytics.BetweennessCentralityPlan.level()

    if not source_node_file == "":
        if not os.path.exists(source_node_file):
            print(f"Source node file doesn't exist: {source_node_file}")
        with open(source_node_file, "r") as fi:
            sources = [int(l) for l in fi.readlines()]

        assert num_sources <= len(sources)
        runs = (len(sources) + num_sources - 1) // num_sources

        for run in range(0, runs):
            start_idx = (num_sources * run) % len(sources)
            rotated_sources = sources[start_idx:] + sources[:start_idx]
            sources = rotated_sources[:num_sources]

            print(f"Using sources: {sources}")
            with time_block("betweenness centrality"):
                analytics.betweenness_centrality(property_graph, property_name,
                                                 sources, bc_plan)

            check_schema(property_graph, property_name)

            stats = analytics.BetweennessCentralityStatistics(
                property_graph, property_name)
            print(f"STATS:\n{stats}")
            property_graph.remove_node_property(property_name)
    else:
        sources = [start_node]
        print(f"Using sources: {sources}")
        with time_block("betweenness centrality"):
            analytics.betweenness_centrality(property_graph, property_name,
                                             sources, bc_plan)

        check_schema(property_graph, property_name)

        stats = analytics.BetweennessCentralityStatistics(
            property_graph, property_name)
        print(f"STATS:\n{stats}")
        property_graph.remove_node_property(property_name)
Beispiel #9
0
def run_sssp(property_graph: PropertyGraph, input_args, source_node_file):
    property_name = "NewProp"
    start_node = input_args["source_node"]
    edge_prop_name = input_args["edge_wt"]

    sssp_plan = analytics.SsspPlan.delta_step(input_args["sssp_delta"])
    if "kron" in input_args["name"] or "urand" in input_args["name"]:
        sssp_plan = analytics.SsspPlan.delta_step_fusion(
            input_args["sssp_delta"])

    if not source_node_file == "":
        if not os.path.exists(source_node_file):
            print(f"Source node file doesn't exist: {source_node_file}")
        with open(source_node_file, "r") as fi:
            sources = [int(l) for l in fi.readlines()]

        for source in sources:
            with time_block(f"sssp on {source}"):
                analytics.sssp(property_graph, source, edge_prop_name,
                               property_name, sssp_plan)

            check_schema(property_graph, property_name)

            analytics.sssp_assert_valid(property_graph, source, edge_prop_name,
                                        property_name)

            stats = analytics.SsspStatistics(property_graph, property_name)
            print(f"STATS:\n{stats}")
            property_graph.remove_node_property(property_name)

    else:
        with time_block("sssp"):
            analytics.sssp(property_graph, start_node, edge_prop_name,
                           property_name, sssp_plan)

        check_schema(property_graph, property_name)

        analytics.sssp_assert_valid(property_graph, start_node, edge_prop_name,
                                    property_name)

        stats = analytics.SsspStatistics(property_graph, property_name)
        print(f"STATS:\n{stats}")
        property_graph.remove_node_property(property_name)
Beispiel #10
0
def run_pagerank(property_graph: PropertyGraph, _input_args):
    property_name = "NewProp"

    tolerance = 0.000001
    max_iteration = 1000
    alpha = 0.85

    pagerank_plan = analytics.PagerankPlan.pull_topological(tolerance, max_iteration, alpha)

    with time_block("pagerank"):
        analytics.pagerank(property_graph, property_name, pagerank_plan)

    check_schema(property_graph, property_name)

    analytics.pagerank_assert_valid(property_graph, property_name)

    stats = analytics.PagerankStatistics(property_graph, property_name)
    print(f"STATS:\n{stats}")
    property_graph.remove_node_property(property_name)