Example #1
0
def degree_assortativity_coefficient(
    graph: PropertyGraph,
    source_degree_type: DegreeType = DegreeType.OUT,
    destination_degree_type: DegreeType = DegreeType.IN,
    weight=None,
):
    """
    Calculates and returns the degree assortativity of a given graph.
    Paramaters:
       * graph: the PropertyGraph to be analyzed
       * source_degree_type: description of degree type to consider for the source node on an edge
            expected values are DegreeType.IN or DegreeType.OUT
       * destination_degree_type: description the degree type to consider for the destination node on an edge
            expected values are DegreeType.IN or DegreeType.OUT
       * weight (optional): edge property to use if using weighted degrees
    """
    # get the tables associated with the degree types of the source and destination nodes
    calculate_degree(graph, "temp_DegreeType.IN", "temp_DegreeType.OUT",
                     weight)
    source_degree = graph.get_node_property("temp_" + str(source_degree_type))
    destination_degree = graph.get_node_property("temp_" +
                                                 str(destination_degree_type))

    try:
        # Calculate the average in and out degrees of graph
        # (with respect to number of edges, not number of nodes)
        num_edges = graph.num_edges()
        source_average, destination_average = average_degree(
            graph, num_edges, source_degree, destination_degree)

        # Calculate the numerator (product of deviation from mean)
        # and the factors of the denominator (square deviation from mean)
        product_of_dev = GAccumulator[float](0)
        square_of_source_dev = GAccumulator[float](0)
        square_of_destination_dev = GAccumulator[float](0)
        do_all(
            range(graph.num_nodes()),
            degree_assortativity_coefficient_operator(
                graph,
                source_degree,
                source_average,
                destination_degree,
                destination_average,
                product_of_dev,
                square_of_source_dev,
                square_of_destination_dev,
            ),
            steal=True,
            loop_name="degree assortativity coefficient calculation",
        )
        return product_of_dev.reduce() / sqrt(
            square_of_source_dev.reduce() * square_of_destination_dev.reduce())
    finally:
        graph.remove_node_property("temp_DegreeType.IN")
        graph.remove_node_property("temp_DegreeType.OUT")
Example #2
0
def main():
    import argparse

    import katana.local
    from katana.galois import set_active_threads

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--startNode", type=int, default=0)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)

    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = PropertyGraph(args.input)

    bfs_sync_pg(graph, args.startNode, args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.node_schema())
        newPropertyId = numNodeProperties - 1
        verify_bfs(graph, args.startNode, newPropertyId)
Example #3
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--startNode", type=int, default=0)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--edgeWeightProperty", type=str, required=True)
    parser.add_argument("--shift", type=int, default=6)
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", setActiveThreads(args.threads))

    graph = PropertyGraph(args.input)

    sssp(graph, args.startNode, args.edgeWeightProperty, args.shift,
         args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.node_schema())
        newPropertyId = numNodeProperties - 1
        verify_sssp(graph, args.startNode, newPropertyId)
Example #4
0
def verify_sssp(graph: PropertyGraph, _source_i: int, property_id: int):
    prop_array = graph.get_node_property(property_id)
    not_visited = GAccumulator[int](0)
    max_dist = GReduceMax[int]()
    # TODO(amp): Remove / 4
    infinity = dtype_info(dtype_of_pyarrow_array(prop_array)).max / 4

    do_all(
        range(len(prop_array)),
        not_visited_operator(infinity, not_visited, prop_array),
        loop_name="not_visited_op",
    )

    if not_visited.reduce() > 0:
        print(
            not_visited.reduce(),
            " unvisited nodes; this is an error if graph is strongly connected",
        )

    do_all(
        range(len(prop_array)),
        max_dist_operator(infinity, max_dist, prop_array),
        steal=True,
        loop_name="max_dist_operator",
    )

    print("Max distance:", max_dist.reduce())
Example #5
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--baseNode", type=int, default=0)
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    g = PropertyGraph(args.input)

    timer = StatTimer("Jaccard (Property Graph) Numba")
    timer.start()
    jaccard(g, args.baseNode, args.propertyName)
    timer.stop()
    # del timer

    print("Node {}: {}".format(args.reportNode, g.get_node_property(args.propertyName)[args.reportNode]))
Example #6
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("--kcore", "-k", type=int, default=100)
    parser.add_argument("--reportNode", type=int, default=0)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = PropertyGraph(args.input)

    kcore_async(graph, args.kcore, args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        verify_kcore(graph, args.propertyName, args.kcore)
Example #7
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--maxIterations", type=int, default=100)
    parser.add_argument("--tolerance", type=float, default=1.0e-3)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--printTopN", type=int, default=10)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("--reportNode", type=int, default=0)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = PropertyGraph(args.input)

    pagerank_pull_sync_residual(graph, args.maxIterations, args.tolerance,
                                args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        verify_pr(graph, args.propertyName, args.printTopN)
Example #8
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--algoType", type=str, default="push")
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", setActiveThreads(args.threads))

    graph = PropertyGraph(args.input)

    if args.algoType == "push":
        cc_push_topo(graph, args.propertyName)
    else:
        cc_pull_topo(graph, args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.node_schema())
        newPropertyId = numNodeProperties - 1
        verify_cc(graph, newPropertyId)
Example #9
0
def verify_bfs(graph: PropertyGraph, _source_i: int, property_id: int):
    chunk_array = graph.get_node_property(property_id)
    not_visited = GAccumulator[int](0)
    max_dist = GReduceMax[int]()

    do_all(
        range(len(chunk_array)),
        not_visited_operator(not_visited, chunk_array),
        loop_name="not_visited_op",
    )

    if not_visited.reduce() > 0:
        print(
            not_visited.reduce(),
            " unvisited nodes; this is an error if graph is strongly connected",
        )

    do_all(
        range(len(chunk_array)),
        max_dist_operator(max_dist, chunk_array),
        steal=True,
        loop_name="max_dist_operator",
    )

    print("BFS Max distance:", max_dist.reduce())
Example #10
0
def test_local_clustering_coefficient():
    property_graph = PropertyGraph(
        get_input("propertygraphs/rmat15_cleaned_symmetric"))

    local_clustering_coefficient(property_graph, "output")
    property_graph: PropertyGraph
    out = property_graph.get_node_property("output")

    assert out[-1].as_py() == 0
    assert not np.any(np.isnan(out))
Example #11
0
def verify_cc(graph: PropertyGraph, property_id: int):
    chunk_array = graph.get_node_property(property_id)
    num_components = GAccumulator[int](0)

    do_all(
        range(len(chunk_array)),
        verify_cc_operator(num_components, chunk_array),
        loop_name="num_components",
    )

    print("Number of components are : ", num_components.reduce())
Example #12
0
def test_load_graphml_write():
    input_file = Path(
        __file__
    ).parent.parent.parent / "tools" / "graph-convert" / "test-inputs" / "movies.graphml"
    pg = PropertyGraph.from_graphml(input_file)
    pg.mark_all_properties_persistent()
    with TemporaryDirectory() as tmpdir:
        pg.write(tmpdir)
        del pg
        property_graph = PropertyGraph(tmpdir)
        assert property_graph.path == f"file://{tmpdir}"
    assert property_graph.get_node_property(0)[1].as_py() == "Keanu Reeves"
Example #13
0
def verify_kcore(graph: PropertyGraph, property_name: str, k_core_num: int):
    """Check output sanity"""
    chunk_array = graph.get_node_property(property_name)
    alive_nodes = GAccumulator[float](0)

    do_all(
        range(len(chunk_array)),
        sanity_check_operator(alive_nodes, chunk_array, k_core_num),
        steal=True,
        loop_name="sanity_check_operator",
    )

    print("Number of nodes in the", k_core_num, "-core is", alive_nodes.reduce())
Example #14
0
def test_jaccard_sorted(property_graph: PropertyGraph):
    sort_all_edges_by_dest(property_graph)

    property_name = "NewProp"
    compare_node = 0

    jaccard(property_graph, compare_node, property_name, JaccardPlan.sorted())

    jaccard_assert_valid(property_graph, compare_node, property_name)

    similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy()
    assert similarities[compare_node] == 1
    assert similarities[1917] == approx(0.28571428)
    assert similarities[2812] == approx(0.01428571)
Example #15
0
def test_assert_valid(property_graph: PropertyGraph):
    with raises(AssertionError):
        bfs_assert_valid(property_graph, "workFrom")
    property_name = "NewProp"
    start_node = 0

    bfs(property_graph, start_node, property_name)

    v = property_graph.get_node_property(property_name).to_numpy().copy()
    v[0] = 100
    property_graph.add_node_property(table({"Prop2": v}))

    with raises(AssertionError):
        bfs_assert_valid(property_graph, "Prop2")
Example #16
0
def run_jaccard(property_graph: PropertyGraph, input_args):
    property_name = "NewProp"
    compare_node = input_args["source_node"]

    with time_block(f"jaccard on {compare_node}"):
        analytics.jaccard(property_graph, compare_node, property_name)

    check_schema(property_graph, property_name)

    similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy()
    assert similarities[compare_node] == 1

    analytics.jaccard_assert_valid(property_graph, compare_node, property_name)

    stats = analytics.JaccardStatistics(property_graph, compare_node, property_name)
    print(f"STATS:\n{stats}")
    property_graph.remove_node_property(property_name)
Example #17
0
def test_bfs(property_graph: PropertyGraph):
    property_name = "NewProp"
    start_node = 0

    bfs(property_graph, start_node, property_name)

    node_schema: Schema = property_graph.node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    assert property_graph.get_node_property(property_name)[start_node].as_py() == 0

    bfs_assert_valid(property_graph, property_name)

    stats = BfsStatistics(property_graph, property_name)

    assert stats.source_node == start_node
    assert stats.max_distance == 7

    # Verify with numba implementation of verifier as well
    verify_bfs(property_graph, start_node, new_property_id)
Example #18
0
def test_busy_wait(property_graph: PropertyGraph):
    set_busy_wait()
    property_name = "NewProp"
    start_node = 0

    bfs(property_graph, start_node, property_name)

    node_schema: Schema = property_graph.node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    assert property_graph.get_node_property(
        property_name)[start_node].as_py() == 0

    bfs_assert_valid(property_graph, start_node, property_name)

    BfsStatistics(property_graph, property_name)

    # Verify with numba implementation of verifier as well
    verify_bfs(property_graph, start_node, new_property_id)
    set_busy_wait(0)
Example #19
0
def test_sssp(property_graph: PropertyGraph):
    property_name = "NewProp"
    weight_name = "workFrom"
    start_node = 0

    sssp(property_graph, start_node, weight_name, property_name)

    node_schema: Schema = property_graph.node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    assert property_graph.get_node_property(property_name)[start_node].as_py() == 0

    sssp_assert_valid(property_graph, start_node, weight_name, property_name)

    stats = SsspStatistics(property_graph, property_name)

    print(stats)
    assert stats.max_distance == 2011.0

    # Verify with numba implementation of verifier
    verify_sssp(property_graph, start_node, new_property_id)
Example #20
0
def test_jaccard(property_graph: PropertyGraph):
    property_name = "NewProp"
    compare_node = 0

    jaccard(property_graph, compare_node, property_name)

    node_schema: Schema = property_graph.node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    jaccard_assert_valid(property_graph, compare_node, property_name)

    stats = JaccardStatistics(property_graph, compare_node, property_name)

    assert stats.max_similarity == approx(1)
    assert stats.min_similarity == approx(0)
    assert stats.average_similarity == approx(0.000637853)

    similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy()
    assert similarities[compare_node] == 1
    assert similarities[1917] == approx(0.28571428)
    assert similarities[2812] == approx(0.01428571)
Example #21
0
def verify_pr(graph: PropertyGraph, property_name: str, topn: int):
    """Check output sanity"""
    chunk_array = graph.get_node_property(property_name)
    sum_rank = GAccumulator[float](0)
    max_rank = GReduceMax[float]()
    min_rank = GReduceMin[float]()

    do_all(
        range(len(chunk_array)),
        sanity_check_operator(sum_rank, max_rank, min_rank, chunk_array),
        steal=True,
        loop_name="sanity_check_operator",
    )

    print("Max rank is ", max_rank.reduce())
    print("Min rank is ", min_rank.reduce())
    print("rank sum is ", sum_rank.reduce())

    # Print top N ranked nodes
    if topn > 0:
        np_array = np.array(chunk_array, dtype=np.float)
        arr = np_array.argsort()[-topn:][::-1]
        for i in arr:
            print(np_array[i], " : ", i, "\n")