Example #1
0
def kcore_async(graph: PropertyGraph, k_core_num, property_name):
    num_nodes = graph.num_nodes()
    initial_worklist = InsertBag[np.uint64]()
    current_degree = LargeArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED)

    timer = StatTimer("Kcore: Property Graph Numba: " + property_name)
    timer.start()

    # Initialize
    do_all(
        range(num_nodes), compute_degree_count_operator(graph, current_degree.as_numpy()), steal=True,
    )

    # Setup initial worklist
    do_all(
        range(num_nodes),
        setup_initial_worklist_operator(initial_worklist, current_degree.as_numpy(), k_core_num),
        steal=True,
    )

    # Compute k-core
    for_each(
        initial_worklist,
        compute_async_kcore_operator(graph, current_degree.as_numpy(), k_core_num),
        steal=True,
        disable_conflict_detection=True,
    )

    timer.stop()
    # Add the ranks as a new property to the property graph
    graph.add_node_property(pyarrow.table({property_name: current_degree}))
Example #2
0
def cc_push_topo(graph: PropertyGraph, property_name):
    print("Executing Push algo\n")
    num_nodes = graph.num_nodes()

    timer = StatTimer("CC: Property Graph Numba: " + property_name)
    timer.start()
    # Stores the component id assignment
    comp_current = np.empty((num_nodes, ), dtype=np.uint32)
    comp_old = np.empty((num_nodes, ), dtype=np.uint32)

    # Initialize
    do_all(
        range(num_nodes),
        initialize_cc_push_operator(graph, comp_current, comp_old),
        steal=True,
        loop_name="initialize_cc_push",
    )

    # Execute while component ids are updated
    changed = GReduceLogicalOr()
    changed.update(True)
    while changed.reduce():
        changed.reset()
        do_all(
            range(num_nodes),
            cc_push_topo_operator(graph, changed, comp_current, comp_old),
            steal=True,
            loop_name="cc_push_topo",
        )

    timer.stop()
    # Add the component assignment as a new property to the property graph
    graph.add_node_property(pyarrow.table({property_name: comp_current}))
Example #3
0
def verify_sssp(graph: PropertyGraph, _source_i: int, property_id: int):
    prop_array = graph.get_node_property(property_id)
    not_visited = GAccumulator[int](0)
    max_dist = GReduceMax[int]()
    # TODO(amp): Remove / 4
    infinity = dtype_info(dtype_of_pyarrow_array(prop_array)).max / 4

    do_all(
        range(len(prop_array)),
        not_visited_operator(infinity, not_visited, prop_array),
        loop_name="not_visited_op",
    )

    if not_visited.reduce() > 0:
        print(
            not_visited.reduce(),
            " unvisited nodes; this is an error if graph is strongly connected",
        )

    do_all(
        range(len(prop_array)),
        max_dist_operator(infinity, max_dist, prop_array),
        steal=True,
        loop_name="max_dist_operator",
    )

    print("Max distance:", max_dist.reduce())
Example #4
0
def verify_bfs(graph: PropertyGraph, _source_i: int, property_id: int):
    chunk_array = graph.get_node_property(property_id)
    not_visited = GAccumulator[int](0)
    max_dist = GReduceMax[int]()

    do_all(
        range(len(chunk_array)),
        not_visited_operator(not_visited, chunk_array),
        loop_name="not_visited_op",
    )

    if not_visited.reduce() > 0:
        print(
            not_visited.reduce(),
            " unvisited nodes; this is an error if graph is strongly connected",
        )

    do_all(
        range(len(chunk_array)),
        max_dist_operator(max_dist, chunk_array),
        steal=True,
        loop_name="max_dist_operator",
    )

    print("BFS Max distance:", max_dist.reduce())
Example #5
0
def bfs_sync_pg(graph: PropertyGraph, source, property_name):
    next_level_number = 0

    curr_level = InsertBag[np.uint64]()
    next_level = InsertBag[np.uint64]()

    timer = StatTimer("BFS Property Graph Numba: " + property_name)
    timer.start()
    distance = np.empty((len(graph), ), dtype=np.uint32)
    initialize(graph, source, distance)
    next_level.push(source)
    while not next_level.empty():
        curr_level.swap(next_level)
        next_level.clear()
        next_level_number += 1
        do_all(
            curr_level,
            bfs_sync_operator_pg(graph, next_level, next_level_number,
                                 distance),
            steal=True,
            loop_name="bfs_sync_pg",
        )
    timer.stop()

    graph.add_node_property(pyarrow.table({property_name: distance}))
Example #6
0
def calculate_degree(graph: PropertyGraph,
                     in_degree_property,
                     out_degree_property,
                     weight_property=None):
    """
    Calculate the (potentially weighted) in and out degrees of a graph.
    The function will modify the given graph by adding two new node properties,
    one for the in degree and one for the out degree. Nothing is returned.
    Parameters:
        graph: a PropertyGraph
        in_degree_property: the property name for the in degree
        out_degree_property: the property name for the out degree
        weight_property: an edge property to use in calculating the weighted degree
    """
    num_nodes = graph.num_nodes()
    nout = LargeArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED)
    nin = LargeArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED)

    do_all(range(num_nodes), initialize_in_degree(nin.as_numpy()), steal=False)

    # are we calculating weighted degree?
    if not weight_property:
        count_operator = count_in_and_out_degree(graph, nout.as_numpy(),
                                                 nin.as_numpy())
    else:
        count_operator = count_weighted_in_and_out_degree(
            graph, nout.as_numpy(), nin.as_numpy(),
            graph.get_edge_property(weight_property))
    do_all(range(num_nodes), count_operator, steal=True)

    graph.add_node_property(
        pyarrow.table({
            in_degree_property: nin,
            out_degree_property: nout
        }))
Example #7
0
def test_do_all(modes):
    @do_all_operator()
    def f(out, i):
        out[i] = i + 1

    out = np.zeros(10, dtype=int)
    do_all(range(10), f(out), **modes)
    assert np.allclose(out, np.array(range(1, 11)))
Example #8
0
def test_do_all_wrong_closure():
    @for_each_operator()
    def f(out, i, ctx):
        out[i] = i + 1

    out = np.zeros(10, dtype=int)
    with pytest.raises(TypeError):
        do_all(range(10), f(out))
Example #9
0
def test_atomic_min_parallel(dtype, threads_many):
    @do_all_operator()
    def f(out, i):
        atomic_min(out, 0, i)

    out = np.array([500], dtype=dtype)
    do_all(range(1000), f(out), steal=False)
    assert out[0] == 0
Example #10
0
def test_atomic_add_parallel_largearray(threads_many):
    @do_all_operator()
    def f(out, i):
        atomic_add(out, 0, i)

    out = LargeArray[int]()
    out.allocateBlocked(1000)
    do_all(range(1000), f(out.as_numpy()), steal=False)
    assert out[0] == 499500
Example #11
0
def test_do_all_python(modes):
    total = 0

    def f(i):
        nonlocal total
        total += i

    do_all(range(10), f, **modes)
    assert total == 45
Example #12
0
def test_GReduceLogicalAnd_parallel(threads_many):
    T = GReduceLogicalAnd
    acc = T()

    @do_all_operator()
    def f(acc, i):
        acc.update(i % 3 == 0)

    do_all(range(1000), f(acc), steal=False)
    assert acc.reduce() == False
Example #13
0
def test_GReduceMin_parallel(threads_many):
    T = GReduceMin[float]
    acc = T()

    @do_all_operator()
    def f(acc, i):
        acc.update((i - 500) / 10)

    do_all(range(1000), f(acc), steal=False)
    assert acc.reduce() == -50.0
Example #14
0
def test_GAccumulator_parallel(threads_many):
    T = GAccumulator[int]
    acc = T()

    @do_all_operator()
    def f(acc, i):
        acc.update(i)

    do_all(range(1000), f(acc), steal=False)
    assert acc.reduce() == 499500
Example #15
0
def test_GReduceMax_parallel(threads_many):
    T = GReduceMax[int]
    acc = T()

    @do_all_operator()
    def f(acc, i):
        acc.update(abs(500 - i))

    do_all(range(1000), f(acc), steal=False)
    assert acc.reduce() == 500
Example #16
0
def degree_assortativity_coefficient(
    graph: PropertyGraph,
    source_degree_type: DegreeType = DegreeType.OUT,
    destination_degree_type: DegreeType = DegreeType.IN,
    weight=None,
):
    """
    Calculates and returns the degree assortativity of a given graph.
    Paramaters:
       * graph: the PropertyGraph to be analyzed
       * source_degree_type: description of degree type to consider for the source node on an edge
            expected values are DegreeType.IN or DegreeType.OUT
       * destination_degree_type: description the degree type to consider for the destination node on an edge
            expected values are DegreeType.IN or DegreeType.OUT
       * weight (optional): edge property to use if using weighted degrees
    """
    # get the tables associated with the degree types of the source and destination nodes
    calculate_degree(graph, "temp_DegreeType.IN", "temp_DegreeType.OUT",
                     weight)
    source_degree = graph.get_node_property("temp_" + str(source_degree_type))
    destination_degree = graph.get_node_property("temp_" +
                                                 str(destination_degree_type))

    try:
        # Calculate the average in and out degrees of graph
        # (with respect to number of edges, not number of nodes)
        num_edges = graph.num_edges()
        source_average, destination_average = average_degree(
            graph, num_edges, source_degree, destination_degree)

        # Calculate the numerator (product of deviation from mean)
        # and the factors of the denominator (square deviation from mean)
        product_of_dev = GAccumulator[float](0)
        square_of_source_dev = GAccumulator[float](0)
        square_of_destination_dev = GAccumulator[float](0)
        do_all(
            range(graph.num_nodes()),
            degree_assortativity_coefficient_operator(
                graph,
                source_degree,
                source_average,
                destination_degree,
                destination_average,
                product_of_dev,
                square_of_source_dev,
                square_of_destination_dev,
            ),
            steal=True,
            loop_name="degree assortativity coefficient calculation",
        )
        return product_of_dev.reduce() / sqrt(
            square_of_source_dev.reduce() * square_of_destination_dev.reduce())
    finally:
        graph.remove_node_property("temp_DegreeType.IN")
        graph.remove_node_property("temp_DegreeType.OUT")
Example #17
0
def verify_cc(graph: PropertyGraph, property_id: int):
    chunk_array = graph.get_node_property(property_id)
    num_components = GAccumulator[int](0)

    do_all(
        range(len(chunk_array)),
        verify_cc_operator(num_components, chunk_array),
        loop_name="num_components",
    )

    print("Number of components are : ", num_components.reduce())
Example #18
0
def test_InsertBag_parallel_opaque():
    dt = np.dtype([("x", np.float32), ("y", np.int16),], align=True)
    T = InsertBag[dt]
    bag = T()

    @do_all_operator()
    def f(bag, i):
        bag.push((i / 2.0, i))

    do_all(range(1000), f(bag), steal=False)
    for s in bag:
        assert s.x == pytest.approx(s.y / 2.0)
Example #19
0
def test_LargeArray_numpy_parallel(typ):
    T = LargeArray[typ]
    arr = T()
    arr.allocateInterleaved(1000)

    @do_all_operator()
    def f(arr, i):
        arr[i] = i
        arr[i] += 1

    do_all(range(1000), f(arr.as_numpy()), steal=False)
    assert list(arr) == list(range(1, 1001))
Example #20
0
def test_InsertBag_parallel(typ):
    T = InsertBag[typ]
    bag = T()

    @do_all_operator()
    def f(bag, i):
        bag.push(i)
        bag.push(i)

    do_all(range(1000), f(bag), steal=False)
    l = list(bag)
    l.sort()
    assert l == [v for i in range(1000) for v in [i, i]]
Example #21
0
def test_LargeArray_parallel(typ):
    T = LargeArray[typ]
    arr = T()
    arr.allocateInterleaved(1000)

    @do_all_operator()
    def f(arr, i):
        # TODO: Use __setitem__
        arr.set(i, i)
        arr.set(i, arr.get(i) + 1)

    do_all(range(1000), f(arr), steal=False)
    assert list(arr) == list(range(1, 1001))
Example #22
0
def jaccard(g, key_node, property_name):
    key_neighbors = np.zeros(len(g), dtype=bool)
    output = np.empty(len(g), dtype=float)

    for e in g.edges(key_node):
        n = g.get_edge_dest(e)
        key_neighbors[n] = True

    do_all(
        g, jaccard_operator(g, key_neighbors, len(g.edges(key_node)), output), steal=True, loop_name="jaccard",
    )

    g.add_node_property(pyarrow.table({property_name: output}))
Example #23
0
def verify_kcore(graph: PropertyGraph, property_name: str, k_core_num: int):
    """Check output sanity"""
    chunk_array = graph.get_node_property(property_name)
    alive_nodes = GAccumulator[float](0)

    do_all(
        range(len(chunk_array)),
        sanity_check_operator(alive_nodes, chunk_array, k_core_num),
        steal=True,
        loop_name="sanity_check_operator",
    )

    print("Number of nodes in the", k_core_num, "-core is", alive_nodes.reduce())
Example #24
0
def test_simple_barrier(threads_many):
    _ = threads_many
    threads = get_active_threads()
    barrier = SimpleBarrier(threads)
    out = []

    def op(v):
        out.append(v)
        barrier.wait()
        out.append(v)

    do_all(range(threads), op)
    assert set(out[:threads]) == set(range(threads))
    assert set(out[threads:]) == set(range(threads))
Example #25
0
def test_fast_barrier_in_numba(threads_many):
    _ = threads_many
    barrier = get_fast_barrier()
    out = []

    @do_all_operator()
    def op(v):
        out.append(v)
        barrier.wait()
        out.append(v)

    threads = get_active_threads()
    do_all(range(threads), op)
    assert set(out[:threads]) == set(range(threads))
    assert set(out[threads:]) == set(range(threads))
Example #26
0
def average_degree(graph: PropertyGraph, num_edges: int, source_degree,
                   destination_degree):
    """
    Calculate the average in or out degree for the source and destination nodes
    Returns the result as a tuple in the form (average degree for source, average degree for destination)
    """
    sum_source_degrees = GAccumulator[np.uint64](0)
    sum_destination_degrees = GAccumulator[np.uint64](0)
    do_all(
        range(graph.num_nodes()),
        sum_degree_operator(graph, source_degree, sum_source_degrees,
                            destination_degree, sum_destination_degrees),
        steal=True,
    )
    return (sum_source_degrees.reduce() / num_edges,
            sum_destination_degrees.reduce() / num_edges)
Example #27
0
def test_do_all_specific_type(modes, typ):
    from katana.datastructures import InsertBag

    @do_all_operator()
    def f(out, i):
        out[int(i)] = i

    input = InsertBag[typ]()
    for i in range(1000):
        input.push(i)

    out = np.zeros(1000, dtype=typ)
    do_all(input, f(out), **modes)
    assert np.allclose(out, np.array(range(1000)))
    # Check that the operator was actually compiled for the correct type
    assert list(f.inspect_llvm().keys())[0][1][0] == from_dtype(np.dtype(typ))
Example #28
0
def test_LargeArray_numpy_parallel_opaque():
    dt = np.dtype([("x", np.float32), ("y", np.int16),], align=True)
    T = LargeArray[dt]
    arr = T()
    arr.allocateInterleaved(1000)

    @do_all_operator()
    def f(arr, i):
        arr[i].x = i
        arr[i].y = i
        arr[i].x += 1.1

    do_all(range(1000), f(arr.as_numpy()), steal=False)

    for i, s in enumerate(arr):
        assert s.x == pytest.approx(i + 1.1)
        assert s.y == i
        assert arr[i].x == pytest.approx(i + 1.1)
        assert arr[i].y == i
Example #29
0
def test_do_all_opaque(modes):
    from katana.datastructures import InsertBag

    @do_all_operator()
    def f(out, s):
        out[s.y] = s.x

    dt = np.dtype([
        ("x", np.float32),
        ("y", np.int8),
    ], align=True)
    input = InsertBag[dt]()
    input.push((1.1, 0))
    input.push((2.1, 1))
    input.push((3.1, 3))

    out = np.zeros(4, dtype=float)
    do_all(input, f(out), **modes)
    assert np.allclose(out, np.array([1.1, 2.1, 0, 3.1]))
Example #30
0
def pagerank_pull_sync_residual(graph: PropertyGraph, maxIterations, tolerance, property_name):
    num_nodes = graph.num_nodes()

    rank = LargeArray[float](num_nodes, AllocationPolicy.INTERLEAVED)
    nout = LargeArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED)
    delta = LargeArray[float](num_nodes, AllocationPolicy.INTERLEAVED)
    residual = LargeArray[float](num_nodes, AllocationPolicy.INTERLEAVED)

    # Initialize
    do_all(
        range(num_nodes),
        initialize_residual_operator(rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(),),
        steal=True,
        loop_name="initialize_pagerank_pull_residual",
    )

    # Compute out-degree for each node
    do_all(
        range(num_nodes), compute_out_deg_operator(graph, nout.as_numpy()), steal=True, loop_name="Compute_out_degree",
    )

    print("Out-degree of 0: ", nout[0])

    changed = GReduceLogicalOr(True)
    iterations = 0
    timer = StatTimer("Pagerank: Property Graph Numba: " + property_name)
    timer.start()
    while iterations < maxIterations and changed.reduce():
        print("Iter: ", iterations, "\n")
        changed.reset()
        iterations += 1
        do_all(
            range(num_nodes),
            compute_pagerank_pull_delta_operator(
                rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(), tolerance, changed,
            ),
            steal=True,
            loop_name="pagerank_delta",
        )

        do_all(
            range(num_nodes),
            compute_pagerank_pull_residual_operator(graph, delta.as_numpy(), residual.as_numpy()),
            steal=True,
            loop_name="pagerank",
        )

    timer.stop()
    # Add the ranks as a new property to the property graph
    graph.add_node_property(pyarrow.table({property_name: rank}))