def verify_sssp(graph: Graph, _source_i: int, property_id: int): prop_array = graph.get_node_property(property_id) not_visited = ReduceSum[int](0) max_dist = ReduceMax[int]() # TODO(amp): Remove / 4 infinity = dtype_info(dtype_of_pyarrow_array(prop_array)).max / 4 do_all( range(len(prop_array)), not_visited_operator(infinity, not_visited, prop_array), loop_name="not_visited_op", ) if not_visited.reduce() > 0: print( not_visited.reduce(), " unvisited nodes; this is an error if graph is strongly connected", ) do_all( range(len(prop_array)), max_dist_operator(infinity, max_dist, prop_array), steal=True, loop_name="max_dist_operator", ) print("Max distance:", max_dist.reduce())
def test_out_edges(graph): assert len(graph.out_edge_ids()) == graph.num_edges() assert len(graph.out_edge_ids(1)) == 1 assert len( graph.out_edge_ids(26352, graph.edge_types.atomic_types["LIKES"])) == 27 assert graph.out_degree(26352) == 103 assert graph.out_degree(26352, graph.edge_types.atomic_types["LIKES"]) == 27 likes_id = graph.edge_types.atomic_types["LIKES"].id @do_all_operator() def f(graph, out, i): # pylint: disable=unused-argument out[0] = len(graph.out_edge_ids()) == graph.num_edges() out[1] = len(graph.out_edge_ids_for_node(26352)) == 103 out[2] = len(graph.out_edge_ids_for_node_and_type(26352, likes_id)) == 27 out[3] = graph.out_degree(26352) == 103 out[4] = graph.out_degree_for_type(26352, likes_id) == 27 out = np.zeros(shape=(5, ), dtype=int) do_all(range(0, 1), f(graph.with_edge_type_lookup(), out)) assert all(out) @do_all_operator() def g(graph, out, i): # pylint: disable=unused-argument out[0] = len(graph.out_edge_ids()) == graph.num_edges() out[1] = len(graph.out_edge_ids_for_node(26352)) == 103 out[2] = graph.out_degree(26352) == 103 out = np.zeros(shape=(3, ), dtype=int) do_all(range(0, 1), g(graph, out)) assert all(out)
def calculate_degree(graph: Graph, in_degree_property, out_degree_property, weight_property=None): """ Calculate the (potentially weighted) in and out degrees of a graph. The function will modify the given graph by adding two new node properties, one for the in degree and one for the out degree. Nothing is returned. Parameters: graph: a Graph in_degree_property: the property name for the in degree out_degree_property: the property name for the out degree weight_property: an edge property to use in calculating the weighted degree """ num_nodes = graph.num_nodes() nout = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED) nin = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED) do_all(range(num_nodes), initialize_in_degree(nin.as_numpy()), steal=False) # are we calculating weighted degree? if not weight_property: count_operator = count_in_and_out_degree(graph, nout.as_numpy(), nin.as_numpy()) else: count_operator = count_weighted_in_and_out_degree( graph, nout.as_numpy(), nin.as_numpy(), graph.get_edge_property(weight_property)) do_all(range(num_nodes), count_operator, steal=True) graph.add_node_property( pyarrow.table({ in_degree_property: nin, out_degree_property: nout }))
def kcore_async(graph: Graph, k_core_num, property_name): num_nodes = graph.num_nodes() initial_worklist = InsertBag[np.uint64]() current_degree = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED) timer = StatTimer("Kcore: Property Graph Numba: " + property_name) timer.start() # Initialize do_all( range(num_nodes), compute_degree_count_operator(graph, current_degree.as_numpy()), steal=True, ) # Setup initial worklist do_all( range(num_nodes), setup_initial_worklist_operator(initial_worklist, current_degree.as_numpy(), k_core_num), steal=True, ) # Compute k-core for_each( initial_worklist, compute_async_kcore_operator(graph, current_degree.as_numpy(), k_core_num), steal=True, disable_conflict_detection=True, ) timer.stop() # Add the ranks as a new property to the property graph graph.add_node_property(pyarrow.table({property_name: current_degree}))
def bfs_sync_pg(graph: Graph, source, property_name): next_level_number = 0 curr_level = InsertBag[np.uint64]() next_level = InsertBag[np.uint64]() timer = StatTimer("BFS Property Graph Numba: " + property_name) timer.start() distance = np.empty((graph.num_nodes(), ), dtype=np.uint32) initialize(graph, source, distance) next_level.push(source) while not next_level.empty(): curr_level.swap(next_level) next_level.clear() next_level_number += 1 do_all( curr_level, bfs_sync_operator_pg(graph, next_level, next_level_number, distance), steal=True, loop_name="bfs_sync_pg", ) timer.stop() graph.add_node_property(pyarrow.table({property_name: distance}))
def cc_push_topo(graph: Graph, property_name): print("Executing Push algo\n") num_nodes = graph.num_nodes() timer = StatTimer("CC: Property Graph Numba: " + property_name) timer.start() # Stores the component id assignment comp_current = np.empty((num_nodes, ), dtype=np.uint32) comp_old = np.empty((num_nodes, ), dtype=np.uint32) # Initialize do_all( range(num_nodes), initialize_cc_push_operator(graph, comp_current, comp_old), steal=True, loop_name="initialize_cc_push", ) # Execute while component ids are updated changed = ReduceLogicalOr() changed.update(True) while changed.reduce(): changed.reset() do_all( range(num_nodes), cc_push_topo_operator(graph, changed, comp_current, comp_old), steal=True, loop_name="cc_push_topo", ) timer.stop() # Add the component assignment as a new property to the property graph graph.add_node_property(pyarrow.table({property_name: comp_current}))
def verify_bfs(graph: Graph, _source_i: int, property_id): chunk_array = graph.get_node_property(property_id) not_visited = ReduceSum[int](0) max_dist = ReduceMax[int]() do_all( range(len(chunk_array)), not_visited_operator(not_visited, chunk_array), loop_name="not_visited_op", ) if not_visited.reduce() > 0: print( not_visited.reduce(), " unvisited nodes; this is an error if graph is strongly connected", ) do_all( range(len(chunk_array)), max_dist_operator(max_dist, chunk_array), steal=True, loop_name="max_dist_operator", ) print("BFS Max distance:", max_dist.reduce())
def test_do_all(modes): @do_all_operator() def f(out, i): out[i] = i + 1 out = np.zeros(10, dtype=int) do_all(range(10), f(out), **modes) assert np.allclose(out, np.array(range(1, 11)))
def test_atomic_min_parallel(dtype, threads_many): @do_all_operator() def f(out, i): atomic_min(out, 0, i) out = np.array([500], dtype=dtype) do_all(range(1000), f(out), steal=False) assert out[0] == 0
def test_atomic_add_parallel_numaarray(threads_many): @do_all_operator() def f(out, i): atomic_add(out, 0, i) out = NUMAArray[int]() out.allocateBlocked(1000) do_all(range(1000), f(out.as_numpy()), steal=False) assert out[0] == 499500
def verify_cc(graph: Graph, property_id: int): chunk_array = graph.get_node_property(property_id) num_components = ReduceSum[int](0) do_all( range(len(chunk_array)), verify_cc_operator(num_components, chunk_array), loop_name="num_components", ) print("Number of components are : ", num_components.reduce())
def test_do_all_wrong_closure(): @for_each_operator() def f(out, i, ctx): # pylint: disable=unused-argument out[i] = i + 1 out = np.zeros(10, dtype=int) with pytest.raises(TypeError): do_all(range(10), f(out))
def test_ReduceSum_parallel(threads_many): T = ReduceSum[int] acc = T() @do_all_operator() def f(acc, i): acc.update(i) do_all(range(1000), f(acc), steal=False) assert acc.reduce() == 499500
def test_ReduceMax_parallel(threads_many): T = ReduceMax[int] acc = T() @do_all_operator() def f(acc, i): acc.update(abs(500 - i)) do_all(range(1000), f(acc), steal=False) assert acc.reduce() == 500
def test_do_all_python(modes, lock): total = 0 def f(i): nonlocal total with lock: total += i do_all(range(10), f, **modes) assert total == 45
def test_ReduceMin_parallel(threads_many): T = ReduceMin[float] acc = T() @do_all_operator() def f(acc, i): acc.update((i - 500) / 10) do_all(range(1000), f(acc), steal=False) assert acc.reduce() == -50.0
def test_ReduceLogicalOr_parallel(threads_many): T = ReduceLogicalOr acc = T() @do_all_operator() def f(acc, i): acc.update(i % 3 == 0) do_all(range(1000), f(acc), steal=False) assert acc.reduce() is True
def degree_assortativity_coefficient( graph: Graph, source_degree_type: DegreeType = DegreeType.OUT, destination_degree_type: DegreeType = DegreeType.IN, weight=None, ): """ Calculates and returns the degree assortativity of a given graph. Paramaters: * graph: the Graph to be analyzed * source_degree_type: description of degree type to consider for the source node on an edge expected values are DegreeType.IN or DegreeType.OUT * destination_degree_type: description the degree type to consider for the destination node on an edge expected values are DegreeType.IN or DegreeType.OUT * weight (optional): edge property to use if using weighted degrees """ # get the tables associated with the degree types of the source and destination nodes calculate_degree(graph, "temp_DegreeType.IN", "temp_DegreeType.OUT", weight) source_degree = graph.get_node_property("temp_" + str(source_degree_type)) destination_degree = graph.get_node_property("temp_" + str(destination_degree_type)) try: # Calculate the average in and out degrees of graph # (with respect to number of edges, not number of nodes) num_edges = graph.num_edges() source_average, destination_average = average_degree( graph, num_edges, source_degree, destination_degree) # Calculate the numerator (product of deviation from mean) # and the factors of the denominator (square deviation from mean) product_of_dev = ReduceSum[float](0) square_of_source_dev = ReduceSum[float](0) square_of_destination_dev = ReduceSum[float](0) do_all( range(graph.num_nodes()), degree_assortativity_coefficient_operator( graph, source_degree, source_average, destination_degree, destination_average, product_of_dev, square_of_source_dev, square_of_destination_dev, ), steal=True, loop_name="degree assortativity coefficient calculation", ) return product_of_dev.reduce() / sqrt( square_of_source_dev.reduce() * square_of_destination_dev.reduce()) finally: graph.remove_node_property("temp_DegreeType.IN") graph.remove_node_property("temp_DegreeType.OUT")
def test_NUMAArray_numpy_parallel(typ): T = NUMAArray[typ] arr = T() arr.allocateInterleaved(1000) @do_all_operator() def f(arr, i): arr[i] = i arr[i] += 1 do_all(range(1000), f(arr.as_numpy()), steal=False) assert list(arr) == list(range(1, 1001))
def test_InsertBag_parallel_opaque(): dt = np.dtype([("x", np.float32), ("y", np.int16),], align=True) T = InsertBag[dt] bag = T() @do_all_operator() def f(bag, i): bag.push((i / 2.0, i)) do_all(range(1000), f(bag), steal=False) for s in bag: assert s.x == pytest.approx(s.y / 2.0)
def test_InsertBag_parallel(typ): T = InsertBag[typ] bag = T() @do_all_operator() def f(bag, i): bag.push(i) bag.push(i) do_all(range(1000), f(bag), steal=False) l = list(bag) l.sort() assert l == [v for i in range(1000) for v in [i, i]]
def jaccard(g, key_node, property_name): key_neighbors = np.zeros(g.num_nodes(), dtype=bool) output = np.empty(g.num_nodes(), dtype=float) for e in g.out_edge_ids(key_node): n = g.out_edge_dst(e) key_neighbors[n] = True do_all( g, jaccard_operator(g, key_neighbors, len(g.out_edge_ids(key_node)), output), steal=True, loop_name="jaccard", ) g.add_node_property(pyarrow.table({property_name: output}))
def verify_kcore(graph: Graph, property_name: str, k_core_num: int): """Check output sanity""" chunk_array = graph.get_node_property(property_name) alive_nodes = ReduceSum[float](0) do_all( range(len(chunk_array)), sanity_check_operator(alive_nodes, chunk_array, k_core_num), steal=True, loop_name="sanity_check_operator", ) print("Number of nodes in the", k_core_num, "-core is", alive_nodes.reduce())
def test_NUMAArray_parallel(typ): T = NUMAArray[typ] arr = T() arr.allocateInterleaved(1000) @do_all_operator() def f(arr, i): # TODO: Use __setitem__ arr.set(i, i) arr.set(i, arr.get(i) + 1) do_all(range(1000), f(arr), steal=False) assert list(arr) == list(range(1, 1001))
def test_simple_barrier(threads_many): # pylint: disable=unused-argument threads = get_active_threads() barrier = SimpleBarrier(threads) out = [] def op(v): out.append(v) barrier.wait() out.append(v) do_all(range(threads), op) assert set(out[:threads]) == set(range(threads)) assert set(out[threads:]) == set(range(threads))
def test_fast_barrier_in_numba(threads_many): # pylint: disable=unused-argument barrier = get_fast_barrier() threads = get_active_threads() a = np.zeros(threads, dtype=int) b = np.zeros(threads, dtype=int) @do_all_operator() def op(a, b, i): a[i] = 1 barrier.wait() b[i] = a.sum() do_all(range(threads), op(a, b)) assert np.all(a == np.ones(threads)) assert np.all(b == np.ones(threads) * threads)
def average_degree(graph: Graph, num_edges: int, source_degree, destination_degree): """ Calculate the average in or out degree for the source and destination nodes Returns the result as a tuple in the form (average degree for source, average degree for destination) """ sum_source_degrees = ReduceSum[np.uint64](0) sum_destination_degrees = ReduceSum[np.uint64](0) do_all( range(graph.num_nodes()), sum_degree_operator(graph, source_degree, sum_source_degrees, destination_degree, sum_destination_degrees), steal=True, ) return (sum_source_degrees.reduce() / num_edges, sum_destination_degrees.reduce() / num_edges)
def test_do_all_opaque(modes): from katana.local import InsertBag @do_all_operator() def f(out, s): out[s.y] = s.x dt = np.dtype([ ("x", np.float32), ("y", np.int8), ], align=True) data = InsertBag[dt]() data.push((1.1, 0)) data.push((2.1, 1)) data.push((3.1, 3)) out = np.zeros(4, dtype=float) do_all(data, f(out), **modes) assert np.allclose(out, np.array([1.1, 2.1, 0, 3.1]))
def test_NUMAArray_numpy_parallel_opaque(): dt = np.dtype([("x", np.float32), ("y", np.int16),], align=True) T = NUMAArray[dt] arr = T() arr.allocateInterleaved(1000) @do_all_operator() def f(arr, i): arr[i].x = i arr[i].y = i arr[i].x += 1.1 do_all(range(1000), f(arr.as_numpy()), steal=False) for i, s in enumerate(arr): assert s.x == pytest.approx(i + 1.1) assert s.y == i assert arr[i].x == pytest.approx(i + 1.1) assert arr[i].y == i
def test_do_all_specific_type(modes, typ): from katana.local import InsertBag @do_all_operator() def f(out, i): out[int(i)] = i data = InsertBag[typ]() for i in range(1000): data.push(i) out = np.zeros(1000, dtype=typ) do_all(data, f(out), **modes) assert np.allclose(out, np.array(range(1000))) if not numba.config.DISABLE_JIT: # Check that the operator was actually compiled for the correct type # [0][1][0] = [first overload][second argument][first possible type] # I'm not sure why the last indexing is used. It always seems to be a 1-tuple, but numba makes it. *shrug* assert list(f.inspect_llvm().keys())[0][1][0] == from_dtype( np.dtype(typ))