def main(): import argparse import katana.local from katana import set_active_threads katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--startNode", type=int, default=0) parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) graph = Graph(args.input) bfs_sync_pg(graph, args.startNode, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: newPropertyID = graph.loaded_node_schema()[-1].name verify_bfs(graph, args.startNode, newPropertyID)
def cc_push_topo(graph: Graph, property_name): print("Executing Push algo\n") num_nodes = graph.num_nodes() timer = StatTimer("CC: Property Graph Numba: " + property_name) timer.start() # Stores the component id assignment comp_current = np.empty((num_nodes, ), dtype=np.uint32) comp_old = np.empty((num_nodes, ), dtype=np.uint32) # Initialize do_all( range(num_nodes), initialize_cc_push_operator(graph, comp_current, comp_old), steal=True, loop_name="initialize_cc_push", ) # Execute while component ids are updated changed = ReduceLogicalOr() changed.update(True) while changed.reduce(): changed.reset() do_all( range(num_nodes), cc_push_topo_operator(graph, changed, comp_current, comp_old), steal=True, loop_name="cc_push_topo", ) timer.stop() # Add the component assignment as a new property to the property graph graph.add_node_property(pyarrow.table({property_name: comp_current}))
def main(): import argparse import katana.local katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--baseNode", type=int, default=0) parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) g = Graph(args.input) timer = StatTimer("Jaccard (Property Graph) Numba") timer.start() jaccard(g, args.baseNode, args.propertyName) timer.stop() del timer print("Node {}: {}".format(args.reportNode, g.get_node_property(args.propertyName)[args.reportNode]))
def bfs_sync_pg(graph: Graph, source, property_name): next_level_number = 0 curr_level = InsertBag[np.uint64]() next_level = InsertBag[np.uint64]() timer = StatTimer("BFS Property Graph Numba: " + property_name) timer.start() distance = np.empty((graph.num_nodes(), ), dtype=np.uint32) initialize(graph, source, distance) next_level.push(source) while not next_level.empty(): curr_level.swap(next_level) next_level.clear() next_level_number += 1 do_all( curr_level, bfs_sync_operator_pg(graph, next_level, next_level_number, distance), steal=True, loop_name="bfs_sync_pg", ) timer.stop() graph.add_node_property(pyarrow.table({property_name: distance}))
def test_connected_components(): graph = Graph(get_rdg_dataset("rmat10_symmetric")) # Graph is already symmetric. Last bool argument (True) # indicates that. connected_components(graph, "output_sym", True) stats_sym = ConnectedComponentsStatistics(graph, "output_sym") assert stats_sym.total_components == 69 assert stats_sym.total_non_trivial_components == 1 assert stats_sym.largest_component_size == 956 assert stats_sym.largest_component_ratio == approx(0.933594) connected_components_assert_valid(graph, "output_sym") # Graph is not symmetric. Last bool argument (False) # indicates that. Connected components routine will create # undirected view for computation. graph = Graph(get_rdg_dataset("rmat10")) connected_components(graph, "output", False) stats = ConnectedComponentsStatistics(graph, "output") assert stats.total_components == stats_sym.total_components assert stats.total_non_trivial_components == stats_sym.total_non_trivial_components assert stats.largest_component_size == stats_sym.largest_component_size assert stats.largest_component_ratio == stats_sym.largest_component_ratio
def main(): import argparse import katana.local katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--startNode", type=int, default=0) parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--edgeWeightProperty", type=str, required=True) parser.add_argument("--shift", type=int, default=6) parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) graph = Graph(args.input) sssp(graph, args.startNode, args.edgeWeightProperty, args.shift, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: numNodeProperties = len(graph.loaded_node_schema()) newPropertyID = numNodeProperties - 1 verify_sssp(graph, args.startNode, newPropertyID)
def kcore_async(graph: Graph, k_core_num, property_name): num_nodes = graph.num_nodes() initial_worklist = InsertBag[np.uint64]() current_degree = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED) timer = StatTimer("Kcore: Property Graph Numba: " + property_name) timer.start() # Initialize do_all( range(num_nodes), compute_degree_count_operator(graph, current_degree.as_numpy()), steal=True, ) # Setup initial worklist do_all( range(num_nodes), setup_initial_worklist_operator(initial_worklist, current_degree.as_numpy(), k_core_num), steal=True, ) # Compute k-core for_each( initial_worklist, compute_async_kcore_operator(graph, current_degree.as_numpy(), k_core_num), steal=True, disable_conflict_detection=True, ) timer.stop() # Add the ranks as a new property to the property graph graph.add_node_property(pyarrow.table({property_name: current_degree}))
def main(): import argparse import katana.local katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("--kcore", "-k", type=int, default=100) parser.add_argument("--reportNode", type=int, default=0) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) graph = Graph(args.input) kcore_async(graph, args.kcore, args.propertyName) print("Node {}: {}".format(args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: verify_kcore(graph, args.propertyName, args.kcore)
def count_in_and_out_degree(graph: Graph, nout, nin, nid): out_degree = 0 for edge in graph.edge_ids(nid): out_degree += 1 dst = graph.get_edge_dest(edge) atomic_add(nin, dst, 1) nout[nid] = out_degree
def calculate_degree(graph: Graph, in_degree_property, out_degree_property, weight_property=None): """ Calculate the (potentially weighted) in and out degrees of a graph. The function will modify the given graph by adding two new node properties, one for the in degree and one for the out degree. Nothing is returned. Parameters: graph: a Graph in_degree_property: the property name for the in degree out_degree_property: the property name for the out degree weight_property: an edge property to use in calculating the weighted degree """ num_nodes = graph.num_nodes() nout = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED) nin = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED) do_all(range(num_nodes), initialize_in_degree(nin.as_numpy()), steal=False) # are we calculating weighted degree? if not weight_property: count_operator = count_in_and_out_degree(graph, nout.as_numpy(), nin.as_numpy()) else: count_operator = count_weighted_in_and_out_degree( graph, nout.as_numpy(), nin.as_numpy(), graph.get_edge_property(weight_property)) do_all(range(num_nodes), count_operator, steal=True) graph.add_node_property( pyarrow.table({ in_degree_property: nin, out_degree_property: nout }))
def main(): import argparse import katana.local katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--algoType", type=str, default="push") parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) graph = Graph(args.input) if args.algoType == "push": cc_push_topo(graph, args.propertyName) else: cc_pull_topo(graph, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: numNodeProperties = len(graph.loaded_node_schema()) newPropertyID = numNodeProperties - 1 verify_cc(graph, newPropertyID)
def main(): import argparse import katana.local katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--maxIterations", type=int, default=100) parser.add_argument("--tolerance", type=float, default=1.0e-3) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--printTopN", type=int, default=10) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("--reportNode", type=int, default=0) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) graph = Graph(args.input) pagerank_pull_sync_residual(graph, args.maxIterations, args.tolerance, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: verify_pr(graph, args.propertyName, args.printTopN)
def cc_pull_topo_operator(graph: Graph, changed, comp_current: np.ndarray, nid): for ii in graph.out_edge_ids_for_node(nid): dst = graph.out_edge_dst(ii) # Pull the minimum component from your neighbors if comp_current[nid] > comp_current[dst]: comp_current[nid] = comp_current[dst] # Indicates that update happened changed.update(True)
def test_sort_nodes_by_degree(graph: Graph): sort_nodes_by_degree(graph) assert len(graph.edge_ids(0)) == 103 last_node_n_edges = 103 for n in range(1, NODES_TO_SAMPLE): v = len(graph.edge_ids(n)) assert v <= last_node_n_edges last_node_n_edges = v
def compute_async_kcore_operator(graph: Graph, current_degree, k_core_num, nid, ctx): # Decrement degree of all the neighbors of dead node for ii in graph.out_edge_ids_for_node(nid): dst = graph.out_edge_dst(ii) old_degree = atomic_sub(current_degree, dst, 1) # Add new dead nodes to the worklist if old_degree == k_core_num: ctx.push(dst)
def count_weighted_in_and_out_degree(graph: Graph, nout, nin, weight_array, nid): out_degree = 0 for edge in graph.out_edge_ids(nid): weight = weight_array[edge] out_degree += weight dst = graph.out_edge_dst(edge) atomic_add(nin, dst, weight) nout[nid] = out_degree
def create_distance_array(g: Graph, source, length_property): a = np.empty(g.num_nodes(), dtype=dtype_of_pyarrow_array( g.get_edge_property(length_property))) # TODO(amp): Remove / 4 infinity = dtype_info(a.dtype).max / 4 a[:] = infinity a[source] = 0 return a
def test_local_clustering_coefficient(): graph = Graph(get_input("propertygraphs/rmat15_cleaned_symmetric")) local_clustering_coefficient(graph, "output") graph: Graph out = graph.get_node_property("output") assert out[-1].as_py() == 0 assert not np.any(np.isnan(out))
def compute_pagerank_pull_residual_operator(graph: Graph, delta, residual, nid): total = 0 for ii in graph.out_edge_ids_for_node(nid): dst = graph.out_edge_dst(ii) if delta[dst] > 0: total += delta[dst] if total > 0: residual[nid] = total
def test_load_graphml_write(): input_file = Path(get_misc_dataset("graph-convert/movies.graphml")) pg = from_graphml(input_file) with TemporaryDirectory() as tmpdir: pg.write(tmpdir) del pg graph = Graph(tmpdir) assert graph.path == f"file://{tmpdir}" assert graph.get_node_property("name")[1].as_py() == "Keanu Reeves"
def cc_push_topo_operator(graph: Graph, changed, comp_current: np.ndarray, comp_old: np.ndarray, nid): if comp_old[nid] > comp_current[nid]: comp_old[nid] = comp_current[nid] # Indicates that update happened changed.update(True) for ii in graph.out_edge_ids_for_node(nid): dst = graph.out_edge_dst(ii) new_comp = comp_current[nid] # Push the minimum component to your neighbors atomic_min(comp_current, dst, new_comp)
def sssp_operator(g: Graph, dists: np.ndarray, edge_weights, item, ctx: UserContext): if dists[item.src] < item.dist: return for ii in g.edge_ids(item.src): dst = g.get_edge_dest(ii) edge_length = edge_weights[ii] new_distance = edge_length + dists[item.src] old_distance = atomic_min(dists, dst, new_distance) if new_distance < old_distance: ctx.push((dst, new_distance))
def test_load_graphml_write(): input_file = Path( os.environ["KATANA_SOURCE_DIR"] ) / "tools" / "graph-convert" / "test-inputs" / "movies.graphml" pg = from_graphml(input_file) with TemporaryDirectory() as tmpdir: pg.write(tmpdir) del pg graph = Graph(tmpdir) assert graph.path == f"file://{tmpdir}" assert graph.get_node_property(0)[1].as_py() == "Keanu Reeves"
def bfs_sync_operator_pg( graph: Graph, next_level: InsertBag[np.uint64], next_level_number: int, distance: np.ndarray, nid, ): for ii in graph.out_edge_ids(nid): dst = graph.out_edge_dst(ii) if distance[dst] == distance_infinity: distance[dst] = next_level_number next_level.push(dst)
def sum_degree_operator( graph: Graph, source_degree, sum_source: ReduceSum[np.uint64], destination_degree, sum_destination: ReduceSum[np.uint64], nid, ): for edge in graph.out_edge_ids_for_node(nid): sum_source.update(source_degree[nid]) dst = graph.out_edge_dst(edge) sum_destination.update(destination_degree[dst])
def pagerank_pull_sync_residual(graph: Graph, maxIterations, tolerance, property_name): num_nodes = graph.num_nodes() rank = NUMAArray[float](num_nodes, AllocationPolicy.INTERLEAVED) nout = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED) delta = NUMAArray[float](num_nodes, AllocationPolicy.INTERLEAVED) residual = NUMAArray[float](num_nodes, AllocationPolicy.INTERLEAVED) # Initialize do_all( range(num_nodes), initialize_residual_operator(rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(),), steal=True, loop_name="initialize_pagerank_pull_residual", ) # Compute out-degree for each node do_all( range(num_nodes), compute_out_deg_operator(graph, nout.as_numpy()), steal=True, loop_name="Compute_out_degree", ) print("Out-degree of 0: ", nout[0]) changed = ReduceOr(True) iterations = 0 timer = StatTimer("Pagerank: Property Graph Numba: " + property_name) timer.start() while iterations < maxIterations and changed.reduce(): print("Iter: ", iterations, "\n") changed.reset() iterations += 1 do_all( range(num_nodes), compute_pagerank_pull_delta_operator( rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(), tolerance, changed, ), steal=True, loop_name="pagerank_delta", ) do_all( range(num_nodes), compute_pagerank_pull_residual_operator(graph, delta.as_numpy(), residual.as_numpy()), steal=True, loop_name="pagerank", ) timer.stop() # Add the ranks as a new property to the property graph graph.add_node_property(pyarrow.table({property_name: rank}))
def test_assert_valid(graph: Graph): property_name = "NewProp" start_node = 0 with raises(AssertionError): bfs_assert_valid(graph, start_node, "workFrom") bfs(graph, start_node, property_name) v = graph.get_node_property(property_name).to_numpy().copy() v[0] = 100 graph.add_node_property(table({"Prop2": v})) with raises(AssertionError): bfs_assert_valid(graph, start_node, "Prop2")
def verify_sssp(graph: Graph, _source_i: int, property_id: int): prop_array = graph.get_node_property(property_id) not_visited = ReduceSum[int](0) max_dist = ReduceMax[int]() # TODO(amp): Remove / 4 infinity = dtype_info(dtype_of_pyarrow_array(prop_array)).max / 4 do_all( range(len(prop_array)), not_visited_operator(infinity, not_visited, prop_array), loop_name="not_visited_op", ) if not_visited.reduce() > 0: print( not_visited.reduce(), " unvisited nodes; this is an error if graph is strongly connected", ) do_all( range(len(prop_array)), max_dist_operator(infinity, max_dist, prop_array), steal=True, loop_name="max_dist_operator", ) print("Max distance:", max_dist.reduce())
def verify_bfs(graph: Graph, _source_i: int, property_id): chunk_array = graph.get_node_property(property_id) not_visited = ReduceSum[int](0) max_dist = ReduceMax[int]() do_all( range(len(chunk_array)), not_visited_operator(not_visited, chunk_array), loop_name="not_visited_op", ) if not_visited.reduce() > 0: print( not_visited.reduce(), " unvisited nodes; this is an error if graph is strongly connected", ) do_all( range(len(chunk_array)), max_dist_operator(max_dist, chunk_array), steal=True, loop_name="max_dist_operator", ) print("BFS Max distance:", max_dist.reduce())
def initialize(graph: Graph, source: int, distance: np.ndarray): num_nodes = graph.num_nodes() for n in range(num_nodes): if n == source: distance[n] = 0 else: distance[n] = distance_infinity