def main(): import argparse from galois.shmem import setActiveThreads parser = argparse.ArgumentParser() parser.add_argument("--startNode", type=int, default=0) parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--cython", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", setActiveThreads(args.threads)) graph = PropertyGraph(args.input) if args.cython: cython_bfs(graph, args.startNode, args.propertyName) else: bfs_sync_pg(graph, args.startNode, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: numNodeProperties = len(graph.node_schema()) newPropertyId = numNodeProperties - 1 if args.cython: cython_verify_bfs(graph, args.startNode, newPropertyId) else: verify_bfs(graph, args.startNode, newPropertyId)
def cc_push_topo(graph: PropertyGraph, property_name): print("Executing Push algo\n") num_nodes = graph.num_nodes() timer = StatTimer("CC: Property Graph Numba: " + property_name) timer.start() # Stores the component id assignment comp_current = np.empty((num_nodes,), dtype=np.uint32) comp_old = np.empty((num_nodes,), dtype=np.uint32) # Initialize do_all( range(num_nodes), initialize_cc_push_operator(graph, comp_current, comp_old), steal=True, loop_name="initialize_cc_push", ) # Execute while component ids are updated changed = GReduceLogicalOr() changed.update(True) while changed.reduce(): changed.reset() do_all( range(num_nodes), cc_push_topo_operator(graph, changed, comp_current, comp_old), steal=True, loop_name="cc_push_topo", ) timer.stop() # Add the component assignment as a new property to the property graph graph.add_node_property(pyarrow.table({property_name: comp_current}))
def bfs_sync_pg(graph: PropertyGraph, source, property_name): next_level_number = 0 curr_level = InsertBag[np.uint64]() next_level = InsertBag[np.uint64]() timer = StatTimer("BFS Property Graph Numba: " + property_name) timer.start() distance = np.empty((len(graph), ), dtype=np.uint32) initialize(graph, source, distance) next_level.push(source) while not next_level.empty(): curr_level.swap(next_level) next_level.clear() next_level_number += 1 do_all( curr_level, bfs_sync_operator_pg(graph, next_level, next_level_number, distance), steal=True, loop_name="bfs_sync_pg", ) timer.stop() graph.add_node_property(pyarrow.table({property_name: distance}))
def verify_sssp(graph: PropertyGraph, _source_i: int, property_id: int): chunk_array = graph.get_node_property(property_id) not_visited = GAccumulator[int](0) max_dist = GReduceMax[int]() do_all( range(len(chunk_array)), not_visited_operator(graph.num_nodes(), not_visited, chunk_array), loop_name="not_visited_op", ) if not_visited.reduce() > 0: print( not_visited.reduce(), " unvisited nodes; this is an error if graph is strongly connected", ) do_all( range(len(chunk_array)), max_dist_operator(graph.num_nodes(), max_dist, chunk_array), steal=True, loop_name="max_dist_operator", ) print("Max distance:", max_dist.reduce())
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--startNode", type=int, default=0) parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--edgeWeightProperty", type=str, required=True) parser.add_argument("--shift", type=int, default=6) parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", setActiveThreads(args.threads)) graph = PropertyGraph(args.input) sssp(graph, args.startNode, args.edgeWeightProperty, args.shift, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: numNodeProperties = len(graph.node_schema()) newPropertyId = numNodeProperties - 1 verify_sssp(graph, args.startNode, newPropertyId)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--algoType", type=str, default="push") parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", setActiveThreads(args.threads)) graph = PropertyGraph(args.input) if args.algoType == "push": cc_push_topo(graph, args.propertyName) else: cc_pull_topo(graph, args.propertyName) print("Node {}: {}".format(args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: numNodeProperties = len(graph.node_schema()) newPropertyId = numNodeProperties - 1 verify_cc(graph, newPropertyId)
def cc_pull_topo_operator(graph: PropertyGraph, changed, comp_current: np.ndarray, nid): for ii in graph.edges(nid): dst = graph.get_edge_dst(ii) # Pull the minimum component from your neighbors if comp_current[nid] > comp_current[dst]: comp_current[nid] = comp_current[dst] # Indicates that update happened changed.update(True)
def compute_pagerank_pull_residual_operator(graph: PropertyGraph, delta, residual, nid): sum = 0 for ii in graph.edges(nid): dst = graph.get_edge_dst(ii) if delta[dst] > 0: sum += delta[dst] if sum > 0: residual[nid] = sum
def compute_async_kcore_operator(graph: PropertyGraph, current_degree, k_core_num, nid, ctx): # Decrement degree of all the neighbors of dead node for ii in graph.edges(nid): dst = graph.get_edge_dst(ii) old_degree = atomic_sub(current_degree, dst, 1) # Add new dead nodes to the worklist if old_degree == k_core_num: ctx.push(dst)
def cc_push_topo_operator(graph: PropertyGraph, changed, comp_current: np.ndarray, comp_old: np.ndarray, nid): if comp_old[nid] > comp_current[nid]: comp_old[nid] = comp_current[nid] # Indicates that update happened changed.update(True) for ii in graph.edges(nid): dst = graph.get_edge_dst(ii) new_comp = comp_current[nid] # Push the minimum component to your neighbors atomic_min(comp_current, dst, new_comp)
def bfs_sync_operator_pg( graph: PropertyGraph, next_level: InsertBag[np.uint64], next_level_number: int, distance: np.ndarray, nid, ): num_nodes = graph.num_nodes() for ii in graph.edges(nid): dst = graph.get_edge_dst(ii) if distance[dst] == num_nodes: distance[dst] = next_level_number next_level.push(dst)
def sssp_operator(g: PropertyGraph, dists: np.ndarray, edge_weights, item, ctx: UserContext): if dists[item.src] < item.dist: return for ii in g.edges(item.src): dst = g.get_edge_dst(ii) edge_length = edge_weights[ii] new_distance = edge_length + dists[item.src] old_distance = atomic_min(dists, dst, new_distance) if new_distance < old_distance: ctx.push((dst, new_distance))
def test_assert_valid(property_graph: PropertyGraph): with raises(AssertionError): bfs_assert_valid(property_graph, "workFrom") property_name = "NewProp" start_node = 0 bfs(property_graph, start_node, property_name) v = property_graph.get_node_property(property_name).to_numpy().copy() v[0] = 100 property_graph.add_node_property(table({"Prop2": v})) with raises(AssertionError): bfs_assert_valid(property_graph, "Prop2")
def pagerank_pull_sync_residual(graph: PropertyGraph, maxIterations, tolerance, property_name): num_nodes = graph.num_nodes() rank = LargeArray[float](num_nodes, AllocationPolicy.INTERLEAVED) nout = LargeArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED) delta = LargeArray[float](num_nodes, AllocationPolicy.INTERLEAVED) residual = LargeArray[float](num_nodes, AllocationPolicy.INTERLEAVED) # Initialize do_all( range(num_nodes), initialize_residual_operator(rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(),), steal=True, loop_name="initialize_pagerank_pull_residual", ) # Compute out-degree for each node do_all( range(num_nodes), compute_out_deg_operator(graph, nout.as_numpy()), steal=True, loop_name="Compute_out_degree", ) print("Out-degree of 0: ", nout[0]) changed = GReduceLogicalOr(True) iterations = 0 timer = StatTimer("Pagerank: Property Graph Numba: " + property_name) timer.start() while iterations < maxIterations and changed.reduce(): print("Iter: ", iterations, "\n") changed.reset() iterations += 1 do_all( range(num_nodes), compute_pagerank_pull_delta_operator( rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(), tolerance, changed, ), steal=True, loop_name="pagerank_delta", ) do_all( range(num_nodes), compute_pagerank_pull_residual_operator(graph, delta.as_numpy(), residual.as_numpy()), steal=True, loop_name="pagerank", ) timer.stop() # Add the ranks as a new property to the property graph graph.add_node_property(pyarrow.table({property_name: rank}))
def test_bfs(property_graph: PropertyGraph): property_name = "NewProp" start_node = 0 bfs(property_graph, start_node, property_name) node_schema: Schema = property_graph.node_schema() num_node_properties = len(node_schema) new_property_id = num_node_properties - 1 assert node_schema.names[new_property_id] == property_name assert property_graph.get_node_property( property_name)[start_node].as_py() == 0 # Verify with numba implementation of verifier verify_bfs(property_graph, start_node, new_property_id)
def initialize(graph: PropertyGraph, source: int, distance: np.ndarray): num_nodes = graph.num_nodes() for n in range(num_nodes): if n == source: distance[n] = 0 else: distance[n] = distance_infinity
def create_distance_array(g: PropertyGraph, source, length_property): inf_distance = numba.types.uint64.maxval a = np.empty(len(g), dtype=dtype_of_pyarrow_array( g.get_edge_property(length_property))) a[:] = inf_distance a[source] = 0 return a
def verify_cc(graph: PropertyGraph, property_id: int): chunk_array = graph.get_node_property(property_id) num_components = GAccumulator[int](0) do_all( range(len(chunk_array)), verify_cc_operator(num_components, chunk_array), loop_name="num_components", ) print("Number of components are : ", num_components.reduce())
def test_load_garbage_file(): fi = NamedTemporaryFile(delete=False) try: with fi: fi.write(b"Test") with pytest.raises(TsubaError): PropertyGraph(fi.name) finally: os.unlink(fi.name)
def sssp(graph: PropertyGraph, source, length_property, shift, property_name): dists = create_distance_array(graph, source, length_property) init_bag = InsertBag[UpdateRequest]() init_bag.push((source, 0)) t = StatTimer("Total SSSP") t.start() for_each( init_bag, sssp_operator(graph, dists, graph.get_edge_property(length_property)), worklist=OrderedByIntegerMetric(obim_indexer(shift)), disable_conflict_detection=True, loop_name="SSSP", ) t.stop() print("Elapsed time: ", t.get(), "milliseconds.") graph.add_node_property(pyarrow.table({property_name: dists}))
def test_sssp(property_graph: PropertyGraph): property_name = "NewProp" weight_name = "workFrom" start_node = 0 sssp(property_graph, start_node, weight_name, property_name) node_schema: Schema = property_graph.node_schema() num_node_properties = len(node_schema) new_property_id = num_node_properties - 1 assert node_schema.names[new_property_id] == property_name assert property_graph.get_node_property(property_name)[start_node].as_py() == 0 sssp_assert_valid(property_graph, start_node, weight_name, property_name) stats = SsspStatistics(property_graph, property_name) assert stats.max_distance == 4294967295.0 # Verify with numba implementation of verifier verify_sssp(property_graph, start_node, new_property_id)
def test_bfs(property_graph: PropertyGraph): property_name = "NewProp" start_node = 0 bfs(property_graph, start_node, property_name) node_schema: Schema = property_graph.node_schema() num_node_properties = len(node_schema) new_property_id = num_node_properties - 1 assert node_schema.names[new_property_id] == property_name assert property_graph.get_node_property(property_name)[start_node].as_py() == 0 bfs_assert_valid(property_graph, property_name) stats = BfsStatistics(property_graph, property_name) assert stats.source_node == start_node assert stats.max_distance == 7 # Verify with numba implementation of verifier as well verify_bfs(property_graph, start_node, new_property_id)
def verify_kcore(graph: PropertyGraph, property_name: str, k_core_num: int): """Check output sanity""" chunk_array = graph.get_node_property(property_name) alive_nodes = GAccumulator[float](0) do_all( range(len(chunk_array)), sanity_check_operator(alive_nodes, chunk_array, k_core_num), steal=True, loop_name="sanity_check_operator", ) print("Number of nodes in the", k_core_num, "-core is", alive_nodes.reduce())
def kcore_async(graph: PropertyGraph, k_core_num, property_name): num_nodes = graph.num_nodes() initial_worklist = InsertBag[np.uint64]() current_degree = LargeArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED) timer = StatTimer("Kcore: Property Graph Numba: " + property_name) timer.start() # Initialize do_all( range(num_nodes), compute_degree_count_operator(graph, current_degree.as_numpy()), steal=True, ) # Setup initial worklist do_all( range(num_nodes), setup_initial_worklist_operator(initial_worklist, current_degree.as_numpy(), k_core_num), steal=True, ) # Compute k-core for_each( initial_worklist, compute_async_kcore_operator(graph, current_degree.as_numpy(), k_core_num), steal=True, disable_conflict_detection=True, ) timer.stop() # Add the ranks as a new property to the property graph graph.add_node_property(pyarrow.table({property_name: current_degree}))
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("--kcore", "-k", type=int, default=100) parser.add_argument("--reportNode", type=int, default=0) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", setActiveThreads(args.threads)) graph = PropertyGraph(args.input) kcore_async(graph, args.kcore, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: verify_kcore(graph, args.propertyName, args.kcore)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--baseNode", type=int, default=0) parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", setActiveThreads(args.threads)) g = PropertyGraph(args.input) timer = StatTimer("Jaccard (Property Graph) Numba") timer.start() jaccard(g, args.baseNode, args.propertyName) timer.stop() # del timer print("Node {}: {}".format( args.reportNode, g.get_node_property(args.propertyName)[args.reportNode]))
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--maxIterations", type=int, default=100) parser.add_argument("--tolerance", type=float, default=1.0e-3) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--printTopN", type=int, default=10) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("--reportNode", type=int, default=0) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", setActiveThreads(args.threads)) graph = PropertyGraph(args.input) pagerank_pull_sync_residual(graph, args.maxIterations, args.tolerance, args.propertyName) print("Node {}: {}".format(args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: verify_pr(graph, args.propertyName, args.printTopN)
def test_bfs(property_graph: PropertyGraph): start_node = 0 property_name = "NewProp" bfs_sync_pg(property_graph, start_node, property_name) num_node_properties = len(property_graph.node_schema()) new_property_id = num_node_properties - 1 verify_bfs(property_graph, start_node, new_property_id) stats = BfsStatistics(property_graph, property_name) assert stats.source_node == start_node assert stats.max_distance == 7 bfs_assert_valid(property_graph, property_name)
def verify_pr(graph: PropertyGraph, property_name: str, topn: int): """Check output sanity""" chunk_array = graph.get_node_property(property_name) sum_rank = GAccumulator[float](0) max_rank = GReduceMax[float]() min_rank = GReduceMin[float]() do_all( range(len(chunk_array)), sanity_check_operator(sum_rank, max_rank, min_rank, chunk_array), steal=True, loop_name="sanity_check_operator", ) print("Max rank is ", max_rank.reduce()) print("Min rank is ", min_rank.reduce()) print("rank sum is ", sum_rank.reduce()) # Print top N ranked nodes if topn > 0: np_array = np.array(chunk_array, dtype=np.float) arr = np_array.argsort()[-topn:][::-1] for i in arr: print(np_array[i], " : ", i, "\n")
def compute_degree_count_operator(graph: PropertyGraph, current_degree, nid): """ Operator to initialize degree fields in graph with current degree. Since symmetric, out edge count is equivalent to in-edge count. """ current_degree[nid] = len(graph.edges(nid))