def degree_assortativity_coefficient( graph: PropertyGraph, source_degree_type: DegreeType = DegreeType.OUT, destination_degree_type: DegreeType = DegreeType.IN, weight=None, ): """ Calculates and returns the degree assortativity of a given graph. Paramaters: * graph: the PropertyGraph to be analyzed * source_degree_type: description of degree type to consider for the source node on an edge expected values are DegreeType.IN or DegreeType.OUT * destination_degree_type: description the degree type to consider for the destination node on an edge expected values are DegreeType.IN or DegreeType.OUT * weight (optional): edge property to use if using weighted degrees """ # get the tables associated with the degree types of the source and destination nodes calculate_degree(graph, "temp_DegreeType.IN", "temp_DegreeType.OUT", weight) source_degree = graph.get_node_property("temp_" + str(source_degree_type)) destination_degree = graph.get_node_property("temp_" + str(destination_degree_type)) try: # Calculate the average in and out degrees of graph # (with respect to number of edges, not number of nodes) num_edges = graph.num_edges() source_average, destination_average = average_degree( graph, num_edges, source_degree, destination_degree) # Calculate the numerator (product of deviation from mean) # and the factors of the denominator (square deviation from mean) product_of_dev = GAccumulator[float](0) square_of_source_dev = GAccumulator[float](0) square_of_destination_dev = GAccumulator[float](0) do_all( range(graph.num_nodes()), degree_assortativity_coefficient_operator( graph, source_degree, source_average, destination_degree, destination_average, product_of_dev, square_of_source_dev, square_of_destination_dev, ), steal=True, loop_name="degree assortativity coefficient calculation", ) return product_of_dev.reduce() / sqrt( square_of_source_dev.reduce() * square_of_destination_dev.reduce()) finally: graph.remove_node_property("temp_DegreeType.IN") graph.remove_node_property("temp_DegreeType.OUT")
def main(): import argparse import katana.local from katana.galois import set_active_threads katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--startNode", type=int, default=0) parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) graph = PropertyGraph(args.input) bfs_sync_pg(graph, args.startNode, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: numNodeProperties = len(graph.node_schema()) newPropertyId = numNodeProperties - 1 verify_bfs(graph, args.startNode, newPropertyId)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--startNode", type=int, default=0) parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--edgeWeightProperty", type=str, required=True) parser.add_argument("--shift", type=int, default=6) parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", setActiveThreads(args.threads)) graph = PropertyGraph(args.input) sssp(graph, args.startNode, args.edgeWeightProperty, args.shift, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: numNodeProperties = len(graph.node_schema()) newPropertyId = numNodeProperties - 1 verify_sssp(graph, args.startNode, newPropertyId)
def verify_sssp(graph: PropertyGraph, _source_i: int, property_id: int): prop_array = graph.get_node_property(property_id) not_visited = GAccumulator[int](0) max_dist = GReduceMax[int]() # TODO(amp): Remove / 4 infinity = dtype_info(dtype_of_pyarrow_array(prop_array)).max / 4 do_all( range(len(prop_array)), not_visited_operator(infinity, not_visited, prop_array), loop_name="not_visited_op", ) if not_visited.reduce() > 0: print( not_visited.reduce(), " unvisited nodes; this is an error if graph is strongly connected", ) do_all( range(len(prop_array)), max_dist_operator(infinity, max_dist, prop_array), steal=True, loop_name="max_dist_operator", ) print("Max distance:", max_dist.reduce())
def main(): import argparse import katana.local katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--baseNode", type=int, default=0) parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) g = PropertyGraph(args.input) timer = StatTimer("Jaccard (Property Graph) Numba") timer.start() jaccard(g, args.baseNode, args.propertyName) timer.stop() # del timer print("Node {}: {}".format(args.reportNode, g.get_node_property(args.propertyName)[args.reportNode]))
def main(): import argparse import katana.local katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("--kcore", "-k", type=int, default=100) parser.add_argument("--reportNode", type=int, default=0) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) graph = PropertyGraph(args.input) kcore_async(graph, args.kcore, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: verify_kcore(graph, args.propertyName, args.kcore)
def main(): import argparse import katana.local katana.local.initialize() parser = argparse.ArgumentParser() parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--maxIterations", type=int, default=100) parser.add_argument("--tolerance", type=float, default=1.0e-3) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--printTopN", type=int, default=10) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("--reportNode", type=int, default=0) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", set_active_threads(args.threads)) graph = PropertyGraph(args.input) pagerank_pull_sync_residual(graph, args.maxIterations, args.tolerance, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: verify_pr(graph, args.propertyName, args.printTopN)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--algoType", type=str, default="push") parser.add_argument("--propertyName", type=str, default="NewProperty") parser.add_argument("--reportNode", type=int, default=1) parser.add_argument("--noverify", action="store_true", default=False) parser.add_argument("--threads", "-t", type=int, default=1) parser.add_argument("input", type=str) args = parser.parse_args() print("Using threads:", setActiveThreads(args.threads)) graph = PropertyGraph(args.input) if args.algoType == "push": cc_push_topo(graph, args.propertyName) else: cc_pull_topo(graph, args.propertyName) print("Node {}: {}".format( args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode])) if not args.noverify: numNodeProperties = len(graph.node_schema()) newPropertyId = numNodeProperties - 1 verify_cc(graph, newPropertyId)
def verify_bfs(graph: PropertyGraph, _source_i: int, property_id: int): chunk_array = graph.get_node_property(property_id) not_visited = GAccumulator[int](0) max_dist = GReduceMax[int]() do_all( range(len(chunk_array)), not_visited_operator(not_visited, chunk_array), loop_name="not_visited_op", ) if not_visited.reduce() > 0: print( not_visited.reduce(), " unvisited nodes; this is an error if graph is strongly connected", ) do_all( range(len(chunk_array)), max_dist_operator(max_dist, chunk_array), steal=True, loop_name="max_dist_operator", ) print("BFS Max distance:", max_dist.reduce())
def test_local_clustering_coefficient(): property_graph = PropertyGraph( get_input("propertygraphs/rmat15_cleaned_symmetric")) local_clustering_coefficient(property_graph, "output") property_graph: PropertyGraph out = property_graph.get_node_property("output") assert out[-1].as_py() == 0 assert not np.any(np.isnan(out))
def verify_cc(graph: PropertyGraph, property_id: int): chunk_array = graph.get_node_property(property_id) num_components = GAccumulator[int](0) do_all( range(len(chunk_array)), verify_cc_operator(num_components, chunk_array), loop_name="num_components", ) print("Number of components are : ", num_components.reduce())
def test_load_graphml_write(): input_file = Path( __file__ ).parent.parent.parent / "tools" / "graph-convert" / "test-inputs" / "movies.graphml" pg = PropertyGraph.from_graphml(input_file) pg.mark_all_properties_persistent() with TemporaryDirectory() as tmpdir: pg.write(tmpdir) del pg property_graph = PropertyGraph(tmpdir) assert property_graph.path == f"file://{tmpdir}" assert property_graph.get_node_property(0)[1].as_py() == "Keanu Reeves"
def verify_kcore(graph: PropertyGraph, property_name: str, k_core_num: int): """Check output sanity""" chunk_array = graph.get_node_property(property_name) alive_nodes = GAccumulator[float](0) do_all( range(len(chunk_array)), sanity_check_operator(alive_nodes, chunk_array, k_core_num), steal=True, loop_name="sanity_check_operator", ) print("Number of nodes in the", k_core_num, "-core is", alive_nodes.reduce())
def test_jaccard_sorted(property_graph: PropertyGraph): sort_all_edges_by_dest(property_graph) property_name = "NewProp" compare_node = 0 jaccard(property_graph, compare_node, property_name, JaccardPlan.sorted()) jaccard_assert_valid(property_graph, compare_node, property_name) similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy() assert similarities[compare_node] == 1 assert similarities[1917] == approx(0.28571428) assert similarities[2812] == approx(0.01428571)
def test_assert_valid(property_graph: PropertyGraph): with raises(AssertionError): bfs_assert_valid(property_graph, "workFrom") property_name = "NewProp" start_node = 0 bfs(property_graph, start_node, property_name) v = property_graph.get_node_property(property_name).to_numpy().copy() v[0] = 100 property_graph.add_node_property(table({"Prop2": v})) with raises(AssertionError): bfs_assert_valid(property_graph, "Prop2")
def run_jaccard(property_graph: PropertyGraph, input_args): property_name = "NewProp" compare_node = input_args["source_node"] with time_block(f"jaccard on {compare_node}"): analytics.jaccard(property_graph, compare_node, property_name) check_schema(property_graph, property_name) similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy() assert similarities[compare_node] == 1 analytics.jaccard_assert_valid(property_graph, compare_node, property_name) stats = analytics.JaccardStatistics(property_graph, compare_node, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)
def test_bfs(property_graph: PropertyGraph): property_name = "NewProp" start_node = 0 bfs(property_graph, start_node, property_name) node_schema: Schema = property_graph.node_schema() num_node_properties = len(node_schema) new_property_id = num_node_properties - 1 assert node_schema.names[new_property_id] == property_name assert property_graph.get_node_property(property_name)[start_node].as_py() == 0 bfs_assert_valid(property_graph, property_name) stats = BfsStatistics(property_graph, property_name) assert stats.source_node == start_node assert stats.max_distance == 7 # Verify with numba implementation of verifier as well verify_bfs(property_graph, start_node, new_property_id)
def test_busy_wait(property_graph: PropertyGraph): set_busy_wait() property_name = "NewProp" start_node = 0 bfs(property_graph, start_node, property_name) node_schema: Schema = property_graph.node_schema() num_node_properties = len(node_schema) new_property_id = num_node_properties - 1 assert node_schema.names[new_property_id] == property_name assert property_graph.get_node_property( property_name)[start_node].as_py() == 0 bfs_assert_valid(property_graph, start_node, property_name) BfsStatistics(property_graph, property_name) # Verify with numba implementation of verifier as well verify_bfs(property_graph, start_node, new_property_id) set_busy_wait(0)
def test_sssp(property_graph: PropertyGraph): property_name = "NewProp" weight_name = "workFrom" start_node = 0 sssp(property_graph, start_node, weight_name, property_name) node_schema: Schema = property_graph.node_schema() num_node_properties = len(node_schema) new_property_id = num_node_properties - 1 assert node_schema.names[new_property_id] == property_name assert property_graph.get_node_property(property_name)[start_node].as_py() == 0 sssp_assert_valid(property_graph, start_node, weight_name, property_name) stats = SsspStatistics(property_graph, property_name) print(stats) assert stats.max_distance == 2011.0 # Verify with numba implementation of verifier verify_sssp(property_graph, start_node, new_property_id)
def test_jaccard(property_graph: PropertyGraph): property_name = "NewProp" compare_node = 0 jaccard(property_graph, compare_node, property_name) node_schema: Schema = property_graph.node_schema() num_node_properties = len(node_schema) new_property_id = num_node_properties - 1 assert node_schema.names[new_property_id] == property_name jaccard_assert_valid(property_graph, compare_node, property_name) stats = JaccardStatistics(property_graph, compare_node, property_name) assert stats.max_similarity == approx(1) assert stats.min_similarity == approx(0) assert stats.average_similarity == approx(0.000637853) similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy() assert similarities[compare_node] == 1 assert similarities[1917] == approx(0.28571428) assert similarities[2812] == approx(0.01428571)
def verify_pr(graph: PropertyGraph, property_name: str, topn: int): """Check output sanity""" chunk_array = graph.get_node_property(property_name) sum_rank = GAccumulator[float](0) max_rank = GReduceMax[float]() min_rank = GReduceMin[float]() do_all( range(len(chunk_array)), sanity_check_operator(sum_rank, max_rank, min_rank, chunk_array), steal=True, loop_name="sanity_check_operator", ) print("Max rank is ", max_rank.reduce()) print("Min rank is ", min_rank.reduce()) print("rank sum is ", sum_rank.reduce()) # Print top N ranked nodes if topn > 0: np_array = np.array(chunk_array, dtype=np.float) arr = np_array.argsort()[-topn:][::-1] for i in arr: print(np_array[i], " : ", i, "\n")