def run_bfs(property_graph: PropertyGraph, input_args, source_node_file): property_name = "NewProp" start_node = input_args["source_node"] if not source_node_file == "": if not os.path.exists(source_node_file): print(f"Source node file doesn't exist: {source_node_file}") with open(source_node_file, "r") as fi: sources = [int(l) for l in fi.readlines()] for source in sources: with time_block(f"bfs on {source}"): analytics.bfs(property_graph, int(source), property_name) check_schema(property_graph, property_name) analytics.bfs_assert_valid(property_graph, property_name) stats = analytics.BfsStatistics(property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name) else: with time_block("bfs"): analytics.bfs(property_graph, start_node, property_name) check_schema(property_graph, property_name) analytics.bfs_assert_valid(property_graph, property_name) stats = analytics.BfsStatistics(property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)
def run_bc(property_graph: PropertyGraph, input_args, source_node_file): property_name = "NewProp" start_node = input_args["source_node"] bc_plan = analytics.BetweennessCentralityPlan.level() n = 4 if not source_node_file == "": if not os.path.exists(source_node_file): print(f"Source node file doesn't exist: {source_node_file}") sources = open(source_node_file, "r").readlines() for i in range(0, len(sources), n): sources_to_use = [int(i) for i in sources[i : i + n]] print(f"Using source: {sources_to_use}") with time_block("betweenness centrality"): analytics.betweenness_centrality(property_graph, property_name, sources_to_use, bc_plan) check_schema(property_graph, property_name) stats = analytics.BetweennessCentralityStatistics(property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name) else: sources = [start_node] with time_block("betweenness centrality"): analytics.betweenness_centrality(property_graph, property_name, sources, bc_plan) check_schema(property_graph, property_name) stats = analytics.BetweennessCentralityStatistics(property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)
def degree_assortativity_coefficient( graph: PropertyGraph, source_degree_type: DegreeType = DegreeType.OUT, destination_degree_type: DegreeType = DegreeType.IN, weight=None, ): """ Calculates and returns the degree assortativity of a given graph. Paramaters: * graph: the PropertyGraph to be analyzed * source_degree_type: description of degree type to consider for the source node on an edge expected values are DegreeType.IN or DegreeType.OUT * destination_degree_type: description the degree type to consider for the destination node on an edge expected values are DegreeType.IN or DegreeType.OUT * weight (optional): edge property to use if using weighted degrees """ # get the tables associated with the degree types of the source and destination nodes calculate_degree(graph, "temp_DegreeType.IN", "temp_DegreeType.OUT", weight) source_degree = graph.get_node_property("temp_" + str(source_degree_type)) destination_degree = graph.get_node_property("temp_" + str(destination_degree_type)) try: # Calculate the average in and out degrees of graph # (with respect to number of edges, not number of nodes) num_edges = graph.num_edges() source_average, destination_average = average_degree( graph, num_edges, source_degree, destination_degree) # Calculate the numerator (product of deviation from mean) # and the factors of the denominator (square deviation from mean) product_of_dev = GAccumulator[float](0) square_of_source_dev = GAccumulator[float](0) square_of_destination_dev = GAccumulator[float](0) do_all( range(graph.num_nodes()), degree_assortativity_coefficient_operator( graph, source_degree, source_average, destination_degree, destination_average, product_of_dev, square_of_source_dev, square_of_destination_dev, ), steal=True, loop_name="degree assortativity coefficient calculation", ) return product_of_dev.reduce() / sqrt( square_of_source_dev.reduce() * square_of_destination_dev.reduce()) finally: graph.remove_node_property("temp_DegreeType.IN") graph.remove_node_property("temp_DegreeType.OUT")
def run_cc(property_graph: PropertyGraph, _input_args): property_name = "NewProp" with time_block("connected components"): analytics.connected_components(property_graph, property_name) check_schema(property_graph, property_name) analytics.connected_components_assert_valid(property_graph, property_name) stats = analytics.ConnectedComponentsStatistics(property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)
def run_kcore(property_graph: PropertyGraph, _input_args): property_name = "NewProp" k = 10 with time_block("k-core"): analytics.k_core(property_graph, k, property_name) check_schema(property_graph, property_name) analytics.k_core_assert_valid(property_graph, k, property_name) stats = analytics.KCoreStatistics(property_graph, k, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)
def run_louvain(property_graph: PropertyGraph, input_args): property_name = "NewProp" edge_prop_name = input_args["edge_wt"] with time_block("louvain"): louvain_plan = analytics.LouvainClusteringPlan.do_all(False, 0.0001, 0.0001, 10000, 100) analytics.louvain_clustering(property_graph, edge_prop_name, property_name, louvain_plan) check_schema(property_graph, property_name) analytics.louvain_clustering_assert_valid(property_graph, edge_prop_name, property_name) stats = analytics.LouvainClusteringStatistics(property_graph, edge_prop_name, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)
def run_jaccard(property_graph: PropertyGraph, input_args): property_name = "NewProp" compare_node = input_args["source_node"] with time_block(f"jaccard on {compare_node}"): analytics.jaccard(property_graph, compare_node, property_name) check_schema(property_graph, property_name) similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy() assert similarities[compare_node] == 1 analytics.jaccard_assert_valid(property_graph, compare_node, property_name) stats = analytics.JaccardStatistics(property_graph, compare_node, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)
def run_bc(property_graph: PropertyGraph, input_args, source_node_file, num_sources): property_name = "NewProp" start_node = input_args["source_node"] bc_plan = analytics.BetweennessCentralityPlan.level() if not source_node_file == "": if not os.path.exists(source_node_file): print(f"Source node file doesn't exist: {source_node_file}") with open(source_node_file, "r") as fi: sources = [int(l) for l in fi.readlines()] assert num_sources <= len(sources) runs = (len(sources) + num_sources - 1) // num_sources for run in range(0, runs): start_idx = (num_sources * run) % len(sources) rotated_sources = sources[start_idx:] + sources[:start_idx] sources = rotated_sources[:num_sources] print(f"Using sources: {sources}") with time_block("betweenness centrality"): analytics.betweenness_centrality(property_graph, property_name, sources, bc_plan) check_schema(property_graph, property_name) stats = analytics.BetweennessCentralityStatistics( property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name) else: sources = [start_node] print(f"Using sources: {sources}") with time_block("betweenness centrality"): analytics.betweenness_centrality(property_graph, property_name, sources, bc_plan) check_schema(property_graph, property_name) stats = analytics.BetweennessCentralityStatistics( property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)
def run_sssp(property_graph: PropertyGraph, input_args, source_node_file): property_name = "NewProp" start_node = input_args["source_node"] edge_prop_name = input_args["edge_wt"] sssp_plan = analytics.SsspPlan.delta_step(input_args["sssp_delta"]) if "kron" in input_args["name"] or "urand" in input_args["name"]: sssp_plan = analytics.SsspPlan.delta_step_fusion( input_args["sssp_delta"]) if not source_node_file == "": if not os.path.exists(source_node_file): print(f"Source node file doesn't exist: {source_node_file}") with open(source_node_file, "r") as fi: sources = [int(l) for l in fi.readlines()] for source in sources: with time_block(f"sssp on {source}"): analytics.sssp(property_graph, source, edge_prop_name, property_name, sssp_plan) check_schema(property_graph, property_name) analytics.sssp_assert_valid(property_graph, source, edge_prop_name, property_name) stats = analytics.SsspStatistics(property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name) else: with time_block("sssp"): analytics.sssp(property_graph, start_node, edge_prop_name, property_name, sssp_plan) check_schema(property_graph, property_name) analytics.sssp_assert_valid(property_graph, start_node, edge_prop_name, property_name) stats = analytics.SsspStatistics(property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)
def run_pagerank(property_graph: PropertyGraph, _input_args): property_name = "NewProp" tolerance = 0.000001 max_iteration = 1000 alpha = 0.85 pagerank_plan = analytics.PagerankPlan.pull_topological(tolerance, max_iteration, alpha) with time_block("pagerank"): analytics.pagerank(property_graph, property_name, pagerank_plan) check_schema(property_graph, property_name) analytics.pagerank_assert_valid(property_graph, property_name) stats = analytics.PagerankStatistics(property_graph, property_name) print(f"STATS:\n{stats}") property_graph.remove_node_property(property_name)