def test_connected_components(): graph = Graph(get_rdg_dataset("rmat10_symmetric")) # Graph is already symmetric. Last bool argument (True) # indicates that. connected_components(graph, "output_sym", True) stats_sym = ConnectedComponentsStatistics(graph, "output_sym") assert stats_sym.total_components == 69 assert stats_sym.total_non_trivial_components == 1 assert stats_sym.largest_component_size == 956 assert stats_sym.largest_component_ratio == approx(0.933594) connected_components_assert_valid(graph, "output_sym") # Graph is not symmetric. Last bool argument (False) # indicates that. Connected components routine will create # undirected view for computation. graph = Graph(get_rdg_dataset("rmat10")) connected_components(graph, "output", False) stats = ConnectedComponentsStatistics(graph, "output") assert stats.total_components == stats_sym.total_components assert stats.total_non_trivial_components == stats_sym.total_non_trivial_components assert stats.largest_component_size == stats_sym.largest_component_size assert stats.largest_component_ratio == stats_sym.largest_component_ratio
def test_louvain_clustering(): graph_sym = Graph(get_rdg_dataset("rmat10_symmetric")) louvain_clustering(graph_sym, "value", "output_sym", True) louvain_clustering_assert_valid(graph_sym, "value", "output_sym") LouvainClusteringStatistics(graph_sym, "value", "output_sym") graph = Graph(get_rdg_dataset("rmat10")) louvain_clustering(graph, "value", "output", False) louvain_clustering_assert_valid(graph, "value", "output") LouvainClusteringStatistics(graph, "value", "output")
def test_cdlp(): graph = Graph(get_rdg_dataset("rmat10")) cdlp(graph, "output", 10, False) stats = CdlpStatistics(graph, "output") assert stats.total_communities == 69 assert stats.total_non_trivial_communities == 1 assert stats.largest_community_size == 956 assert stats.largest_community_ratio == approx(0.933594) graph = Graph(get_rdg_dataset("rmat10_symmetric")) cdlp(graph, "output", 10, True) stats = CdlpStatistics(graph, "output") assert stats.total_communities == 69 assert stats.total_non_trivial_communities == 1 assert stats.largest_community_size == 956 assert stats.largest_community_ratio == approx(0.933594)
def test_k_truss_fail(): graph = Graph(get_rdg_dataset("rmat10_symmetric")) with raises(GaloisError): k_truss(graph, 2, "output") with raises(GaloisError): k_truss(graph, 1, "output2")
def test_triangle_count_presorted(): graph = Graph(get_rdg_dataset("rmat15_cleaned_symmetric")) sort_nodes_by_degree(graph) sort_all_edges_by_dest(graph) n = triangle_count( graph, TriangleCountPlan.node_iteration(relabeling=False, edges_sorted=True)) assert n == 282617
def test_local_clustering_coefficient(): graph = Graph(get_rdg_dataset("rmat15_cleaned_symmetric")) local_clustering_coefficient(graph, "output") graph: Graph out = graph.get_node_property("output") assert out[-1].as_py() == 0 assert not np.any(np.isnan(out))
def test_k_truss(): graph = Graph(get_rdg_dataset("rmat10_symmetric")) k_truss(graph, 10, "output") stats = KTrussStatistics(graph, 10, "output") assert stats.number_of_edges_left == 13339 k_truss_assert_valid(graph, 10, "output")
def test_leiden_clustering(): graph = Graph(get_rdg_dataset("rmat10_symmetric")) leiden_clustering(graph, "value", "output_sym", True) leiden_clustering_assert_valid(graph, "value", "output_sym") stats_sym = LeidenClusteringStatistics(graph, "value", "output_sym") graph = Graph(get_rdg_dataset("rmat10")) leiden_clustering(graph, "value", "output", False) leiden_clustering_assert_valid(graph, "value", "output") stats = LeidenClusteringStatistics(graph, "value", "output") assert stats.n_clusters == stats_sym.n_clusters assert stats.n_non_trivial_clusters == stats_sym.n_non_trivial_clusters assert stats.largest_cluster_size == stats_sym.largest_cluster_size
def test_k_core(): graph = Graph(get_rdg_dataset("rmat10_symmetric")) # Graph is already symmetric. Last bool argument (True) # indicates that. k_core(graph, 10, "output_sym", True) stats_sym = KCoreStatistics(graph, 10, "output_sym") assert stats_sym.number_of_nodes_in_kcore == 438 k_core_assert_valid(graph, 10, "output_sym") # Graph is not symmetric. Last bool argument (False) # indicates that. k_core routine will create # undirected view for computation. graph = Graph(get_rdg_dataset("rmat10")) k_core(graph, 10, "output", False) stats = KCoreStatistics(graph, 10, "output") assert stats.number_of_nodes_in_kcore == stats_sym.number_of_nodes_in_kcore
def test_independent_set(): graph = Graph(get_rdg_dataset("rmat10_symmetric")) independent_set(graph, "output") IndependentSetStatistics(graph, "output") independent_set_assert_valid(graph, "output") independent_set(graph, "output2", IndependentSetPlan.pull()) IndependentSetStatistics(graph, "output2") independent_set_assert_valid(graph, "output2")
def test_subgraph_extraction(): graph = Graph(get_rdg_dataset("rmat15_cleaned_symmetric")) sort_all_edges_by_dest(graph) nodes = [1, 3, 11, 120] expected_edges = [[ nodes.index(graph.get_edge_dst(e)) for e in graph.out_edge_ids(i) if graph.get_edge_dst(e) in nodes ] for i in nodes] pg = subgraph_extraction(graph, nodes) assert isinstance(pg, Graph) assert pg.num_nodes() == len(nodes) assert pg.num_edges() == 6 for i, _ in enumerate(expected_edges): assert len(pg.out_edge_ids(i)) == len(expected_edges[i]) assert [pg.get_edge_dst(e) for e in pg.out_edge_ids(i)] == expected_edges[i]
def test_triangle_count(): graph = Graph(get_rdg_dataset("rmat15_cleaned_symmetric")) original_first_edge_list = [ graph.get_edge_dst(e) for e in graph.out_edge_ids(0) ] n = triangle_count(graph) assert n == 282617 n = triangle_count(graph, TriangleCountPlan.node_iteration()) assert n == 282617 n = triangle_count(graph, TriangleCountPlan.edge_iteration()) assert n == 282617 assert [graph.get_edge_dst(e) for e in graph.out_edge_ids(0)] == original_first_edge_list sort_all_edges_by_dest(graph) n = triangle_count(graph, TriangleCountPlan.ordered_count(edges_sorted=True)) assert n == 282617
def test_storage_format_unchanged_local(): """ load up a known good rdg, store a copy compare the storage format of the known good rdg to our stored copy Attempts to catch the following situations: 1) the storage_format_version was changed but the rdg test datasets have not been updated 2) the on disk storage format was changed, but the storage_format_version was not 3) unstable storage format changes that are in use without the unstable storage format flag TODO(emcginnis): this test would be best if we had some way to create a 'maximal' RDG, aka one with as many optional features present as possible the current 'maximal' input requires the developer to be aware of all optional storage format features which is not realistic/sustainable """ orig_rdg = get_rdg_dataset("ldbc_003_maximal") orig_graph = Graph(orig_rdg) new_rdg = tempfile.mkdtemp() orig_graph.write(new_rdg) # ensure we can load it, so we can say it is sort of sane Graph(new_rdg) orig_rdg_path = pathlib.Path(orig_rdg) new_rdg_path = pathlib.Path(new_rdg) assert get_storage_format_version( orig_rdg_path ) == get_storage_format_version(new_rdg_path), ( "storage_format_version mismatch between the known good rdg and the generated rdg. Ensure that the rdgs in" "test-datasets/rdg_datasets have been updated to use the newest supported storage_format_version." ) assert validate_rdg_storage_format_match(orig_rdg_path, new_rdg_path), ( "storage format mismatch between the known good rdg and the generated rdg." "This usually is due to one of the following: \n" "1) The storage format was changed, but the storage_format_version was not bumped up \n" "2) An unstable feature is not properly gated behind the unstable storage format flag," "resulting in the unstable feature getting added to stable RDGs.") # only cleanup the temp rdg on success to make debugging failures easier shutil.rmtree(new_rdg)
def pg_rmat15_cleaned_symmetric(): katana.local.initialize() pg = Graph(get_rdg_dataset("rmat15_cleaned_symmetric")) return pg
def graph(): g = Graph(get_rdg_dataset("ldbc_003")) return g