Esempio n. 1
0
def test_connected_components():
    graph = Graph(get_rdg_dataset("rmat10_symmetric"))

    # Graph is already symmetric. Last bool argument (True)
    # indicates that.
    connected_components(graph, "output_sym", True)

    stats_sym = ConnectedComponentsStatistics(graph, "output_sym")

    assert stats_sym.total_components == 69
    assert stats_sym.total_non_trivial_components == 1
    assert stats_sym.largest_component_size == 956
    assert stats_sym.largest_component_ratio == approx(0.933594)

    connected_components_assert_valid(graph, "output_sym")

    # Graph is not symmetric. Last bool argument (False)
    # indicates that. Connected components routine will create
    # undirected view for computation.
    graph = Graph(get_rdg_dataset("rmat10"))

    connected_components(graph, "output", False)

    stats = ConnectedComponentsStatistics(graph, "output")

    assert stats.total_components == stats_sym.total_components
    assert stats.total_non_trivial_components == stats_sym.total_non_trivial_components
    assert stats.largest_component_size == stats_sym.largest_component_size
    assert stats.largest_component_ratio == stats_sym.largest_component_ratio
Esempio n. 2
0
def test_louvain_clustering():
    graph_sym = Graph(get_rdg_dataset("rmat10_symmetric"))

    louvain_clustering(graph_sym, "value", "output_sym", True)

    louvain_clustering_assert_valid(graph_sym, "value", "output_sym")

    LouvainClusteringStatistics(graph_sym, "value", "output_sym")

    graph = Graph(get_rdg_dataset("rmat10"))

    louvain_clustering(graph, "value", "output", False)

    louvain_clustering_assert_valid(graph, "value", "output")

    LouvainClusteringStatistics(graph, "value", "output")
Esempio n. 3
0
def test_cdlp():
    graph = Graph(get_rdg_dataset("rmat10"))
    cdlp(graph, "output", 10, False)
    stats = CdlpStatistics(graph, "output")
    assert stats.total_communities == 69
    assert stats.total_non_trivial_communities == 1
    assert stats.largest_community_size == 956
    assert stats.largest_community_ratio == approx(0.933594)

    graph = Graph(get_rdg_dataset("rmat10_symmetric"))
    cdlp(graph, "output", 10, True)
    stats = CdlpStatistics(graph, "output")
    assert stats.total_communities == 69
    assert stats.total_non_trivial_communities == 1
    assert stats.largest_community_size == 956
    assert stats.largest_community_ratio == approx(0.933594)
Esempio n. 4
0
def test_k_truss_fail():
    graph = Graph(get_rdg_dataset("rmat10_symmetric"))

    with raises(GaloisError):
        k_truss(graph, 2, "output")

    with raises(GaloisError):
        k_truss(graph, 1, "output2")
Esempio n. 5
0
def test_triangle_count_presorted():
    graph = Graph(get_rdg_dataset("rmat15_cleaned_symmetric"))
    sort_nodes_by_degree(graph)
    sort_all_edges_by_dest(graph)
    n = triangle_count(
        graph,
        TriangleCountPlan.node_iteration(relabeling=False, edges_sorted=True))
    assert n == 282617
Esempio n. 6
0
def test_local_clustering_coefficient():
    graph = Graph(get_rdg_dataset("rmat15_cleaned_symmetric"))

    local_clustering_coefficient(graph, "output")
    graph: Graph
    out = graph.get_node_property("output")

    assert out[-1].as_py() == 0
    assert not np.any(np.isnan(out))
Esempio n. 7
0
def test_k_truss():
    graph = Graph(get_rdg_dataset("rmat10_symmetric"))

    k_truss(graph, 10, "output")

    stats = KTrussStatistics(graph, 10, "output")

    assert stats.number_of_edges_left == 13339

    k_truss_assert_valid(graph, 10, "output")
Esempio n. 8
0
def test_leiden_clustering():
    graph = Graph(get_rdg_dataset("rmat10_symmetric"))

    leiden_clustering(graph, "value", "output_sym", True)

    leiden_clustering_assert_valid(graph, "value", "output_sym")

    stats_sym = LeidenClusteringStatistics(graph, "value", "output_sym")

    graph = Graph(get_rdg_dataset("rmat10"))

    leiden_clustering(graph, "value", "output", False)

    leiden_clustering_assert_valid(graph, "value", "output")

    stats = LeidenClusteringStatistics(graph, "value", "output")

    assert stats.n_clusters == stats_sym.n_clusters
    assert stats.n_non_trivial_clusters == stats_sym.n_non_trivial_clusters
    assert stats.largest_cluster_size == stats_sym.largest_cluster_size
Esempio n. 9
0
def test_k_core():
    graph = Graph(get_rdg_dataset("rmat10_symmetric"))

    # Graph is already symmetric. Last bool argument (True)
    # indicates that.
    k_core(graph, 10, "output_sym", True)

    stats_sym = KCoreStatistics(graph, 10, "output_sym")

    assert stats_sym.number_of_nodes_in_kcore == 438

    k_core_assert_valid(graph, 10, "output_sym")

    # Graph is not symmetric. Last bool argument (False)
    # indicates that. k_core routine will create
    # undirected view for computation.
    graph = Graph(get_rdg_dataset("rmat10"))

    k_core(graph, 10, "output", False)

    stats = KCoreStatistics(graph, 10, "output")

    assert stats.number_of_nodes_in_kcore == stats_sym.number_of_nodes_in_kcore
Esempio n. 10
0
def test_independent_set():
    graph = Graph(get_rdg_dataset("rmat10_symmetric"))

    independent_set(graph, "output")

    IndependentSetStatistics(graph, "output")

    independent_set_assert_valid(graph, "output")

    independent_set(graph, "output2", IndependentSetPlan.pull())

    IndependentSetStatistics(graph, "output2")

    independent_set_assert_valid(graph, "output2")
Esempio n. 11
0
def test_subgraph_extraction():
    graph = Graph(get_rdg_dataset("rmat15_cleaned_symmetric"))
    sort_all_edges_by_dest(graph)
    nodes = [1, 3, 11, 120]

    expected_edges = [[
        nodes.index(graph.get_edge_dst(e)) for e in graph.out_edge_ids(i)
        if graph.get_edge_dst(e) in nodes
    ] for i in nodes]

    pg = subgraph_extraction(graph, nodes)

    assert isinstance(pg, Graph)
    assert pg.num_nodes() == len(nodes)
    assert pg.num_edges() == 6

    for i, _ in enumerate(expected_edges):
        assert len(pg.out_edge_ids(i)) == len(expected_edges[i])
        assert [pg.get_edge_dst(e)
                for e in pg.out_edge_ids(i)] == expected_edges[i]
Esempio n. 12
0
def test_triangle_count():
    graph = Graph(get_rdg_dataset("rmat15_cleaned_symmetric"))
    original_first_edge_list = [
        graph.get_edge_dst(e) for e in graph.out_edge_ids(0)
    ]
    n = triangle_count(graph)
    assert n == 282617

    n = triangle_count(graph, TriangleCountPlan.node_iteration())
    assert n == 282617

    n = triangle_count(graph, TriangleCountPlan.edge_iteration())
    assert n == 282617

    assert [graph.get_edge_dst(e)
            for e in graph.out_edge_ids(0)] == original_first_edge_list

    sort_all_edges_by_dest(graph)
    n = triangle_count(graph,
                       TriangleCountPlan.ordered_count(edges_sorted=True))
    assert n == 282617
Esempio n. 13
0
def test_storage_format_unchanged_local():
    """
    load up a known good rdg, store a copy
    compare the storage format of the known good rdg to our stored copy
    Attempts to catch the following situations:
    1) the storage_format_version was changed but the rdg test datasets have not been updated
    2) the on disk storage format was changed, but the storage_format_version was not
    3) unstable storage format changes that are in use without the unstable storage format flag

    TODO(emcginnis): this test would be best if we had some way to create a 'maximal' RDG, aka one with as many
        optional features present as possible
        the current 'maximal' input requires the developer to be aware of all optional storage format features
        which is not realistic/sustainable
    """
    orig_rdg = get_rdg_dataset("ldbc_003_maximal")
    orig_graph = Graph(orig_rdg)
    new_rdg = tempfile.mkdtemp()
    orig_graph.write(new_rdg)
    # ensure we can load it, so we can say it is sort of sane
    Graph(new_rdg)

    orig_rdg_path = pathlib.Path(orig_rdg)
    new_rdg_path = pathlib.Path(new_rdg)

    assert get_storage_format_version(
        orig_rdg_path
    ) == get_storage_format_version(new_rdg_path), (
        "storage_format_version mismatch between the known good rdg and the generated rdg. Ensure that the rdgs in"
        "test-datasets/rdg_datasets have been updated to use the newest supported storage_format_version."
    )

    assert validate_rdg_storage_format_match(orig_rdg_path, new_rdg_path), (
        "storage format mismatch between the known good rdg and the generated rdg."
        "This usually is due to one of the following: \n"
        "1) The storage format was changed, but the storage_format_version was not bumped up \n"
        "2) An unstable feature is not properly gated behind the unstable storage format flag,"
        "resulting in the unstable feature getting added to stable RDGs.")

    # only cleanup the temp rdg on success to make debugging failures easier
    shutil.rmtree(new_rdg)
Esempio n. 14
0
def pg_rmat15_cleaned_symmetric():
    katana.local.initialize()
    pg = Graph(get_rdg_dataset("rmat15_cleaned_symmetric"))
    return pg
Esempio n. 15
0
def graph():
    g = Graph(get_rdg_dataset("ldbc_003"))
    return g