Example #1
0
def _calc_bc_subset_fixed(G, Gnx, normalized, weight, k, seed, result_dtype):
    assert isinstance(k, int), ("This test is meant for verifying coherence "
                                "when k is given as an int")
    # In the fixed set we compare cu_bc against itself as we random.seed(seed)
    # on the same seed and then sample on the number of vertices themselves
    if seed is None:
        seed = 123  # random.seed(None) uses time, but we want same sources
    random.seed(seed)  # It will be called again in cugraph's call
    sources = random.sample(range(G.number_of_vertices()), k)
    # The first call is going to proceed to the random sampling in the same
    # fashion as the lines above
    df = cugraph.edge_betweenness_centrality(
        G,
        k=k,
        normalized=normalized,
        weight=weight,
        seed=seed,
        result_dtype=result_dtype,
    )

    # The second call is going to process source that were already sampled
    # We set seed to None as k : int, seed : not none should not be normal
    # behavior
    df2 = cugraph.edge_betweenness_centrality(
        G,
        k=sources,
        normalized=normalized,
        weight=weight,
        seed=None,
        result_dtype=result_dtype,
    )

    sorted_df = df.sort_values(["src", "dst"
                                ]).rename(columns={
                                    "betweenness_centrality": "cu_bc"
                                },
                                          copy=False).reset_index(drop=True)
    sorted_df2 = df2.sort_values(["src", "dst"
                                  ]).rename(columns={
                                      "betweenness_centrality": "ref_bc"
                                  },
                                            copy=False).reset_index(drop=True)

    sorted_df = cudf.concat([sorted_df, sorted_df2["ref_bc"]],
                            axis=1,
                            sort=False)

    return sorted_df
Example #2
0
def _calc_bc_full(G, Gnx, normalized, weight, k, seed, result_dtype):
    df = cugraph.edge_betweenness_centrality(
        G,
        k=k,
        normalized=normalized,
        weight=weight,
        seed=seed,
        result_dtype=result_dtype,
    )

    assert (df["betweenness_centrality"].dtype == result_dtype
            ), "'betweenness_centrality' column has not the expected type"

    nx_bc_dict = nx.edge_betweenness_centrality(Gnx,
                                                k=k,
                                                normalized=normalized,
                                                seed=seed,
                                                weight=weight)

    nx_df = generate_nx_result(nx_bc_dict,
                               type(Gnx) is nx.DiGraph).rename(columns={
                                   "betweenness_centrality":
                                   "ref_bc"
                               },
                                                               copy=False)

    merged_df = df.merge(nx_df,
                         on=['src',
                             'dst']).rename(columns={
                                 "betweenness_centrality": "cu_bc"
                             },
                                            copy=False).reset_index(drop=True)

    return merged_df
Example #3
0
def _calc_bc_full(G, Gnx, normalized, weight, k, seed, result_dtype):
    df = cugraph.edge_betweenness_centrality(
        G,
        k=k,
        normalized=normalized,
        weight=weight,
        seed=seed,
        result_dtype=result_dtype,
    )
    assert (df["betweenness_centrality"].dtype == result_dtype
            ), "'betweenness_centrality' column has not the expected type"
    nx_bc_dict = nx.edge_betweenness_centrality(Gnx,
                                                k=k,
                                                normalized=normalized,
                                                seed=seed,
                                                weight=weight)

    nx_df = generate_nx_result(nx_bc_dict,
                               type(Gnx) is nx.DiGraph).rename(columns={
                                   "betweenness_centrality":
                                   "ref_bc"
                               },
                                                               copy=False)

    sorted_df = df.sort_values(["src", "dst"
                                ]).rename(columns={
                                    "betweenness_centrality": "cu_bc"
                                },
                                          copy=False).reset_index(drop=True)

    sorted_df = cudf.concat([sorted_df, nx_df["ref_bc"]], axis=1, sort=False)
    return sorted_df
def test_edge_betweenness_centrality_nx(
        graph_file,
        directed,
        edgevals
):
    prepare_test()

    Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals)

    nx_bc = nx.edge_betweenness_centrality(Gnx)
    cu_bc = cugraph.edge_betweenness_centrality(Gnx)

    # Calculating mismatch
    networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0])
    cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0])
    err = 0
    assert len(cugraph_bc) == len(networkx_bc)
    for i in range(len(cugraph_bc)):
        if (
            abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01
            and cugraph_bc[i][0] == networkx_bc[i][0]
        ):
            err = err + 1
            print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}")
    print("Mismatches:", err)
    assert err < (0.01 * len(cugraph_bc))
def _calc_bc_subset(G, Gnx, normalized, weight, k, seed, result_dtype):
    # NOTE: Networkx API does not allow passing a list of vertices
    # And the sampling is operated on Gnx.nodes() directly
    # We first mimic acquisition of the nodes to compare with same sources
    random.seed(seed)  # It will be called again in nx's call
    sources = random.sample(Gnx.nodes(), k)

    # NOTE: Since we sampled the Networkx graph, the sources are already
    # external ids, so we don't need to translate to external ids for
    # cugraph

    df = cugraph.edge_betweenness_centrality(
        G,
        k=sources,
        normalized=normalized,
        weight=weight,
        result_dtype=result_dtype,
    )

    nx_bc_dict = nx.edge_betweenness_centrality(
        Gnx, k=k, normalized=normalized, weight=weight, seed=seed
    )

    nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename(
        columns={"betweenness_centrality": "ref_bc"}, copy=False
    )

    merged_df = df.merge(nx_df, on=['src', 'dst']).rename(
        columns={"betweenness_centrality": "cu_bc"}, copy=False
    ).reset_index(drop=True)

    return merged_df
def _calc_bc_subset_fixed(G, Gnx, normalized, weight, k, seed, result_dtype):
    assert isinstance(k, int), (
        "This test is meant for verifying coherence "
        "when k is given as an int"
    )
    # In the fixed set we compare cu_bc against itself as we random.seed(seed)
    # on the same seed and then sample on the number of vertices themselves
    if seed is None:
        seed = 123  # random.seed(None) uses time, but we want same sources
    random.seed(seed)  # It will be called again in cugraph's call
    sources = random.sample(range(G.number_of_vertices()), k)

    if G.renumbered:
        sources_df = cudf.DataFrame({'src': sources})
        sources = G.unrenumber(sources_df, 'src')['src'].to_pandas().tolist()

    # The first call is going to proceed to the random sampling in the same
    # fashion as the lines above
    df = cugraph.edge_betweenness_centrality(
        G,
        k=k,
        normalized=normalized,
        weight=weight,
        seed=seed,
        result_dtype=result_dtype,
    ).rename(
        columns={"betweenness_centrality": "cu_bc"}, copy=False
    )

    # The second call is going to process source that were already sampled
    # We set seed to None as k : int, seed : not none should not be normal
    # behavior
    df2 = cugraph.edge_betweenness_centrality(
        G,
        k=sources,
        normalized=normalized,
        weight=weight,
        seed=None,
        result_dtype=result_dtype,
    ).rename(
        columns={"betweenness_centrality": "ref_bc"}, copy=False
    ).reset_index(drop=True)

    merged_df = df.merge(df2, on=['src', 'dst']).reset_index(drop=True)

    return merged_df
Example #7
0
def _calc_bc_subset(G, Gnx, normalized, weight, k, seed, result_dtype):
    # NOTE: Networkx API does not allow passing a list of vertices
    # And the sampling is operated on Gnx.nodes() directly
    # We first mimic acquisition of the nodes to compare with same sources
    random.seed(seed)  # It will be called again in nx's call
    sources = random.sample(Gnx.nodes(), k)
    df = cugraph.edge_betweenness_centrality(
        G,
        k=sources,
        normalized=normalized,
        weight=weight,
        result_dtype=result_dtype,
    )

    nx_bc_dict = nx.edge_betweenness_centrality(Gnx,
                                                k=k,
                                                normalized=normalized,
                                                weight=weight,
                                                seed=seed)

    nx_df = generate_nx_result(nx_bc_dict,
                               type(Gnx) is nx.DiGraph).rename(columns={
                                   "betweenness_centrality":
                                   "ref_bc"
                               },
                                                               copy=False)

    sorted_df = df.sort_values(["src", "dst"
                                ]).rename(columns={
                                    "betweenness_centrality": "cu_bc"
                                },
                                          copy=False).reset_index(drop=True)

    sorted_df = cudf.concat([sorted_df, nx_df["ref_bc"]], axis=1, sort=False)

    return sorted_df