Esempio n. 1
0
def _calc_bc_subset_fixed(
    G, Gnx, normalized, weight, endpoints, k, seed, result_dtype
):
    assert isinstance(k, int), (
        "This test is meant for verifying coherence "
        "when k is given as an int"
    )
    # In the fixed set we compare cu_bc against itself as we random.seed(seed)
    # on the same seed and then sample on the number of vertices themselves
    if seed is None:
        seed = 123  # random.seed(None) uses time, but we want same sources
    random.seed(seed)  # It will be called again in cugraph's call
    sources = random.sample(range(G.number_of_vertices()), k)

    if G.renumbered:
        sources_df = cudf.DataFrame({'src': sources})
        sources = G.unrenumber(sources_df, 'src')['src'].to_pandas().tolist()

    # The first call is going to proceed to the random sampling in the same
    # fashion as the lines above
    df = cugraph.betweenness_centrality(
        G,
        k=k,
        normalized=normalized,
        weight=weight,
        endpoints=endpoints,
        seed=seed,
        result_dtype=result_dtype,
    )
    sorted_df = df.sort_values("vertex").rename(
        columns={"betweenness_centrality": "cu_bc"}, copy=False
    ).reset_index(drop=True)

    # The second call is going to process source that were already sampled
    # We set seed to None as k : int, seed : not none should not be normal
    # behavior
    df2 = cugraph.betweenness_centrality(
        G,
        k=sources,
        normalized=normalized,
        weight=weight,
        endpoints=endpoints,
        seed=None,
        result_dtype=result_dtype,
    )
    sorted_df2 = df2.sort_values("vertex").rename(
        columns={"betweenness_centrality": "ref_bc"}, copy=False
    ).reset_index(drop=True)

    merged_sorted_df = cudf.concat(
        [sorted_df, sorted_df2["ref_bc"]], axis=1, sort=False
    )

    return merged_sorted_df
Esempio n. 2
0
def test_betweenness_centrality_nx(
        graph_file,
        directed,
        edgevals
):
    prepare_test()

    Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals)

    nx_bc = nx.betweenness_centrality(Gnx)
    cu_bc = cugraph.betweenness_centrality(Gnx)

    # Calculating mismatch
    networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0])
    cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0])
    err = 0
    assert len(cugraph_bc) == len(networkx_bc)
    for i in range(len(cugraph_bc)):
        if (
            abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01
            and cugraph_bc[i][0] == networkx_bc[i][0]
        ):
            err = err + 1
            print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}")
    print("Mismatches:", err)
    assert err < (0.01 * len(cugraph_bc))
Esempio n. 3
0
def _calc_bc_full(
    G, Gnx, normalized, weight, endpoints, k, seed, result_dtype
):
    df = cugraph.betweenness_centrality(
        G,
        k=k,
        normalized=normalized,
        weight=weight,
        endpoints=endpoints,
        result_dtype=result_dtype,
    )
    assert (
        df["betweenness_centrality"].dtype == result_dtype
    ), "'betweenness_centrality' column has not the expected type"
    nx_bc = nx.betweenness_centrality(
        Gnx, k=k, normalized=normalized, weight=weight, endpoints=endpoints
    )

    sorted_df = df.sort_values("vertex").rename(
        columns={"betweenness_centrality": "cu_bc"}, copy=False
    ).reset_index(drop=True)
    _, nx_bc = zip(*sorted(nx_bc.items()))
    nx_df = cudf.DataFrame({"ref_bc": nx_bc})

    merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False)

    return merged_sorted_df
Esempio n. 4
0
def _calc_bc_subset(
    G, Gnx, normalized, weight, endpoints, k, seed, result_dtype
):
    # NOTE: Networkx API does not allow passing a list of vertices
    # And the sampling is operated on Gnx.nodes() directly
    # We first mimic acquisition of the nodes to compare with same sources
    random.seed(seed)  # It will be called again in nx's call
    sources = random.sample(Gnx.nodes(), k)
    df = cugraph.betweenness_centrality(
        G,
        k=sources,
        normalized=normalized,
        weight=weight,
        endpoints=endpoints,
        result_dtype=result_dtype,
    )
    sorted_df = df.sort_values("vertex").rename(
        columns={"betweenness_centrality": "cu_bc"}, copy=False
    ).reset_index(drop=True)

    nx_bc = nx.betweenness_centrality(
        Gnx,
        k=k,
        normalized=normalized,
        weight=weight,
        endpoints=endpoints,
        seed=seed,
    )

    _, nx_bc = zip(*sorted(nx_bc.items()))
    nx_df = cudf.DataFrame({"ref_bc": nx_bc})

    merged_sorted_df = cudf.concat([sorted_df, nx_df], axis=1, sort=False)

    return merged_sorted_df
Esempio n. 5
0
def calc_between_centralities(dg):
    between_centers = cnx.betweenness_centrality(dg)
    return {
        k: v
        for k, v in sorted(
            between_centers.items(), key=lambda x: x[1], reverse=True)
    }
Esempio n. 6
0
def _calc_bc_subset_fixed(G, Gnx, normalized, weight, endpoints, k, seed,
                          result_dtype):
    assert isinstance(k, int), "This test is meant for verifying coherence " \
                               "when k is given as an int"
    # In the fixed set we compare cu_bc against itself as we random.seed(seed)
    # on the same seed and then sample on the number of vertices themselves
    if seed is None:
        seed = 123  # random.seed(None) uses time, but we want same sources
    random.seed(seed)  # It will be called again in cugraph's call
    sources = random.sample(range(G.number_of_vertices()), k)
    # The first call is going to proceed to the random sampling in the same
    # fashion as the lines above
    df = cugraph.betweenness_centrality(G,
                                        k=k,
                                        normalized=normalized,
                                        weight=weight,
                                        endpoints=endpoints,
                                        seed=seed,
                                        result_dtype=result_dtype)
    # The second call is going to process source that were already sampled
    # We set seed to None as k : int, seed : not none should not be normal
    # behavior
    df2 = cugraph.betweenness_centrality(G,
                                         k=sources,
                                         normalized=normalized,
                                         weight=weight,
                                         endpoints=endpoints,
                                         seed=None,
                                         result_dtype=result_dtype)
    cu_bc = {
        key: score
        for key, score in zip(df['vertex'].to_array(),
                              df['betweenness_centrality'].to_array())
    }
    cu_bc2 = {
        key: score
        for key, score in zip(df2['vertex'].to_array(),
                              df2['betweenness_centrality'].to_array())
    }

    return cu_bc, cu_bc2
def calc_betweenness_centrality(graph_file, normalized=True):
    cu_M = utils.read_csv_file(graph_file)
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1')

    df = cugraph.betweenness_centrality(G, normalized=normalized)

    NM = utils.read_csv_for_nx(graph_file)
    Gnx = nx.from_pandas_edgelist(NM,
                                  create_using=nx.DiGraph(),
                                  source='0',
                                  target='1')
    nb = nx.betweenness_centrality(Gnx, normalized=normalized)
    pdf = [nb[k] for k in sorted(nb.keys())]
    df['nx'] = pdf
    df = df.rename({'betweenness_centrality': 'cu'})
    return df
Esempio n. 8
0
def _calc_bc_full(G, Gnx, normalized, weight, endpoints, k, seed,
                  result_dtype):
    df = cugraph.betweenness_centrality(G,
                                        normalized=normalized,
                                        weight=weight,
                                        endpoints=endpoints,
                                        result_dtype=result_dtype)
    assert df['betweenness_centrality'].dtype == result_dtype,  \
        "'betweenness_centrality' column has not the expected type"
    nx_bc = nx.betweenness_centrality(Gnx,
                                      normalized=normalized,
                                      weight=weight,
                                      endpoints=endpoints)

    cu_bc = {
        key: score
        for key, score in zip(df['vertex'].to_array(),
                              df['betweenness_centrality'].to_array())
    }
    return cu_bc, nx_bc
Esempio n. 9
0
def _calc_bc_subset(G, Gnx, normalized, weight, endpoints, k, seed,
                    result_dtype):
    # NOTE: Networkx API does not allow passing a list of vertices
    # And the sampling is operated on Gnx.nodes() directly
    # We first mimic acquisition of the nodes to compare with same sources
    random.seed(seed)  # It will be called again in nx's call
    sources = random.sample(Gnx.nodes(), k)
    df = cugraph.betweenness_centrality(G,
                                        normalized=normalized,
                                        weight=weight,
                                        endpoints=endpoints,
                                        k=sources,
                                        result_dtype=result_dtype)
    nx_bc = nx.betweenness_centrality(Gnx,
                                      normalized=normalized,
                                      k=k,
                                      seed=seed)
    cu_bc = {
        key: score
        for key, score in zip(df['vertex'].to_array(),
                              df['betweenness_centrality'].to_array())
    }
    return cu_bc, nx_bc