예제 #1
0
def test_katz_centrality_multi_column(graph_file):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)
    cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True)
    cu_M['src_1'] = cu_M['src_0'] + 1000
    cu_M['dst_1'] = cu_M['dst_0'] + 1000

    G1 = cugraph.DiGraph()
    G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"],
                          destination=["dst_0", "dst_1"])

    G2 = cugraph.DiGraph()
    G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0")

    k_df_exp = cugraph.katz_centrality(G2, alpha=None, max_iter=1000)
    k_df_exp = k_df_exp.sort_values("vertex").reset_index(drop=True)

    nstart = cudf.DataFrame()
    nstart['vertex_0'] = k_df_exp['vertex']
    nstart['vertex_1'] = nstart['vertex_0'] + 1000
    nstart['values'] = k_df_exp['katz_centrality']

    k_df_res = cugraph.katz_centrality(G1, nstart=nstart,
                                       alpha=None, max_iter=1000)
    k_df_res = k_df_res.sort_values("0_vertex").reset_index(drop=True)
    k_df_res.rename(columns={'0_vertex': 'vertex'}, inplace=True)

    top_res = topKVertices(k_df_res, "katz_centrality", 10)
    top_exp = topKVertices(k_df_exp, "katz_centrality", 10)

    assert top_res.equals(top_exp)
예제 #2
0
def test_katz_centrality_nx(graph_file):
    gc.collect()

    NM = utils.read_csv_for_nx(graph_file)
    Gnx = nx.from_pandas_edgelist(NM,
                                  create_using=nx.DiGraph(),
                                  source="0",
                                  target="1")

    G = cugraph.utilities.convert_from_nx(Gnx)
    largest_out_degree = G.degrees().nlargest(n=1, columns="out_degree")
    largest_out_degree = largest_out_degree["out_degree"].iloc[0]
    katz_alpha = 1 / (largest_out_degree + 1)

    nk = nx.katz_centrality(Gnx, alpha=katz_alpha)
    ck = cugraph.katz_centrality(Gnx, alpha=None, max_iter=1000)

    # Calculating mismatch
    nk = sorted(nk.items(), key=lambda x: x[0])
    ck = sorted(ck.items(), key=lambda x: x[0])
    err = 0
    assert len(ck) == len(nk)
    for i in range(len(ck)):
        if (abs(ck[i][1] - nk[i][1]) > 0.1 and ck[i][0] == nk[i][0]):
            err = err + 1
    print("Mismatches:", err)
    assert err < (0.1 * len(ck))
예제 #3
0
def katz_centrality(G, as_dict=False):
    W = timestep_cache().W
    nodes_centrality = cugraph.katz_centrality(W)
    nodes = np.array(list(nodes_centrality.keys()))
    off_nodes = np.array(G.nodes)
    clusters = np.array(list(nodes_centrality.values()))
    s_clusters = ordered(off_nodes, nodes, clusters) # Need to sort the cluster cause the order is not keeped from louvain algo
    if as_dict:
        return dict(zip(off_nodes, s_clusters))
    return s_clusters
예제 #4
0
def calc_katz(graph_file):
    M = utils.read_csv_file(graph_file)
    G = cugraph.Graph()
    G.add_edge_list(M['0'], M['1'])

    largest_out_degree = G.degrees().nlargest(n=1, columns='out_degree')
    largest_out_degree = largest_out_degree['out_degree'][0]
    katz_alpha = 1 / (largest_out_degree + 1)

    k = cugraph.katz_centrality(G, katz_alpha, max_iter=1000)

    NM = utils.read_csv_for_nx(graph_file)
    NM = NM.tocsr()
    Gnx = nx.DiGraph(NM)
    nk = nx.katz_centrality(Gnx, alpha=katz_alpha)
    pdf = pd.DataFrame(nk, index=[0]).T
    k['nx_katz'] = pdf[0]
    k = k.rename({'katz_centrality': 'cu_katz'})
    return k
예제 #5
0
def calc_katz(graph_file):
    cu_M = utils.read_csv_file(graph_file)
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1')

    largest_out_degree = G.degrees().nlargest(n=1, columns='out_degree')
    largest_out_degree = largest_out_degree['out_degree'].iloc[0]
    katz_alpha = 1/(largest_out_degree + 1)

    k_df = cugraph.katz_centrality(G, katz_alpha, max_iter=1000)

    NM = utils.read_csv_for_nx(graph_file)
    Gnx = nx.from_pandas_edgelist(NM, create_using=nx.DiGraph(),
                                  source='0', target='1')
    nk = nx.katz_centrality(Gnx, alpha=katz_alpha)
    pdf = [nk[k] for k in sorted(nk.keys())]
    k_df['nx_katz'] = pdf
    k_df = k_df.rename({'katz_centrality': 'cu_katz'})
    return k_df
예제 #6
0
def calc_katz(graph_file):
    cu_M = utils.read_csv_file(graph_file)
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1")

    largest_out_degree = G.degrees().nlargest(n=1, columns="out_degree")
    largest_out_degree = largest_out_degree["out_degree"].iloc[0]
    katz_alpha = 1 / (largest_out_degree + 1)

    k_df = cugraph.katz_centrality(G, alpha=None, max_iter=1000)
    k_df = k_df.sort_values("vertex").reset_index(drop=True)

    NM = utils.read_csv_for_nx(graph_file)
    Gnx = nx.from_pandas_edgelist(
        NM, create_using=nx.DiGraph(), source="0", target="1"
    )
    nk = nx.katz_centrality(Gnx, alpha=katz_alpha)
    pdf = [nk[k] for k in sorted(nk.keys())]
    k_df["nx_katz"] = pdf
    k_df = k_df.rename(columns={"katz_centrality": "cu_katz"}, copy=False)
    return k_df
예제 #7
0
def katz(G, alpha=None):
    return cugraph.katz_centrality(G, alpha)