コード例 #1
0
ファイル: test_graph.py プロジェクト: rapidsai/cugraph
def test_to_undirected(graph_file):
    # Read data and then convert to directed by dropped some edges
    cu_M = utils.read_csv_file(graph_file)
    cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)

    M = utils.read_csv_for_nx(graph_file)
    M = M[M["0"] <= M["1"]]
    assert len(cu_M) == len(M)

    # cugraph add_edge_list
    DiG = cugraph.Graph(directed=True)
    DiG.from_cudf_edgelist(cu_M, source="0", destination="1")

    DiGnx = nx.from_pandas_edgelist(M,
                                    source="0",
                                    target="1",
                                    create_using=nx.DiGraph())

    for index, row in cu_M.to_pandas().iterrows():
        assert DiG.has_edge(row['0'], row['1'])
        assert not DiG.has_edge(row['1'], row['0'])

    G = DiG.to_undirected()
    Gnx = DiGnx.to_undirected()

    assert not G.is_directed()
    assert G.number_of_nodes() == Gnx.number_of_nodes()
    assert G.number_of_edges() == Gnx.number_of_edges()

    for index, row in cu_M.to_pandas().iterrows():
        assert G.has_edge(row['0'], row['1'])
        assert G.has_edge(row['1'], row['0'])
コード例 #2
0
ファイル: test_graph.py プロジェクト: rapidsai/cugraph
def test_invalid_has_node():
    df = cudf.DataFrame([[1, 2]], columns=["src", "dst"])
    G = cugraph.Graph()
    G.from_cudf_edgelist(df, source="src", destination="dst")
    assert not G.has_node(-1)
    assert not G.has_node(0)
    assert not G.has_node(G.number_of_nodes() + 1)
コード例 #3
0
ファイル: test_graph.py プロジェクト: rapidsai/cugraph
def test_degrees_functionality(graph_file):
    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    G = cugraph.Graph(directed=True)
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")

    Gnx = nx.from_pandas_edgelist(M,
                                  source="0",
                                  target="1",
                                  create_using=nx.DiGraph())

    df = G.degrees()

    nx_in_degree = Gnx.in_degree()
    nx_out_degree = Gnx.out_degree()

    err_in_degree = 0
    err_out_degree = 0

    for i in range(len(df)):
        if df["in_degree"][i] != nx_in_degree[df["vertex"][i]]:
            err_in_degree = err_in_degree + 1
        if df["out_degree"][i] != nx_out_degree[df["vertex"][i]]:
            err_out_degree = err_out_degree + 1

    assert err_in_degree == 0
    assert err_out_degree == 0
コード例 #4
0
def setup_function():
    global DiGraph_inst

    gc.collect()
    # Set the global DiGraph_inst. This is used for calls that require a Graph
    # type or instance to be provided for tests that use a directed graph.
    DiGraph_inst = cugraph.Graph(directed=True)  # noqa: F841
コード例 #5
0
ファイル: utils.py プロジェクト: goncaloperes/cugraph
def generate_mg_batch_cugraph_graph_from_file(graph_file, directed=True):
    client = get_client()
    _ddf = read_dask_cudf_csv_file(graph_file)
    ddf = client.persist(_ddf)
    G = cugraph.DiGraph() if directed else cugraph.Graph()
    G.from_dask_cudf_edgelist(ddf)
    return G
コード例 #6
0
ファイル: test_paths.py プロジェクト: jpurviance/cugraph
def graphs(request):
    with NamedTemporaryFile(mode="w+", suffix=".csv") as graph_tf:
        graph_tf.writelines(request.param)
        graph_tf.seek(0)

        nx_G = nx.read_weighted_edgelist(graph_tf.name, delimiter=',')
        cudf_df = cudf.read_csv(graph_tf.name,
                                names=["src", "dst", "data"],
                                delimiter=",",
                                dtype=["int32", "int32", "float64"])
        cugraph_G = cugraph.Graph()
        cugraph_G.from_cudf_edgelist(cudf_df,
                                     source="src",
                                     destination="dst",
                                     edge_attr="data")

        # construct cupy coo_matrix graph
        i = []
        j = []
        weights = []
        for index in range(cudf_df.shape[0]):
            vertex1 = cudf_df.iloc[index]["src"]
            vertex2 = cudf_df.iloc[index]["dst"]
            weight = cudf_df.iloc[index]["data"]
            i += [vertex1, vertex2]
            j += [vertex2, vertex1]
            weights += [weight, weight]
        i = cupy.array(i)
        j = cupy.array(j)
        weights = cupy.array(weights)
        largest_vertex = max(cupy.amax(i), cupy.amax(j))
        cupy_df = cupy_coo_matrix(
            (weights, (i, j)), shape=(largest_vertex + 1, largest_vertex + 1))

        yield cugraph_G, nx_G, cupy_df
コード例 #7
0
ファイル: test_graph.py プロジェクト: rapidsai/cugraph
def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file):
    Mnx = utils.read_csv_for_nx(graph_file)
    df = cudf.DataFrame()
    df["src"] = cudf.Series(Mnx["0"])
    df["dst"] = cudf.Series(Mnx["1"])

    N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
    Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx["0"], Mnx["1"])),
                                   shape=(N, N))

    offsets = cudf.Series(Mcsr.indptr)
    indices = cudf.Series(Mcsr.indices)

    G = cugraph.Graph(directed=True)

    # If cugraph has at least one graph representation, adding a new graph
    # should fail to prevent a single graph object storing two different
    # graphs.

    # If cugraph has a graph edge list, adding a new graph should fail.
    G.from_cudf_edgelist(df, source="src", destination="dst")
    with pytest.raises(Exception):
        G.from_cudf_edgelist(df, source="src", destination="dst")
    with pytest.raises(Exception):
        G.from_cudf_adjlist(offsets, indices, None)
    G.delete_edge_list()

    # If cugraph has a graph adjacency list, adding a new graph should fail.
    G.from_cudf_adjlist(offsets, indices, None)
    with pytest.raises(Exception):
        G.from_cudf_edgelist(df, source="src", destination="dst")
    with pytest.raises(Exception):
        G.from_cudf_adjlist(offsets, indices, None)
    G.delete_adj_list()
コード例 #8
0
def test_woverlap(graph_file):
    gc.collect()

    Mnx = utils.read_csv_for_nx(graph_file)
    N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
    M = scipy.sparse.csr_matrix((Mnx.weight, (Mnx["0"], Mnx["1"])),
                                shape=(N, N))

    cu_M = utils.read_csv_file(graph_file)
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1")
    pairs = (G.get_two_hop_neighbors().sort_values(["first", "second"
                                                    ]).reset_index(drop=True))

    cu_coeff = cugraph_call(cu_M, pairs)
    cpu_coeff = cpu_call(M, pairs["first"], pairs["second"])
    assert len(cu_coeff) == len(cpu_coeff)
    for i in range(len(cu_coeff)):
        if np.isnan(cpu_coeff[i]):
            assert np.isnan(cu_coeff[i])
        elif np.isnan(cu_coeff[i]):
            assert cpu_coeff[i] == cu_coeff[i]
        else:
            diff = abs(cpu_coeff[i] - cu_coeff[i])
            assert diff < 1.0e-6
コード例 #9
0
def test_force_atlas2_multi_column_pos_list(graph_file, score, max_iter,
                                            barnes_hut_optimize):
    cu_M = utils.read_csv_file(graph_file)
    test_callback = TestCallback()
    pos = cugraph_call(cu_M,
                       max_iter=max_iter,
                       pos_list=None,
                       outbound_attraction_distribution=True,
                       lin_log_mode=False,
                       prevent_overlapping=False,
                       edge_weight_influence=1.0,
                       jitter_tolerance=1.0,
                       barnes_hut_optimize=False,
                       barnes_hut_theta=0.5,
                       scaling_ratio=2.0,
                       strong_gravity_mode=False,
                       gravity=1.0,
                       callback=test_callback)

    cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True)
    cu_M['src_1'] = cu_M['src_0'] + 1000
    cu_M['dst_1'] = cu_M['dst_0'] + 1000

    G = cugraph.Graph()
    G.from_cudf_edgelist(
        cu_M, source=["src_0", "src_1"],
        destination=["dst_0", "dst_1"],
        edge_attr="2"
    )

    pos_list = cudf.DataFrame()
    pos_list['vertex_0'] = pos['vertex']
    pos_list['vertex_1'] = pos_list['vertex_0'] + 1000
    pos_list['x'] = pos['x']
    pos_list['y'] = pos['y']

    cu_pos = cugraph.force_atlas2(
               G,
               max_iter=max_iter,
               pos_list=pos_list,
               outbound_attraction_distribution=True,
               lin_log_mode=False,
               prevent_overlapping=False,
               edge_weight_influence=1.0,
               jitter_tolerance=1.0,
               barnes_hut_optimize=False,
               barnes_hut_theta=0.5,
               scaling_ratio=2.0,
               strong_gravity_mode=False,
               gravity=1.0,
               callback=test_callback)

    cu_pos = cu_pos.sort_values('0_vertex')
    matrix_file = graph_file.with_suffix(".mtx")
    M = scipy.io.mmread(matrix_file)
    M = M.todense()
    cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas())
    print(cu_trust, score)
    assert cu_trust > score
コード例 #10
0
def test_dask_pagerank(dask_client):
    pandas.set_option("display.max_rows", 10000)

    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.Graph(directed=True)
    g.from_cudf_edgelist(df, "src", "dst")

    dg = cugraph.Graph(directed=True)
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    expected_pr = cugraph.pagerank(g)
    result_pr = dcg.pagerank(dg).compute()

    err = 0
    tol = 1.0e-05

    assert len(expected_pr) == len(result_pr)

    compare_pr = expected_pr.merge(result_pr,
                                   on="vertex",
                                   suffixes=["_local", "_dask"])

    for i in range(len(compare_pr)):
        diff = abs(compare_pr["pagerank_local"].iloc[i] -
                   compare_pr["pagerank_dask"].iloc[i])
        if diff > tol * 1.1:
            err = err + 1
    print("Mismatches:", err)
    assert err == 0
コード例 #11
0
ファイル: run_benchmarks.py プロジェクト: zeta1999/cugraph
def createGraph(edgelist_gdf, auto_csr):
    G = cugraph.Graph()
    G.add_edge_list(edgelist_gdf["src"], edgelist_gdf["dst"],
                    edgelist_gdf["val"])
    if auto_csr == 0:
        G.view_adj_list()
        G.view_transposed_adj_list()
    return G
コード例 #12
0
def get_shortest_paths(edges_df, point_of_interest):
    G_gpu = cugraph.Graph()
    G_gpu.from_cudf_edgelist(
        edges_df, source='src', destination='dst', edge_attr='time'
    )
    shortest_paths = cugraph.traversal.sssp(G_gpu, point_of_interest)
    shortest_paths = shortest_paths.drop('predecessor', axis=1)
    shortest_paths.columns = ['time', 'vertex']
    return shortest_paths
コード例 #13
0
ファイル: test_graph.py プロジェクト: rapidsai/cugraph
def test_graph_init_with_multigraph():
    """
    Ensures only a valid MultiGraph instance can be used to initialize a Graph
    by checking if either the correct exception is raised or no exception at
    all.
    """
    nxMG = nx.MultiGraph()
    with pytest.raises(TypeError):
        cugraph.Graph(m_graph=nxMG)

    gdf = cudf.DataFrame({"src": [0, 1, 2], "dst": [1, 2, 3]})
    cMG = cugraph.MultiGraph()
    cMG.from_cudf_edgelist(gdf, source="src", destination="dst")
    cugraph.Graph(m_graph=cMG)

    cDiMG = cugraph.MultiDiGraph()  # deprecated, but should still work
    cDiMG.from_cudf_edgelist(gdf, source="src", destination="dst")
    cugraph.Graph(m_graph=cDiMG)
コード例 #14
0
def generate_cugraph_graph_from_file(graph_file,
                                     directed=True,
                                     edgevals=False):
    cu_M = read_csv_file(graph_file)
    G = cugraph.DiGraph() if directed else cugraph.Graph()
    if edgevals:
        G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    else:
        G.from_cudf_edgelist(cu_M, source='0', destination='1')
    return G
コード例 #15
0
def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth):
    gc.collect()

    k = 5
    cu_M = utils.read_csv_file(graph_file)
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
    k_subgraph = cugraph.ktruss_subgraph(G, k)

    compare_k_truss(k_subgraph, k, nx_ground_truth)
コード例 #16
0
ファイル: test_graph.py プロジェクト: jpurviance/cugraph
def test_has_node(graph_file):
    cu_M = utils.read_csv_file(graph_file)
    nodes = cudf.concat([cu_M["0"], cu_M["1"]]).unique()

    # cugraph add_edge_list
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1")

    for n in nodes.values_host:
        assert G.has_node(n)
コード例 #17
0
ファイル: test_graph.py プロジェクト: stjordanis/cugraph
def test_transpose_from_adj_list(graph_file):
    M = read_mtx_file(graph_file+'.mtx').tocsr()
    offsets = cudf.Series(M.indptr)
    indices = cudf.Series(M.indices)
    G = cugraph.Graph()
    G.add_adj_list(offsets, indices, None)
    G.add_transposed_adj_list()
    Mt = M.transpose().tocsr()
    toff, tind = G.view_transposed_adj_list()
    assert compare_series(tind, Mt.indices)
    assert compare_offsets(toff, Mt.indptr)
コード例 #18
0
def cugraph_call(M, edgevals=False):
    M = M.tocoo()
    rows = cudf.Series(M.row)
    cols = cudf.Series(M.col)
    if edgevals is False:
        values = None
    else:
        values = cudf.Series(M.data)
    G = cugraph.Graph()
    G.add_edge_list(rows, cols, values)
    return cugraph.triangles(G)
コード例 #19
0
ファイル: test_graph.py プロジェクト: jpurviance/cugraph
def test_has_edge(graph_file):
    cu_M = utils.read_csv_file(graph_file)
    cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)

    # cugraph add_edge_list
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1")

    for index, row in cu_M.to_pandas().iterrows():
        assert G.has_edge(row['0'], row['1'])
        assert G.has_edge(row['1'], row['0'])
コード例 #20
0
def createGraph(edgelist_gdf, createDiGraph, renumber, symmetrized):
    if createDiGraph:
        G = cugraph.DiGraph()
    else:
        G = cugraph.Graph(symmetrized=symmetrized)
    G.from_cudf_edgelist(edgelist_gdf,
                         source="src",
                         destination="dst",
                         edge_attr="val",
                         renumber=renumber)
    return G
コード例 #21
0
ファイル: test_sorensen.py プロジェクト: rapidsai/cugraph
def test_sorensen_two_hop_edge_vals(read_csv):

    M, cu_M = read_csv

    Gnx = nx.from_pandas_edgelist(
        M, source="0", target="1", edge_attr="weight", create_using=nx.Graph()
    )
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")

    compare_sorensen_two_hop(G, Gnx)
コード例 #22
0
def cugraph_k_truss_subgraph(graph_file, k, directed):
    # directed is used to create either a Graph or DiGraph so the returned
    # cugraph can be compared to nx graph of same type.
    cu_M = utils.read_csv_file(graph_file)
    if directed:
        G = cugraph.DiGraph()
    else:
        G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    k_subgraph = cugraph.ktruss_subgraph(G, k)
    return k_subgraph
コード例 #23
0
def test_modularity_clustering(managed, pool, graph_file, partitions):
    gc.collect()

    rmm.finalize()
    rmm_config.use_managed_memory = managed
    rmm_config.use_pool_allocator = pool
    rmm_config.initial_pool_size = 2 << 27
    rmm.initialize()

    assert (rmm.is_initialized())

    # Read in the graph and get a cugraph object
    M = utils.read_csv_for_nx(graph_file).tocsr()
    cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False)

    row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)

    sources = cu_M['0']
    destinations = cu_M['1']

    G_adj = cugraph.Graph()
    G_adj.add_adj_list(row_offsets, col_indices)
    G_edge = cugraph.Graph()
    G_edge.add_edge_list(sources, destinations)

    # Get the modularity score for partitioning versus random assignment
    cu_vid, cu_score = cugraph_call(G_adj, partitions)
    rand_vid, rand_score = random_call(G_adj, partitions)

    # Assert that the partitioning has better modularity than the random
    # assignment
    assert cu_score < rand_score

    # Get the modularity score for partitioning versus random assignment
    cu_vid, cu_score = cugraph_call(G_edge, partitions)
    rand_vid, rand_score = random_call(G_edge, partitions)

    # Assert that the partitioning has better modularity than the random
    # assignment
    assert cu_score < rand_score
コード例 #24
0
ファイル: test_graph.py プロジェクト: stjordanis/cugraph
def test_view_edge_list_from_adj_list(graph_file):
    M = read_mtx_file(graph_file+'.mtx').tocsr()
    offsets = cudf.Series(M.indptr)
    indices = cudf.Series(M.indices)
    G = cugraph.Graph()
    G.add_adj_list(offsets, indices, None)
    src2, dst2 = G.view_edge_list()
    M = M.tocoo()
    src1 = M.row
    dst1 = M.col
    assert compare_series(src1, src2)
    assert compare_series(dst1, dst2)
コード例 #25
0
ファイル: test_jaccard.py プロジェクト: rapidsai/cugraph
def test_jaccard_two_hop(read_csv):

    M, cu_M = read_csv

    Gnx = nx.from_pandas_edgelist(M,
                                  source="0",
                                  target="1",
                                  create_using=nx.Graph())
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1")

    compare_jaccard_two_hop(G, Gnx)
コード例 #26
0
ファイル: test_k_core.py プロジェクト: hieuqtran/cugraph
def calc_k_cores(graph_file):
    M = utils.read_csv_file(graph_file)
    G = cugraph.Graph()
    G.add_edge_list(M['0'], M['1'])

    ck = cugraph.k_core(G)

    NM = utils.read_csv_for_nx(graph_file)
    NM = NM.tocsr()
    Gnx = nx.DiGraph(NM)
    nk = nx.k_core(Gnx)
    return ck, nk
コード例 #27
0
def test_has_node(graph_file):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)
    nodes = cudf.concat([cu_M['0'], cu_M['1']]).unique()

    # cugraph add_edge_list
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1')

    for n in nodes:
        assert G.has_node(n)
コード例 #28
0
def cugraph_call(M, edgevals=False):
    G = cugraph.Graph()
    cu_M = cudf.DataFrame()
    cu_M['src'] = cudf.Series(M['0'])
    cu_M['dst'] = cudf.Series(M['1'])
    if edgevals is True:
        cu_M['weights'] = cudf.Series(M['weight'])
        G.from_cudf_edgelist(cu_M, source='src', destination='dst',
                             edge_attr='weights')
    else:
        G.from_cudf_edgelist(cu_M, source='src', destination='dst')
    return cugraph.triangles(G)
コード例 #29
0
def convert_from_nx(nxG, weight=None, do_renumber=True):
    """
    weight: weight column name. Only used if
    nxG.is_weighted() is True
    """

    if isinstance(nxG, nx.classes.digraph.DiGraph):
        G = cugraph.Graph(directed=True)
    elif isinstance(nxG, nx.classes.graph.Graph):
        G = cugraph.Graph()
    else:
        raise TypeError(
            f"nxG must be either a NetworkX Graph or DiGraph, got {type(nxG)}")

    is_weighted = nx.is_weighted(nxG)

    if is_weighted is False:
        _gdf = convert_unweighted_to_gdf(nxG)
        G.from_cudf_edgelist(_gdf,
                             source="src",
                             destination="dst",
                             edge_attr=None,
                             renumber=do_renumber)
    else:
        if weight is None:
            _gdf = convert_weighted_unnamed_to_gdf(nxG)
            G.from_cudf_edgelist(_gdf,
                                 source="source",
                                 destination="target",
                                 edge_attr='weight',
                                 renumber=do_renumber)
        else:
            _gdf = convert_weighted_named_to_gdf(nxG, weight)
            G.from_cudf_edgelist(_gdf,
                                 source="src",
                                 destination="dst",
                                 edge_attr='weight',
                                 renumber=do_renumber)

    return G
コード例 #30
0
def test_dask_katz_centrality(dask_client):
    gc.collect()

    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH /
                       "karate.csv").as_posix()
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    dg = cugraph.Graph(directed=True)
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    largest_out_degree = dg.out_degree().compute().\
        nlargest(n=1, columns="degree")
    largest_out_degree = largest_out_degree["degree"].iloc[0]
    katz_alpha = 1 / (largest_out_degree + 1)

    mg_res = dcg.katz_centrality(dg, alpha=katz_alpha, tol=1e-6)
    mg_res = mg_res.compute()

    import networkx as nx
    from cugraph.tests import utils
    NM = utils.read_csv_for_nx(input_data_path)
    Gnx = nx.from_pandas_edgelist(
        NM, create_using=nx.DiGraph(), source="0", target="1"
    )
    nk = nx.katz_centrality(Gnx, alpha=katz_alpha)
    import pandas as pd
    pdf = pd.DataFrame(nk.items(), columns=['vertex', 'katz_centrality'])
    exp_res = cudf.DataFrame(pdf)
    err = 0
    tol = 1.0e-05

    compare_res = exp_res.merge(
        mg_res, on="vertex", suffixes=["_local", "_dask"]
    )

    for i in range(len(compare_res)):
        diff = abs(
            compare_res["katz_centrality_local"].iloc[i]
            - compare_res["katz_centrality_dask"].iloc[i]
        )
        if diff > tol * 1.1:
            err = err + 1
    assert err == 0