Python read_csv_file 예제들, cugraph.tests.utils.read_csv_file Python 예제들

예제 #1

0

파일 보기

def test_louvain_with_edgevals(graph_file):
    gc.collect()

    if is_device_version_less_than((7, 0)):
        cu_M = utils.read_csv_file(graph_file)
        with pytest.raises(RuntimeError):
            cu_parts, cu_mod = cugraph_call(cu_M)
    else:
        M = utils.read_csv_for_nx(graph_file)
        cu_M = utils.read_csv_file(graph_file)
        cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True)

        nx_parts = networkx_call(M)
        # Calculating modularity scores for comparison
        Gnx = nx.from_pandas_edgelist(
            M, source="0", target="1",
            edge_attr="weight", create_using=nx.Graph()
        )

        cu_parts = cu_parts.to_pandas()
        cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"]))

        assert set(nx_parts.keys()) == set(cu_map.keys())

        cu_mod_nx = community.modularity(cu_map, Gnx)
        nx_mod = community.modularity(nx_parts, Gnx)

        assert len(cu_parts) == len(nx_parts)
        assert cu_mod > (0.82 * nx_mod)
        assert abs(cu_mod - cu_mod_nx) < 0.0001

예제 #2

0

파일 보기

파일: test_hits.py 프로젝트: jpurviance/cugraph

def test_hits(graph_file, max_iter, tol):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    hubs, authorities = networkx_call(M, max_iter, tol)

    cu_M = utils.read_csv_file(graph_file)
    cugraph_hits = cugraph_call(cu_M, max_iter, tol)

    pdf = pd.DataFrame.from_dict(hubs, orient="index").sort_index()
    cugraph_hits["nx_hubs"] = cudf.Series.from_pandas(pdf[0])

    pdf = pd.DataFrame.from_dict(authorities, orient="index").sort_index()
    cugraph_hits["nx_authorities"] = cudf.Series.from_pandas(pdf[0])

    hubs_diffs1 = cugraph_hits.query('hubs - nx_hubs > 0.00001')
    hubs_diffs2 = cugraph_hits.query('hubs - nx_hubs < -0.00001')
    authorities_diffs1 = cugraph_hits.query(
        'authorities - nx_authorities > 0.0001')
    authorities_diffs2 = cugraph_hits.query(
        'authorities - nx_authorities < -0.0001')

    assert len(hubs_diffs1) == 0
    assert len(hubs_diffs2) == 0
    assert len(authorities_diffs1) == 0
    assert len(authorities_diffs2) == 0

예제 #3

0

파일 보기

def test_multigraph(graph_file):
    # FIXME: Migrate to new test fixtures for Graph setup once available
    cuM = utils.read_csv_file(graph_file)
    G = cugraph.MultiDiGraph()
    G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")

    nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
    Gnx = nx.from_pandas_edgelist(
        nxM,
        source="0",
        target="1",
        edge_attr="weight",
        create_using=nx.MultiDiGraph(),
    )

    assert G.number_of_edges() == Gnx.number_of_edges()
    assert G.number_of_nodes() == Gnx.number_of_nodes()
    cuedges = cugraph.to_pandas_edgelist(G)
    cuedges.rename(columns={"src": "source", "dst": "target",
                   "weights": "weight"}, inplace=True)
    cuedges["weight"] = cuedges["weight"].round(decimals=3)
    nxedges = nx.to_pandas_edgelist(Gnx).astype(dtype={"source": "int32",
                                                       "target": "int32",
                                                       "weight": "float32"})
    cuedges = cuedges.sort_values(by=["source", "target"]).\
        reset_index(drop=True)
    nxedges = nxedges.sort_values(by=["source", "target"]).\
        reset_index(drop=True)
    nxedges["weight"] = nxedges["weight"].round(decimals=3)
    assert nxedges.equals(cuedges[["source", "target", "weight"]])

예제 #4

0

파일 보기

def test_to_undirected(graph_file):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)
    cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
    M = utils.read_csv_for_nx(graph_file)
    M = M[M["0"] <= M["1"]]
    assert len(cu_M) == len(M)

    # cugraph add_edge_list
    DiG = cugraph.DiGraph()
    DiG.from_cudf_edgelist(cu_M, source="0", destination="1")
    DiGnx = nx.from_pandas_edgelist(M,
                                    source="0",
                                    target="1",
                                    create_using=nx.DiGraph())

    G = DiG.to_undirected()
    Gnx = DiGnx.to_undirected()

    assert G.number_of_nodes() == Gnx.number_of_nodes()
    assert G.number_of_edges() == Gnx.number_of_edges()

    edgelist_df = G.edgelist.edgelist_df

    for i in range(len(edgelist_df)):
        assert Gnx.has_edge(edgelist_df.iloc[i]["src"],
                            edgelist_df.iloc[i]["dst"])

예제 #5

0

파일 보기

def test_symmetrize_unweighted(graph_file):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)

    sym_sources, sym_destinations = cugraph.symmetrize(cu_M["0"], cu_M["1"])

    #
    #  Check to see if all pairs in sources/destinations exist in
    #  both directions
    #
    #  Try this with join logic.  Note that if we create data frames
    #  we can join the data frames (using the DataFrame.merge function).
    #  The symmetrize function should contain every edge that was contained
    #  in the input data.  So if we join the input data with the output
    #  the length of the data frames should be equal.
    #
    sym_df = cudf.DataFrame()
    sym_df["src_s"] = sym_sources
    sym_df["dst_s"] = sym_destinations

    orig_df = cudf.DataFrame()
    orig_df["src"] = cu_M["0"]
    orig_df["dst"] = cu_M["1"]

    compare(
        orig_df["src"],
        orig_df["dst"],
        None,
        sym_df["src_s"],
        sym_df["dst_s"],
        None,
    )

예제 #6

0

파일 보기

def calc_cg_core_number(graph_file):
    M = utils.read_csv_file(graph_file)
    G = cugraph.Graph()
    G.from_cudf_edgelist(M, source="0", destination="1")

    cn = cugraph.core_number(G)
    return cn

예제 #7

0

파일 보기

파일: test_jaccard.py 프로젝트: mickeyouyou/cugraph

def test_jaccard_edgevals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_src, cu_dst, cu_coeff = cugraph_call(cu_M, edgevals=True)
    nx_src, nx_dst, nx_coeff = networkx_call(M)

    # Calculating mismatch
    err = 0
    tol = 1.0e-06

    assert len(cu_coeff) == len(nx_coeff)
    for i in range(len(cu_coeff)):
        if(abs(cu_coeff[i] - nx_coeff[i]) > tol*1.1 and
           cu_src[i] == nx_src[i] and cu_dst[i] == nx_dst[i]):
            err += 1

    print("Mismatches:  %d" % err)
    assert err == 0

예제 #8

0

파일 보기

파일: test_graph.py 프로젝트: jpurviance/cugraph

def test_edges_for_Graph(graph_file):
    cu_M = utils.read_csv_file(graph_file)

    # Create nx Graph
    pdf = cu_M.to_pandas()[['0', '1']]
    nx_graph = nx.from_pandas_edgelist(pdf, source='0',
                                       target='1',
                                       create_using=nx.Graph)
    nx_edges = nx_graph.edges()

    # Create Cugraph Graph from DataFrame
    # Force it to use renumber_from_cudf
    G = cugraph.from_cudf_edgelist(cu_M, source=['0'],
                                   destination=['1'],
                                   create_using=cugraph.Graph)
    cu_edge_list = G.edges()

    # Check if number of Edges is same
    assert len(nx_edges) == len(cu_edge_list)
    assert nx_graph.number_of_edges() == G.number_of_edges()

    # Compare nx and cugraph edges when viewing edgelist
    edges = []
    for edge in nx_edges:
        if edge[0] > edge[1]:
            edges.append([edge[1], edge[0]])
        else:
            edges.append([edge[0], edge[1]])
    nx_edge_list = cudf.DataFrame(list(edges), columns=['src', 'dst'])
    assert_eq(
        nx_edge_list.sort_values(by=['src', 'dst']).reset_index(drop=True),
        cu_edge_list.sort_values(by=['src', 'dst']).reset_index(drop=True),
        check_dtype=False
    )

예제 #9

0

파일 보기

def test_jaccard_two_hop_edge_vals(graph_file):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    Gnx = nx.from_pandas_edgelist(M,
                                  source="0",
                                  target="1",
                                  edge_attr="weight",
                                  create_using=nx.Graph())
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")

    pairs = (G.get_two_hop_neighbors().sort_values(["first", "second"
                                                    ]).reset_index(drop=True))

    nx_pairs = []
    for i in range(len(pairs)):
        nx_pairs.append((pairs["first"].iloc[i], pairs["second"].iloc[i]))
    preds = nx.jaccard_coefficient(Gnx, nx_pairs)
    nx_coeff = []
    for u, v, p in preds:
        nx_coeff.append(p)
    df = cugraph.jaccard(G, pairs)
    df = df.sort_values(by=["source", "destination"]).reset_index(drop=True)
    assert len(nx_coeff) == len(df)
    for i in range(len(df)):
        diff = abs(nx_coeff[i] - df["jaccard_coeff"].iloc[i])
        assert diff < 1.0e-6

예제 #10

0

파일 보기

def test_overlap(graph_file):
    gc.collect()

    Mnx = utils.read_csv_for_nx(graph_file)
    N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
    M = scipy.sparse.csr_matrix((Mnx.weight, (Mnx["0"], Mnx["1"])),
                                shape=(N, N))

    cu_M = utils.read_csv_file(graph_file)
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1")
    pairs = (G.get_two_hop_neighbors().sort_values(["first", "second"
                                                    ]).reset_index(drop=True))

    cu_coeff = cugraph_call(cu_M, pairs)
    cpu_coeff = cpu_call(M, pairs["first"], pairs["second"])

    assert len(cu_coeff) == len(cpu_coeff)
    for i in range(len(cu_coeff)):
        if np.isnan(cpu_coeff[i]):
            assert np.isnan(cu_coeff[i])
        elif np.isnan(cu_coeff[i]):
            assert cpu_coeff[i] == cu_coeff[i]
        else:
            diff = abs(cpu_coeff[i] - cu_coeff[i])
            assert diff < 1.0e-6

예제 #11

0

파일 보기

파일: test_woverlap.py 프로젝트: zeta1999/cugraph

def test_woverlap(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    Mnx = utils.read_csv_for_nx(graph_file)
    N = max(max(Mnx['0']), max(Mnx['1'])) + 1
    M = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])),
                                shape=(N, N))

    cu_M = utils.read_csv_file(graph_file)
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1')
    pairs = G.get_two_hop_neighbors()

    cu_coeff = cugraph_call(cu_M, pairs)
    cpu_coeff = cpu_call(M, pairs['first'], pairs['second'])
    assert len(cu_coeff) == len(cpu_coeff)
    for i in range(len(cu_coeff)):
        if np.isnan(cpu_coeff[i]):
            assert np.isnan(cu_coeff[i])
        elif np.isnan(cu_coeff[i]):
            assert cpu_coeff[i] == cu_coeff[i]
        else:
            diff = abs(cpu_coeff[i] - cu_coeff[i])
            assert diff < 1.0e-6

예제 #12

0

파일 보기

def test_louvain(graph_file):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_parts, cu_mod = cugraph_call(cu_M)
    nx_parts = networkx_call(M)

    # Calculating modularity scores for comparison
    Gnx = nx.from_pandas_edgelist(M,
                                  source="0",
                                  target="1",
                                  edge_attr="weight",
                                  create_using=nx.Graph())

    cu_parts = cu_parts.to_pandas()
    cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"]))

    assert set(nx_parts.keys()) == set(cu_map.keys())

    cu_mod_nx = community.modularity(cu_map, Gnx)
    nx_mod = community.modularity(nx_parts, Gnx)

    assert len(cu_parts) == len(nx_parts)
    assert cu_mod > (0.82 * nx_mod)
    assert abs(cu_mod - cu_mod_nx) < 0.0001

예제 #13

0

파일 보기

파일: test_graph.py 프로젝트: mickeyouyou/cugraph

def test_degrees_functionality(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', target='1', edge_attr='2')

    Gnx = nx.DiGraph(M)

    df = G.degrees()

    nx_in_degree = Gnx.in_degree()
    nx_out_degree = Gnx.out_degree()

    err_in_degree = 0
    err_out_degree = 0

    for i in range(len(df)):
        if (df['in_degree'][i] != nx_in_degree[i]):
            err_in_degree = err_in_degree + 1
        if (df['out_degree'][i] != nx_out_degree[i]):
            err_out_degree = err_out_degree + 1

    assert err_in_degree == 0
    assert err_out_degree == 0

예제 #14

0

파일 보기

파일: test_graph.py 프로젝트: mickeyouyou/cugraph

def test_add_edge_list_to_adj_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)

    M = utils.read_csv_for_nx(graph_file).tocsr()
    if M is None:
        raise TypeError('Could not read the input graph')
    if M.shape[0] != M.shape[1]:
        raise TypeError('Shape is not square')

    offsets_exp = M.indptr
    indices_exp = M.indices

    # cugraph add_egde_list to_adj_list call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', target='1')
    offsets_cu, indices_cu, values_cu = G.view_adj_list()
    assert compare_offsets(offsets_cu, offsets_exp)
    assert compare_series(indices_cu, indices_exp)
    assert values_cu is None

예제 #15

0

파일 보기

파일: test_graph.py 프로젝트: zeta1999/cugraph

def test_Graph_from_MultiGraph(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)

    # create dataframe for MultiGraph
    cu_M['3'] = cudf.Series([2.0] * len(cu_M), dtype=np.float32)
    cu_M['4'] = cudf.Series([3.0] * len(cu_M), dtype=np.float32)

    # initialize MultiGraph
    G_multi = cugraph.MultiGraph()
    G_multi.from_cudf_edgelist(cu_M,
                               source='0',
                               destination='1',
                               edge_attr=['2', '3', '4'])

    # initialize Graph
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')

    # create Graph from MultiGraph
    G_from_multi = cugraph.Graph(G_multi, edge_attr='2')

    assert G.edgelist.edgelist_df == G_from_multi.edgelist.edgelist_df

예제 #16

0

파일 보기

파일: test_graph.py 프로젝트: hyperbolic2346/cugraph

def test_to_directed(graph_file):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)
    cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
    M = utils.read_csv_for_nx(graph_file)
    M = M[M["0"] <= M["1"]]
    assert len(cu_M) == len(M)

    # cugraph add_edge_list
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1")
    Gnx = nx.from_pandas_edgelist(M,
                                  source="0",
                                  target="1",
                                  create_using=nx.Graph())

    DiG = G.to_directed()
    DiGnx = Gnx.to_directed()

    assert DiG.number_of_nodes() == DiGnx.number_of_nodes()
    assert DiG.number_of_edges() == DiGnx.number_of_edges()

    for index, row in cu_M.to_pandas().iterrows():
        assert G.has_edge(row['0'], row['1'])
        assert G.has_edge(row['1'], row['0'])

예제 #17

0

파일 보기

def test_filter_unreachable(graph_file, source):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)

    print("sources size = " + str(len(cu_M)))
    print("destinations size = " + str(len(cu_M)))

    # cugraph Pagerank Call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")

    print("cugraph Solving... ")
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print("Time : " + str(t2))

    reachable_df = cugraph.filter_unreachable(df)

    if np.issubdtype(df["distance"].dtype, np.integer):
        inf = np.iinfo(reachable_df["distance"].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0
    elif np.issubdtype(df["distance"].dtype, np.inexact):
        inf = np.finfo(reachable_df["distance"].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0

    assert len(reachable_df) != 0

예제 #18

0

파일 보기

파일: test_overlap.py 프로젝트: lookuptables/cugraph

def test_overlap(managed, pool, graph_file):
    gc.collect()

    rmm.finalize()
    rmm_cfg.use_managed_memory = managed
    rmm_cfg.use_pool_allocator = pool
    rmm.initialize()

    assert (rmm.is_initialized())

    M = utils.read_mtx_file(graph_file + '.mtx')
    M = M.tocsr()
    cu_M = utils.read_csv_file(graph_file + '.csv')
    row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)
    G = cugraph.Graph()
    G.add_adj_list(row_offsets, col_indices, None)
    pairs = G.get_two_hop_neighbors()

    cu_coeff = cugraph_call(cu_M, pairs['first'], pairs['second'])
    cpu_coeff = cpu_call(M, pairs['first'], pairs['second'])

    assert len(cu_coeff) == len(cpu_coeff)
    for i in range(len(cu_coeff)):
        diff = abs(cpu_coeff[i] - cu_coeff[i])
        assert diff < 1.0e-6

예제 #19

0

파일 보기

파일: test_graph.py 프로젝트: jpurviance/cugraph

def test_to_undirected(graph_file):
    # Read data and then convert to directed by dropped some edges
    cu_M = utils.read_csv_file(graph_file)
    cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)

    M = utils.read_csv_for_nx(graph_file)
    M = M[M["0"] <= M["1"]]
    assert len(cu_M) == len(M)

    # cugraph add_edge_list
    DiG = cugraph.DiGraph()
    DiG.from_cudf_edgelist(cu_M, source="0", destination="1")

    DiGnx = nx.from_pandas_edgelist(
        M, source="0", target="1", create_using=nx.DiGraph()
    )

    for index, row in cu_M.to_pandas().iterrows():
        assert DiG.has_edge(row['0'], row['1'])
        assert not DiG.has_edge(row['1'], row['0'])

    G = DiG.to_undirected()
    Gnx = DiGnx.to_undirected()

    assert G.number_of_nodes() == Gnx.number_of_nodes()
    assert G.number_of_edges() == Gnx.number_of_edges()

    for index, row in cu_M.to_pandas().iterrows():
        assert G.has_edge(row['0'], row['1'])
        assert G.has_edge(row['1'], row['0'])

예제 #20

0

파일 보기

파일: test_symmetrize.py 프로젝트: zeta1999/cugraph

def test_symmetrize_unweighted(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed, pool_allocator=pool)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file + '.csv')

    sym_sources, sym_destinations = cugraph.symmetrize(cu_M['0'], cu_M['1'])

    #
    #  Check to see if all pairs in sources/destinations exist in
    #  both directions
    #
    #  Try this with join logic.  Note that if we create data frames
    #  we can join the data frames (using the DataFrame.merge function).
    #  The symmetrize function should contain every edge that was contained
    #  in the input data.  So if we join the input data with the output
    #  the length of the data frames should be equal.
    #
    sym_df = cudf.DataFrame()
    sym_df['src_s'] = sym_sources
    sym_df['dst_s'] = sym_destinations

    orig_df = cudf.DataFrame()
    orig_df['src'] = cu_M['0']
    orig_df['dst'] = cu_M['1']

    compare(orig_df['src'], orig_df['dst'], None, sym_df['src_s'],
            sym_df['dst_s'], None)

예제 #21

0

파일 보기

def test_strong_cc(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    netx_labels = networkx_strong_call(M)

    cu_M = utils.read_csv_file(graph_file)
    cugraph_labels = cugraph_strong_call(cu_M)

    # NetX returns a list of components, each component being a
    # collection (set{}) of vertex indices;
    #
    # while cugraph returns a component label for each vertex;

    nx_n_components = len(netx_labels)
    cg_n_components = get_n_uniqs(cugraph_labels)

    assert nx_n_components == cg_n_components

    lst_nx_components_lens = [len(c) for c in sorted(netx_labels, key=len)]

    # get counts of uniques:
    #
    lst_cg_components_lens = sorted(get_uniq_counts(cugraph_labels))

    assert lst_nx_components_lens == lst_cg_components_lens

예제 #22

0

파일 보기

def test_modularity_clustering_with_edgevals(graph_file, partitions):
    # Read in the graph and get a cugraph object
    M = utils.read_mtx_file(graph_file).tocsr()
    cu_M = utils.read_csv_file(graph_file + '.csv', read_weights_in_sp=False)

    row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)
    val = cudf.Series(M.data)

    G_adj = cugraph.Graph()
    G_adj.add_adj_list(row_offsets, col_indices, val)

    sources = cu_M['0']
    destinations = cu_M['1']
    values = cu_M['2']

    G_edge = cugraph.Graph()
    G_edge.add_edge_list(sources, destinations, values)

    # Get the modularity score for partitioning versus random assignment
    cu_vid, cu_score = cugraph_call(G_adj, partitions)
    rand_vid, rand_score = random_call(G_adj, partitions)

    # Assert that the partitioning has better modularity than the random
    # assignment
    assert cu_score < rand_score

    # Get the modularity score for partitioning versus random assignment
    cu_vid, cu_score = cugraph_call(G_edge, partitions)
    rand_vid, rand_score = random_call(G_edge, partitions)

    # Assert that the partitioning has better modularity than the random
    # assignment
    assert cu_score < rand_score

예제 #23

0

파일 보기

def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
    cu_M = utils.read_csv_file(graph_file)
    cu_pos = cugraph_call(cu_M,
                          max_iter=max_iter,
                          pos_list=None,
                          outbound_attraction_distribution=True,
                          lin_log_mode=False,
                          prevent_overlapping=False,
                          edge_weight_influence=1.0,
                          jitter_tolerance=1.0,
                          barnes_hut_optimize=False,
                          barnes_hut_theta=0.5,
                          scaling_ratio=2.0,
                          strong_gravity_mode=False,
                          gravity=1.0)
    '''
        Trustworthiness score can be used for Force Atlas 2 as the algorithm
        optimizes modularity. The final layout will result in
        different communities being drawn out. We consider here the n x n
        adjacency matrix of the graph as an embedding of the nodes in high
        dimension. The results of force atlas 2 corresponds to the layout in
        a 2d space. Here we check that nodes belonging to the same community
        or neighbors are close to each other in the final embedding.
        Thresholds are based on the best score that is achived after 500
        iterations on a given graph.
    '''

    matrix_file = graph_file[:-4] + '.mtx'
    M = scipy.io.mmread(matrix_file)
    M = M.todense()
    cu_trust = trustworthiness(M, cu_pos[['x', 'y']].to_pandas())
    print(cu_trust, score)
    assert cu_trust > score

예제 #24

0

파일 보기

def test_sssp_edgevals(managed, pool, graph_file, source):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_paths, max_val = cugraph_call(cu_M, source, edgevals=True)
    nx_paths, Gnx = networkx_call(M, source, edgevals=True)

    # Calculating mismatch
    err = 0
    for vid in cu_paths:
        # Validate vertices that are reachable
        # NOTE : If distance type is float64 then cu_paths[vid][0]
        # should be compared against np.finfo(np.float64).max)
        if (cu_paths[vid][0] != max_val):
            if (cu_paths[vid][0] != nx_paths[vid]):
                err = err + 1
            # check pred dist + edge_weight = current dist
            if (vid != source):
                pred = cu_paths[vid][1]
                edge_weight = Gnx[pred][vid]['weight']
                if (cu_paths[pred][0] + edge_weight != cu_paths[vid][0]):
                    err = err + 1
        else:
            if (vid in nx_paths.keys()):
                err = err + 1

    assert err == 0

예제 #25

0

파일 보기

def test_degrees_functionality(graph_file):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")

    Gnx = nx.from_pandas_edgelist(M,
                                  source="0",
                                  target="1",
                                  create_using=nx.DiGraph())

    df = G.degrees()

    nx_in_degree = Gnx.in_degree()
    nx_out_degree = Gnx.out_degree()

    err_in_degree = 0
    err_out_degree = 0

    for i in range(len(df)):
        if df["in_degree"][i] != nx_in_degree[df["vertex"][i]]:
            err_in_degree = err_in_degree + 1
        if df["out_degree"][i] != nx_out_degree[df["vertex"][i]]:
            err_out_degree = err_out_degree + 1

    assert err_in_degree == 0
    assert err_out_degree == 0

예제 #26

0

파일 보기

def test_pagerank(managed, pool, graph_file, max_iter, tol, alpha,
                  personalization_perc, has_guess):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())
    M = utils.read_csv_for_nx(graph_file)
    networkx_pr, networkx_prsn = networkx_call(M, max_iter, tol, alpha,
                                               personalization_perc)

    cu_nstart = None
    if has_guess == 1:
        cu_nstart = cudify(networkx_pr)
        max_iter = 5
    cu_prsn = cudify(networkx_prsn)
    cu_M = utils.read_csv_file(graph_file)
    cugraph_pr = cugraph_call(cu_M, max_iter, tol, alpha, cu_prsn, cu_nstart)

    # Calculating mismatch

    networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0])
    err = 0
    assert len(cugraph_pr) == len(networkx_pr)
    for i in range(len(cugraph_pr)):
        if (abs(cugraph_pr[i][1] - networkx_pr[i][1]) > tol * 1.1
                and cugraph_pr[i][0] == networkx_pr[i][0]):
            err = err + 1
    print("Mismatches:", err)
    assert err < (0.01 * len(cugraph_pr))

예제 #27

0

파일 보기

def test_bipartite_api(graph_file):
    # This test only tests the functionality of adding set of nodes and
    # retrieving them. The datasets currently used are not truly bipartite.
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)
    nodes = cudf.concat([cu_M['0'], cu_M['1']]).unique()

    # Create set of nodes for partition
    set1_exp = cudf.Series(nodes[0:int(len(nodes) / 2)])
    set2_exp = cudf.Series(set(nodes.values_host) - set(set1_exp.values_host))

    G = cugraph.Graph()
    assert not G.is_bipartite()

    # Add a set of nodes present in one partition
    G.add_nodes_from(set1_exp, bipartite='set1')
    G.from_cudf_edgelist(cu_M, source='0', destination='1')

    # Check if Graph is bipartite. It should return True since we have
    # added the partition in add_nodes_from()
    assert G.is_bipartite()

    # Call sets() to get the bipartite set of nodes.
    set1, set2 = G.sets()

    # assert if the input set1_exp is same as returned bipartite set1
    assert set1.equals(set1_exp)
    # assert if set2 is the remaining set of nodes not in set1_exp
    assert set2.equals(set2_exp)

예제 #28

0

파일 보기

파일: test_jaccard.py 프로젝트: thomcom/cugraph

def test_jaccard_two_hop_edge_vals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    Gnx = nx.from_pandas_edgelist(M,
                                  source='0',
                                  target='1',
                                  edge_attr='weight',
                                  create_using=nx.Graph())
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    pairs = G.get_two_hop_neighbors()
    nx_pairs = []
    for i in range(len(pairs)):
        nx_pairs.append((pairs['first'][i], pairs['second'][i]))
    preds = nx.jaccard_coefficient(Gnx, nx_pairs)
    nx_coeff = []
    for u, v, p in preds:
        nx_coeff.append(p)
    df = cugraph.jaccard(G, pairs)
    df = df.sort_values(by=['source', 'destination'])
    assert len(nx_coeff) == len(df)
    for i in range(len(df)):
        diff = abs(nx_coeff[i] - df['jaccard_coeff'][i])
        assert diff < 1.0e-6

예제 #29

0

파일 보기

def test_Graph_from_MultiGraph(graph_file):
    # FIXME: Migrate to new test fixtures for Graph setup once available
    cuM = utils.read_csv_file(graph_file)
    GM = cugraph.MultiGraph()
    GM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
    nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
    GnxM = nx.from_pandas_edgelist(
        nxM,
        source="0",
        target="1",
        edge_attr="weight",
        create_using=nx.MultiGraph(),
    )

    G = cugraph.Graph(GM)
    Gnx = nx.Graph(GnxM)
    assert Gnx.number_of_edges() == G.number_of_edges()

    GdM = cugraph.MultiDiGraph()
    GdM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
    GnxdM = nx.from_pandas_edgelist(
        nxM,
        source="0",
        target="1",
        edge_attr="weight",
        create_using=nx.MultiGraph(),
    )
    Gd = cugraph.DiGraph(GdM)
    Gnxd = nx.DiGraph(GnxdM)
    assert Gnxd.number_of_edges() == Gd.number_of_edges()

예제 #30

0

파일 보기

def test_overlap_edge_vals(managed, pool, graph_file):
    gc.collect()

    rmm.finalize()
    rmm_config.use_managed_memory = managed
    rmm_config.use_pool_allocator = pool
    rmm_config.initial_pool_size = 2 << 27
    rmm.initialize()

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    M = M.tocsr().sorted_indices()
    cu_M = utils.read_csv_file(graph_file)
    row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)
    G = cugraph.Graph()
    G.add_adj_list(row_offsets, col_indices, None)
    pairs = G.get_two_hop_neighbors()

    cu_coeff = cugraph_call(cu_M,
                            pairs['first'],
                            pairs['second'],
                            edgevals=True)
    cpu_coeff = cpu_call(M, pairs['first'], pairs['second'])

    assert len(cu_coeff) == len(cpu_coeff)
    for i in range(len(cu_coeff)):
        if np.isnan(cpu_coeff[i]):
            assert np.isnan(cu_coeff[i])
        elif np.isnan(cu_coeff[i]):
            assert cpu_coeff[i] == cu_coeff[i]
        else:
            diff = abs(cpu_coeff[i] - cu_coeff[i])
            assert diff < 1.0e-6