Beispiel #1
0
def test_woverlap(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    M = M.tocsr().sorted_indices()
    cu_M = utils.read_csv_file(graph_file)
    row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)
    G = cugraph.Graph()
    G.from_cudf_adjlist(row_offsets, col_indices, None)
    pairs = G.get_two_hop_neighbors()

    cu_coeff = cugraph_call(cu_M, pairs['first'], pairs['second'])
    cpu_coeff = cpu_call(M, pairs['first'], pairs['second'])
    assert len(cu_coeff) == len(cpu_coeff)
    for i in range(len(cu_coeff)):
        if np.isnan(cpu_coeff[i]):
            assert np.isnan(cu_coeff[i])
        elif np.isnan(cu_coeff[i]):
            assert cpu_coeff[i] == cu_coeff[i]
        else:
            diff = abs(cpu_coeff[i] - cu_coeff[i])
            assert diff < 1.0e-6
Beispiel #2
0
def test_rmm_getinfo_uninitialized():
    rmm._finalize()

    with pytest.raises(rmm.RMMError):
        rmm.get_info()

    rmm.reinitialize()
Beispiel #3
0
def test_rmm_pool_cupy_allocator_with_stream(stream):
    cupy = pytest.importorskip("cupy")

    rmm.reinitialize(pool_allocator=True)
    cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)

    if stream == "null":
        stream = cupy.cuda.stream.Stream.null
    else:
        stream = cupy.cuda.stream.Stream()

    with stream:
        m = rmm.rmm_cupy_allocator(42)
        assert m.mem.size == 42
        assert m.mem.ptr != 0
        assert isinstance(m.mem._owner, rmm.DeviceBuffer)

        m = rmm.rmm_cupy_allocator(0)
        assert m.mem.size == 0
        assert m.mem.ptr == 0
        assert isinstance(m.mem._owner, rmm.DeviceBuffer)

        a = cupy.arange(10)
        assert isinstance(a.data.mem._owner, rmm.DeviceBuffer)

    # Deleting all allocations known by the RMM pool is required
    # before rmm.reinitialize(), otherwise it may segfault.
    del a

    rmm.reinitialize()
Beispiel #4
0
def test_jaccard_two_hop_edge_vals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    Gnx = nx.from_pandas_edgelist(M,
                                  source='0',
                                  target='1',
                                  edge_attr='weight',
                                  create_using=nx.Graph())
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    pairs = G.get_two_hop_neighbors()
    nx_pairs = []
    for i in range(len(pairs)):
        nx_pairs.append((pairs['first'][i], pairs['second'][i]))
    preds = nx.jaccard_coefficient(Gnx, nx_pairs)
    nx_coeff = []
    for u, v, p in preds:
        nx_coeff.append(p)
    df = cugraph.jaccard(G, pairs)
    df = df.sort_values(by=['source', 'destination'])
    assert len(nx_coeff) == len(df)
    for i in range(len(df)):
        diff = abs(nx_coeff[i] - df['jaccard_coeff'][i])
        assert diff < 1.0e-6
Beispiel #5
0
def test_jaccard_two_hop_edge_vals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    M = M.tocsr()
    Gnx = nx.DiGraph(M).to_undirected()
    G = cugraph.Graph()
    row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)
    values = cudf.Series(M.data)
    G.from_cudf_adjlist(row_offsets, col_indices, values)
    pairs = G.get_two_hop_neighbors()
    nx_pairs = []
    for i in range(len(pairs)):
        nx_pairs.append((pairs['first'][i], pairs['second'][i]))
    preds = nx.jaccard_coefficient(Gnx, nx_pairs)
    nx_coeff = []
    for u, v, p in preds:
        nx_coeff.append(p)
    df = cugraph.jaccard(G, pairs['first'], pairs['second'])
    assert len(nx_coeff) == len(df)
    for i in range(len(df)):
        diff = abs(nx_coeff[i] - df['jaccard_coeff'][i])
        assert diff < 1.0e-6
Beispiel #6
0
def test_symmetrize_unweighted(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed, pool_allocator=pool)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file + '.csv')

    sym_sources, sym_destinations = cugraph.symmetrize(cu_M['0'], cu_M['1'])

    #
    #  Check to see if all pairs in sources/destinations exist in
    #  both directions
    #
    #  Try this with join logic.  Note that if we create data frames
    #  we can join the data frames (using the DataFrame.merge function).
    #  The symmetrize function should contain every edge that was contained
    #  in the input data.  So if we join the input data with the output
    #  the length of the data frames should be equal.
    #
    sym_df = cudf.DataFrame()
    sym_df['src_s'] = sym_sources
    sym_df['dst_s'] = sym_destinations

    orig_df = cudf.DataFrame()
    orig_df['src'] = cu_M['0']
    orig_df['dst'] = cu_M['1']

    compare(orig_df['src'], orig_df['dst'], None, sym_df['src_s'],
            sym_df['dst_s'], None)
Beispiel #7
0
def test_reinitialize_max_pool_size_exceeded():
    rmm.reinitialize(pool_allocator=True,
                     initial_pool_size=0,
                     maximum_pool_size=1 << 23)
    with pytest.raises(MemoryError):
        rmm.DeviceBuffer().resize(1 << 24)
    rmm.reinitialize()
Beispiel #8
0
def test_renumber_files(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    sources = cudf.Series(M['0'])
    destinations = cudf.Series(M['1'])

    translate = 1000

    source_translated = cudf.Series([x + translate for x in sources])
    dest_translated = cudf.Series([x + translate for x in destinations])

    src, dst, numbering = cugraph.renumber(source_translated, dest_translated)

    for i in range(len(sources)):
        assert sources[i] == (numbering[src[i]] - translate)
        assert destinations[i] == (numbering[dst[i]] - translate)
Beispiel #9
0
def test_multi_column_unrenumbering(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    translate = 100
    cu_M = utils.read_csv_file(graph_file)
    cu_M['00'] = cu_M['0'] + translate
    cu_M['11'] = cu_M['1'] + translate

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, ['0', '00'], ['1', '11'])
    result_multi = cugraph.pagerank(G).sort_values(by='0').\
        reset_index(drop=True)

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, '0', '1')
    result_single = cugraph.pagerank(G)

    result_exp = cudf.DataFrame()
    result_exp['0'] = result_single['vertex']
    result_exp['1'] = result_single['vertex'] + translate
    result_exp['pagerank'] = result_single['pagerank']

    assert result_multi.equals(result_exp)
Beispiel #10
0
    def setup(self, worker=None):
        if self.nbytes is not None:
            import rmm

            rmm.reinitialize(pool_allocator=True,
                             managed_memory=False,
                             initial_pool_size=self.nbytes)
Beispiel #11
0
 def _rmm_pool():
     rmm.reinitialize(
         # RMM may require the pool size to be a multiple of 256.
         pool_allocator=True,
         initial_pool_size=(device_pool_size // 256) *
         256,  # Use default size
     )
Beispiel #12
0
def test_to_undirected(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)
    cu_M = cu_M[cu_M['0'] <= cu_M['1']].reset_index(drop=True)
    M = utils.read_csv_for_nx(graph_file)
    M = M[M['0'] <= M['1']]
    assert len(cu_M) == len(M)

    # cugraph add_edge_list
    DiG = cugraph.DiGraph()
    DiG.from_cudf_edgelist(cu_M, source='0', destination='1')
    DiGnx = nx.from_pandas_edgelist(M,
                                    source='0',
                                    target='1',
                                    create_using=nx.DiGraph())

    G = DiG.to_undirected()
    Gnx = DiGnx.to_undirected()

    assert (G.number_of_nodes() == Gnx.number_of_nodes())
    assert (G.number_of_edges() == Gnx.number_of_edges())

    edgelist_df = G.edgelist.edgelist_df

    for i in range(len(edgelist_df)):
        assert Gnx.has_edge(edgelist_df.iloc[i]['src'],
                            edgelist_df.iloc[i]['dst'])
Beispiel #13
0
def test_edge_cut_clustering(managed, pool, graph_file, partitions):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    # Read in the graph and get a cugraph object
    cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False)
    '''row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)

    G_adj = cugraph.DiGraph()
    G_adj.from_cudf_adjlist(row_offsets, col_indices)'''
    G_edge = cugraph.DiGraph()
    G_edge.from_cudf_edgelist(cu_M, source='0', destination='1')

    # Get the edge_cut score for partitioning versus random assignment
    '''cu_vid, cu_score = cugraph_call(G_adj, partitions)
    rand_vid, rand_score = random_call(G_adj, partitions)
    '''
    # Assert that the partitioning has better edge_cut than the random
    # assignment
    '''assert cu_score < rand_score'''

    # Get the edge_cut score for partitioning versus random assignment
    cu_vid, cu_score = cugraph_call(G_edge, partitions)
    rand_vid, rand_score = random_call(G_edge, partitions)

    # Assert that the partitioning has better edge_cut than the random
    # assignment
    print(cu_score, rand_score)
    assert cu_score < rand_score
Beispiel #14
0
def test_louvain_with_edgevals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True)
    nx_parts = networkx_call(M)
    # Calculating modularity scores for comparison
    Gnx = nx.from_pandas_edgelist(M,
                                  source='0',
                                  target='1',
                                  edge_attr='weight',
                                  create_using=nx.Graph())
    cu_map = {0: 0}
    for i in range(len(cu_parts)):
        cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i]
    assert set(nx_parts.keys()) == set(cu_map.keys())
    cu_mod_nx = community.modularity(cu_map, Gnx)
    nx_mod = community.modularity(nx_parts, Gnx)
    assert len(cu_parts) == len(nx_parts)
    assert cu_mod > (.82 * nx_mod)
    assert abs(cu_mod - cu_mod_nx) < .0001
Beispiel #15
0
def test_woverlap(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    Mnx = utils.read_csv_for_nx(graph_file)
    N = max(max(Mnx['0']), max(Mnx['1'])) + 1
    M = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])),
                                shape=(N, N))

    cu_M = utils.read_csv_file(graph_file)
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1')
    pairs = G.get_two_hop_neighbors()

    cu_coeff = cugraph_call(cu_M, pairs)
    cpu_coeff = cpu_call(M, pairs['first'], pairs['second'])
    assert len(cu_coeff) == len(cpu_coeff)
    for i in range(len(cu_coeff)):
        if np.isnan(cpu_coeff[i]):
            assert np.isnan(cu_coeff[i])
        elif np.isnan(cu_coeff[i]):
            assert cpu_coeff[i] == cu_coeff[i]
        else:
            diff = abs(cpu_coeff[i] - cu_coeff[i])
            assert diff < 1.0e-6
Beispiel #16
0
def test_add_adj_list_to_edge_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    Mnx = utils.read_csv_for_nx(graph_file)
    N = max(max(Mnx['0']), max(Mnx['1'])) + 1
    Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])),
                                   shape=(N, N))

    offsets = cudf.Series(Mcsr.indptr)
    indices = cudf.Series(Mcsr.indices)

    Mcoo = Mcsr.tocoo()
    sources_exp = cudf.Series(Mcoo.row)
    destinations_exp = cudf.Series(Mcoo.col)

    # cugraph add_adj_list to_edge_list call
    G = cugraph.DiGraph()
    G.from_cudf_adjlist(offsets, indices, None)
    edgelist = G.view_edge_list()
    sources_cu = np.array(edgelist['src'])
    destinations_cu = np.array(edgelist['dst'])
    assert compare_series(sources_cu, sources_exp)
    assert compare_series(destinations_cu, destinations_exp)
Beispiel #17
0
def init_once():
    global ucp, host_array, device_array
    if ucp is not None:
        return

    import ucp as _ucp

    ucp = _ucp

    # remove/process dask.ucx flags for valid ucx options
    ucx_config = _scrub_ucx_config()

    ucp.init(options=ucx_config, env_takes_precedence=True)

    # Find the function, `host_array()`, to use when allocating new host arrays
    try:
        import numpy

        host_array = lambda n: numpy.empty((n,), dtype="u1")
    except ImportError:
        host_array = lambda n: bytearray(n)

    # Find the function, `cuda_array()`, to use when allocating new CUDA arrays
    try:
        import rmm

        if hasattr(rmm, "DeviceBuffer"):
            device_array = lambda n: rmm.DeviceBuffer(size=n)
        else:  # pre-0.11.0
            import numba.cuda

            def rmm_device_array(n):
                a = rmm.device_array(n, dtype="u1")
                weakref.finalize(a, numba.cuda.current_context)
                return a

            device_array = rmm_device_array
    except ImportError:
        try:
            import numba.cuda

            def numba_device_array(n):
                a = numba.cuda.device_array((n,), dtype="u1")
                weakref.finalize(a, numba.cuda.current_context)
                return a

            device_array = numba_device_array
        except ImportError:

            def device_array(n):
                raise RuntimeError(
                    "In order to send/recv CUDA arrays, Numba or RMM is required"
                )

    pool_size_str = dask.config.get("rmm.pool-size")
    if pool_size_str is not None:
        pool_size = parse_bytes(pool_size_str)
        rmm.reinitialize(
            pool_allocator=True, managed_memory=False, initial_pool_size=pool_size
        )
Beispiel #18
0
def test_delete_edge_list_delete_adj_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    Mnx = utils.read_csv_for_nx(graph_file)
    df = cudf.DataFrame()
    df['src'] = cudf.Series(Mnx['0'])
    df['dst'] = cudf.Series(Mnx['1'])

    N = max(max(Mnx['0']), max(Mnx['1'])) + 1
    Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])),
                                   shape=(N, N))
    offsets = cudf.Series(Mcsr.indptr)
    indices = cudf.Series(Mcsr.indices)

    # cugraph delete_adj_list delete_edge_list call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(df, source='src', destination='dst')
    G.delete_edge_list()
    with pytest.raises(Exception):
        G.view_adj_list()

    G.from_cudf_adjlist(offsets, indices, None)
    G.delete_adj_list()
    with pytest.raises(Exception):
        G.view_edge_list()
Beispiel #19
0
def test_sssp_edgevals(managed, pool, graph_file, source):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_paths, max_val = cugraph_call(cu_M, source, edgevals=True)
    nx_paths, Gnx = networkx_call(M, source, edgevals=True)

    # Calculating mismatch
    err = 0
    for vid in cu_paths:
        # Validate vertices that are reachable
        # NOTE : If distance type is float64 then cu_paths[vid][0]
        # should be compared against np.finfo(np.float64).max)
        if (cu_paths[vid][0] != max_val):
            if (cu_paths[vid][0] != nx_paths[vid]):
                err = err + 1
            # check pred dist + edge_weight = current dist
            if (vid != source):
                pred = cu_paths[vid][1]
                edge_weight = Gnx[pred][vid]['weight']
                if (cu_paths[pred][0] + edge_weight != cu_paths[vid][0]):
                    err = err + 1
        else:
            if (vid in nx_paths.keys()):
                err = err + 1

    assert err == 0
Beispiel #20
0
def test_Graph_from_MultiGraph(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)

    # create dataframe for MultiGraph
    cu_M['3'] = cudf.Series([2.0] * len(cu_M), dtype=np.float32)
    cu_M['4'] = cudf.Series([3.0] * len(cu_M), dtype=np.float32)

    # initialize MultiGraph
    G_multi = cugraph.MultiGraph()
    G_multi.from_cudf_edgelist(cu_M,
                               source='0',
                               destination='1',
                               edge_attr=['2', '3', '4'])

    # initialize Graph
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')

    # create Graph from MultiGraph
    G_from_multi = cugraph.Graph(G_multi, edge_attr='2')

    assert G.edgelist.edgelist_df == G_from_multi.edgelist.edgelist_df
Beispiel #21
0
def test_pagerank(managed, pool, graph_file, max_iter, tol, alpha,
                  personalization_perc, has_guess):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())
    M = utils.read_csv_for_nx(graph_file)
    networkx_pr, networkx_prsn = networkx_call(M, max_iter, tol, alpha,
                                               personalization_perc)

    cu_nstart = None
    if has_guess == 1:
        cu_nstart = cudify(networkx_pr)
        max_iter = 5
    cu_prsn = cudify(networkx_prsn)
    cu_M = utils.read_csv_file(graph_file)
    cugraph_pr = cugraph_call(cu_M, max_iter, tol, alpha, cu_prsn, cu_nstart)

    # Calculating mismatch

    networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0])
    err = 0
    assert len(cugraph_pr) == len(networkx_pr)
    for i in range(len(cugraph_pr)):
        if (abs(cugraph_pr[i][1] - networkx_pr[i][1]) > tol * 1.1
                and cugraph_pr[i][0] == networkx_pr[i][0]):
            err = err + 1
    print("Mismatches:", err)
    assert err < (0.01 * len(cugraph_pr))
Beispiel #22
0
def test_add_edge_list_to_adj_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)

    M = utils.read_csv_for_nx(graph_file).tocsr()
    if M is None:
        raise TypeError('Could not read the input graph')
    if M.shape[0] != M.shape[1]:
        raise TypeError('Shape is not square')

    offsets_exp = M.indptr
    indices_exp = M.indices

    # cugraph add_egde_list to_adj_list call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', target='1')
    offsets_cu, indices_cu, values_cu = G.view_adj_list()
    assert compare_offsets(offsets_cu, offsets_exp)
    assert compare_series(indices_cu, indices_exp)
    assert values_cu is None
Beispiel #23
0
def test_jaccard_edgevals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_src, cu_dst, cu_coeff = cugraph_call(cu_M, edgevals=True)
    nx_src, nx_dst, nx_coeff = networkx_call(M)

    # Calculating mismatch
    err = 0
    tol = 1.0e-06

    assert len(cu_coeff) == len(nx_coeff)
    for i in range(len(cu_coeff)):
        if(abs(cu_coeff[i] - nx_coeff[i]) > tol*1.1 and
           cu_src[i] == nx_src[i] and cu_dst[i] == nx_dst[i]):
            err += 1

    print("Mismatches:  %d" % err)
    assert err == 0
Beispiel #24
0
def test_add_adj_list_to_edge_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file).tocsr()
    if M is None:
        raise TypeError('Could not read the input graph')
    if M.shape[0] != M.shape[1]:
        raise TypeError('Shape is not square')

    offsets = cudf.Series(M.indptr)
    indices = cudf.Series(M.indices)

    M = M.tocoo()
    sources_exp = cudf.Series(M.row)
    destinations_exp = cudf.Series(M.col)

    # cugraph add_adj_list to_edge_list call
    G = cugraph.DiGraph()
    G.from_cudf_adjlist(offsets, indices, None)
    edgelist = G.view_edge_list()
    sources_cu = np.array(edgelist['src'])
    destinations_cu = np.array(edgelist['dst'])
    assert compare_series(sources_cu, sources_exp)
    assert compare_series(destinations_cu, destinations_exp)
Beispiel #25
0
def test_renumber_files_multi_col(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    sources = cudf.Series(M['0'])
    destinations = cudf.Series(M['1'])

    translate = 1000

    gdf = cudf.DataFrame()
    gdf['src_old'] = sources
    gdf['dst_old'] = destinations
    gdf['src'] = sources + translate
    gdf['dst'] = destinations + translate

    src, dst, numbering = cugraph.renumber_from_cudf(gdf, ['src', 'src_old'],
                                                     ['dst', 'dst_old'])

    for i in range(len(gdf)):
        assert sources[i] == (numbering['0'][src[i]] - translate)
        assert destinations[i] == (numbering['0'][dst[i]] - translate)
Beispiel #26
0
def test_delete_edge_list_delete_adj_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    df = cudf.DataFrame()
    df['src'] = cudf.Series(M.row)
    df['dst'] = cudf.Series(M.col)

    M = M.tocsr()
    if M is None:
        raise TypeError('Could not read the input graph')
    if M.shape[0] != M.shape[1]:
        raise TypeError('Shape is not square')

    offsets = cudf.Series(M.indptr)
    indices = cudf.Series(M.indices)

    # cugraph delete_adj_list delete_edge_list call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(df, source='src', target='dst')
    G.delete_edge_list()
    with pytest.raises(Exception):
        G.view_adj_list()

    G.from_cudf_adjlist(offsets, indices, None)
    G.delete_adj_list()
    with pytest.raises(Exception):
        G.view_edge_list()
Beispiel #27
0
def test_strong_cc(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    netx_labels = networkx_strong_call(M)

    cu_M = utils.read_csv_file(graph_file)
    cugraph_labels = cugraph_strong_call(cu_M)

    # NetX returns a list of components, each component being a
    # collection (set{}) of vertex indices;
    #
    # while cugraph returns a component label for each vertex;

    nx_n_components = len(netx_labels)
    cg_n_components = get_n_uniqs(cugraph_labels)

    assert nx_n_components == cg_n_components

    lst_nx_components_lens = [len(c) for c in sorted(netx_labels, key=len)]

    # get counts of uniques:
    #
    lst_cg_components_lens = sorted(get_uniq_counts(cugraph_labels))

    assert lst_nx_components_lens == lst_cg_components_lens
Beispiel #28
0
def test_degrees_functionality(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', target='1', edge_attr='2')

    Gnx = nx.DiGraph(M)

    df = G.degrees()

    nx_in_degree = Gnx.in_degree()
    nx_out_degree = Gnx.out_degree()

    err_in_degree = 0
    err_out_degree = 0

    for i in range(len(df)):
        if (df['in_degree'][i] != nx_in_degree[i]):
            err_in_degree = err_in_degree + 1
        if (df['out_degree'][i] != nx_out_degree[i]):
            err_out_degree = err_out_degree + 1

    assert err_in_degree == 0
    assert err_out_degree == 0
Beispiel #29
0
def set_allocator(
    allocator="default",
    pool=False,
    initial_pool_size=None,
    enable_logging=False,
):
    """
    Set the GPU memory allocator. This function should be run only once,
    before any cudf objects are created.

    allocator : {"default", "managed"}
        "default": use default allocator.
        "managed": use managed memory allocator.
    pool : bool
        Enable memory pool.
    initial_pool_size : int
        Memory pool size in bytes. If ``None`` (default), 1/2 of total
        GPU memory is used. If ``pool=False``, this argument is ignored.
    enable_logging : bool, optional
        Enable logging (default ``False``).
        Enabling this option will introduce performance overhead.
    """
    use_managed_memory = True if allocator == "managed" else False

    rmm.reinitialize(
        pool_allocator=pool,
        managed_memory=use_managed_memory,
        initial_pool_size=initial_pool_size,
        logging=enable_logging,
    )
Beispiel #30
0
def test_ecg_clustering(managed,
                        pool,
                        graph_file,
                        min_weight,
                        ensemble_size):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    # Read in the graph and get a cugraph object
    cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False)
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')

    # Get the modularity score for partitioning versus random assignment
    cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size)
    golden_score = golden_call(graph_file)

    # Assert that the partitioning has better modularity than the random
    # assignment
    assert cu_score > (.95 * golden_score)