def test_woverlap(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) M = M.tocsr().sorted_indices() cu_M = utils.read_csv_file(graph_file) row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) G = cugraph.Graph() G.from_cudf_adjlist(row_offsets, col_indices, None) pairs = G.get_two_hop_neighbors() cu_coeff = cugraph_call(cu_M, pairs['first'], pairs['second']) cpu_coeff = cpu_call(M, pairs['first'], pairs['second']) assert len(cu_coeff) == len(cpu_coeff) for i in range(len(cu_coeff)): if np.isnan(cpu_coeff[i]): assert np.isnan(cu_coeff[i]) elif np.isnan(cu_coeff[i]): assert cpu_coeff[i] == cu_coeff[i] else: diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6
def test_rmm_getinfo_uninitialized(): rmm._finalize() with pytest.raises(rmm.RMMError): rmm.get_info() rmm.reinitialize()
def test_rmm_pool_cupy_allocator_with_stream(stream): cupy = pytest.importorskip("cupy") rmm.reinitialize(pool_allocator=True) cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) if stream == "null": stream = cupy.cuda.stream.Stream.null else: stream = cupy.cuda.stream.Stream() with stream: m = rmm.rmm_cupy_allocator(42) assert m.mem.size == 42 assert m.mem.ptr != 0 assert isinstance(m.mem._owner, rmm.DeviceBuffer) m = rmm.rmm_cupy_allocator(0) assert m.mem.size == 0 assert m.mem.ptr == 0 assert isinstance(m.mem._owner, rmm.DeviceBuffer) a = cupy.arange(10) assert isinstance(a.data.mem._owner, rmm.DeviceBuffer) # Deleting all allocations known by the RMM pool is required # before rmm.reinitialize(), otherwise it may segfault. del a rmm.reinitialize()
def test_jaccard_two_hop_edge_vals(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) Gnx = nx.from_pandas_edgelist(M, source='0', target='1', edge_attr='weight', create_using=nx.Graph()) G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2') pairs = G.get_two_hop_neighbors() nx_pairs = [] for i in range(len(pairs)): nx_pairs.append((pairs['first'][i], pairs['second'][i])) preds = nx.jaccard_coefficient(Gnx, nx_pairs) nx_coeff = [] for u, v, p in preds: nx_coeff.append(p) df = cugraph.jaccard(G, pairs) df = df.sort_values(by=['source', 'destination']) assert len(nx_coeff) == len(df) for i in range(len(df)): diff = abs(nx_coeff[i] - df['jaccard_coeff'][i]) assert diff < 1.0e-6
def test_jaccard_two_hop_edge_vals(managed, pool, graph_file): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) M = M.tocsr() Gnx = nx.DiGraph(M).to_undirected() G = cugraph.Graph() row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) G.from_cudf_adjlist(row_offsets, col_indices, values) pairs = G.get_two_hop_neighbors() nx_pairs = [] for i in range(len(pairs)): nx_pairs.append((pairs['first'][i], pairs['second'][i])) preds = nx.jaccard_coefficient(Gnx, nx_pairs) nx_coeff = [] for u, v, p in preds: nx_coeff.append(p) df = cugraph.jaccard(G, pairs['first'], pairs['second']) assert len(nx_coeff) == len(df) for i in range(len(df)): diff = abs(nx_coeff[i] - df['jaccard_coeff'][i]) assert diff < 1.0e-6
def test_symmetrize_unweighted(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool) assert (rmm.is_initialized()) cu_M = utils.read_csv_file(graph_file + '.csv') sym_sources, sym_destinations = cugraph.symmetrize(cu_M['0'], cu_M['1']) # # Check to see if all pairs in sources/destinations exist in # both directions # # Try this with join logic. Note that if we create data frames # we can join the data frames (using the DataFrame.merge function). # The symmetrize function should contain every edge that was contained # in the input data. So if we join the input data with the output # the length of the data frames should be equal. # sym_df = cudf.DataFrame() sym_df['src_s'] = sym_sources sym_df['dst_s'] = sym_destinations orig_df = cudf.DataFrame() orig_df['src'] = cu_M['0'] orig_df['dst'] = cu_M['1'] compare(orig_df['src'], orig_df['dst'], None, sym_df['src_s'], sym_df['dst_s'], None)
def test_reinitialize_max_pool_size_exceeded(): rmm.reinitialize(pool_allocator=True, initial_pool_size=0, maximum_pool_size=1 << 23) with pytest.raises(MemoryError): rmm.DeviceBuffer().resize(1 << 24) rmm.reinitialize()
def test_renumber_files(managed, pool, graph_file): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M['0']) destinations = cudf.Series(M['1']) translate = 1000 source_translated = cudf.Series([x + translate for x in sources]) dest_translated = cudf.Series([x + translate for x in destinations]) src, dst, numbering = cugraph.renumber(source_translated, dest_translated) for i in range(len(sources)): assert sources[i] == (numbering[src[i]] - translate) assert destinations[i] == (numbering[dst[i]] - translate)
def test_multi_column_unrenumbering(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) translate = 100 cu_M = utils.read_csv_file(graph_file) cu_M['00'] = cu_M['0'] + translate cu_M['11'] = cu_M['1'] + translate G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, ['0', '00'], ['1', '11']) result_multi = cugraph.pagerank(G).sort_values(by='0').\ reset_index(drop=True) G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, '0', '1') result_single = cugraph.pagerank(G) result_exp = cudf.DataFrame() result_exp['0'] = result_single['vertex'] result_exp['1'] = result_single['vertex'] + translate result_exp['pagerank'] = result_single['pagerank'] assert result_multi.equals(result_exp)
def setup(self, worker=None): if self.nbytes is not None: import rmm rmm.reinitialize(pool_allocator=True, managed_memory=False, initial_pool_size=self.nbytes)
def _rmm_pool(): rmm.reinitialize( # RMM may require the pool size to be a multiple of 256. pool_allocator=True, initial_pool_size=(device_pool_size // 256) * 256, # Use default size )
def test_to_undirected(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) cu_M = utils.read_csv_file(graph_file) cu_M = cu_M[cu_M['0'] <= cu_M['1']].reset_index(drop=True) M = utils.read_csv_for_nx(graph_file) M = M[M['0'] <= M['1']] assert len(cu_M) == len(M) # cugraph add_edge_list DiG = cugraph.DiGraph() DiG.from_cudf_edgelist(cu_M, source='0', destination='1') DiGnx = nx.from_pandas_edgelist(M, source='0', target='1', create_using=nx.DiGraph()) G = DiG.to_undirected() Gnx = DiGnx.to_undirected() assert (G.number_of_nodes() == Gnx.number_of_nodes()) assert (G.number_of_edges() == Gnx.number_of_edges()) edgelist_df = G.edgelist.edgelist_df for i in range(len(edgelist_df)): assert Gnx.has_edge(edgelist_df.iloc[i]['src'], edgelist_df.iloc[i]['dst'])
def test_edge_cut_clustering(managed, pool, graph_file, partitions): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) # Read in the graph and get a cugraph object cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) '''row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) G_adj = cugraph.DiGraph() G_adj.from_cudf_adjlist(row_offsets, col_indices)''' G_edge = cugraph.DiGraph() G_edge.from_cudf_edgelist(cu_M, source='0', destination='1') # Get the edge_cut score for partitioning versus random assignment '''cu_vid, cu_score = cugraph_call(G_adj, partitions) rand_vid, rand_score = random_call(G_adj, partitions) ''' # Assert that the partitioning has better edge_cut than the random # assignment '''assert cu_score < rand_score''' # Get the edge_cut score for partitioning versus random assignment cu_vid, cu_score = cugraph_call(G_edge, partitions) rand_vid, rand_score = random_call(G_edge, partitions) # Assert that the partitioning has better edge_cut than the random # assignment print(cu_score, rand_score) assert cu_score < rand_score
def test_louvain_with_edgevals(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) nx_parts = networkx_call(M) # Calculating modularity scores for comparison Gnx = nx.from_pandas_edgelist(M, source='0', target='1', edge_attr='weight', create_using=nx.Graph()) cu_map = {0: 0} for i in range(len(cu_parts)): cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i] assert set(nx_parts.keys()) == set(cu_map.keys()) cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) assert len(cu_parts) == len(nx_parts) assert cu_mod > (.82 * nx_mod) assert abs(cu_mod - cu_mod_nx) < .0001
def test_woverlap(managed, pool, graph_file): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) Mnx = utils.read_csv_for_nx(graph_file) N = max(max(Mnx['0']), max(Mnx['1'])) + 1 M = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])), shape=(N, N)) cu_M = utils.read_csv_file(graph_file) G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source='0', destination='1') pairs = G.get_two_hop_neighbors() cu_coeff = cugraph_call(cu_M, pairs) cpu_coeff = cpu_call(M, pairs['first'], pairs['second']) assert len(cu_coeff) == len(cpu_coeff) for i in range(len(cu_coeff)): if np.isnan(cpu_coeff[i]): assert np.isnan(cu_coeff[i]) elif np.isnan(cu_coeff[i]): assert cpu_coeff[i] == cu_coeff[i] else: diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6
def test_add_adj_list_to_edge_list(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) Mnx = utils.read_csv_for_nx(graph_file) N = max(max(Mnx['0']), max(Mnx['1'])) + 1 Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])), shape=(N, N)) offsets = cudf.Series(Mcsr.indptr) indices = cudf.Series(Mcsr.indices) Mcoo = Mcsr.tocoo() sources_exp = cudf.Series(Mcoo.row) destinations_exp = cudf.Series(Mcoo.col) # cugraph add_adj_list to_edge_list call G = cugraph.DiGraph() G.from_cudf_adjlist(offsets, indices, None) edgelist = G.view_edge_list() sources_cu = np.array(edgelist['src']) destinations_cu = np.array(edgelist['dst']) assert compare_series(sources_cu, sources_exp) assert compare_series(destinations_cu, destinations_exp)
def init_once(): global ucp, host_array, device_array if ucp is not None: return import ucp as _ucp ucp = _ucp # remove/process dask.ucx flags for valid ucx options ucx_config = _scrub_ucx_config() ucp.init(options=ucx_config, env_takes_precedence=True) # Find the function, `host_array()`, to use when allocating new host arrays try: import numpy host_array = lambda n: numpy.empty((n,), dtype="u1") except ImportError: host_array = lambda n: bytearray(n) # Find the function, `cuda_array()`, to use when allocating new CUDA arrays try: import rmm if hasattr(rmm, "DeviceBuffer"): device_array = lambda n: rmm.DeviceBuffer(size=n) else: # pre-0.11.0 import numba.cuda def rmm_device_array(n): a = rmm.device_array(n, dtype="u1") weakref.finalize(a, numba.cuda.current_context) return a device_array = rmm_device_array except ImportError: try: import numba.cuda def numba_device_array(n): a = numba.cuda.device_array((n,), dtype="u1") weakref.finalize(a, numba.cuda.current_context) return a device_array = numba_device_array except ImportError: def device_array(n): raise RuntimeError( "In order to send/recv CUDA arrays, Numba or RMM is required" ) pool_size_str = dask.config.get("rmm.pool-size") if pool_size_str is not None: pool_size = parse_bytes(pool_size_str) rmm.reinitialize( pool_allocator=True, managed_memory=False, initial_pool_size=pool_size )
def test_delete_edge_list_delete_adj_list(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) Mnx = utils.read_csv_for_nx(graph_file) df = cudf.DataFrame() df['src'] = cudf.Series(Mnx['0']) df['dst'] = cudf.Series(Mnx['1']) N = max(max(Mnx['0']), max(Mnx['1'])) + 1 Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])), shape=(N, N)) offsets = cudf.Series(Mcsr.indptr) indices = cudf.Series(Mcsr.indices) # cugraph delete_adj_list delete_edge_list call G = cugraph.DiGraph() G.from_cudf_edgelist(df, source='src', destination='dst') G.delete_edge_list() with pytest.raises(Exception): G.view_adj_list() G.from_cudf_adjlist(offsets, indices, None) G.delete_adj_list() with pytest.raises(Exception): G.view_edge_list()
def test_sssp_edgevals(managed, pool, graph_file, source): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_paths, max_val = cugraph_call(cu_M, source, edgevals=True) nx_paths, Gnx = networkx_call(M, source, edgevals=True) # Calculating mismatch err = 0 for vid in cu_paths: # Validate vertices that are reachable # NOTE : If distance type is float64 then cu_paths[vid][0] # should be compared against np.finfo(np.float64).max) if (cu_paths[vid][0] != max_val): if (cu_paths[vid][0] != nx_paths[vid]): err = err + 1 # check pred dist + edge_weight = current dist if (vid != source): pred = cu_paths[vid][1] edge_weight = Gnx[pred][vid]['weight'] if (cu_paths[pred][0] + edge_weight != cu_paths[vid][0]): err = err + 1 else: if (vid in nx_paths.keys()): err = err + 1 assert err == 0
def test_Graph_from_MultiGraph(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) cu_M = utils.read_csv_file(graph_file) # create dataframe for MultiGraph cu_M['3'] = cudf.Series([2.0] * len(cu_M), dtype=np.float32) cu_M['4'] = cudf.Series([3.0] * len(cu_M), dtype=np.float32) # initialize MultiGraph G_multi = cugraph.MultiGraph() G_multi.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr=['2', '3', '4']) # initialize Graph G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2') # create Graph from MultiGraph G_from_multi = cugraph.Graph(G_multi, edge_attr='2') assert G.edgelist.edgelist_df == G_from_multi.edgelist.edgelist_df
def test_pagerank(managed, pool, graph_file, max_iter, tol, alpha, personalization_perc, has_guess): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) networkx_pr, networkx_prsn = networkx_call(M, max_iter, tol, alpha, personalization_perc) cu_nstart = None if has_guess == 1: cu_nstart = cudify(networkx_pr) max_iter = 5 cu_prsn = cudify(networkx_prsn) cu_M = utils.read_csv_file(graph_file) cugraph_pr = cugraph_call(cu_M, max_iter, tol, alpha, cu_prsn, cu_nstart) # Calculating mismatch networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0]) err = 0 assert len(cugraph_pr) == len(networkx_pr) for i in range(len(cugraph_pr)): if (abs(cugraph_pr[i][1] - networkx_pr[i][1]) > tol * 1.1 and cugraph_pr[i][0] == networkx_pr[i][0]): err = err + 1 print("Mismatches:", err) assert err < (0.01 * len(cugraph_pr))
def test_add_edge_list_to_adj_list(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) cu_M = utils.read_csv_file(graph_file) M = utils.read_csv_for_nx(graph_file).tocsr() if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') offsets_exp = M.indptr indices_exp = M.indices # cugraph add_egde_list to_adj_list call G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source='0', target='1') offsets_cu, indices_cu, values_cu = G.view_adj_list() assert compare_offsets(offsets_cu, offsets_exp) assert compare_series(indices_cu, indices_exp) assert values_cu is None
def test_jaccard_edgevals(managed, pool, graph_file): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_src, cu_dst, cu_coeff = cugraph_call(cu_M, edgevals=True) nx_src, nx_dst, nx_coeff = networkx_call(M) # Calculating mismatch err = 0 tol = 1.0e-06 assert len(cu_coeff) == len(nx_coeff) for i in range(len(cu_coeff)): if(abs(cu_coeff[i] - nx_coeff[i]) > tol*1.1 and cu_src[i] == nx_src[i] and cu_dst[i] == nx_dst[i]): err += 1 print("Mismatches: %d" % err) assert err == 0
def test_add_adj_list_to_edge_list(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file).tocsr() if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) M = M.tocoo() sources_exp = cudf.Series(M.row) destinations_exp = cudf.Series(M.col) # cugraph add_adj_list to_edge_list call G = cugraph.DiGraph() G.from_cudf_adjlist(offsets, indices, None) edgelist = G.view_edge_list() sources_cu = np.array(edgelist['src']) destinations_cu = np.array(edgelist['dst']) assert compare_series(sources_cu, sources_exp) assert compare_series(destinations_cu, destinations_exp)
def test_renumber_files_multi_col(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M['0']) destinations = cudf.Series(M['1']) translate = 1000 gdf = cudf.DataFrame() gdf['src_old'] = sources gdf['dst_old'] = destinations gdf['src'] = sources + translate gdf['dst'] = destinations + translate src, dst, numbering = cugraph.renumber_from_cudf(gdf, ['src', 'src_old'], ['dst', 'dst_old']) for i in range(len(gdf)): assert sources[i] == (numbering['0'][src[i]] - translate) assert destinations[i] == (numbering['0'][dst[i]] - translate)
def test_delete_edge_list_delete_adj_list(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) df = cudf.DataFrame() df['src'] = cudf.Series(M.row) df['dst'] = cudf.Series(M.col) M = M.tocsr() if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) # cugraph delete_adj_list delete_edge_list call G = cugraph.DiGraph() G.from_cudf_edgelist(df, source='src', target='dst') G.delete_edge_list() with pytest.raises(Exception): G.view_adj_list() G.from_cudf_adjlist(offsets, indices, None) G.delete_adj_list() with pytest.raises(Exception): G.view_edge_list()
def test_strong_cc(managed, pool, graph_file): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) netx_labels = networkx_strong_call(M) cu_M = utils.read_csv_file(graph_file) cugraph_labels = cugraph_strong_call(cu_M) # NetX returns a list of components, each component being a # collection (set{}) of vertex indices; # # while cugraph returns a component label for each vertex; nx_n_components = len(netx_labels) cg_n_components = get_n_uniqs(cugraph_labels) assert nx_n_components == cg_n_components lst_nx_components_lens = [len(c) for c in sorted(netx_labels, key=len)] # get counts of uniques: # lst_cg_components_lens = sorted(get_uniq_counts(cugraph_labels)) assert lst_nx_components_lens == lst_cg_components_lens
def test_degrees_functionality(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source='0', target='1', edge_attr='2') Gnx = nx.DiGraph(M) df = G.degrees() nx_in_degree = Gnx.in_degree() nx_out_degree = Gnx.out_degree() err_in_degree = 0 err_out_degree = 0 for i in range(len(df)): if (df['in_degree'][i] != nx_in_degree[i]): err_in_degree = err_in_degree + 1 if (df['out_degree'][i] != nx_out_degree[i]): err_out_degree = err_out_degree + 1 assert err_in_degree == 0 assert err_out_degree == 0
def set_allocator( allocator="default", pool=False, initial_pool_size=None, enable_logging=False, ): """ Set the GPU memory allocator. This function should be run only once, before any cudf objects are created. allocator : {"default", "managed"} "default": use default allocator. "managed": use managed memory allocator. pool : bool Enable memory pool. initial_pool_size : int Memory pool size in bytes. If ``None`` (default), 1/2 of total GPU memory is used. If ``pool=False``, this argument is ignored. enable_logging : bool, optional Enable logging (default ``False``). Enabling this option will introduce performance overhead. """ use_managed_memory = True if allocator == "managed" else False rmm.reinitialize( pool_allocator=pool, managed_memory=use_managed_memory, initial_pool_size=initial_pool_size, logging=enable_logging, )
def test_ecg_clustering(managed, pool, graph_file, min_weight, ensemble_size): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) # Read in the graph and get a cugraph object cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2') # Get the modularity score for partitioning versus random assignment cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) golden_score = golden_call(graph_file) # Assert that the partitioning has better modularity than the random # assignment assert cu_score > (.95 * golden_score)