def test_renumber_ips(): source_list = ['192.168.1.1', '172.217.5.238', '216.228.121.209', '192.16.31.23'] dest_list = ['172.217.5.238', '216.228.121.209', '192.16.31.23', '192.168.1.1'] pdf = pd.DataFrame({ 'source_list': source_list, 'dest_list': dest_list }) gdf = cudf.from_pandas(pdf) gdf['source_as_int'] = gdf['source_list'].str.ip2int() gdf['dest_as_int'] = gdf['dest_list'].str.ip2int() src, dst, numbering = cugraph.renumber(gdf['source_as_int'], gdf['dest_as_int']) for i in range(len(gdf)): assert numbering[src[i]] == gdf['source_as_int'][i] assert numbering[dst[i]] == gdf['dest_as_int'][i]
def test_renumber_files(managed, pool, graph_file): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M['0']) destinations = cudf.Series(M['1']) translate = 1000 source_translated = cudf.Series([x + translate for x in sources]) dest_translated = cudf.Series([x + translate for x in destinations]) src, dst, numbering = cugraph.renumber(source_translated, dest_translated) for i in range(len(sources)): assert sources[i] == (numbering[src[i]] - translate) assert destinations[i] == (numbering[dst[i]] - translate)
def test_renumber_negative(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] df = pd.DataFrame({ 'source_list': source_list, 'dest_list': dest_list, }) gdf = cudf.DataFrame.from_pandas(df[['source_list', 'dest_list']]) src, dst, numbering = cugraph.renumber(gdf['source_list'], gdf['dest_list']) for i in range(len(source_list)): assert source_list[i] == numbering[src[i]] assert dest_list[i] == numbering[dst[i]]
def test_renumber_files(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M['0']) destinations = cudf.Series(M['1']) translate = 1000 source_translated = cudf.Series([x + translate for x in sources]) dest_translated = cudf.Series([x + translate for x in destinations]) src, dst, numbering = cugraph.renumber(source_translated, dest_translated) for i in range(len(sources)): assert sources[i] == (numbering[src[i]] - translate) assert destinations[i] == (numbering[dst[i]] - translate)
def load_cugraph(lang, year, usecols=[0, 2], dtype=["int32", "str", "int32", "str"]): import cugraph import cudf G = cugraph.DiGraph() file_name = generate_file_name(lang, year) gdf = cudf.read_csv(file_name, usecols=usecols, dtype=dtype, sep="\t") sources = cudf.Series(gdf["page_id_from"]) destinations = cudf.Series(gdf["page_id_to"]) source_col, dest_col, renumbering_map = cugraph.renumber( sources, destinations) edge_df = cudf.DataFrame() edge_df.add_column("src", source_col) edge_df.add_column("dst", dest_col) G.from_cudf_edgelist(edge_df, source="src", target="dst") return G, renumbering_map
def test_renumber_files(managed, pool, graph_file): gc.collect() rmm.finalize() rmm_cfg.use_managed_memory = managed rmm_cfg.use_pool_allocator = pool rmm.initialize() assert (rmm.is_initialized()) M = utils.read_mtx_file(graph_file) sources = cudf.Series(M.row) destinations = cudf.Series(M.col) translate = 1000 source_translated = cudf.Series([x + translate for x in sources]) dest_translated = cudf.Series([x + translate for x in destinations]) src, dst, numbering = cugraph.renumber(source_translated, dest_translated) for i in range(len(sources)): assert sources[i] == (numbering[src[i]] - translate) assert destinations[i] == (numbering[dst[i]] - translate)