def test_renumber_files_multi_col(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M['0']) destinations = cudf.Series(M['1']) translate = 1000 gdf = cudf.DataFrame() gdf['src_old'] = sources gdf['dst_old'] = destinations gdf['src'] = sources + translate gdf['dst'] = destinations + translate src, dst, numbering = cugraph.renumber_from_cudf(gdf, ['src', 'src_old'], ['dst', 'dst_old']) for i in range(len(gdf)): assert sources[i] == (numbering['0'][src[i]] - translate) assert destinations[i] == (numbering['0'][dst[i]] - translate)
def test_renumber_ips_cols(): source_list = ['192.168.1.1', '172.217.5.238', '216.228.121.209', '192.16.31.23'] dest_list = ['172.217.5.238', '216.228.121.209', '192.16.31.23', '192.168.1.1'] pdf = pd.DataFrame({ 'source_list': source_list, 'dest_list': dest_list }) gdf = cudf.from_pandas(pdf) gdf['source_as_int'] = gdf['source_list'].str.ip2int() gdf['dest_as_int'] = gdf['dest_list'].str.ip2int() src, dst, number_df = cugraph.renumber_from_cudf( gdf, ['source_as_int'], ['dest_as_int']) for i in range(len(gdf)): assert number_df['0'][src[i]] == gdf['source_as_int'][i] assert number_df['0'][dst[i]] == gdf['dest_as_int'][i]
def test_renumber_negative_col(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] df = pd.DataFrame({ 'source_list': source_list, 'dest_list': dest_list, }) gdf = cudf.DataFrame.from_pandas(df[['source_list', 'dest_list']]) src, dst, numbering = cugraph.renumber_from_cudf(gdf, ['source_list'], ['dest_list']) for i in range(len(source_list)): assert source_list[i] == numbering['0'][src[i]] assert dest_list[i] == numbering['0'][dst[i]]
def test_renumber_files_col(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M['0']) destinations = cudf.Series(M['1']) translate = 1000 gdf = cudf.DataFrame() gdf['src'] = cudf.Series([x + translate for x in sources]) gdf['dst'] = cudf.Series([x + translate for x in destinations]) src, dst, numbering = cugraph.renumber_from_cudf(gdf, ['src'], ['dst']) for i in range(len(gdf)): assert sources[i] == (numbering['0'].iloc[src[i]] - translate) assert destinations[i] == (numbering['0'].iloc[dst[i]] - translate)