def test_mg_renumber(graph_file, client_connection): gc.collect() M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) translate = 1000 gdf = cudf.DataFrame() gdf["src_old"] = sources gdf["dst_old"] = destinations gdf["src"] = sources + translate gdf["dst"] = destinations + translate ddf = dask.dataframe.from_pandas(gdf, npartitions=2) numbering = NumberMap() numbering.from_dataframe(ddf, ["src", "src_old"], ["dst", "dst_old"]) renumbered_df = numbering.add_internal_vertex_id( numbering.add_internal_vertex_id(ddf, "src_id", ["src", "src_old"]), "dst_id", ["dst", "dst_old"], ) check_src = numbering.from_internal_vertex_id(renumbered_df, "src_id").compute() check_dst = numbering.from_internal_vertex_id(renumbered_df, "dst_id").compute() assert check_src["0"].to_pandas().equals(check_src["src"].to_pandas()) assert check_src["1"].to_pandas().equals(check_src["src_old"].to_pandas()) assert check_dst["0"].to_pandas().equals(check_dst["dst"].to_pandas()) assert check_dst["1"].to_pandas().equals(check_dst["dst_old"].to_pandas())
def test_renumber_files_col(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) translate = 1000 gdf = cudf.DataFrame() gdf['src'] = cudf.Series([x + translate for x in sources.values_host]) gdf['dst'] = cudf.Series([x + translate for x in destinations.values_host]) numbering = NumberMap() numbering.from_dataframe(gdf, ["src"], ["dst"]) renumbered_df = numbering.add_internal_vertex_id( numbering.add_internal_vertex_id(gdf, "src_id", ["src"]), "dst_id", ["dst"]) check_src = numbering.from_internal_vertex_id(renumbered_df, "src_id") check_dst = numbering.from_internal_vertex_id(renumbered_df, "dst_id") assert check_src["src"].equals(check_src["0"]) assert check_dst["dst"].equals(check_dst["0"])
def test_renumber_files_multi_col(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) translate = 1000 gdf = cudf.DataFrame() gdf["src_old"] = sources gdf["dst_old"] = destinations gdf["src"] = sources + translate gdf["dst"] = destinations + translate numbering = NumberMap() numbering.from_dataframe(gdf, ["src", "src_old"], ["dst", "dst_old"]) renumbered_df = numbering.add_internal_vertex_id( numbering.add_internal_vertex_id(gdf, "src_id", ["src", "src_old"]), "dst_id", ["dst", "dst_old"], ) check_src = numbering.from_internal_vertex_id(renumbered_df, "src_id") check_dst = numbering.from_internal_vertex_id(renumbered_df, "dst_id") assert check_src["src"].equals(check_src["0"]) assert check_src["src_old"].equals(check_src["1"]) assert check_dst["dst"].equals(check_dst["0"]) assert check_dst["dst_old"].equals(check_dst["1"])
def test_renumber_series(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) translate = 1000 df = cudf.DataFrame() df["src"] = cudf.Series([x + translate for x in sources.values_host]) df["dst"] = cudf.Series([x + translate for x in destinations.values_host]) numbering_series_1 = NumberMap() numbering_series_1.from_series(df["src"]) numbering_series_2 = NumberMap() numbering_series_2.from_series(df["dst"]) renumbered_src = numbering_series_1.add_internal_vertex_id( df["src"], "src_id") renumbered_dst = numbering_series_2.add_internal_vertex_id( df["dst"], "dst_id") check_src = numbering_series_1.from_internal_vertex_id( renumbered_src, "src_id") check_dst = numbering_series_2.from_internal_vertex_id( renumbered_dst, "dst_id") assert check_src["0_y"].equals(check_src["0_x"]) assert check_dst["0_y"].equals(check_dst["0_x"])