def cugraph_weak_call(cu_M): G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source="0", destination="1") t1 = time.time() df = cugraph.weakly_connected_components(G) t2 = time.time() - t1 print("Time : " + str(t2)) label_vertex_dict = defaultdict(list) for i in range(len(df)): label_vertex_dict[df["labels"][i]].append(df["vertices"][i]) return label_vertex_dict
def cugraph_weak_call(cu_M): # cugraph Pagerank Call G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source='0', destination='1') t1 = time.time() df = cugraph.weakly_connected_components(G) t2 = time.time() - t1 print('Time : '+str(t2)) result = df['labels'].to_array() labels = sorted(result) return labels
def cugraph_weak_call(cu_M): # cugraph Pagerank Call G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source='0', destination='1') t1 = time.time() df = cugraph.weakly_connected_components(G) t2 = time.time() - t1 print('Time : ' + str(t2)) label_vertex_dict = defaultdict(list) for i in range(len(df)): label_vertex_dict[df['labels'][i]].append(df['vertices'][i]) return label_vertex_dict
def cugraph_weak_call(cu_M): # Device data sources = cu_M['0'] destinations = cu_M['1'] # cugraph Pagerank Call G = cugraph.Graph() G.add_edge_list(sources, destinations, None) t1 = time.time() df = cugraph.weakly_connected_components(G) t2 = time.time() - t1 print('Time : ' + str(t2)) result = df['labels'].to_array() labels = sorted(result) return labels
def test_weak_cc_nx(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) Gnx = nx.from_pandas_edgelist( M, source="0", target="1", create_using=nx.DiGraph() ) nx_wcc = nx.weakly_connected_components(Gnx) nx_result = sorted(nx_wcc) cu_wcc = cugraph.weakly_connected_components(Gnx) pdf = pd.DataFrame.from_dict(cu_wcc, orient='index').reset_index() pdf.columns = ["vertex", "labels"] cu_result = pdf["labels"].nunique() assert len(nx_result) == cu_result
def test_dask_wcc(client_connection): gc.collect() # FIXME: update this to allow dataset to be parameterized and have dataset # part of test param id (see other tests) input_data_path = r"../datasets/netscience.csv" print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, chunksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) df = cudf.read_csv( input_data_path, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) g = cugraph.DiGraph() g.from_cudf_edgelist(df, "src", "dst", renumber=True) dg = cugraph.DiGraph() dg.from_dask_cudf_edgelist(ddf, "src", "dst") expected_dist = cugraph.weakly_connected_components(g) result_dist = dcg.weakly_connected_components(dg) result_dist = result_dist.compute() compare_dist = expected_dist.merge(result_dist, on="vertex", suffixes=["_local", "_dask"]) unique_local_labels = compare_dist['labels_local'].unique() for label in unique_local_labels.values.tolist(): dask_labels_df = compare_dist[compare_dist['labels_local'] == label] dask_labels = dask_labels_df['labels_dask'] assert (dask_labels.iloc[0] == dask_labels).all()
def test_dask_wcc(dask_client): gc.collect() input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix() print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, chunksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) df = cudf.read_csv( input_data_path, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) g = cugraph.DiGraph() g.from_cudf_edgelist(df, "src", "dst", renumber=True) dg = cugraph.DiGraph() dg.from_dask_cudf_edgelist(ddf, "src", "dst") expected_dist = cugraph.weakly_connected_components(g) result_dist = dcg.weakly_connected_components(dg) result_dist = result_dist.compute() compare_dist = expected_dist.merge(result_dist, on="vertex", suffixes=["_local", "_dask"]) unique_local_labels = compare_dist['labels_local'].unique() for label in unique_local_labels.values.tolist(): dask_labels_df = compare_dist[compare_dist['labels_local'] == label] dask_labels = dask_labels_df['labels_dask'] assert (dask_labels.iloc[0] == dask_labels).all()
def wcc(G): return cugraph.weakly_connected_components(G)
def connected_components(graph: CuGraph) -> CuDFNodeMap: series = cugraph.weakly_connected_components( graph.value).set_index("vertices")["labels"] return CuDFNodeMap(series)