def test_mg_edge_betweenness_centrality( graph_file, directed, subset_size, normalized, weight, subset_seed, result_dtype, mg_device_count, ): prepare_test() skip_if_not_enough_devices(mg_device_count) with MGContext(number_of_devices=mg_device_count, p2p=True): sorted_df = calc_edge_betweenness_centrality( graph_file, directed=directed, normalized=normalized, k=subset_size, weight=weight, seed=subset_seed, result_dtype=result_dtype, multi_gpu_batch=True, ) compare_scores( sorted_df, first_key="cu_bc", second_key="ref_bc", epsilon=DEFAULT_EPSILON, )
def test_enable_batch_adjlist_replication_weights(graph_file, directed, mg_device_count): skip_if_not_enough_devices(mg_device_count) df = cudf.read_csv(graph_file, delimiter=' ', names=['src', 'dst', 'value'], dtype=['int32', 'int32', 'float32']) G = cugraph.DiGraph() if directed else cugraph.Graph() G.from_cudf_edgelist(df, source='src', destination='dst', edge_attr='value') with MGContext(mg_device_count): G.enable_batch() G.view_adj_list() adjlist = G.adjlist offsets = adjlist.offsets indices = adjlist.indices weights = adjlist.weights for worker in G.batch_adjlists: (rep_offsets, rep_indices, rep_weights) = G.batch_adjlists[worker] assert offsets.equals(rep_offsets.result()), "Replication of " \ "adjlist offsets failed" assert indices.equals(rep_indices.result()), "Replication of " \ "adjlist indices failed" assert weights.equals(rep_weights.result()), "Replication of " \ "adjlist weights failed"
def test_enable_batch_adjlist_replication_no_weights(graph_file, directed, mg_device_count): gc.collect() skip_if_not_enough_devices(mg_device_count) df = cudf.read_csv( graph_file, delimiter=" ", names=["src", "dst"], dtype=["int32", "int32"], ) G = cugraph.DiGraph() if directed else cugraph.Graph() G.from_cudf_edgelist(df, source="src", destination="dst") with MGContext(number_of_devices=mg_device_count, p2p=True): G.enable_batch() G.view_adj_list() adjlist = G.adjlist offsets = adjlist.offsets indices = adjlist.indices weights = adjlist.weights for worker in G.batch_adjlists: (rep_offsets, rep_indices, rep_weights) = G.batch_adjlists[worker] assert offsets.equals( rep_offsets.result()), ("Replication of " "adjlist offsets failed") assert indices.equals( rep_indices.result()), ("Replication of " "adjlist indices failed") assert weights is None and rep_weights is None
def test_enable_batch_edgelist_replication(graph_file, directed, mg_device_count): skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) with MGContext(mg_device_count): G.enable_batch() df = G.edgelist.edgelist_df for worker in G.batch_edgelists: replicated_df = G.batch_edgelists[worker].result() assert df.equals(replicated_df), "Replication of edgelist failed"
def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): skip_if_not_enough_devices(mg_device_count) df = cudf.read_csv(input_data_path, delimiter=' ', names=['src', 'dst'], dtype=['int32', 'int32']) with MGContext(mg_device_count): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() assert df.equals(replicated_df), "There is a mismatch in one " \ "of the replications"
def test_enable_batch_context_no_context_views(graph_file, directed, mg_device_count): skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) with MGContext(mg_device_count): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" assert G.batch_edgelists is not None, "The graph should have " \ "been created with an " \ "edgelist" G.view_edge_list() G.view_adj_list() G.view_transposed_adj_list()
def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): gc.collect() skip_if_not_enough_devices(mg_device_count) df = cudf.read_csv( input_data_path, delimiter=" ", names=["src", "dst"], dtype=["int32", "int32"], ) with MGContext(number_of_devices=mg_device_count, p2p=True): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() assert df.equals(replicated_df), ("There is a mismatch in one " "of the replications")
def test_replicate_cudf_series(input_data_path, mg_device_count): skip_if_not_enough_devices(mg_device_count) df = cudf.read_csv(input_data_path, delimiter=' ', names=['src', 'dst', 'value'], dtype=['int32', 'int32', 'float32']) with MGContext(mg_device_count): for column in df.columns.values: series = df[column] worker_to_futures = replication.replicate_cudf_series(series) for worker in worker_to_futures: replicated_series = worker_to_futures[worker].result() assert series.equals(replicated_series), "There is a " \ "mismatch in one of the replications" # FIXME: If we do not clear this dictionary, when comparing # results for the 2nd column, one of the workers still # has a value from the 1st column worker_to_futures = {}
def test_enable_batch_context_then_views(graph_file, directed, mg_device_count): gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) with MGContext(number_of_devices=mg_device_count, p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" assert G.batch_edgelists is not None, ("The graph should have " "been created with an " "edgelist") assert G.batch_adjlists is None G.view_adj_list() assert G.batch_adjlists is not None assert G.batch_transposed_adjlists is None G.view_transposed_adj_list() assert G.batch_transposed_adjlists is not None
def test_replicate_cudf_series(input_data_path, mg_device_count): gc.collect() skip_if_not_enough_devices(mg_device_count) df = cudf.read_csv( input_data_path, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) with MGContext(number_of_devices=mg_device_count, p2p=True): for column in df.columns.values: series = df[column] worker_to_futures = replication.replicate_cudf_series(series) for worker in worker_to_futures: replicated_series = worker_to_futures[worker].result() assert series.equals(replicated_series), ( "There is a " "mismatch in one of the replications") # FIXME: If we do not clear this dictionary, when comparing # results for the 2nd column, one of the workers still # has a value from the 1st column worker_to_futures = {}