def compare_bfs(graph_file, directed=True, return_sp_counter=False, seed=42): """ Genereate both cugraph and reference bfs traversal Parameters ----------- graph_file : string Path to COO Graph representation in .csv format directed : bool, optional, default=True Indicated whether the graph is directed or not return_sp_counter : bool, optional, default=False Retrun shortest path counters from traversal if True seed : int, optional, default=42 Value for random seed to obtain starting vertex Returns ------- """ G, Gnx = utils.build_cu_and_nx_graphs(graph_file, directed) # Seed for reproducibility if isinstance(seed, int): random.seed(seed) start_vertex = random.sample(Gnx.nodes(), 1)[0] # Test for shortest_path_counter compare_func = _compare_bfs_spc if return_sp_counter else _compare_bfs # NOTE: We need to take 2 different path for verification as the nx # functions used as reference return dictionaries that might # not contain all the vertices while the cugraph version return # a cudf.DataFrame with all the vertices, also some verification # become slow with the data transfer compare_func(G, Gnx, start_vertex) elif isinstance(seed, list): # For other Verifications for start_vertex in seed: compare_func = ( _compare_bfs_spc if return_sp_counter else _compare_bfs ) compare_func(G, Gnx, start_vertex) elif seed is None: # Same here, it is only to run full checks for start_vertex in Gnx: compare_func = ( _compare_bfs_spc if return_sp_counter else _compare_bfs ) compare_func(G, Gnx, start_vertex) else: # Unknown type given to seed raise NotImplementedError("Invalid type for seed")
def get_cu_nx_graph_datasets_unrenumbered(request): return utils.build_cu_and_nx_graphs(*request.param)
def get_cu_nx_graph_datasets_small(request): return utils.build_cu_and_nx_graphs(*request.param)
def calc_edge_betweenness_centrality(graph_file, directed=True, k=None, normalized=False, weight=None, seed=None, result_dtype=np.float64, use_k_full=False, multi_gpu_batch=False, edgevals=False): """ Generate both cugraph and networkx edge betweenness centrality Parameters ---------- graph_file : string Path to COO Graph representation in .csv format k : int or None, optional, default=None int: Number of sources to sample from None: All sources are used to compute directed : bool, optional, default=True normalized : bool True: Normalize Betweenness Centrality scores False: Scores are left unnormalized weight : cudf.DataFrame: Not supported as of 06/2020 seed : int or None, optional, default=None Seed for random sampling of the starting point result_dtype : numpy.dtype Expected type of the result, either np.float32 or np.float64 use_k_full : bool When True, if k is None replaces k by the number of sources of the Graph multi_gpu_batch: bool When True, enable mg batch after constructing the graph edgevals: bool When True, enable tests with weighted graph, should be ignored during computation. Returns ------- sorted_df : cudf.DataFrame Contains 'src', 'dst', 'cu_bc' and 'ref_bc' columns, where 'cu_bc' and 'ref_bc' are the two betweenness centrality scores to compare. The dataframe is expected to be sorted based on 'src' then 'dst', so that we can use cupy.isclose to compare the scores. """ G = None Gnx = None G, Gnx = utils.build_cu_and_nx_graphs(graph_file, directed=directed, edgevals=edgevals) assert G is not None and Gnx is not None if multi_gpu_batch: G.enable_batch() if k is not None and seed is not None: calc_func = _calc_bc_subset elif k is not None: calc_func = _calc_bc_subset_fixed else: # We processed to a comparison using every sources if use_k_full: k = Gnx.number_of_nodes() calc_func = _calc_bc_full sorted_df = calc_func( G, Gnx, k=k, normalized=normalized, weight=weight, seed=seed, result_dtype=result_dtype, ) return sorted_df
def calc_betweenness_centrality( graph_file, directed=True, k=None, normalized=False, weight=None, endpoints=False, seed=None, result_dtype=np.float64, use_k_full=False, ): """ Generate both cugraph and networkx betweenness centrality Parameters ---------- graph_file : string Path to COO Graph representation in .csv format directed : bool, optional, default=True k : int or None, optional, default=None int: Number of sources to sample from None: All sources are used to compute normalized : bool True: Normalize Betweenness Centrality scores False: Scores are left unnormalized weight : cudf.DataFrame: Not supported as of 06/2020 endpoints : bool True: Endpoints are included when computing scores False: Endpoints are not considered seed : int or None, optional, default=None Seed for random sampling of the starting point result_dtype : numpy.dtype Expected type of the result, either np.float32 or np.float64 use_k_full : bool When True, if k is None replaces k by the number of sources of the Graph Returns ------- sorted_df : cudf.DataFrame Contains 'vertex' and 'cu_bc' 'ref_bc' columns, where 'cu_bc' and 'ref_bc' are the two betweenness centrality scores to compare. The dataframe is expected to be sorted based on 'vertex', so that we can use cupy.isclose to compare the scores. """ G, Gnx = utils.build_cu_and_nx_graphs(graph_file, directed=directed) calc_func = None if k is not None and seed is not None: calc_func = _calc_bc_subset elif k is not None: calc_func = _calc_bc_subset_fixed else: # We processed to a comparison using every sources if use_k_full: k = Gnx.number_of_nodes() calc_func = _calc_bc_full sorted_df = calc_func( G, Gnx, k=k, normalized=normalized, weight=weight, endpoints=endpoints, seed=seed, result_dtype=result_dtype, ) return sorted_df