Example #1
0
def compare_bfs(graph_file, directed=True, return_sp_counter=False, seed=42):
    """ Genereate both cugraph and reference bfs traversal

    Parameters
    -----------
    graph_file : string
        Path to COO Graph representation in .csv format

    directed : bool, optional, default=True
        Indicated whether the graph is directed or not

    return_sp_counter : bool, optional, default=False
        Retrun shortest path counters from traversal if True

    seed : int, optional, default=42
        Value for random seed to obtain starting vertex

    Returns
    -------
    """
    G, Gnx = utils.build_cu_and_nx_graphs(graph_file, directed)
    # Seed for reproducibility
    if isinstance(seed, int):
        random.seed(seed)
        start_vertex = random.sample(Gnx.nodes(), 1)[0]

        # Test for  shortest_path_counter
        compare_func = _compare_bfs_spc if return_sp_counter else _compare_bfs

        # NOTE: We need to take 2 different path for verification as the nx
        #       functions used as reference return dictionaries that might
        #       not contain all the vertices while the cugraph version return
        #       a cudf.DataFrame with all the vertices, also some verification
        #       become slow with the data transfer
        compare_func(G, Gnx, start_vertex)
    elif isinstance(seed, list):  # For other Verifications
        for start_vertex in seed:
            compare_func = (
                _compare_bfs_spc if return_sp_counter else _compare_bfs
            )
            compare_func(G, Gnx, start_vertex)
    elif seed is None:  # Same here, it is only to run full checks
        for start_vertex in Gnx:
            compare_func = (
                _compare_bfs_spc if return_sp_counter else _compare_bfs
            )
            compare_func(G, Gnx, start_vertex)
    else:  # Unknown type given to seed
        raise NotImplementedError("Invalid type for seed")
Example #2
0
def get_cu_nx_graph_datasets_unrenumbered(request):
    return utils.build_cu_and_nx_graphs(*request.param)
Example #3
0
def get_cu_nx_graph_datasets_small(request):
    return utils.build_cu_and_nx_graphs(*request.param)
Example #4
0
def calc_edge_betweenness_centrality(graph_file,
                                     directed=True,
                                     k=None,
                                     normalized=False,
                                     weight=None,
                                     seed=None,
                                     result_dtype=np.float64,
                                     use_k_full=False,
                                     multi_gpu_batch=False,
                                     edgevals=False):
    """ Generate both cugraph and networkx edge betweenness centrality

    Parameters
    ----------
    graph_file : string
        Path to COO Graph representation in .csv format

    k : int or None, optional, default=None
        int:  Number of sources  to sample  from
        None: All sources are used to compute

    directed : bool, optional, default=True

    normalized : bool
        True: Normalize Betweenness Centrality scores
        False: Scores are left unnormalized

    weight : cudf.DataFrame:
        Not supported as of 06/2020

    seed : int or None, optional, default=None
        Seed for random sampling  of the starting point

    result_dtype :  numpy.dtype
        Expected type of the result, either np.float32 or np.float64

    use_k_full : bool
        When True, if k is None replaces k by the number of sources of the
        Graph

    multi_gpu_batch: bool
        When True, enable mg batch after constructing the graph

    edgevals: bool
        When True, enable tests with weighted graph, should be ignored
        during computation.

    Returns
    -------

    sorted_df : cudf.DataFrame
        Contains 'src', 'dst', 'cu_bc' and 'ref_bc' columns,  where 'cu_bc'
        and 'ref_bc' are the two betweenness centrality scores to compare.
        The dataframe is expected to be sorted based on 'src' then 'dst',
        so that we can use cupy.isclose to compare the scores.
    """
    G = None
    Gnx = None
    G, Gnx = utils.build_cu_and_nx_graphs(graph_file,
                                          directed=directed,
                                          edgevals=edgevals)
    assert G is not None and Gnx is not None
    if multi_gpu_batch:
        G.enable_batch()

    if k is not None and seed is not None:
        calc_func = _calc_bc_subset
    elif k is not None:
        calc_func = _calc_bc_subset_fixed
    else:  # We processed to a comparison using every sources
        if use_k_full:
            k = Gnx.number_of_nodes()
        calc_func = _calc_bc_full
    sorted_df = calc_func(
        G,
        Gnx,
        k=k,
        normalized=normalized,
        weight=weight,
        seed=seed,
        result_dtype=result_dtype,
    )

    return sorted_df
Example #5
0
def calc_betweenness_centrality(
    graph_file,
    directed=True,
    k=None,
    normalized=False,
    weight=None,
    endpoints=False,
    seed=None,
    result_dtype=np.float64,
    use_k_full=False,
):
    """ Generate both cugraph and networkx betweenness centrality

    Parameters
    ----------
    graph_file : string
        Path to COO Graph representation in .csv format

    directed : bool, optional, default=True

    k : int or None, optional, default=None
        int:  Number of sources  to sample  from
        None: All sources are used to compute

    normalized : bool
        True: Normalize Betweenness Centrality scores
        False: Scores are left unnormalized

    weight : cudf.DataFrame:
        Not supported as of 06/2020

    endpoints : bool
        True: Endpoints are included when computing scores
        False: Endpoints are not considered

    seed : int or None, optional, default=None
        Seed for random sampling  of the starting point

    result_dtype :  numpy.dtype
        Expected type of the result, either np.float32 or np.float64

    use_k_full : bool
        When True, if k is None replaces k by the number of sources of the
        Graph

    Returns
    -------

    sorted_df : cudf.DataFrame
        Contains 'vertex' and  'cu_bc' 'ref_bc' columns,  where 'cu_bc'
        and 'ref_bc' are the two betweenness centrality scores to compare.
        The dataframe is expected to be sorted based on 'vertex', so that we
        can use cupy.isclose to compare the scores.
    """
    G, Gnx = utils.build_cu_and_nx_graphs(graph_file, directed=directed)
    calc_func = None
    if k is not None and seed is not None:
        calc_func = _calc_bc_subset
    elif k is not None:
        calc_func = _calc_bc_subset_fixed
    else:  # We processed to a comparison using every sources
        if use_k_full:
            k = Gnx.number_of_nodes()
        calc_func = _calc_bc_full
    sorted_df = calc_func(
        G,
        Gnx,
        k=k,
        normalized=normalized,
        weight=weight,
        endpoints=endpoints,
        seed=seed,
        result_dtype=result_dtype,
    )

    return sorted_df