Beispiel #1
0
def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None):
    """
    Compute the  induced subgraph of neighbors centered at node n,
    within a given radius.

    Parameters
    ----------
    G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix
        Graph or matrix object, which should contain the connectivity
        information. Edge weights, if present, should be single or double
        precision floating point values.
    n : integer
        A single node
    radius: integer, optional
        Include all neighbors of distance<=radius from n.
    center: bool, optional
        Defaults to True. False is not supported
    undirected: bool, optional
        Defaults to False. True is not supported
    distance: key, optional
        Distances are counted in hops from n. Other cases are not supported.

    Returns
    -------
    G_ego : cuGraph.Graph or networkx.Graph
        A graph descriptor with a minimum spanning tree or forest.
        The networkx graph will not have all attributes copied over

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv',
                          delimiter = ' ',
                          dtype=['int32', 'int32', 'float32'],
                          header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1')
    >>> ego_graph = cugraph.ego_graph(G, seed, radius=2)

    """

    (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight")
    result_graph = type(G)()

    if G.renumbered is True:
        n = G.lookup_internal_vertex_id(cudf.Series([n]))

    df, offsets = egonet_wrapper.egonet(G, n, radius)

    if G.renumbered:
        df = G.unrenumber(df, "src")
        df = G.unrenumber(df, "dst")

    if G.edgelist.weights:
        result_graph.from_cudf_edgelist(
            df, source="src", destination="dst", edge_attr="weight"
        )
    else:
        result_graph.from_cudf_edgelist(df, source="src", destination="dst")
    return _convert_graph_to_output_type(result_graph, input_type)
Beispiel #2
0
def batched_ego_graphs(G,
                       seeds,
                       radius=1,
                       center=True,
                       undirected=False,
                       distance=None):
    """
    Compute the  induced subgraph of neighbors for each node in seeds
    within a given radius.

    Parameters
    ----------
    G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix
        Graph or matrix object, which should contain the connectivity
        information. Edge weights, if present, should be single or double
        precision floating point values.
    seeds : cudf.Series or list or cudf.DataFrame
        Specifies the seeds of the induced egonet subgraphs.
    radius: integer, optional
        Include all neighbors of distance<=radius from n.
    center: bool, optional
        Defaults to True. False is not supported
    undirected: bool, optional
        Defaults to False. True is not supported
    distance: key, optional
        Distances are counted in hops from n. Other cases are not supported.

    Returns
    -------
    ego_edge_lists : cudf.DataFrame or pandas.DataFrame
        GPU data frame containing all induced sources identifiers,
        destination identifiers, edge weights
    seeds_offsets: cudf.Series
        Series containing the starting offset in the returned edge list
        for each seed.
    """

    (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight")

    if G.renumbered is True:
        if isinstance(seeds, cudf.DataFrame):
            seeds = G.lookup_internal_vertex_id(seeds, seeds.columns)
        else:
            seeds = G.lookup_internal_vertex_id(cudf.Series(seeds))

    df, offsets = egonet_wrapper.egonet(G, seeds, radius)

    if G.renumbered:
        df = G.unrenumber(df, "src", preserve_order=True)
        df = G.unrenumber(df, "dst", preserve_order=True)

    return _convert_df_series_to_output_type(df, offsets, input_type)
Beispiel #3
0
def bfs(G,
        start=None,
        return_sp_counter=None,
        i_start=None,
        directed=None,
        return_predecessors=None):
    """Find the distances and predecessors for a breadth first traversal of a
    graph.

    Parameters
    ----------
    G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix
        Graph or matrix object, which should contain the connectivity
        information. Edge weights, if present, should be single or double
        precision floating point values.

    start : Integer
        The index of the graph vertex from which the traversal begins

    return_sp_counter : bool, optional, default=False
        Indicates if shortest path counters should be returned

    i_start : Integer, optional
        Identical to start, added for API compatibility. Only start or i_start
        can be set, not both.

    directed : bool, optional
        NOTE
            For non-Graph-type (eg. sparse matrix) values of G only. Raises
            TypeError if used with a Graph object.

        If True (default), then convert the input matrix to a cugraph.DiGraph,
        otherwise a cugraph.Graph object will be used.

    Returns
    -------
    Return value type is based on the input type.  If G is a cugraph.Graph,
    returns:

       cudf.DataFrame
          df['vertex'] vertex IDs

          df['distance'] path distance for each vertex from the starting vertex

          df['predecessor'] for each i'th position in the column, the vertex ID
          immediately preceding the vertex at position i in the 'vertex' column

          df['sp_counter'] for each i'th position in the column, the number of
          shortest paths leading to the vertex at position i in the 'vertex'
          column (Only if retrun_sp_counter is True)

    If G is a networkx.Graph, returns:

       pandas.DataFrame with contents equivalent to the cudf.DataFrame
       described above.

    If G is a CuPy or SciPy matrix, returns:
       a 2-tuple of CuPy ndarrays (if CuPy matrix input) or Numpy ndarrays (if
       SciPy matrix input) representing:

       distance: cupy or numpy ndarray
          ndarray of shortest distances between source and vertex.

       predecessor: cupy or numpy ndarray
          ndarray of predecessors of a vertex on the path from source, which
          can be used to reconstruct the shortest paths.

       ...or if return_sp_counter is True, returns a 3-tuple with the above two
       arrays plus:

       sp_counter: cupy or numpy ndarray
          ndarray of number of shortest paths leading to each vertex.

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1')
    >>> df = cugraph.bfs(G, 0)

    """
    (start, return_sp_counter, directed) = \
        _ensure_args(G, start, return_sp_counter, i_start, directed)

    # FIXME: allow nx_weight_attr to be specified
    (G, input_type) = ensure_cugraph_obj(
        G,
        nx_weight_attr="weight",
        matrix_graph_type=DiGraph if directed else Graph)

    if type(G) is Graph:
        is_directed = False
    else:
        is_directed = True

    if G.renumbered is True:
        start = G.lookup_internal_vertex_id(cudf.Series([start]))[0]

    df = bfs_wrapper.bfs(G, start, is_directed, return_sp_counter)

    if G.renumbered:
        df = G.unrenumber(df, "vertex")
        df = G.unrenumber(df, "predecessor")
        df["predecessor"].fillna(-1, inplace=True)

    return _convert_df_to_output_type(df, input_type)
Beispiel #4
0
def sssp(G,
         source=None,
         method=None,
         directed=None,
         return_predecessors=None,
         unweighted=None,
         overwrite=None,
         indices=None):
    """
    Compute the distance and predecessors for shortest paths from the specified
    source to all the vertices in the graph. The distances column will store
    the distance from the source to each vertex. The predecessors column will
    store each vertex's predecessor in the shortest path. Vertices that are
    unreachable will have a distance of infinity denoted by the maximum value
    of the data type and the predecessor set as -1. The source vertex's
    predecessor is also set to -1. Graphs with negative weight cycles are not
    supported.

    Parameters
    ----------
    graph : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix Graph or
        matrix object, which should contain the connectivity information. Edge
        weights, if present, should be single or double precision floating
        point values.
    source : int
        Index of the source vertex.

    Returns
    -------
    Return value type is based on the input type.  If G is a cugraph.Graph,
    returns:

       cudf.DataFrame
          df['vertex']
              vertex id

          df['distance']
              gives the path distance from the starting vertex

          df['predecessor']
              the vertex it was reached from

    If G is a networkx.Graph, returns:

       pandas.DataFrame with contents equivalent to the cudf.DataFrame
       described above.

    If G is a CuPy or SciPy matrix, returns:
       a 2-tuple of CuPy ndarrays (if CuPy matrix input) or Numpy ndarrays (if
       SciPy matrix input) representing:

       distance: cupy or numpy ndarray
          ndarray of shortest distances between source and vertex.

       predecessor: cupy or numpy ndarray
          ndarray of predecessors of a vertex on the path from source, which
          can be used to reconstruct the shortest paths.

    Examples
    --------
    >>> M = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
    ...                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1')
    >>> distances = cugraph.sssp(G, 0)

    """
    (source, directed,
     return_predecessors) = _ensure_args(G, source, method, directed,
                                         return_predecessors, unweighted,
                                         overwrite, indices)

    # FIXME: allow nx_weight_attr to be specified
    (G, input_type) = ensure_cugraph_obj(
        G,
        nx_weight_attr="weight",
        matrix_graph_type=DiGraph if directed else Graph)

    if G.renumbered:
        if isinstance(source, cudf.DataFrame):
            source = G.lookup_internal_vertex_id(source,
                                                 source.columns).iloc[0]
        else:
            source = G.lookup_internal_vertex_id(cudf.Series([source]))[0]

    if source is cudf.NA:
        raise ValueError(
            "Starting vertex should be between 0 to number of vertices")

    df = sssp_wrapper.sssp(G, source)

    if G.renumbered:
        df = G.unrenumber(df, "vertex")
        df = G.unrenumber(df, "predecessor")
        df.fillna(-1, inplace=True)

    return _convert_df_to_output_type(df, input_type, return_predecessors)
Beispiel #5
0
def weakly_connected_components(G,
                                directed=None,
                                connection=None,
                                return_labels=None):
    """
    Generate the Weakly Connected Components and attach a component label to
    each vertex.

    Parameters
    ----------
    G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix

        Graph or matrix object, which should contain the connectivity
        information (edge weights are not used for this algorithm). If using a
        graph object, the graph can be either directed or undirected where an
        undirected edge is represented by a directed edge in both directions.
        The adjacency list will be computed if not already present.  The number
        of vertices should fit into a 32b int.

    directed : bool, optional

        NOTE
            For non-Graph-type (eg. sparse matrix) values of G only.
            Raises TypeError if used with a Graph object.

        If True (default), then convert the input matrix to a cugraph.DiGraph
        and only move from point i to point j along paths csgraph[i, j]. If
        False, then find the shortest path on an undirected graph: the
        algorithm can progress from point i to j along csgraph[i, j] or
        csgraph[j, i].

    connection : str, optional (default=None)

        Added for SciPy compatibility, can only be specified for non-Graph-type
        (eg. sparse matrix) values of G only (raises TypeError if used with a
        Graph object), and can only be set to "weak" for this API.

    return_labels : bool, optional

        NOTE
            For non-Graph-type (eg. sparse matrix) values of G only. Raises
            TypeError if used with a Graph object.

        If True (default), then return the labels for each of the connected
        components.

    Returns
    -------
    Return value type is based on the input type.  If G is a cugraph.Graph,
    returns:

       cudf.DataFrame
           GPU data frame containing two cudf.Series of size V: the vertex
           identifiers and the corresponding component identifier.

           df['vertex']
               Contains the vertex identifier
           df['labels']
               The component identifier

    If G is a networkx.Graph, returns:

       python dictionary, where keys are vertices and values are the component
       identifiers.

    If G is a CuPy or SciPy matrix, returns:

       CuPy ndarray (if CuPy matrix input) or Numpy ndarray (if SciPy matrix
       input) of shape (<num vertices>, 2), where column 0 contains component
       identifiers and column 1 contains vertices.

    Examples
    --------
    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
    ...                   delimiter = ' ',
    ...                   dtype=['int32', 'int32', 'float32'],
    ...                   header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None)
    >>> df = cugraph.weakly_connected_components(G)

    """
    (directed, connection,
     return_labels) = _ensure_args("weakly_connected_components", G, directed,
                                   connection, return_labels)

    # FIXME: allow nx_weight_attr to be specified
    (G, input_type) = ensure_cugraph_obj(
        G,
        nx_weight_attr="weight",
        matrix_graph_type=DiGraph if directed else Graph)

    df = connectivity_wrapper.weakly_connected_components(G)

    if G.renumbered:
        df = G.unrenumber(df, "vertex")

    return _convert_df_to_output_type(df, input_type, return_labels)
Beispiel #6
0
def bfs(G, start, return_sp_counter=False):
    """
    Find the distances and predecessors for a breadth first traversal of a
    graph.

    Parameters
    ----------
    G : cuGraph.Graph, NetworkX.Graph, or CuPy sparse COO matrix
        cuGraph graph descriptor with connectivity information. Edge weights,
        if present, should be single or double precision floating point values.

    start : Integer
        The index of the graph vertex from which the traversal begins

    return_sp_counter : bool, optional, default=False
        Indicates if shortest path counters should be returned

    Returns
    -------
    Return value type is based on the input type.  If G is a cugraph.Graph,
    returns:

       cudf.DataFrame
          df['vertex'] vertex IDs

          df['distance'] path distance for each vertex from the starting vertex

          df['predecessor'] for each i'th position in the column, the vertex ID
          immediately preceding the vertex at position i in the 'vertex' column

          df['sp_counter'] for each i'th position in the column, the number of
          shortest paths leading to the vertex at position i in the 'vertex'
          column (Only if retrun_sp_counter is True)

    If G is a networkx.Graph, returns:

       pandas.DataFrame with contents equivalent to the cudf.DataFrame
       described above.

    If G is a CuPy sparse COO matrix, returns a 2-tuple of cupy.ndarray:

       distance: cupy.ndarray
          ndarray of shortest distances between source and vertex.

       predecessor: cupy.ndarray
          ndarray of predecessors of a vertex on the path from source, which
          can be used to reconstruct the shortest paths.

       sp_counter: cupy.ndarray
          ndarray of number of shortest paths leading to each vertex (only if
          retrun_sp_counter is True)

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1')
    >>> df = cugraph.bfs(G, 0)
    """
    # FIXME: allow nx_weight_attr to be specified
    (G, input_type) = ensure_cugraph_obj(G,
                                         nx_weight_attr="weight",
                                         matrix_graph_type=Graph)

    if type(G) is Graph:
        directed = False
    else:
        directed = True

    if G.renumbered is True:
        start = G.lookup_internal_vertex_id(cudf.Series([start]))[0]

    df = bfs_wrapper.bfs(G, start, directed, return_sp_counter)

    if G.renumbered:
        df = G.unrenumber(df, "vertex")
        df = G.unrenumber(df, "predecessor")
        df["predecessor"].fillna(-1, inplace=True)

    return _convert_df_to_output_type(df, input_type)
Beispiel #7
0
def sssp(G, source):
    """
    Compute the distance and predecessors for shortest paths from the specified
    source to all the vertices in the graph. The distances column will store
    the distance from the source to each vertex. The predecessors column will
    store each vertex's predecessor in the shortest path. Vertices that are
    unreachable will have a distance of infinity denoted by the maximum value
    of the data type and the predecessor set as -1. The source vertex's
    predecessor is also set to -1. Graphs with negative weight cycles are not
    supported.

    Parameters
    ----------
    graph : cuGraph.Graph, NetworkX.Graph, or CuPy sparse COO matrix
        cuGraph graph descriptor with connectivity information. Edge weights,
        if present, should be single or double precision floating point values.
    source : int
        Index of the source vertex.

    Returns
    -------
    Return value type is based on the input type.  If G is a cugraph.Graph,
    returns:

       cudf.DataFrame
          df['vertex']
              vertex id

          df['distance']
              gives the path distance from the starting vertex

          df['predecessor']
              the vertex it was reached from

    If G is a networkx.Graph, returns:

       pandas.DataFrame with contents equivalent to the cudf.DataFrame
       described above.

    If G is a CuPy sparse COO matrix, returns a 2-tuple of cupy.ndarray:

       distance: cupy.ndarray
          ndarray of shortest distances between source and vertex.

       predecessor: cupy.ndarray
          ndarray of predecessors of a vertex on the path from source, which
          can be used to reconstruct the shortest paths.

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1')
    >>> distances = cugraph.sssp(G, 0)
    """
    # FIXME: allow nx_weight_attr to be specified
    (G, input_type) = ensure_cugraph_obj(G,
                                         nx_weight_attr="weight",
                                         matrix_graph_type=Graph)

    if G.renumbered:
        source = G.lookup_internal_vertex_id(cudf.Series([source]))[0]

    df = sssp_wrapper.sssp(G, source)

    if G.renumbered:
        df = G.unrenumber(df, "vertex")
        df = G.unrenumber(df, "predecessor")
        df["predecessor"].fillna(-1, inplace=True)

    return _convert_df_to_output_type(df, input_type)