Esempio n. 1
0
def from_cudf_edgelist(df,
                       source='source',
                       destination='destination',
                       edge_attr=None,
                       create_using=Graph,
                       renumber=True):
    """
    Return a new graph created from the edge list representaion. This function
    is added for NetworkX compatibility (this function is a RAPIDS version of
    NetworkX's from_pandas_edge_list()).  This function does not support
    multiple source or destination columns.  But does support renumbering

    Parameters
    ----------
    df : cudf.DataFrame
        This cudf.DataFrame contains columns storing edge source vertices,
        destination (or target following NetworkX's terminology) vertices, and
        (optional) weights.
    source : string or integer
        This is used to index the source column.
    destination : string or integer
        This is used to index the destination (or target following NetworkX's
        terminology) column.
    edge_attr : string or integer, optional
        This pointer can be ``None``. If not, this is used to index the weight
        column.
    create_using : cuGraph.Graph
        Specify the type of Graph to create.  Default is cugraph.Graph
    renumber : bool
        If source and destination indices are not in range 0 to V where V
        is number of vertices, renumber argument should be True.

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2')

    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_cudf_edgelist(df,
                         source=source,
                         destination=destination,
                         edge_attr=edge_attr,
                         renumber=renumber)

    return G
Esempio n. 2
0
def _minimum_spanning_tree_subgraph(G):
    mst_subgraph = Graph()
    if type(G) is not Graph:
        raise Exception("input graph must be undirected")
    mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G)
    if G.renumbered:
        mst_df = G.unrenumber(mst_df, "src")
        mst_df = G.unrenumber(mst_df, "dst")

    mst_subgraph.from_cudf_edgelist(mst_df,
                                    source="src",
                                    destination="dst",
                                    edge_attr="weight")
    return mst_subgraph
Esempio n. 3
0
def from_numpy_matrix(A, create_using=Graph):
    """
    Initializes the graph from numpy matrix containing adjacency matrix.
    Set create_using to cugraph.DiGraph for directed graph and
    cugraph.Graph for undirected Graph.
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")
    G.from_numpy_matrix(A)
    return G
Esempio n. 4
0
def from_pandas_adjacency(df, create_using=Graph):
    """
    Initializes the graph from pandas adjacency matrix.
    Set create_using to cugraph.DiGraph for directed graph and
    cugraph.Graph for undirected Graph.
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_pandas_adjacency(df)
    return G
Esempio n. 5
0
def from_cudf_edgelist(df,
                       source='source',
                       destination='destination',
                       edge_attr=None,
                       create_using=Graph,
                       renumber=True):
    """
    Return a new graph created from the edge list representaion. This function
    is added for NetworkX compatibility (this function is a RAPIDS version of
    NetworkX's from_pandas_edge_list()).
    Parameters
    ----------
    df : cudf.DataFrame
        This cudf.DataFrame contains columns storing edge source vertices,
        destination (or target following NetworkX's terminology) vertices, and
        (optional) weights.
    source : string or integer
        This is used to index the source column.
    target : string or integer
        This is used to index the destination (or target following NetworkX's
        terminology) column.
    weight : string or integer, optional
        This pointer can be ``None``. If not, this is used to index the weight
        column.
    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2')
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_cudf_edgelist(df,
                         source=source,
                         destination=destination,
                         edge_attr=edge_attr,
                         renumber=renumber)

    return G
Esempio n. 6
0
def k_core(G, k=None, core_number=None):
    """
    Compute the k-core of the graph G based on the out degree of its nodes. A
    k-core of a graph is a maximal subgraph that contains nodes of degree k or
    more. This call does not support a graph with self-loops and parallel
    edges.

    Parameters
    ----------
    G : cuGraph.Graph
        cuGraph graph descriptor with connectivity information. The graph
        should contain undirected edges where undirected edges are represented
        as directed edges in both directions. While this graph can contain edge
        weights, they don't participate in the calculation of the k-core.
    k : int, optional
        Order of the core. This value must not be negative. If set to None, the
        main core is returned.
    core_number : cudf.DataFrame, optional
        Precomputed core number of the nodes of the graph G containing two
        cudf.Series of size V: the vertex identifiers and the corresponding
        core number values. If set to None, the core numbers of the nodes are
        calculated internally.

        core_number['vertex'] : cudf.Series
            Contains the vertex identifiers
        core_number['values'] : cudf.Series
            Contains the core number of vertices

    Returns
    -------
    KCoreGraph : cuGraph.Graph
        K Core of the input graph

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> sources = cudf.Series(M['0'])
    >>> destinations = cudf.Series(M['1'])
    >>> G = cugraph.Graph()
    >>> G.add_edge_list(sources, destinations, None)
    >>> KCoreGraph = cugraph.k_core(G)
    """

    KCoreGraph = Graph()
    if core_number is None:
        core_number = core_number_wrapper.core_number(G.graph_ptr)
        core_number = core_number.rename(columns={"core_number": "values"})

    if k is None:
        k = core_number['values'].max()

    k_core_wrapper.k_core(G.graph_ptr, KCoreGraph.graph_ptr, k, core_number)

    return KCoreGraph
Esempio n. 7
0
def _maximum_spanning_tree_subgraph(G):
    mst_subgraph = Graph()
    if type(G) is not Graph:
        raise Exception("input graph must be undirected")

    if G.adjlist.weights is not None:
        G.adjlist.weights = G.adjlist.weights.mul(-1)

    mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G)

    # revert to original weights
    if G.adjlist.weights is not None:
        G.adjlist.weights = G.adjlist.weights.mul(-1)
        mst_df["weight"] = mst_df["weight"].mul(-1)

    if G.renumbered:
        mst_df = G.unrenumber(mst_df, "src")
        mst_df = G.unrenumber(mst_df, "dst")

    mst_subgraph.from_cudf_edgelist(mst_df,
                                    source="src",
                                    destination="dst",
                                    edge_attr="weight")
    return mst_subgraph
def subgraph(G, vertices):
    """
    Compute a subgraph of the existing graph including only the specified
    vertices.  This algorithm works for both directed and undirected graphs,
    it does not actually traverse the edges, simply pulls out any edges that
    are incident on vertices that are both contained in the vertices list.

    Parameters
    ----------
    G : cugraph.Graph
        cuGraph graph descriptor
    vertices : cudf.Series
        Specifies the vertices of the induced subgraph

    Returns
    -------
    Sg : cugraph.Graph
        A graph object containing the subgraph induced by the given vertex set.

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> sources = cudf.Series(M['0'])
    >>> destinations = cudf.Series(M['1'])
    >>> G = cugraph.Graph()
    >>> G.add_edge_list(sources, destinations, None)
    >>> verts = numpy.zeros(3, dtype=numpy.int32)
    >>> verts[0] = 0
    >>> verts[1] = 1
    >>> verts[2] = 2
    >>> sverts = cudf.Series(verts)
    >>> Sg = cugraph.subgraph(G, sverts)
    """

    null_check(vertices)

    result_graph = Graph()

    subgraph_extraction_wrapper.subgraph(G.graph_ptr, vertices,
                                         result_graph.graph_ptr)

    return result_graph
Esempio n. 9
0
def from_cudf_edgelist(df, source='source', target='target', weight=None):
    """
    Return a new graph created from the edge list representaion. This function
    is added for NetworkX compatibility (this function is a RAPIDS version of
    NetworkX's from_pandas_edge_list()).

    Parameters
    ----------
    df : cudf.DataFrame
        This cudf.DataFrame contains columns storing edge source vertices,
        destination (or target following NetworkX's terminology) vertices, and
        (optional) weights.
    source : string or integer
        This is used to index the source column.
    target : string or integer
        This is used to index the destination (or target following NetworkX's
        terminology) column.
    weight : string or integer, optional
        This pointer can be ``None``. If not, this is used to index the weight
        column.

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2')
    """

    G = Graph()

    if weight is None:
        G.add_edge_list(df[source], df[target])
    else:
        G.add_edge_list(df[source], df[target], df[weight])

    return G
Esempio n. 10
0
def ktruss_subgraph(G, k, use_weights=True):
    """
    Returns the K-Truss subgraph of a graph for a specific k.

    The k-truss of a graph is a subgraph where each edge is part of at least
    (k−2) triangles. K-trusses are used for finding tighlty knit groups of
    vertices in a graph. A k-truss is a relaxation of a k-clique in the graph
    and was define in [1]. Finding cliques is computationally demanding and
    finding the maximal k-clique is known to be NP-Hard.

    In contrast, finding a k-truss is computationally tractable as its
    key building block, namely triangle counting counting, can be executed
    in polnymomial time.Typically, it takes many iterations of triangle
    counting to find the k-truss of a graph. Yet these iterations operate
    on a weakly monotonically shrinking graph.
    Therefore, finding the k-truss of a graph can be done in a fairly
    reasonable amount of time. The solution in cuGraph is based on a
    GPU algorithm first shown in [2] and uses the triangle counting algorithm
    from [3].

    [1] Cohen, J.,
    "Trusses: Cohesive subgraphs for social network analysis"
    National security agency technical report, 2008

    [2] O. Green, J. Fox, E. Kim, F. Busato, et al.
    “Quickly Finding a Truss in a Haystack”
    IEEE High Performance Extreme Computing Conference (HPEC), 2017
    https://doi.org/10.1109/HPEC.2017.8091038

    [3] O. Green, P. Yalamanchili, L.M. Munguia,
    “Fast Triangle Counting on GPU”
    Irregular Applications: Architectures and Algorithms (IA3), 2014


    Parameters
    ----------
    G : cuGraph.Graph
        cuGraph graph descriptor with connectivity information. k-Trusses are
        defined for only undirected graphs as they are defined for
        undirected triangle in a graph.

    k : int
        The desired k to be used for extracting the k-truss subgraph.

    use_weights : Bool
        whether the output should contain the edge weights if G has them

    Returns
    -------
    G_truss : cuGraph.Graph
        A cugraph graph descriptor with the k-truss subgraph for the given k.

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1')
    >>> k_subgraph = cugraph.ktruss_subgraph(G, 3)
    """

    KTrussSubgraph = Graph()
    if type(G) is not Graph:
        raise Exception("input graph must be undirected")

    subgraph_df = ktruss_subgraph_wrapper.ktruss_subgraph(G, k, use_weights)
    if G.renumbered:
        subgraph_df = G.unrenumber(subgraph_df, "src")
        subgraph_df = G.unrenumber(subgraph_df, "dst")

    if G.edgelist.weights:
        KTrussSubgraph.from_cudf_edgelist(
            subgraph_df, source="src", destination="dst", edge_attr="weight"
        )
    else:
        KTrussSubgraph.from_cudf_edgelist(
            subgraph_df, source="src", destination="dst"
        )

    return KTrussSubgraph
Esempio n. 11
0
def from_pandas_edgelist(df,
                         source="source",
                         destination="destination",
                         edge_attr=None,
                         create_using=Graph,
                         renumber=True):
    """
    Initialize a graph from the edge list. It is an error to call this
    method on an initialized Graph object. Source argument is source
    column name and destination argument is destination column name.

    By default, renumbering is enabled to map the source and destination
    vertices into an index in the range [0, V) where V is the number
    of vertices.  If the input vertices are a single column of integers
    in the range [0, V), renumbering can be disabled and the original
    external vertex ids will be used.

    If weights are present, edge_attr argument is the weights column name.

    Parameters
    ----------
    input_df : pandas.DataFrame
        A DataFrame that contains edge information
    source : str or array-like
        source column name or array of column names
    destination : str or array-like
        destination column name or array of column names
    edge_attr : str or None
        the weights column name. Default is None
    renumber : bool
        Indicate whether or not to renumber the source and destination
        vertex IDs. Default is True.
    create_using: cugraph.DiGraph or cugraph.Graph
        Indicate whether to create a directed or undirected graph

    Returns
    -------
    G : cugraph.DiGraph or cugraph.Graph
        graph containing edges from the pandas edgelist

    Examples
    --------
    >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_pandas_edgelist(df, source='0', destination='1',
                               edge_attr='2', renumber=False)
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_pandas_edgelist(df,
                           source=source,
                           destination=destination,
                           edge_attr=edge_attr,
                           renumber=renumber)
    return G