예제 #1
0
def from_cudf_edgelist(df,
                       source='source',
                       destination='destination',
                       edge_attr=None,
                       create_using=Graph,
                       renumber=True):
    """
    Return a new graph created from the edge list representaion. This function
    is added for NetworkX compatibility (this function is a RAPIDS version of
    NetworkX's from_pandas_edge_list()).  This function does not support
    multiple source or destination columns.  But does support renumbering

    Parameters
    ----------
    df : cudf.DataFrame
        This cudf.DataFrame contains columns storing edge source vertices,
        destination (or target following NetworkX's terminology) vertices, and
        (optional) weights.

    source : string or integer, optional (default='source')
        This is used to index the source column.

    destination : string or integer, optional (default='destination')
        This is used to index the destination (or target following NetworkX's
        terminology) column.

    edge_attr : string or integer, optional (default=None)
        This pointer can be ``None``. If not, this is used to index the weight
        column.

    create_using : cuGraph.Graph, optional (default=cugraph.Graph)
        Specify the type of Graph to create.

    renumber : bool, optional (default=True)
        If source and destination indices are not in range 0 to V where V
        is number of vertices, renumber argument should be True.

    Examples
    --------
    >>> M = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
    ...                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G = cugraph.from_cudf_edgelist(M, source='0', destination='1',
    ...                                edge_attr='2')

    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_cudf_edgelist(df,
                         source=source,
                         destination=destination,
                         edge_attr=edge_attr,
                         renumber=renumber)

    return G
예제 #2
0
def _minimum_spanning_tree_subgraph(G):
    mst_subgraph = Graph()
    if type(G) is not Graph:
        raise Exception("input graph must be undirected")
    mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G)
    if G.renumbered:
        mst_df = G.unrenumber(mst_df, "src")
        mst_df = G.unrenumber(mst_df, "dst")

    mst_subgraph.from_cudf_edgelist(
        mst_df, source="src", destination="dst", edge_attr="weight"
    )
    return mst_subgraph
예제 #3
0
def from_numpy_matrix(A, create_using=Graph):
    """
    Initializes the graph from numpy matrix containing adjacency matrix.
    Set create_using to cugraph.DiGraph for directed graph and
    cugraph.Graph for undirected Graph.
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")
    G.from_numpy_matrix(A)
    return G
예제 #4
0
def from_pandas_adjacency(df, create_using=Graph):
    """
    Initializes the graph from pandas adjacency matrix.
    Set create_using to cugraph.DiGraph for directed graph and
    cugraph.Graph for undirected Graph.
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_pandas_adjacency(df)
    return G
예제 #5
0
def from_numpy_array(A, create_using=Graph):
    """
    Initializes the graph from numpy array containing adjacency matrix.

    Parameters
    ----------
    A : numpy.array
        A Numpy array that contains adjacency information

    create_using: cugraph.DiGraph or cugraph.Graph, optional (default=Graph)
        Indicate whether to create a directed or undirected graph
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_numpy_array(A)
    return G
예제 #6
0
def from_pandas_adjacency(df, create_using=Graph):
    """
    Initializes the graph from pandas adjacency matrix.

    Parameters
    ----------
    df : pandas.DataFrame
        A DataFrame that contains edge information

    create_using: cugraph.DiGraph or cugraph.Graph, optional (default=Graph)
        Indicate whether to create a directed or undirected graph
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_pandas_adjacency(df)
    return G
예제 #7
0
def _maximum_spanning_tree_subgraph(G):
    mst_subgraph = Graph()
    if type(G) is not Graph:
        raise Exception("input graph must be undirected")

    if G.adjlist.weights is not None:
        G.adjlist.weights = G.adjlist.weights.mul(-1)

    mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G)

    # revert to original weights
    if G.adjlist.weights is not None:
        G.adjlist.weights = G.adjlist.weights.mul(-1)
        mst_df["weight"] = mst_df["weight"].mul(-1)

    if G.renumbered:
        mst_df = G.unrenumber(mst_df, "src")
        mst_df = G.unrenumber(mst_df, "dst")

    mst_subgraph.from_cudf_edgelist(
        mst_df, source="src", destination="dst", edge_attr="weight"
    )
    return mst_subgraph
예제 #8
0
def from_pandas_edgelist(df,
                         source="source",
                         destination="destination",
                         edge_attr=None,
                         create_using=Graph,
                         renumber=True):
    """
    Initialize a graph from the edge list. It is an error to call this
    method on an initialized Graph object. Source argument is source
    column name and destination argument is destination column name.

    By default, renumbering is enabled to map the source and destination
    vertices into an index in the range [0, V) where V is the number
    of vertices.  If the input vertices are a single column of integers
    in the range [0, V), renumbering can be disabled and the original
    external vertex ids will be used.

    If weights are present, edge_attr argument is the weights column name.

    Parameters
    ----------
    input_df : pandas.DataFrame
        A DataFrame that contains edge information
    source : str or array-like
        source column name or array of column names
    destination : str or array-like
        destination column name or array of column names
    edge_attr : str or None
        the weights column name. Default is None
    renumber : bool
        Indicate whether or not to renumber the source and destination
        vertex IDs. Default is True.
    create_using: cugraph.DiGraph or cugraph.Graph
        Indicate whether to create a directed or undirected graph

    Returns
    -------
    G : cugraph.DiGraph or cugraph.Graph
        graph containing edges from the pandas edgelist

    Examples
    --------
    >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_pandas_edgelist(df, source='0', destination='1',
                               edge_attr='2', renumber=False)
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_pandas_edgelist(df,
                           source=source,
                           destination=destination,
                           edge_attr=edge_attr,
                           renumber=renumber)
    return G
예제 #9
0
def from_pandas_edgelist(df,
                         source="source",
                         destination="destination",
                         edge_attr=None,
                         create_using=Graph,
                         renumber=True):
    """
    Initialize a graph from the edge list. It is an error to call this
    method on an initialized Graph object. Source argument is source
    column name and destination argument is destination column name.

    By default, renumbering is enabled to map the source and destination
    vertices into an index in the range [0, V) where V is the number
    of vertices.  If the input vertices are a single column of integers
    in the range [0, V), renumbering can be disabled and the original
    external vertex ids will be used.

    If weights are present, edge_attr argument is the weights column name.

    Parameters
    ----------
    df : pandas.DataFrame
        A DataFrame that contains edge information

    source : str or array-like, optional (default='source')
        source column name or array of column names

    destination : str or array-like, optional (default='destination')
        destination column name or array of column names

    edge_attr : str or None, optional (default=None)
        the weights column name.

    renumber : bool, optional (default=True)
        Indicate whether or not to renumber the source and destination
        vertex IDs.

    create_using: cugraph.DiGraph or cugraph.Graph, optional (default=Graph)
        Indicate whether to create a directed or undirected graph

    Returns
    -------
    G : cugraph.DiGraph or cugraph.Graph
        graph containing edges from the pandas edgelist

    Examples
    --------
    >>> #  Download dataset from
    >>> #  https://github.com/rapidsai/cugraph/datasets/...
    >>> df = pd.read_csv(datasets_path / 'karate.csv', delimiter=' ',
    ...                  header=None, names=["0", "1", "2"],
    ...                  dtype={"0": "int32", "1": "int32", "2": "float32"})
    >>> G = cugraph.Graph()
    >>> G.from_pandas_edgelist(df, source='0', destination='1',
    ...                        edge_attr='2', renumber=False)

    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_pandas_edgelist(df,
                           source=source,
                           destination=destination,
                           edge_attr=edge_attr,
                           renumber=renumber)
    return G
예제 #10
0
def ktruss_subgraph(G, k, use_weights=True):
    """
    Returns the K-Truss subgraph of a graph for a specific k.

    The k-truss of a graph is a subgraph where each edge is part of at least
    (k−2) triangles. K-trusses are used for finding tighlty knit groups of
    vertices in a graph. A k-truss is a relaxation of a k-clique in the graph
    and was define in [1]. Finding cliques is computationally demanding and
    finding the maximal k-clique is known to be NP-Hard.

    In contrast, finding a k-truss is computationally tractable as its
    key building block, namely triangle counting counting, can be executed
    in polnymomial time.Typically, it takes many iterations of triangle
    counting to find the k-truss of a graph. Yet these iterations operate
    on a weakly monotonically shrinking graph.
    Therefore, finding the k-truss of a graph can be done in a fairly
    reasonable amount of time. The solution in cuGraph is based on a
    GPU algorithm first shown in [2] and uses the triangle counting algorithm
    from [3].

    [1] Cohen, J.,
    "Trusses: Cohesive subgraphs for social network analysis"
    National security agency technical report, 2008

    [2] O. Green, J. Fox, E. Kim, F. Busato, et al.
    “Quickly Finding a Truss in a Haystack”
    IEEE High Performance Extreme Computing Conference (HPEC), 2017
    https://doi.org/10.1109/HPEC.2017.8091038

    [3] O. Green, P. Yalamanchili, L.M. Munguia,
    “Fast Triangle Counting on GPU”
    Irregular Applications: Architectures and Algorithms (IA3), 2014


    Parameters
    ----------
    G : cuGraph.Graph
        cuGraph graph descriptor with connectivity information. k-Trusses are
        defined for only undirected graphs as they are defined for
        undirected triangle in a graph.

    k : int
        The desired k to be used for extracting the k-truss subgraph.

    use_weights : Bool
        whether the output should contain the edge weights if G has them

    Returns
    -------
    G_truss : cuGraph.Graph
        A cugraph graph descriptor with the k-truss subgraph for the given k.

    Examples
    --------
    >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
    >>> k_subgraph = cugraph.ktruss_subgraph(G, 3)
    """

    KTrussSubgraph = Graph()
    if type(G) is not Graph:
        raise Exception("input graph must be undirected")

    subgraph_df = ktruss_subgraph_wrapper.ktruss_subgraph(G, k, use_weights)
    if G.renumbered:
        subgraph_df = G.unrenumber(subgraph_df, "src")
        subgraph_df = G.unrenumber(subgraph_df, "dst")

    if G.edgelist.weights:
        KTrussSubgraph.from_cudf_edgelist(subgraph_df,
                                          source="src",
                                          destination="dst",
                                          edge_attr="weight")
    else:
        KTrussSubgraph.from_cudf_edgelist(subgraph_df,
                                          source="src",
                                          destination="dst")

    return KTrussSubgraph