def from_cudf_edgelist(df, source='source', destination='destination', edge_attr=None, create_using=Graph, renumber=True): """ Return a new graph created from the edge list representaion. This function is added for NetworkX compatibility (this function is a RAPIDS version of NetworkX's from_pandas_edge_list()). This function does not support multiple source or destination columns. But does support renumbering Parameters ---------- df : cudf.DataFrame This cudf.DataFrame contains columns storing edge source vertices, destination (or target following NetworkX's terminology) vertices, and (optional) weights. source : string or integer This is used to index the source column. destination : string or integer This is used to index the destination (or target following NetworkX's terminology) column. edge_attr : string or integer, optional This pointer can be ``None``. If not, this is used to index the weight column. create_using : cuGraph.Graph Specify the type of Graph to create. Default is cugraph.Graph renumber : bool If source and destination indices are not in range 0 to V where V is number of vertices, renumber argument should be True. Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2') """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_cudf_edgelist(df, source=source, destination=destination, edge_attr=edge_attr, renumber=renumber) return G
def _minimum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G) if G.renumbered: mst_df = G.unrenumber(mst_df, "src") mst_df = G.unrenumber(mst_df, "dst") mst_subgraph.from_cudf_edgelist(mst_df, source="src", destination="dst", edge_attr="weight") return mst_subgraph
def from_numpy_matrix(A, create_using=Graph): """ Initializes the graph from numpy matrix containing adjacency matrix. Set create_using to cugraph.DiGraph for directed graph and cugraph.Graph for undirected Graph. """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_numpy_matrix(A) return G
def from_pandas_adjacency(df, create_using=Graph): """ Initializes the graph from pandas adjacency matrix. Set create_using to cugraph.DiGraph for directed graph and cugraph.Graph for undirected Graph. """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_pandas_adjacency(df) return G
def from_cudf_edgelist(df, source='source', destination='destination', edge_attr=None, create_using=Graph, renumber=True): """ Return a new graph created from the edge list representaion. This function is added for NetworkX compatibility (this function is a RAPIDS version of NetworkX's from_pandas_edge_list()). Parameters ---------- df : cudf.DataFrame This cudf.DataFrame contains columns storing edge source vertices, destination (or target following NetworkX's terminology) vertices, and (optional) weights. source : string or integer This is used to index the source column. target : string or integer This is used to index the destination (or target following NetworkX's terminology) column. weight : string or integer, optional This pointer can be ``None``. If not, this is used to index the weight column. Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2') """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_cudf_edgelist(df, source=source, destination=destination, edge_attr=edge_attr, renumber=renumber) return G
def k_core(G, k=None, core_number=None): """ Compute the k-core of the graph G based on the out degree of its nodes. A k-core of a graph is a maximal subgraph that contains nodes of degree k or more. This call does not support a graph with self-loops and parallel edges. Parameters ---------- G : cuGraph.Graph cuGraph graph descriptor with connectivity information. The graph should contain undirected edges where undirected edges are represented as directed edges in both directions. While this graph can contain edge weights, they don't participate in the calculation of the k-core. k : int, optional Order of the core. This value must not be negative. If set to None, the main core is returned. core_number : cudf.DataFrame, optional Precomputed core number of the nodes of the graph G containing two cudf.Series of size V: the vertex identifiers and the corresponding core number values. If set to None, the core numbers of the nodes are calculated internally. core_number['vertex'] : cudf.Series Contains the vertex identifiers core_number['values'] : cudf.Series Contains the core number of vertices Returns ------- KCoreGraph : cuGraph.Graph K Core of the input graph Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> sources = cudf.Series(M['0']) >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() >>> G.add_edge_list(sources, destinations, None) >>> KCoreGraph = cugraph.k_core(G) """ KCoreGraph = Graph() if core_number is None: core_number = core_number_wrapper.core_number(G.graph_ptr) core_number = core_number.rename(columns={"core_number": "values"}) if k is None: k = core_number['values'].max() k_core_wrapper.k_core(G.graph_ptr, KCoreGraph.graph_ptr, k, core_number) return KCoreGraph
def _maximum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") if G.adjlist.weights is not None: G.adjlist.weights = G.adjlist.weights.mul(-1) mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G) # revert to original weights if G.adjlist.weights is not None: G.adjlist.weights = G.adjlist.weights.mul(-1) mst_df["weight"] = mst_df["weight"].mul(-1) if G.renumbered: mst_df = G.unrenumber(mst_df, "src") mst_df = G.unrenumber(mst_df, "dst") mst_subgraph.from_cudf_edgelist(mst_df, source="src", destination="dst", edge_attr="weight") return mst_subgraph
def subgraph(G, vertices): """ Compute a subgraph of the existing graph including only the specified vertices. This algorithm works for both directed and undirected graphs, it does not actually traverse the edges, simply pulls out any edges that are incident on vertices that are both contained in the vertices list. Parameters ---------- G : cugraph.Graph cuGraph graph descriptor vertices : cudf.Series Specifies the vertices of the induced subgraph Returns ------- Sg : cugraph.Graph A graph object containing the subgraph induced by the given vertex set. Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> sources = cudf.Series(M['0']) >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() >>> G.add_edge_list(sources, destinations, None) >>> verts = numpy.zeros(3, dtype=numpy.int32) >>> verts[0] = 0 >>> verts[1] = 1 >>> verts[2] = 2 >>> sverts = cudf.Series(verts) >>> Sg = cugraph.subgraph(G, sverts) """ null_check(vertices) result_graph = Graph() subgraph_extraction_wrapper.subgraph(G.graph_ptr, vertices, result_graph.graph_ptr) return result_graph
def from_cudf_edgelist(df, source='source', target='target', weight=None): """ Return a new graph created from the edge list representaion. This function is added for NetworkX compatibility (this function is a RAPIDS version of NetworkX's from_pandas_edge_list()). Parameters ---------- df : cudf.DataFrame This cudf.DataFrame contains columns storing edge source vertices, destination (or target following NetworkX's terminology) vertices, and (optional) weights. source : string or integer This is used to index the source column. target : string or integer This is used to index the destination (or target following NetworkX's terminology) column. weight : string or integer, optional This pointer can be ``None``. If not, this is used to index the weight column. Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2') """ G = Graph() if weight is None: G.add_edge_list(df[source], df[target]) else: G.add_edge_list(df[source], df[target], df[weight]) return G
def ktruss_subgraph(G, k, use_weights=True): """ Returns the K-Truss subgraph of a graph for a specific k. The k-truss of a graph is a subgraph where each edge is part of at least (k−2) triangles. K-trusses are used for finding tighlty knit groups of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph and was define in [1]. Finding cliques is computationally demanding and finding the maximal k-clique is known to be NP-Hard. In contrast, finding a k-truss is computationally tractable as its key building block, namely triangle counting counting, can be executed in polnymomial time.Typically, it takes many iterations of triangle counting to find the k-truss of a graph. Yet these iterations operate on a weakly monotonically shrinking graph. Therefore, finding the k-truss of a graph can be done in a fairly reasonable amount of time. The solution in cuGraph is based on a GPU algorithm first shown in [2] and uses the triangle counting algorithm from [3]. [1] Cohen, J., "Trusses: Cohesive subgraphs for social network analysis" National security agency technical report, 2008 [2] O. Green, J. Fox, E. Kim, F. Busato, et al. “Quickly Finding a Truss in a Haystack” IEEE High Performance Extreme Computing Conference (HPEC), 2017 https://doi.org/10.1109/HPEC.2017.8091038 [3] O. Green, P. Yalamanchili, L.M. Munguia, “Fast Triangle Counting on GPU” Irregular Applications: Architectures and Algorithms (IA3), 2014 Parameters ---------- G : cuGraph.Graph cuGraph graph descriptor with connectivity information. k-Trusses are defined for only undirected graphs as they are defined for undirected triangle in a graph. k : int The desired k to be used for extracting the k-truss subgraph. use_weights : Bool whether the output should contain the edge weights if G has them Returns ------- G_truss : cuGraph.Graph A cugraph graph descriptor with the k-truss subgraph for the given k. Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> k_subgraph = cugraph.ktruss_subgraph(G, 3) """ KTrussSubgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") subgraph_df = ktruss_subgraph_wrapper.ktruss_subgraph(G, k, use_weights) if G.renumbered: subgraph_df = G.unrenumber(subgraph_df, "src") subgraph_df = G.unrenumber(subgraph_df, "dst") if G.edgelist.weights: KTrussSubgraph.from_cudf_edgelist( subgraph_df, source="src", destination="dst", edge_attr="weight" ) else: KTrussSubgraph.from_cudf_edgelist( subgraph_df, source="src", destination="dst" ) return KTrussSubgraph
def from_pandas_edgelist(df, source="source", destination="destination", edge_attr=None, create_using=Graph, renumber=True): """ Initialize a graph from the edge list. It is an error to call this method on an initialized Graph object. Source argument is source column name and destination argument is destination column name. By default, renumbering is enabled to map the source and destination vertices into an index in the range [0, V) where V is the number of vertices. If the input vertices are a single column of integers in the range [0, V), renumbering can be disabled and the original external vertex ids will be used. If weights are present, edge_attr argument is the weights column name. Parameters ---------- input_df : pandas.DataFrame A DataFrame that contains edge information source : str or array-like source column name or array of column names destination : str or array-like destination column name or array of column names edge_attr : str or None the weights column name. Default is None renumber : bool Indicate whether or not to renumber the source and destination vertex IDs. Default is True. create_using: cugraph.DiGraph or cugraph.Graph Indicate whether to create a directed or undirected graph Returns ------- G : cugraph.DiGraph or cugraph.Graph graph containing edges from the pandas edgelist Examples -------- >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_pandas_edgelist(df, source='0', destination='1', edge_attr='2', renumber=False) """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_pandas_edgelist(df, source=source, destination=destination, edge_attr=edge_attr, renumber=renumber) return G