def from_cudf_edgelist(df, source='source', destination='destination', edge_attr=None, create_using=Graph, renumber=True): """ Return a new graph created from the edge list representaion. This function is added for NetworkX compatibility (this function is a RAPIDS version of NetworkX's from_pandas_edge_list()). This function does not support multiple source or destination columns. But does support renumbering Parameters ---------- df : cudf.DataFrame This cudf.DataFrame contains columns storing edge source vertices, destination (or target following NetworkX's terminology) vertices, and (optional) weights. source : string or integer, optional (default='source') This is used to index the source column. destination : string or integer, optional (default='destination') This is used to index the destination (or target following NetworkX's terminology) column. edge_attr : string or integer, optional (default=None) This pointer can be ``None``. If not, this is used to index the weight column. create_using : cuGraph.Graph, optional (default=cugraph.Graph) Specify the type of Graph to create. renumber : bool, optional (default=True) If source and destination indices are not in range 0 to V where V is number of vertices, renumber argument should be True. Examples -------- >>> M = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', ... dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G = cugraph.from_cudf_edgelist(M, source='0', destination='1', ... edge_attr='2') """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_cudf_edgelist(df, source=source, destination=destination, edge_attr=edge_attr, renumber=renumber) return G
def _minimum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G) if G.renumbered: mst_df = G.unrenumber(mst_df, "src") mst_df = G.unrenumber(mst_df, "dst") mst_subgraph.from_cudf_edgelist( mst_df, source="src", destination="dst", edge_attr="weight" ) return mst_subgraph
def from_numpy_matrix(A, create_using=Graph): """ Initializes the graph from numpy matrix containing adjacency matrix. Set create_using to cugraph.DiGraph for directed graph and cugraph.Graph for undirected Graph. """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_numpy_matrix(A) return G
def from_pandas_adjacency(df, create_using=Graph): """ Initializes the graph from pandas adjacency matrix. Set create_using to cugraph.DiGraph for directed graph and cugraph.Graph for undirected Graph. """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_pandas_adjacency(df) return G
def from_numpy_array(A, create_using=Graph): """ Initializes the graph from numpy array containing adjacency matrix. Parameters ---------- A : numpy.array A Numpy array that contains adjacency information create_using: cugraph.DiGraph or cugraph.Graph, optional (default=Graph) Indicate whether to create a directed or undirected graph """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_numpy_array(A) return G
def from_pandas_adjacency(df, create_using=Graph): """ Initializes the graph from pandas adjacency matrix. Parameters ---------- df : pandas.DataFrame A DataFrame that contains edge information create_using: cugraph.DiGraph or cugraph.Graph, optional (default=Graph) Indicate whether to create a directed or undirected graph """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_pandas_adjacency(df) return G
def _maximum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") if G.adjlist.weights is not None: G.adjlist.weights = G.adjlist.weights.mul(-1) mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G) # revert to original weights if G.adjlist.weights is not None: G.adjlist.weights = G.adjlist.weights.mul(-1) mst_df["weight"] = mst_df["weight"].mul(-1) if G.renumbered: mst_df = G.unrenumber(mst_df, "src") mst_df = G.unrenumber(mst_df, "dst") mst_subgraph.from_cudf_edgelist( mst_df, source="src", destination="dst", edge_attr="weight" ) return mst_subgraph
def from_pandas_edgelist(df, source="source", destination="destination", edge_attr=None, create_using=Graph, renumber=True): """ Initialize a graph from the edge list. It is an error to call this method on an initialized Graph object. Source argument is source column name and destination argument is destination column name. By default, renumbering is enabled to map the source and destination vertices into an index in the range [0, V) where V is the number of vertices. If the input vertices are a single column of integers in the range [0, V), renumbering can be disabled and the original external vertex ids will be used. If weights are present, edge_attr argument is the weights column name. Parameters ---------- input_df : pandas.DataFrame A DataFrame that contains edge information source : str or array-like source column name or array of column names destination : str or array-like destination column name or array of column names edge_attr : str or None the weights column name. Default is None renumber : bool Indicate whether or not to renumber the source and destination vertex IDs. Default is True. create_using: cugraph.DiGraph or cugraph.Graph Indicate whether to create a directed or undirected graph Returns ------- G : cugraph.DiGraph or cugraph.Graph graph containing edges from the pandas edgelist Examples -------- >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_pandas_edgelist(df, source='0', destination='1', edge_attr='2', renumber=False) """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_pandas_edgelist(df, source=source, destination=destination, edge_attr=edge_attr, renumber=renumber) return G
def from_pandas_edgelist(df, source="source", destination="destination", edge_attr=None, create_using=Graph, renumber=True): """ Initialize a graph from the edge list. It is an error to call this method on an initialized Graph object. Source argument is source column name and destination argument is destination column name. By default, renumbering is enabled to map the source and destination vertices into an index in the range [0, V) where V is the number of vertices. If the input vertices are a single column of integers in the range [0, V), renumbering can be disabled and the original external vertex ids will be used. If weights are present, edge_attr argument is the weights column name. Parameters ---------- df : pandas.DataFrame A DataFrame that contains edge information source : str or array-like, optional (default='source') source column name or array of column names destination : str or array-like, optional (default='destination') destination column name or array of column names edge_attr : str or None, optional (default=None) the weights column name. renumber : bool, optional (default=True) Indicate whether or not to renumber the source and destination vertex IDs. create_using: cugraph.DiGraph or cugraph.Graph, optional (default=Graph) Indicate whether to create a directed or undirected graph Returns ------- G : cugraph.DiGraph or cugraph.Graph graph containing edges from the pandas edgelist Examples -------- >>> # Download dataset from >>> # https://github.com/rapidsai/cugraph/datasets/... >>> df = pd.read_csv(datasets_path / 'karate.csv', delimiter=' ', ... header=None, names=["0", "1", "2"], ... dtype={"0": "int32", "1": "int32", "2": "float32"}) >>> G = cugraph.Graph() >>> G.from_pandas_edgelist(df, source='0', destination='1', ... edge_attr='2', renumber=False) """ if create_using is Graph: G = Graph() elif create_using is DiGraph: G = DiGraph() else: raise Exception("create_using supports Graph and DiGraph") G.from_pandas_edgelist(df, source=source, destination=destination, edge_attr=edge_attr, renumber=renumber) return G
def ktruss_subgraph(G, k, use_weights=True): """ Returns the K-Truss subgraph of a graph for a specific k. The k-truss of a graph is a subgraph where each edge is part of at least (k−2) triangles. K-trusses are used for finding tighlty knit groups of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph and was define in [1]. Finding cliques is computationally demanding and finding the maximal k-clique is known to be NP-Hard. In contrast, finding a k-truss is computationally tractable as its key building block, namely triangle counting counting, can be executed in polnymomial time.Typically, it takes many iterations of triangle counting to find the k-truss of a graph. Yet these iterations operate on a weakly monotonically shrinking graph. Therefore, finding the k-truss of a graph can be done in a fairly reasonable amount of time. The solution in cuGraph is based on a GPU algorithm first shown in [2] and uses the triangle counting algorithm from [3]. [1] Cohen, J., "Trusses: Cohesive subgraphs for social network analysis" National security agency technical report, 2008 [2] O. Green, J. Fox, E. Kim, F. Busato, et al. “Quickly Finding a Truss in a Haystack” IEEE High Performance Extreme Computing Conference (HPEC), 2017 https://doi.org/10.1109/HPEC.2017.8091038 [3] O. Green, P. Yalamanchili, L.M. Munguia, “Fast Triangle Counting on GPU” Irregular Applications: Architectures and Algorithms (IA3), 2014 Parameters ---------- G : cuGraph.Graph cuGraph graph descriptor with connectivity information. k-Trusses are defined for only undirected graphs as they are defined for undirected triangle in a graph. k : int The desired k to be used for extracting the k-truss subgraph. use_weights : Bool whether the output should contain the edge weights if G has them Returns ------- G_truss : cuGraph.Graph A cugraph graph descriptor with the k-truss subgraph for the given k. Examples -------- >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> k_subgraph = cugraph.ktruss_subgraph(G, 3) """ KTrussSubgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") subgraph_df = ktruss_subgraph_wrapper.ktruss_subgraph(G, k, use_weights) if G.renumbered: subgraph_df = G.unrenumber(subgraph_df, "src") subgraph_df = G.unrenumber(subgraph_df, "dst") if G.edgelist.weights: KTrussSubgraph.from_cudf_edgelist(subgraph_df, source="src", destination="dst", edge_attr="weight") else: KTrussSubgraph.from_cudf_edgelist(subgraph_df, source="src", destination="dst") return KTrussSubgraph