def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): """ Compute the induced subgraph of neighbors centered at node n, within a given radius. Parameters ---------- G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix Graph or matrix object, which should contain the connectivity information. Edge weights, if present, should be single or double precision floating point values. n : integer A single node radius: integer, optional Include all neighbors of distance<=radius from n. center: bool, optional Defaults to True. False is not supported undirected: bool, optional Defaults to False. True is not supported distance: key, optional Distances are counted in hops from n. Other cases are not supported. Returns ------- G_ego : cuGraph.Graph or networkx.Graph A graph descriptor with a minimum spanning tree or forest. The networkx graph will not have all attributes copied over Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter = ' ', dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> ego_graph = cugraph.ego_graph(G, seed, radius=2) """ (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") result_graph = type(G)() if G.renumbered is True: n = G.lookup_internal_vertex_id(cudf.Series([n])) df, offsets = egonet_wrapper.egonet(G, n, radius) if G.renumbered: df = G.unrenumber(df, "src") df = G.unrenumber(df, "dst") if G.edgelist.weights: result_graph.from_cudf_edgelist( df, source="src", destination="dst", edge_attr="weight" ) else: result_graph.from_cudf_edgelist(df, source="src", destination="dst") return _convert_graph_to_output_type(result_graph, input_type)
def batched_ego_graphs(G, seeds, radius=1, center=True, undirected=False, distance=None): """ Compute the induced subgraph of neighbors for each node in seeds within a given radius. Parameters ---------- G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix Graph or matrix object, which should contain the connectivity information. Edge weights, if present, should be single or double precision floating point values. seeds : cudf.Series or list or cudf.DataFrame Specifies the seeds of the induced egonet subgraphs. radius: integer, optional Include all neighbors of distance<=radius from n. center: bool, optional Defaults to True. False is not supported undirected: bool, optional Defaults to False. True is not supported distance: key, optional Distances are counted in hops from n. Other cases are not supported. Returns ------- ego_edge_lists : cudf.DataFrame or pandas.DataFrame GPU data frame containing all induced sources identifiers, destination identifiers, edge weights seeds_offsets: cudf.Series Series containing the starting offset in the returned edge list for each seed. """ (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") if G.renumbered is True: if isinstance(seeds, cudf.DataFrame): seeds = G.lookup_internal_vertex_id(seeds, seeds.columns) else: seeds = G.lookup_internal_vertex_id(cudf.Series(seeds)) df, offsets = egonet_wrapper.egonet(G, seeds, radius) if G.renumbered: df = G.unrenumber(df, "src", preserve_order=True) df = G.unrenumber(df, "dst", preserve_order=True) return _convert_df_series_to_output_type(df, offsets, input_type)
def bfs(G, start=None, return_sp_counter=None, i_start=None, directed=None, return_predecessors=None): """Find the distances and predecessors for a breadth first traversal of a graph. Parameters ---------- G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix Graph or matrix object, which should contain the connectivity information. Edge weights, if present, should be single or double precision floating point values. start : Integer The index of the graph vertex from which the traversal begins return_sp_counter : bool, optional, default=False Indicates if shortest path counters should be returned i_start : Integer, optional Identical to start, added for API compatibility. Only start or i_start can be set, not both. directed : bool, optional NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. If True (default), then convert the input matrix to a cugraph.DiGraph, otherwise a cugraph.Graph object will be used. Returns ------- Return value type is based on the input type. If G is a cugraph.Graph, returns: cudf.DataFrame df['vertex'] vertex IDs df['distance'] path distance for each vertex from the starting vertex df['predecessor'] for each i'th position in the column, the vertex ID immediately preceding the vertex at position i in the 'vertex' column df['sp_counter'] for each i'th position in the column, the number of shortest paths leading to the vertex at position i in the 'vertex' column (Only if retrun_sp_counter is True) If G is a networkx.Graph, returns: pandas.DataFrame with contents equivalent to the cudf.DataFrame described above. If G is a CuPy or SciPy matrix, returns: a 2-tuple of CuPy ndarrays (if CuPy matrix input) or Numpy ndarrays (if SciPy matrix input) representing: distance: cupy or numpy ndarray ndarray of shortest distances between source and vertex. predecessor: cupy or numpy ndarray ndarray of predecessors of a vertex on the path from source, which can be used to reconstruct the shortest paths. ...or if return_sp_counter is True, returns a 3-tuple with the above two arrays plus: sp_counter: cupy or numpy ndarray ndarray of number of shortest paths leading to each vertex. Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> df = cugraph.bfs(G, 0) """ (start, return_sp_counter, directed) = \ _ensure_args(G, start, return_sp_counter, i_start, directed) # FIXME: allow nx_weight_attr to be specified (G, input_type) = ensure_cugraph_obj( G, nx_weight_attr="weight", matrix_graph_type=DiGraph if directed else Graph) if type(G) is Graph: is_directed = False else: is_directed = True if G.renumbered is True: start = G.lookup_internal_vertex_id(cudf.Series([start]))[0] df = bfs_wrapper.bfs(G, start, is_directed, return_sp_counter) if G.renumbered: df = G.unrenumber(df, "vertex") df = G.unrenumber(df, "predecessor") df["predecessor"].fillna(-1, inplace=True) return _convert_df_to_output_type(df, input_type)
def sssp(G, source=None, method=None, directed=None, return_predecessors=None, unweighted=None, overwrite=None, indices=None): """ Compute the distance and predecessors for shortest paths from the specified source to all the vertices in the graph. The distances column will store the distance from the source to each vertex. The predecessors column will store each vertex's predecessor in the shortest path. Vertices that are unreachable will have a distance of infinity denoted by the maximum value of the data type and the predecessor set as -1. The source vertex's predecessor is also set to -1. Graphs with negative weight cycles are not supported. Parameters ---------- graph : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix Graph or matrix object, which should contain the connectivity information. Edge weights, if present, should be single or double precision floating point values. source : int Index of the source vertex. Returns ------- Return value type is based on the input type. If G is a cugraph.Graph, returns: cudf.DataFrame df['vertex'] vertex id df['distance'] gives the path distance from the starting vertex df['predecessor'] the vertex it was reached from If G is a networkx.Graph, returns: pandas.DataFrame with contents equivalent to the cudf.DataFrame described above. If G is a CuPy or SciPy matrix, returns: a 2-tuple of CuPy ndarrays (if CuPy matrix input) or Numpy ndarrays (if SciPy matrix input) representing: distance: cupy or numpy ndarray ndarray of shortest distances between source and vertex. predecessor: cupy or numpy ndarray ndarray of predecessors of a vertex on the path from source, which can be used to reconstruct the shortest paths. Examples -------- >>> M = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', ... dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> distances = cugraph.sssp(G, 0) """ (source, directed, return_predecessors) = _ensure_args(G, source, method, directed, return_predecessors, unweighted, overwrite, indices) # FIXME: allow nx_weight_attr to be specified (G, input_type) = ensure_cugraph_obj( G, nx_weight_attr="weight", matrix_graph_type=DiGraph if directed else Graph) if G.renumbered: if isinstance(source, cudf.DataFrame): source = G.lookup_internal_vertex_id(source, source.columns).iloc[0] else: source = G.lookup_internal_vertex_id(cudf.Series([source]))[0] if source is cudf.NA: raise ValueError( "Starting vertex should be between 0 to number of vertices") df = sssp_wrapper.sssp(G, source) if G.renumbered: df = G.unrenumber(df, "vertex") df = G.unrenumber(df, "predecessor") df.fillna(-1, inplace=True) return _convert_df_to_output_type(df, input_type, return_predecessors)
def weakly_connected_components(G, directed=None, connection=None, return_labels=None): """ Generate the Weakly Connected Components and attach a component label to each vertex. Parameters ---------- G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix Graph or matrix object, which should contain the connectivity information (edge weights are not used for this algorithm). If using a graph object, the graph can be either directed or undirected where an undirected edge is represented by a directed edge in both directions. The adjacency list will be computed if not already present. The number of vertices should fit into a 32b int. directed : bool, optional NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. If True (default), then convert the input matrix to a cugraph.DiGraph and only move from point i to point j along paths csgraph[i, j]. If False, then find the shortest path on an undirected graph: the algorithm can progress from point i to j along csgraph[i, j] or csgraph[j, i]. connection : str, optional (default=None) Added for SciPy compatibility, can only be specified for non-Graph-type (eg. sparse matrix) values of G only (raises TypeError if used with a Graph object), and can only be set to "weak" for this API. return_labels : bool, optional NOTE For non-Graph-type (eg. sparse matrix) values of G only. Raises TypeError if used with a Graph object. If True (default), then return the labels for each of the connected components. Returns ------- Return value type is based on the input type. If G is a cugraph.Graph, returns: cudf.DataFrame GPU data frame containing two cudf.Series of size V: the vertex identifiers and the corresponding component identifier. df['vertex'] Contains the vertex identifier df['labels'] The component identifier If G is a networkx.Graph, returns: python dictionary, where keys are vertices and values are the component identifiers. If G is a CuPy or SciPy matrix, returns: CuPy ndarray (if CuPy matrix input) or Numpy ndarray (if SciPy matrix input) of shape (<num vertices>, 2), where column 0 contains component identifiers and column 1 contains vertices. Examples -------- >>> M = cudf.read_csv(datasets_path / 'karate.csv', ... delimiter = ' ', ... dtype=['int32', 'int32', 'float32'], ... header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) >>> df = cugraph.weakly_connected_components(G) """ (directed, connection, return_labels) = _ensure_args("weakly_connected_components", G, directed, connection, return_labels) # FIXME: allow nx_weight_attr to be specified (G, input_type) = ensure_cugraph_obj( G, nx_weight_attr="weight", matrix_graph_type=DiGraph if directed else Graph) df = connectivity_wrapper.weakly_connected_components(G) if G.renumbered: df = G.unrenumber(df, "vertex") return _convert_df_to_output_type(df, input_type, return_labels)
def bfs(G, start, return_sp_counter=False): """ Find the distances and predecessors for a breadth first traversal of a graph. Parameters ---------- G : cuGraph.Graph, NetworkX.Graph, or CuPy sparse COO matrix cuGraph graph descriptor with connectivity information. Edge weights, if present, should be single or double precision floating point values. start : Integer The index of the graph vertex from which the traversal begins return_sp_counter : bool, optional, default=False Indicates if shortest path counters should be returned Returns ------- Return value type is based on the input type. If G is a cugraph.Graph, returns: cudf.DataFrame df['vertex'] vertex IDs df['distance'] path distance for each vertex from the starting vertex df['predecessor'] for each i'th position in the column, the vertex ID immediately preceding the vertex at position i in the 'vertex' column df['sp_counter'] for each i'th position in the column, the number of shortest paths leading to the vertex at position i in the 'vertex' column (Only if retrun_sp_counter is True) If G is a networkx.Graph, returns: pandas.DataFrame with contents equivalent to the cudf.DataFrame described above. If G is a CuPy sparse COO matrix, returns a 2-tuple of cupy.ndarray: distance: cupy.ndarray ndarray of shortest distances between source and vertex. predecessor: cupy.ndarray ndarray of predecessors of a vertex on the path from source, which can be used to reconstruct the shortest paths. sp_counter: cupy.ndarray ndarray of number of shortest paths leading to each vertex (only if retrun_sp_counter is True) Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> df = cugraph.bfs(G, 0) """ # FIXME: allow nx_weight_attr to be specified (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight", matrix_graph_type=Graph) if type(G) is Graph: directed = False else: directed = True if G.renumbered is True: start = G.lookup_internal_vertex_id(cudf.Series([start]))[0] df = bfs_wrapper.bfs(G, start, directed, return_sp_counter) if G.renumbered: df = G.unrenumber(df, "vertex") df = G.unrenumber(df, "predecessor") df["predecessor"].fillna(-1, inplace=True) return _convert_df_to_output_type(df, input_type)
def sssp(G, source): """ Compute the distance and predecessors for shortest paths from the specified source to all the vertices in the graph. The distances column will store the distance from the source to each vertex. The predecessors column will store each vertex's predecessor in the shortest path. Vertices that are unreachable will have a distance of infinity denoted by the maximum value of the data type and the predecessor set as -1. The source vertex's predecessor is also set to -1. Graphs with negative weight cycles are not supported. Parameters ---------- graph : cuGraph.Graph, NetworkX.Graph, or CuPy sparse COO matrix cuGraph graph descriptor with connectivity information. Edge weights, if present, should be single or double precision floating point values. source : int Index of the source vertex. Returns ------- Return value type is based on the input type. If G is a cugraph.Graph, returns: cudf.DataFrame df['vertex'] vertex id df['distance'] gives the path distance from the starting vertex df['predecessor'] the vertex it was reached from If G is a networkx.Graph, returns: pandas.DataFrame with contents equivalent to the cudf.DataFrame described above. If G is a CuPy sparse COO matrix, returns a 2-tuple of cupy.ndarray: distance: cupy.ndarray ndarray of shortest distances between source and vertex. predecessor: cupy.ndarray ndarray of predecessors of a vertex on the path from source, which can be used to reconstruct the shortest paths. Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> distances = cugraph.sssp(G, 0) """ # FIXME: allow nx_weight_attr to be specified (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight", matrix_graph_type=Graph) if G.renumbered: source = G.lookup_internal_vertex_id(cudf.Series([source]))[0] df = sssp_wrapper.sssp(G, source) if G.renumbered: df = G.unrenumber(df, "vertex") df = G.unrenumber(df, "predecessor") df["predecessor"].fillna(-1, inplace=True) return _convert_df_to_output_type(df, input_type)