def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None, result_dtype=np.float64): """ Compute the edge betweenness centrality for all edges of the graph G from a sample of 'k' sources. CuGraph does not currently support the 'weight' parameter as seen in the corresponding networkX call. Parameters ---------- G : cuGraph.Graph cuGraph graph descriptor with connectivity information. The graph can be either directed (DiGraph) or undirected (Graph). Weights in the graph are ignored, the current implementation uses BFS traversals. Use weight parameter if weights need to be considered (currently not supported) k : int or list or None, optional, default=None If k is not None, use k node samples to estimate betweenness. Higher values give better approximation If k is a list, use the content of the list for estimation: the list should contain vertices identifiers. Vertices obtained through sampling or defined as a list will be used as sources for traversals inside the algorithm. normalized : bool, optional Default is True. If true, the betweenness values are normalized by 2 / (n * (n - 1)) for Graphs (undirected), and 1 / (n * (n - 1)) for DiGraphs (directed graphs) where n is the number of nodes in G. Normalization will ensure that values are in [0, 1], this normalization scales for the highest possible value where one edge is crossed by every single shortest path. weight : cudf.DataFrame, optional, default=None Specifies the weights to be used for each edge. Should contain a mapping between edges and weights. (Not Supported) seed : optional if k is specified and k is an integer, use seed to initialize the random number generator. Using None as seed relies on random.seed() behavior: using current system time If k is either None or list: seed parameter is ignored result_dtype : np.float32 or np.float64, optional, default=np.float64 Indicate the data type of the betweenness centrality scores Using double automatically switch implementation to "default" Returns ------- df : cudf.DataFrame GPU data frame containing three cudf.Series of size |E|: the vertex identifiers of the sources, the vertex identifies of the destinations and the corresponding betweenness centrality values. Please note that the resulting the 'src', 'dst' column might not be in ascending order. df['src'] : cudf.Series Contains the vertex identifiers of the source of each edge df['dst'] : cudf.Series Contains the vertex identifiers of the destination of each edge df['edge_betweenness_centrality'] : cudf.Series Contains the betweenness centrality of edges When using undirected graphs, 'src' and 'dst' only contains elements such that 'src' < 'dst', which might differ from networkx and user's input. Namely edge (1 -> 0) is transformed into (0 -> 1) but contains the betweenness centrality of edge (1 -> 0). Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> ebc = cugraph.edge_betweenness_centrality(G) """ vertices, k = _initialize_vertices(G, k, seed) if weight is not None: raise NotImplementedError("weighted implementation of betweenness " "centrality not currently supported") if result_dtype not in [np.float32, np.float64]: raise TypeError("result type can only be np.float32 or np.float64") df = edge_betweenness_centrality_wrapper.edge_betweenness_centrality( G, normalized, weight, k, vertices, result_dtype) if G.renumbered: df = G.unrenumber(df, "src") df = G.unrenumber(df, "dst") return df
def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None, result_dtype=np.float64): """ Compute the edge betweenness centrality for all edges of the graph G. Betweenness centrality is a measure of the number of shortest paths that pass over an edge. An edge with a high betweenness centrality score has more paths passing over it and is therefore believed to be more important. To improve performance, rather than doing an all-pair shortest path, a sample of k starting vertices can be used. CuGraph does not currently support the 'weight' parameter as seen in the corresponding networkX call. Parameters ---------- G : cuGraph.Graph or networkx.Graph The graph can be either directed (Graph(directed=True)) or undirected. Weights in the graph are ignored, the current implementation uses BFS traversals. Use weight parameter if weights need to be considered (currently not supported) k : int or list or None, optional (default=None) If k is not None, use k node samples to estimate betweenness. Higher values give better approximation. If k is a list, use the content of the list for estimation: the list should contain vertices identifiers. Vertices obtained through sampling or defined as a list will be used as sources for traversals inside the algorithm. normalized : bool, optional (default=True) Default is True. If true, the betweenness values are normalized by 2 / (n * (n - 1)) for undirected Graphs, and 1 / (n * (n - 1)) for directed Graphs where n is the number of nodes in G. Normalization will ensure that values are in [0, 1], this normalization scales for the highest possible value where one edge is crossed by every single shortest path. weight : cudf.DataFrame, optional (default=None) Specifies the weights to be used for each edge. Should contain a mapping between edges and weights. (Not Supported) seed : optional (default=None) if k is specified and k is an integer, use seed to initialize the random number generator. Using None as seed relies on random.seed() behavior: using current system time If k is either None or list: seed parameter is ignored result_dtype : np.float32 or np.float64, optional (default=np.float64) Indicate the data type of the betweenness centrality scores Using double automatically switch implementation to "default" Returns ------- df : cudf.DataFrame or Dictionary if using NetworkX GPU data frame containing three cudf.Series of size E: the vertex identifiers of the sources, the vertex identifies of the destinations and the corresponding betweenness centrality values. Please note that the resulting the 'src', 'dst' column might not be in ascending order. df['src'] : cudf.Series Contains the vertex identifiers of the source of each edge df['dst'] : cudf.Series Contains the vertex identifiers of the destination of each edge df['edge_betweenness_centrality'] : cudf.Series Contains the betweenness centrality of edges When using undirected graphs, 'src' and 'dst' only contains elements such that 'src' < 'dst', which might differ from networkx and user's input. Namely edge (1 -> 0) is transformed into (0 -> 1) but contains the betweenness centrality of edge (1 -> 0). Examples -------- >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', ... dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> ebc = cugraph.edge_betweenness_centrality(G) """ if weight is not None: raise NotImplementedError("weighted implementation of betweenness " "centrality not currently supported") if result_dtype not in [np.float32, np.float64]: raise TypeError("result type can only be np.float32 or np.float64") G, isNx = ensure_cugraph_obj_for_nx(G) vertices = _initialize_vertices(G, k, seed) df = edge_betweenness_centrality_wrapper.edge_betweenness_centrality( G, normalized, weight, vertices, result_dtype) if G.renumbered: df = G.unrenumber(df, "src") df = G.unrenumber(df, "dst") if G.is_directed() is False: # select the lower triangle of the df based on src/dst vertex value lower_triangle = df['src'] >= df['dst'] # swap the src and dst vertices for the lower triangle only. Because # this is a symmeterized graph, this operation results in a df with # multiple src/dst entries. df['src'][lower_triangle], df['dst'][lower_triangle] = \ df['dst'][lower_triangle], df['src'][lower_triangle] # overwrite the df with the sum of the values for all alike src/dst # vertex pairs, resulting in half the edges of the original df from the # symmeterized graph. df = df.groupby(by=["src", "dst"]).sum().reset_index() if isNx is True: return df_edge_score_to_dictionary(df, 'betweenness_centrality') else: return df