def extended_stats(G, connectivity=False, anc=False, ecc=False, bc=False, cc=False): """ Calculate extended topological measures for a graph. Many of these algorithms have an inherently high time complexity. Global topological analysis of large complex networks is extremely time consuming and may exhaust computer memory. Consider using function arguments to not run metrics that require computation of a full matrix of paths if they will not be needed. Parameters ---------- G : networkx.MultiDiGraph input graph connectivity : bool if True, calculate node and edge connectivity anc : bool if True, calculate average node connectivity ecc : bool if True, calculate shortest paths, eccentricity, and topological metrics that use eccentricity bc : bool if True, calculate node betweenness centrality cc : bool if True, calculate node closeness centrality Returns ------- stats : dict dictionary of network measures containing the following elements (some only calculated/returned optionally, based on passed parameters): - avg_neighbor_degree - avg_neighbor_degree_avg - avg_weighted_neighbor_degree - avg_weighted_neighbor_degree_avg - degree_centrality - degree_centrality_avg - clustering_coefficient - clustering_coefficient_avg - clustering_coefficient_weighted - clustering_coefficient_weighted_avg - pagerank - pagerank_max_node - pagerank_max - pagerank_min_node - pagerank_min - node_connectivity - node_connectivity_avg - edge_connectivity - eccentricity - diameter - radius - center - periphery - closeness_centrality - closeness_centrality_avg - betweenness_centrality - betweenness_centrality_avg """ stats = dict() # create DiGraph from the MultiDiGraph, for those metrics that need it D = utils_graph.get_digraph(G, weight="length") # create undirected Graph from the DiGraph, for those metrics that need it Gu = nx.Graph(D) # get largest strongly connected component, for those metrics that require # strongly connected graphs Gs = utils_graph.get_largest_component(G, strongly=True) # average degree of the neighborhood of each node, and average for graph avg_neighbor_degree = nx.average_neighbor_degree(G) stats["avg_neighbor_degree"] = avg_neighbor_degree stats["avg_neighbor_degree_avg"] = sum( avg_neighbor_degree.values()) / len(avg_neighbor_degree) # avg weighted degree of neighborhood of each node, and average for graph avg_wtd_nbr_deg = nx.average_neighbor_degree(G, weight="length") stats["avg_weighted_neighbor_degree"] = avg_wtd_nbr_deg stats["avg_weighted_neighbor_degree_avg"] = sum( avg_wtd_nbr_deg.values()) / len(avg_wtd_nbr_deg) # degree centrality for a node is the fraction of nodes it is connected to degree_centrality = nx.degree_centrality(G) stats["degree_centrality"] = degree_centrality stats["degree_centrality_avg"] = sum( degree_centrality.values()) / len(degree_centrality) # calculate clustering coefficient for the nodes stats["clustering_coefficient"] = nx.clustering(Gu) # average clustering coefficient for the graph stats["clustering_coefficient_avg"] = nx.average_clustering(Gu) # calculate weighted clustering coefficient for the nodes stats["clustering_coefficient_weighted"] = nx.clustering(Gu, weight="length") # average clustering coefficient (weighted) for the graph stats["clustering_coefficient_weighted_avg"] = nx.average_clustering( Gu, weight="length") # pagerank: a ranking of the nodes in the graph based on the structure of # the incoming links pagerank = nx.pagerank(D, weight="length") stats["pagerank"] = pagerank # node with the highest page rank, and its value pagerank_max_node = max(pagerank, key=lambda x: pagerank[x]) stats["pagerank_max_node"] = pagerank_max_node stats["pagerank_max"] = pagerank[pagerank_max_node] # node with the lowest page rank, and its value pagerank_min_node = min(pagerank, key=lambda x: pagerank[x]) stats["pagerank_min_node"] = pagerank_min_node stats["pagerank_min"] = pagerank[pagerank_min_node] # if True, calculate node and edge connectivity if connectivity: # node connectivity is minimum number of nodes that must be removed # to disconnect G or render it trivial stats["node_connectivity"] = nx.node_connectivity(Gs) # edge connectivity is equal to minimum number of edges that must be # removed to disconnect G or render it trivial stats["edge_connectivity"] = nx.edge_connectivity(Gs) utils.log("Calculated node and edge connectivity") # if True, calculate average node connectivity if anc: # mean number of internally node-disjoint paths between each pair of # nodes in G, i.e., expected number of nodes that must be removed to # disconnect a randomly selected pair of non-adjacent nodes stats["node_connectivity_avg"] = nx.average_node_connectivity(G) utils.log("Calculated average node connectivity") # if True, calculate shortest paths, eccentricity, and topological metrics # that use eccentricity if ecc: # precompute shortest paths between all nodes for eccentricity-based # stats length_func = nx.single_source_dijkstra_path_length sp = { source: dict(length_func(Gs, source, weight="length")) for source in Gs.nodes } utils.log("Calculated shortest path lengths") # eccentricity of a node v is the maximum distance from v to all other # nodes in G eccentricity = nx.eccentricity(Gs, sp=sp) stats["eccentricity"] = eccentricity # diameter is the maximum eccentricity diameter = nx.diameter(Gs, e=eccentricity) stats["diameter"] = diameter # radius is the minimum eccentricity radius = nx.radius(Gs, e=eccentricity) stats["radius"] = radius # center is the set of nodes with eccentricity equal to radius center = nx.center(Gs, e=eccentricity) stats["center"] = center # periphery is the set of nodes with eccentricity equal to diameter periphery = nx.periphery(Gs, e=eccentricity) stats["periphery"] = periphery # if True, calculate node closeness centrality if cc: # closeness centrality of a node is the reciprocal of the sum of the # shortest path distances from u to all other nodes close_cent = nx.closeness_centrality(G, distance="length") stats["closeness_centrality"] = close_cent stats["closeness_centrality_avg"] = sum( close_cent.values()) / len(close_cent) utils.log("Calculated closeness centrality") # if True, calculate node betweenness centrality if bc: # betweenness centrality of a node is the sum of the fraction of # all-pairs shortest paths that pass through node. nx2.4+ # implementation cannot run on Multi(Di)Graphs, so use DiGraph btwn_cent = nx.betweenness_centrality(D, weight="length") stats["betweenness_centrality"] = btwn_cent stats["betweenness_centrality_avg"] = sum( btwn_cent.values()) / len(btwn_cent) utils.log("Calculated betweenness centrality") utils.log("Calculated extended stats") return stats
def test_average_connectivity(): # figure 1 from: # Beineke, L., O. Oellermann, and R. Pippert (2002). The average # connectivity of a graph. Discrete mathematics 252(1-3), 31-45 # http://www.sciencedirect.com/science/article/pii/S0012365X01001807 G1 = nx.path_graph(3) G1.add_edges_from([(1, 3), (1, 4)]) assert_equal(nx.average_node_connectivity(G1), 1) G2 = nx.path_graph(3) G2.add_edges_from([(1, 3), (1, 4), (0, 3), (0, 4), (3, 4)]) assert_equal(nx.average_node_connectivity(G2), 2.2) G3 = nx.Graph() assert_equal(nx.average_node_connectivity(G3), 0)
def test_average_connectivity(): # figure 1 from: # Beineke, L., O. Oellermann, and R. Pippert (2002). The average # connectivity of a graph. Discrete mathematics 252(1-3), 31-45 # http://www.sciencedirect.com/science/article/pii/S0012365X01001807 G1 = nx.path_graph(3) G1.add_edges_from([(1,3),(1,4)]) assert_equal(nx.average_node_connectivity(G1),1) G2 = nx.path_graph(3) G2.add_edges_from([(1,3),(1,4),(0,3),(0,4),(3,4)]) assert_equal(nx.average_node_connectivity(G2),2.2) G3 = nx.Graph() assert_equal(nx.average_node_connectivity(G3),0)
def graph_stats(G): result = {} try: # https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.flow.min_cost_flow.html#networkx.algorithms.flow.min_cost_flow # demand maybe from strongly_connected_components # TODO: revisit this # calculate_demand(G) # result["min_cost_flow"] = nx.min_cost_flow(G, capacity="inverse_weight", weight="weight") result["pagerank"] = nx.pagerank(G) result["betweenness_centrality"] = nx.betweenness_centrality(G) result["degree_centrality"] = nx.degree_centrality(G) result["eccentricity"] = nx.eccentricity(G) result["average_node_connectivity"] = nx.average_node_connectivity(G) result["dominating_set"] = nx.dominating_set(G) result["strongly_connected_components"] = list(nx.strongly_connected_components(G)) except Exception: pass return result
def __init__(self, n_rooms=0, connectivity_threshold=1.2, secret_chance=10, random_state=None): nx.Graph.__init__(self) if random_state is None: self.random_state = Random() else: self.random_state = random_state self.secret_chance = secret_chance self.tags = [] if n_rooms > 0: # make each room for room_idx in range(n_rooms): self.add_room(room_idx) # randomly add connections until connectivity threshold is reached while nx.average_node_connectivity(self) < connectivity_threshold: room1, room2 = self.random_state.sample(self.nodes(), 2) self.connect_rooms(room1, room2) # ensure all parts of the dungeon are reachable connected_components = [i for i in nx.connected_components(self)] if len(connected_components) > 1: for idx in range(len(connected_components) - 1): room = self.random_state.sample(connected_components[idx], 1)[0] connecting_room = self.random_state.sample( connected_components[idx + 1], 1)[0] self.connect_rooms(room, connecting_room) # label nodes self.paths = { a: len(nx.shortest_path(self, 0, a)) for a in self.nodes() } self.tag_nodes()
def test_average_connectivity(): # figure 1 from: # Beineke, L., O. Oellermann, and R. Pippert (2002). The average # connectivity of a graph. Discrete mathematics 252(1-3), 31-45 # http://www.sciencedirect.com/science/article/pii/S0012365X01001807 G1 = nx.path_graph(3) G1.add_edges_from([(1, 3), (1, 4)]) G2 = nx.path_graph(3) G2.add_edges_from([(1, 3), (1, 4), (0, 3), (0, 4), (3, 4)]) G3 = nx.Graph() for flow_func in flow_funcs: kwargs = dict(flow_func=flow_func) errmsg = f"Assertion failed in function: {flow_func.__name__}" assert nx.average_node_connectivity(G1, **kwargs) == 1, errmsg assert nx.average_node_connectivity(G2, **kwargs) == 2.2, errmsg assert nx.average_node_connectivity(G3, **kwargs) == 0, errmsg
def run(self, ips, imgs, para=None): titles = [ 'PartID', 'Noeds', 'Edges', 'TotalLength', 'Density', 'AveConnect' ] k, unit = ips.unit gs = nx.connected_component_subgraphs( ips.data, False) if para['parts'] else [ips.data] comid, datas = 0, [] for g in gs: sl = 0 for (s, e) in g.edges(): sl += sum([i['weight'] for i in g[s][e].values()]) datas.append([ comid, g.number_of_nodes(), g.number_of_edges(), round(sl * k, 2), round(nx.density(g), 2), round(nx.average_node_connectivity(g), 2) ][1 - para['parts']:]) comid += 1 print(titles, datas) IPy.show_table(pd.DataFrame(datas, columns=titles[1 - para['parts']:]), ips.title + '-graph')
def calculate(network): try: n = nx.average_node_connectivity(network) except: return 0 return round(n, 7)
def netgen_rr(n, d): I = nx.random_regular_graph(d=d, n=n) degs = [I.degree[node] for node in I.nodes()] avg_deg = np.mean(degs) max_deg = np.max(degs) conn = nx.average_node_connectivity(I) clust = nx.average_clustering(I) return I, avg_deg, max_deg, conn, clust
def netgen_ba(n, m): I = nx.barabasi_albert_graph(n=n, m=m) degs = [I.degree[node] for node in I.nodes()] avg_deg = np.mean(degs) max_deg = np.max(degs) conn = nx.average_node_connectivity(I) clust = nx.average_clustering(I) return I, (avg_deg, max_deg, conn, clust)
def netgen_er(n, p): I = nx.erdos_renyi_graph(n=n, p=p) degs = [I.degree[node] for node in I.nodes()] avg_deg = np.mean(degs) max_deg = np.max(degs) conn = nx.average_node_connectivity(I) clust = nx.average_clustering(I) return I, (avg_deg, max_deg, conn, clust)
def test_average_connectivity(): # figure 1 from: # Beineke, L., O. Oellermann, and R. Pippert (2002). The average # connectivity of a graph. Discrete mathematics 252(1-3), 31-45 # http://www.sciencedirect.com/science/article/pii/S0012365X01001807 G1 = nx.path_graph(3) G1.add_edges_from([(1, 3),(1, 4)]) G2 = nx.path_graph(3) G2.add_edges_from([(1, 3),(1, 4),(0, 3),(0, 4),(3, 4)]) G3 = nx.Graph() for flow_func in flow_funcs: kwargs = dict(flow_func=flow_func) assert_equal(nx.average_node_connectivity(G1, **kwargs), 1, msg=msg.format(flow_func.__name__)) assert_equal(nx.average_node_connectivity(G2, **kwargs), 2.2, msg=msg.format(flow_func.__name__)) assert_equal(nx.average_node_connectivity(G3, **kwargs), 0, msg=msg.format(flow_func.__name__))
def graphMeasures(self): """ calculates several graph measures """ #average_degree_connectivity = nx.average_degree_connectivity(self.graph) #average_neighbor_degree = nx.average_neighbor_degree(self.graph) average_node_connectivity = nx.average_node_connectivity(self.graph) #average_node_connectivity = 1 return [average_node_connectivity]
def graph_separation(G,sizetreshold,output,septreshold,notseparate): if len(G.nodes())<(sizetreshold+1): output.append(G) return output cut_value,xer=nx.stoer_wagner(G) H1=G.subgraph(xer[0]) H2=G.subgraph(xer[1]) if nx.average_node_connectivity(H1)>septreshold: notseparate.append(H1) else: graph_separation(H1,treshold,output,notseparate) if nx.average_node_connectivity(H2)>septreshold: notseparate.append(H2) else: graph_separation(H2,treshold,output,notseparate) return output,notseparate
def computeAveConnect(G): """ Compute the average node connectivity of the network. """ print(G.nodes()) print(G.edges()) print(nx.average_node_connectivity(G)) print(nx.degree(G, 'Hub')) print(nx.degree(G, 'Tablet')) return None
def calculatenodeconnectivity(network): ''' Node connectivity is equal to the minimum number of nodes that must be removed to disconnect G or render it trivial. If source and target nodes are provided, this function returns the local node connectivity. Returns the average connectivity of a graph G. ''' try: n = nx.average_node_connectivity(network) except: return 0 return round(n, 7)
def get_additional_regressors(env, t): f = lambda d: list(d.values()) nodes = env.get_living(t) subg = nx.subgraph(env, nodes) graph_properties = pd.DataFrame({ "avg_node_connectivity": nx.average_node_connectivity(subg), "density": nx.density(subg), "number_of_nodes": [subg.number_of_nodes()]*subg.number_of_nodes(), "number_of_edges": [subg.number_of_edges()]*subg.number_of_nodes() }) node_properties = {} try: node_properties["betweenness_centrality"] = f(nx.betweenness_centrality(subg)) except: node_properties["betweenness_centrality"] = [0]*subg.number_of_nodes() try: node_properties["in_degree_centrality"] = f(nx.in_degree_centrality(subg)) except: node_properties["in_degree_centrality"] = [0]*subg.number_of_nodes() try: node_properties["out_degree_centrality"] = f(nx.out_degree_centrality(subg)) except: node_properties["out_degree_centrality"] = [0]*subg.number_of_nodes() try: node_properties["harmonic_centrality"] = f(nx.harmonic_centrality(subg)) except: node_properties["harmonic_centrality"] = [0]*subg.number_of_nodes() try: node_properties["closeness_centrality"] = f(nx.closeness_centrality(subg)) except: node_properties["closeness_centrality"] = [0]*subg.number_of_nodes() node_properties.update({ "core_number": f(nx.core_number(subg)), "pagerank": f(nx.pagerank(subg)), "in_edges": [len(subg.in_edges(v)) for v in subg.nodes()], "out_edges": [len(subg.out_edges(v)) for v in subg.nodes()], "average_neighbor_degree": f(nx.average_neighbor_degree(subg)) }) node_properties = pd.DataFrame(node_properties) return graph_properties, node_properties
def getGraphVector(gGraph): print("Extracting graph feature vector...") mRes = np.asarray([ len(gGraph.edges()), len(gGraph.nodes()), getMeanDegreeCentrality(gGraph), nx.graph_number_of_cliques(gGraph), nx.number_connected_components(gGraph), nx.average_node_connectivity(gGraph), getAvgShortestPath(gGraph) ]) print("Extracting graph feature vector... Done.") return mRes
def mineTrees(rf_model): result = pd.DataFrame(index=np.arange(0, rf_model.n_estimators), columns=[ 'nodes', 'edges', 'diameter', 'weak_components', 'strong_components', 'node_connectivity', 'mean_hub_score', 'mean_auth_score', 'median_degree', 'mean_degree' ]) for t in range(0, rf_model.n_estimators): tree = rf_model.estimators_[t] graph = nx.DiGraph() # export_graphviz(tree, out_file=str('results/trees/tree') + str(t) + '.dot', # feature_names=dataTrain.columns,class_names=data2.Class,rounded=True, # proportion=False,precision=2, filled=True) left_children = tree.tree_.children_left right_children = tree.tree_.children_right features = tree.tree_.feature for n in range(0, len(left_children)): node = features[n] l_child = left_children[n] r_child = right_children[n] if node >= 0: if l_child > 0 and features[l_child] >= 0: graph.add_edge(node, features[l_child]) if r_child > 0 and features[r_child] >= 0: graph.add_edge(node, features[r_child]) # Network metrics hubs, authorities = nx.hits_numpy(graph) mean_hub_score = np.mean(list(hubs.values())) mean_auth_score = np.mean(list(authorities.values())) nodes = nx.number_of_nodes(graph) diameter = nx.diameter(nx.to_undirected(graph)) edges = nx.number_of_edges(graph) strong_comp = nx.number_strongly_connected_components(graph) weak_comp = nx.number_weakly_connected_components(graph) degrees = nx.average_degree_connectivity(graph, target="in") avg_in_degree = np.mean(list(degrees)) median_in_degree = np.median(list(degrees)) node_connectivity = nx.average_node_connectivity(graph) row = [ nodes, edges, diameter, weak_comp, strong_comp, node_connectivity, mean_hub_score, mean_auth_score, median_in_degree, avg_in_degree ] result.loc[t] = row return result
def concistency(): global graph, actives, logs, nodes, size for i in range(0, nedges): other = edges[i] if not (graph.has_edge(other[1], other[0])): print('inconsistent ' + str(other[1]) + ' ' + str(other[0])) graph = graph.to_undirected(reciprocal=True) print("Clustering Coeficient: ", nx.average_clustering(graph, graph.nodes, 1)) print("Average Shortest Path: ", nx.average_shortest_path_length(graph)) print("Raidus (minimum eccentricity): ", nx.radius(graph)) print("Diameter (maximum eccentricity): ", nx.diameter(graph)) print("Average node conectvity: ", nx.average_node_connectivity(graph)) print("Node conectivity: ", nx.node_connectivity(graph)) print("Isolated nodes: ", *nx.isolates(graph))
def base_dungeon(self, initial_room=0): dungeon = nx.Graph() rooms_min, rooms_max = dungeon_styles[self.style]['rooms'] threshold = dungeon_styles[self.style]['connectivity'] colour = self.colour class_ = dungeon_styles[self.style]['class'] n_rooms = random.randint(rooms_min, rooms_max) for i in range(initial_room, initial_room+n_rooms): dungeon.add_node(i,colour=colour, class_=class_, style=self.style, purpose=self.purpose, tags=[]) while nx.average_node_connectivity(dungeon) < threshold: rooms = random.sample(dungeon.nodes(), 2) dungeon.add_edge(rooms[0], rooms[1], style='solid', weight=1) self.add_secrets(dungeon) self.label_secret_areas(dungeon) self.fix_unjoined_areas(dungeon) self.tag_nodes(dungeon) self.assign_rooms(dungeon) self.graph = dungeon
def test_connectivity(): conn_dict = {} conn_lst = [] for i in range(1000): graph, laplacian = random_graph(4) con = get_connectivity(laplacian) conn_lst.append(nx.average_node_connectivity(graph)) conn_lst.sort() print con # if con < 0.74 and con>0.73: # Graph.print_graph(graph) if 1.43 < con < 2.45: Graph.print_graph(graph) con = abs(con) con = round(con, 3) if con not in conn_dict: conn_dict[con] = 0 conn_dict[con] += 1
def generate(User): G = nx.Graph() #User = User #User = '******' df = pd.read_csv(User, header=None, chunksize=100000) for data in df: for i in range(len(data)): if data.ix[i, 3] != data.ix[i, 4]: G.add_edges_from([(data.ix[i, 3], data.ix[i, 4])]) #nx.write_adjlist(G,'G_adjlist') f0 = len(set(data[3])) #使用的计算机的数量 f1 = len(set(data[4])) #认证过的计算机的数量 f2 = len(G.nodes()) #图中节点的个数,去除重复f2 = f0+f1? f3 = len(G.edges()) #图中边的个数,去除重复 f5 = nx.degree_histogram(G)[1] #节点度数为1 #孤立节点的个数 f6 = len(nx.degree_histogram(G)) - 1 #节点度数最大为几 f7 = nx.number_connected_components(G) #连通组件的个数 f8 = nx.average_clustering(G) #平均聚类系数 f9 = nx.average_node_connectivity(G) #节点的平均连通性 f10 = nx.density(G) #图密度 #-------------------------------------------------------- G = max(nx.connected_component_subgraphs(G), key=len) #G = max(nx.connected_components(G), key=len)# f11 = nx.average_shortest_path_length(G) #返回图G所有节点间平均最短路径长度。 f12 = nx.diameter(G) #返回图G的直径(最长最短路径的长度) f13 = nx.radius(G) #半径 #f11 = nx.degree_centrality(G)#度中心性 #f12 = nx.betweenness_centrality(G)#介数中心性 f14 = nx.degree_assortativity_coefficient( G) #调用 nx.degree_assortativity(G) 方法可以计算一个图的度匹配性。(同配性) #L = [f0,f1,f2,f3,f4,f5,f6] #L = [f0,f1,f2,f3,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,-1]# return [f0, f1, f2, f3, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, -1] #*****
def print_stats(G): try: calculate_demand(G) print("min_cost_flow") # https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.flow.min_cost_flow.html#networkx.algorithms.flow.min_cost_flow # demand maybe from strongly_connected_components print(nx.min_cost_flow(G, capacity="inverse_weight", weight="weight")) print("pagerank") print(nx.pagerank(G)) print("average_node_connectivity") print(nx.average_node_connectivity(G)) print("dominating_set") print(nx.dominating_set(G)) print("strongly_connected_components") print(list(nx.strongly_connected_components(G))) except Exception: pass
def calculate_metrics(G): # Only works for undirected # clustering = nx.average_clustering(G) density = nx.density(G) size = G.number_of_nodes() avg_conn = nx.average_node_connectivity(G) degrees = np.fromiter(iter(G.degree().values()), dtype=np.int) avg_degree = np.average(degrees) # giant_comp_size = len(max(nx.strongly_connected_components(G), key=len)) # strongly_connected_ratio = giant_comp_size/size # Only works when connected # diameter = nx.diameter(G) # betweenness centrality? return [density, size, avg_degree, avg_conn] # strongly_connected_ratio]
def get_network_statistics(self): """ A function that generates summary statistics of the network as a whole under analysis. :param arc_list_df: A data-frame containing a sources, targets, and weights of relationship. :Note: NetworkX does not work well with directed graphs. """ G = nx.from_pandas_dataframe(df=self.arc_list_df, source='source', target='target', edge_attr='type') degree = nx.degree(G) total_degree = sum(degree.values()) number_of_nodes = nx.number_of_nodes(G) average_degree = total_degree / number_of_nodes edge_population = len(self.arc_list_df['id']) summary_statistics = {} summary_statistics.update( {'number_connected_components': nx.number_connected_components(G)}) summary_statistics.update( {'average_node_connectivity': nx.average_node_connectivity(G)}) summary_statistics.update( {'average_clustering': nx.average_clustering(G)}) summary_statistics.update({'diameter': 12}) summary_statistics.update({'density': nx.density(G)}) summary_statistics.update({'number_of_nodes': number_of_nodes}) summary_statistics.update({'number_of_edges': nx.number_of_edges(G)}) summary_statistics.update({'total_degree': total_degree}) summary_statistics.update({'average_degree': average_degree}) return summary_statistics
def computeNetConnectivity(net): return nx.average_node_connectivity(computeGraph(net))
def test_average_connectivity_directed(): G = nx.DiGraph([(1,3),(1,4),(1,5)]) assert_equal(nx.average_node_connectivity(G),0.25)
def network_metrics(s): print "Concurrency", concurrency(s) print "Partner Turnover Rate", partner_turnover_rate(s) print "Average Clustering", nx.algorithms.bipartite.average_clustering(s.network) print "Degree Assortivity", nx.degree_assortativity_coefficient(s.network) print "Average node connectivity", nx.average_node_connectivity(s.network)
def test_average_connectivity_directed(): G = nx.DiGraph([(1,3),(1,4),(1,5)]) for flow_func in flow_funcs: assert_equal(nx.average_node_connectivity(G), 0.25, msg=msg.format(flow_func.__name__))
def compute_avg_connectivity(self): anc = nx.average_node_connectivity(self.graph) self.graph.graph[AVERAGE_CONNECTIVITY] = anc # nx.set_node_attributes(self.graph, d, "DENSITY") logging.debug(self.__class__.__name__ + ": Connectivity computed.")
def extended_stats(G, connectivity=False, anc=False, ecc=False, bc=False, cc=False): """ Do not use: deprecated and will be removed in a future release. Parameters ---------- G : networkx.MultiDiGraph deprecated connectivity : bool deprecated anc : bool deprecated ecc : bool deprecated bc : bool deprecated cc : bool deprecated Returns ------- dict """ msg = ( "The extended_stats function has been deprecated and will be removed in a " "future release. Use NetworkX directly for extended topological measures." ) warnings.warn(msg) stats = dict() D = utils_graph.get_digraph(G, weight="length") Gu = nx.Graph(D) Gs = utils_graph.get_largest_component(G, strongly=True) avg_neighbor_degree = nx.average_neighbor_degree(G) stats["avg_neighbor_degree"] = avg_neighbor_degree stats["avg_neighbor_degree_avg"] = sum(avg_neighbor_degree.values()) / len(avg_neighbor_degree) avg_wtd_nbr_deg = nx.average_neighbor_degree(G, weight="length") stats["avg_weighted_neighbor_degree"] = avg_wtd_nbr_deg stats["avg_weighted_neighbor_degree_avg"] = sum(avg_wtd_nbr_deg.values()) / len(avg_wtd_nbr_deg) degree_centrality = nx.degree_centrality(G) stats["degree_centrality"] = degree_centrality stats["degree_centrality_avg"] = sum(degree_centrality.values()) / len(degree_centrality) stats["clustering_coefficient"] = nx.clustering(Gu) stats["clustering_coefficient_avg"] = nx.average_clustering(Gu) stats["clustering_coefficient_weighted"] = nx.clustering(Gu, weight="length") stats["clustering_coefficient_weighted_avg"] = nx.average_clustering(Gu, weight="length") pagerank = nx.pagerank(D, weight="length") stats["pagerank"] = pagerank pagerank_max_node = max(pagerank, key=lambda x: pagerank[x]) stats["pagerank_max_node"] = pagerank_max_node stats["pagerank_max"] = pagerank[pagerank_max_node] pagerank_min_node = min(pagerank, key=lambda x: pagerank[x]) stats["pagerank_min_node"] = pagerank_min_node stats["pagerank_min"] = pagerank[pagerank_min_node] if connectivity: stats["node_connectivity"] = nx.node_connectivity(Gs) stats["edge_connectivity"] = nx.edge_connectivity(Gs) utils.log("Calculated node and edge connectivity") if anc: stats["node_connectivity_avg"] = nx.average_node_connectivity(G) utils.log("Calculated average node connectivity") if ecc: length_func = nx.single_source_dijkstra_path_length sp = {source: dict(length_func(Gs, source, weight="length")) for source in Gs.nodes} utils.log("Calculated shortest path lengths") eccentricity = nx.eccentricity(Gs, sp=sp) stats["eccentricity"] = eccentricity diameter = nx.diameter(Gs, e=eccentricity) stats["diameter"] = diameter radius = nx.radius(Gs, e=eccentricity) stats["radius"] = radius center = nx.center(Gs, e=eccentricity) stats["center"] = center periphery = nx.periphery(Gs, e=eccentricity) stats["periphery"] = periphery if cc: close_cent = nx.closeness_centrality(G, distance="length") stats["closeness_centrality"] = close_cent stats["closeness_centrality_avg"] = sum(close_cent.values()) / len(close_cent) utils.log("Calculated closeness centrality") if bc: btwn_cent = nx.betweenness_centrality(D, weight="length") stats["betweenness_centrality"] = btwn_cent stats["betweenness_centrality_avg"] = sum(btwn_cent.values()) / len(btwn_cent) utils.log("Calculated betweenness centrality") utils.log("Calculated extended stats") return stats
def compute_average_node_connectivity(G): """For the given graph, compute the average connectivity""" return nx.average_node_connectivity(G)
def extended_stats(G, connectivity=False, anc=False, ecc=False, bc=False, cc=False): """ Calculate extended topological stats and metrics for a graph. Many of these algorithms have an inherently high time complexity. Global topological analysis of large complex networks is extremely time consuming and may exhaust computer memory. Consider using function arguments to not run metrics that require computation of a full matrix of paths if they will not be needed. Parameters ---------- G : networkx multidigraph connectivity : bool if True, calculate node and edge connectivity anc : bool if True, calculate average node connectivity ecc : bool if True, calculate shortest paths, eccentricity, and topological metrics that use eccentricity bc : bool if True, calculate node betweenness centrality cc : bool if True, calculate node closeness centrality Returns ------- stats : dict dictionary of network measures containing the following elements (some only calculated/returned optionally, based on passed parameters): - avg_neighbor_degree - avg_neighbor_degree_avg - avg_weighted_neighbor_degree - avg_weighted_neighbor_degree_avg - degree_centrality - degree_centrality_avg - clustering_coefficient - clustering_coefficient_avg - clustering_coefficient_weighted - clustering_coefficient_weighted_avg - pagerank - pagerank_max_node - pagerank_max - pagerank_min_node - pagerank_min - node_connectivity - node_connectivity_avg - edge_connectivity - eccentricity - diameter - radius - center - periphery - closeness_centrality - closeness_centrality_avg - betweenness_centrality - betweenness_centrality_avg """ stats = {} full_start_time = time.time() # create a DiGraph from the MultiDiGraph, for those metrics that require it G_dir = nx.DiGraph(G) # create an undirected Graph from the MultiDiGraph, for those metrics that # require it G_undir = nx.Graph(G) # get the largest strongly connected component, for those metrics that # require strongly connected graphs G_strong = get_largest_component(G, strongly=True) # average degree of the neighborhood of each node, and average for the graph avg_neighbor_degree = nx.average_neighbor_degree(G) stats['avg_neighbor_degree'] = avg_neighbor_degree stats['avg_neighbor_degree_avg'] = sum(avg_neighbor_degree.values())/len(avg_neighbor_degree) # average weighted degree of the neighborhood of each node, and average for # the graph avg_weighted_neighbor_degree = nx.average_neighbor_degree(G, weight='length') stats['avg_weighted_neighbor_degree'] = avg_weighted_neighbor_degree stats['avg_weighted_neighbor_degree_avg'] = sum(avg_weighted_neighbor_degree.values())/len(avg_weighted_neighbor_degree) # degree centrality for a node is the fraction of nodes it is connected to degree_centrality = nx.degree_centrality(G) stats['degree_centrality'] = degree_centrality stats['degree_centrality_avg'] = sum(degree_centrality.values())/len(degree_centrality) # calculate clustering coefficient for the nodes stats['clustering_coefficient'] = nx.clustering(G_undir) # average clustering coefficient for the graph stats['clustering_coefficient_avg'] = nx.average_clustering(G_undir) # calculate weighted clustering coefficient for the nodes stats['clustering_coefficient_weighted'] = nx.clustering(G_undir, weight='length') # average clustering coefficient (weighted) for the graph stats['clustering_coefficient_weighted_avg'] = nx.average_clustering(G_undir, weight='length') # pagerank: a ranking of the nodes in the graph based on the structure of # the incoming links pagerank = nx.pagerank(G_dir, weight='length') stats['pagerank'] = pagerank # node with the highest page rank, and its value pagerank_max_node = max(pagerank, key=lambda x: pagerank[x]) stats['pagerank_max_node'] = pagerank_max_node stats['pagerank_max'] = pagerank[pagerank_max_node] # node with the lowest page rank, and its value pagerank_min_node = min(pagerank, key=lambda x: pagerank[x]) stats['pagerank_min_node'] = pagerank_min_node stats['pagerank_min'] = pagerank[pagerank_min_node] # if True, calculate node and edge connectivity if connectivity: start_time = time.time() # node connectivity is the minimum number of nodes that must be removed # to disconnect G or render it trivial stats['node_connectivity'] = nx.node_connectivity(G_strong) # edge connectivity is equal to the minimum number of edges that must be # removed to disconnect G or render it trivial stats['edge_connectivity'] = nx.edge_connectivity(G_strong) log('Calculated node and edge connectivity in {:,.2f} seconds'.format(time.time() - start_time)) # if True, calculate average node connectivity if anc: # mean number of internally node-disjoint paths between each pair of # nodes in G, i.e., the expected number of nodes that must be removed to # disconnect a randomly selected pair of non-adjacent nodes start_time = time.time() stats['node_connectivity_avg'] = nx.average_node_connectivity(G) log('Calculated average node connectivity in {:,.2f} seconds'.format(time.time() - start_time)) # if True, calculate shortest paths, eccentricity, and topological metrics # that use eccentricity if ecc: # precompute shortest paths between all nodes for eccentricity-based # stats start_time = time.time() sp = {source:dict(nx.single_source_dijkstra_path_length(G_strong, source, weight='length')) for source in G_strong.nodes()} log('Calculated shortest path lengths in {:,.2f} seconds'.format(time.time() - start_time)) # eccentricity of a node v is the maximum distance from v to all other # nodes in G eccentricity = nx.eccentricity(G_strong, sp=sp) stats['eccentricity'] = eccentricity # diameter is the maximum eccentricity diameter = nx.diameter(G_strong, e=eccentricity) stats['diameter'] = diameter # radius is the minimum eccentricity radius = nx.radius(G_strong, e=eccentricity) stats['radius'] = radius # center is the set of nodes with eccentricity equal to radius center = nx.center(G_strong, e=eccentricity) stats['center'] = center # periphery is the set of nodes with eccentricity equal to the diameter periphery = nx.periphery(G_strong, e=eccentricity) stats['periphery'] = periphery # if True, calculate node closeness centrality if cc: # closeness centrality of a node is the reciprocal of the sum of the # shortest path distances from u to all other nodes start_time = time.time() closeness_centrality = nx.closeness_centrality(G, distance='length') stats['closeness_centrality'] = closeness_centrality stats['closeness_centrality_avg'] = sum(closeness_centrality.values())/len(closeness_centrality) log('Calculated closeness centrality in {:,.2f} seconds'.format(time.time() - start_time)) # if True, calculate node betweenness centrality if bc: # betweenness centrality of a node is the sum of the fraction of # all-pairs shortest paths that pass through node start_time = time.time() betweenness_centrality = nx.betweenness_centrality(G, weight='length') stats['betweenness_centrality'] = betweenness_centrality stats['betweenness_centrality_avg'] = sum(betweenness_centrality.values())/len(betweenness_centrality) log('Calculated betweenness centrality in {:,.2f} seconds'.format(time.time() - start_time)) log('Calculated extended stats in {:,.2f} seconds'.format(time.time()-full_start_time)) return stats
def nc(self): """compute node connectivity for this graph""" return nx.average_node_connectivity(self.connectivity_graph)
def test_average_connectivity_directed(): G = nx.DiGraph([(1, 3), (1, 4), (1, 5)]) for flow_func in flow_funcs: errmsg = f"Assertion failed in function: {flow_func.__name__}" assert nx.average_node_connectivity(G) == 0.25, errmsg
## using networkx import networkx as nx net=nx.read_weighted_edgelist('Eco/STRING_511145.tsv.gz') ## get sum of all weights of edges between given nodes! rows = glb.clusters[0].rows ## this uses networkx and takes 1.92 ms: np.sum(nx.get_edge_attributes(net.subgraph(rows),'weight').values()) ## this uses pandas and takes 226 ms: np.sum(glb.string_net[ glb.string_net[[0,1]].isin(rows).all(1) ].weight)/2.0 ## even this takes longer (27.7 ms + 4.33 ms) = 32.03 ms: net2 = glb.string_net.ix[rows] np.sum( net2[ net2[[0,1]].isin(rows).all(1) ].weight)/2.0 ## Using networkx to do this on all 4000 genes is still (27.7+4.33*4000)/(1.92*4000) or about 2.25x faster ## how about this? no -- 114 ms -- why? and does it account for weights? nx.average_node_connectivity(net.subgraph(rows)) ## using igraph? can't read in gzipped file, so uncompress it then: import igraph as ig G=ig.Graph.Read_Ncol('Eco/STRING_511145.tsv',weights=True) ## throws an error if any rows are not in the network ## This is 4.05 ms np.sum(G.induced_subgraph(rows[np.in1d(rows,G.vs['name'])]).es['weight'])/2.0 r=rows[np.in1d(rows,G.vs['name'])] ## This is 250 us -- so if we could pre-filter all rows into only those that are in the network, this is fastest np.sum(G.induced_subgraph(r).es['weight'])/2.0
def get_graph(Mat_D, Threshold, percentageConnections=False, complet=False): import scipy.io as sio import numpy as np import networkx as nx import pandas as pd import os Data = sio.loadmat(Mat_D) matX = Data['Correlation'] #[:tamn,:tamn] labels = Data['labels'] print(np.shape(matX)) print(np.shape(labels)) print(np.min(matX), np.max(matX)) if percentageConnections: if percentageConnections > 0 and percentageConnections < 1: for i in range(-100, 100): per = np.sum(matX > i / 100.) / np.size(matX) if per <= Threshold: Threshold = i / 100. break print(Threshold) else: print('The coefficient is outside rank') #Lista de conexion del grafo row, col = np.shape(matX) e = [] for i in range(1, row): for j in range(i): if complet: e.append((labels[i], labels[j], matX[i, j])) else: if matX[i, j] > Threshold: e.append((labels[i], labels[j], matX[i, j])) print(np.shape(e)[0], int(((row - 1) * row) / 2)) #Generar grafo G = nx.Graph() G.add_weighted_edges_from(e) labelNew = list(G.nodes) #Metricas por grafo (ponderados) Dpc = nx.degree_pearson_correlation_coefficient(G, weight='weight') cluster = nx.average_clustering(G, weight='weight') #No ponderados estra = nx.estrada_index(G) tnsity = nx.transitivity(G) conNo = nx.average_node_connectivity(G) ac = nx.degree_assortativity_coefficient(G) #Metricas por nodo tam = 15 BoolCenV = False BoolLoad = False alpha = 0.1 beta = 1.0 katxCN = nx.katz_centrality_numpy(G, alpha=alpha, beta=beta, weight='weight') bcen = nx.betweenness_centrality(G, weight='weight') av_nd = nx.average_neighbor_degree(G, weight='weight') ctr = nx.clustering(G, weight='weight') ranPaN = nx.pagerank_numpy(G, weight='weight') Gol_N = nx.hits_numpy(G) Dgc = nx.degree_centrality(G) cl_ce = nx.closeness_centrality(G) cluster_Sq = nx.square_clustering(G) centr = nx.core_number(G) cami = nx.node_clique_number(G) camiN = nx.number_of_cliques(G) trian = nx.triangles(G) colorG = nx.greedy_color(G) try: cenVNum = nx.eigenvector_centrality_numpy(G, weight='weight') tam = tam + 1 BoolCenV = True except TypeError: print( "La red es muy pequeña y no se puede calcular este parametro gil") except: print('NetworkXPointlessConcept: graph null') if Threshold > 0: carga_cen = nx.load_centrality(G, weight='weight') #Pesos positivos BoolLoad = True tam = tam + 1 #katxC=nx.katz_centrality(G, alpha=alpha, beta=beta, weight='weight') #cenV=nx.eigenvector_centrality(G,weight='weight') #cenV=nx.eigenvector_centrality(G,weight='weight') #Golp=nx.hits(G) #Gol_si=nx.hits_scipy(G) #ranPa=nx.pagerank(G, weight='weight') #ranPaS=nx.pagerank_scipy(G, weight='weight') matrix_datos = np.zeros((tam, np.shape(labelNew)[0])) tam = 15 print(np.shape(matrix_datos)) lim = np.shape(labelNew)[0] for i in range(lim): roi = labelNew[i] #print(roi) matrix_datos[0, i] = katxCN[roi] matrix_datos[1, i] = bcen[roi] matrix_datos[2, i] = av_nd[roi] matrix_datos[3, i] = ctr[roi] matrix_datos[4, i] = ranPaN[roi] matrix_datos[5, i] = Gol_N[0][roi] matrix_datos[6, i] = Gol_N[1][roi] matrix_datos[7, i] = Dgc[roi] matrix_datos[8, i] = cl_ce[roi] matrix_datos[9, i] = cluster_Sq[roi] matrix_datos[10, i] = centr[roi] matrix_datos[11, i] = cami[roi] matrix_datos[12, i] = camiN[roi] matrix_datos[13, i] = trian[roi] matrix_datos[14, i] = colorG[roi] if BoolCenV: matrix_datos[15, i] = cenVNum[roi] tam = tam + 1 if BoolLoad: matrix_datos[16, i] = carga_cen[roi] tam = tam + 1 #matrix_datos[0,i]=katxC[roi] #matrix_datos[2,i]=cenV[roi] #matrix_datos[7,i]=Golp[0][roi] #matrix_datos[9,i]=Gol_si[0][roi] #matrix_datos[10,i]=Golp[1][roi] #matrix_datos[12,i]=Gol_si[1][roi] #matrix_datos[22,i]=ranPa[roi] #matrix_datos[24,i]=ranPaS[roi] FuncName = [ 'degree_pearson_correlation_coefficient', 'average_clustering', 'estrada_index', 'transitivity', 'average_node_connectivity', 'degree_assortativity_coefficient', 'katz_centrality_numpy', 'betweenness_centrality', 'average_neighbor_degree', 'clustering', 'pagerank_numpy', 'hits_numpy0', 'hits_numpy1', 'degree_centrality', 'closeness_centrality', 'square_clustering', 'core_number', 'node_clique_number', 'number_of_cliques', 'triangles', 'greedy_color', 'eigenvector_centrality_numpy', 'load_centrality' ] frame = pd.DataFrame(matrix_datos) frame.columns = labelNew frame.index = FuncName[6:tam] Resul = os.getcwd() out_data = Resul + '/graph_metrics.csv' out_mat = Resul + '/graph_metrics_global.mat' frame.to_csv(out_data) sio.savemat( out_mat, { FuncName[0]: Dpc, FuncName[1]: cluster, FuncName[2]: estra, FuncName[3]: tnsity, FuncName[4]: conNo, FuncName[5]: ac }) return out_data, out_mat