def centrality_analysis(G, isDriected=False): ''' :param g: Digraph()/ Graph() :return: several types of centrality of each nodes ''' nodes = G.nodes() if isDriected: in_dc = centrality.in_degree_centrality(G) out_dc = centrality.out_degree_centrality(G) bc = centrality.betweenness_centrality(G) ec = centrality.eigenvector_centrality(G) cent = {} for node in nodes: cent[node] = [in_dc[node], out_dc[node], bc[node], ec[node]] print( "Four types of centrality are calculated \n" + "\n\tin_degree_centrality\n\tout_degree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality" ) return cent else: dc = centrality.degree_centrality(G) bc = centrality.betweenness_centrality(G) ec = centrality.eigenvector_centrality(G) cent = {} for node in nodes: cent[node] = [dc[node], bc[node], ec[node]] print( "Three types of centrality are calculated \n" + "\n\tdegree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality" ) return cent
def get_topological_features(G, nodes=None): N_ = len(G.nodes) if nodes is None: nodes = G.nodes # Degree centrality d_c = get_features(degree_centrality(G).values()) print 'a' # Betweeness centrality b_c = get_features(betweenness_centrality(G).values()) print 'b' # Close ness centrality c_c = get_features(closeness_centrality(G).values()) print 'c' # Clustering c = get_features(clustering(G).values()) print 'd' d = diameter(G) r = radius(G) s_p_average = [] for s in shortest_path_length(G): dic = s[1] lengths = dic.values() s_p_average += [sum(lengths) / float(N_)] s_p_average = get_features(s_p_average) features = np.concatenate((d_c, b_c, c_c, c, s_p_average, [d], [r]), axis=0) return features
def get_centrality_labels(knn_graph_obj, perc_labeled, type='degree'): import random if type == 'degree': degree_centrality_knn = pd.DataFrame.from_dict(centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = degree_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(degree_centrality_knn.index))].tolist() elif type == 'closeness': closeness_centrality_knn = pd.DataFrame.from_dict(centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = closeness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(closeness_centrality_knn.index))].tolist() elif type == 'betweenness': betweenness_centrality_knn = pd.DataFrame.from_dict(centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = betweenness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(betweenness_centrality_knn.index))].tolist() elif type == 'katz': katz_centrality_knn = pd.DataFrame.from_dict(centrality.katz_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = katz_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(katz_centrality_knn.index))].tolist() elif type == 'clustering': clustering_knn = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = clustering_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(clustering_knn.index))].tolist() else: indexes = list(knn_graph_obj.nodes) #print(indexes) node_toget_labels = random.sample(indexes, int(perc_labeled*len(indexes))) #print(node_toget_labels) return node_toget_labels
def get_centrality_labels(knn_graph_obj, type='degree'): import random if type == 'degree': node_toget_labels = pd.DataFrame.from_dict( centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value']) elif type == 'closeness': node_toget_labels = pd.DataFrame.from_dict( centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value']) elif type == 'betweenness': node_toget_labels = pd.DataFrame.from_dict( centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value']) elif type == 'clustering': node_toget_labels = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value']) else: node_toget_labels = list(knn_graph_obj.nodes) #print(node_toget_labels) return node_toget_labels
def get_layer_info(subject, journal_volume, edge_list): G = nx.Graph() G.add_weighted_edges_from(edge_list) PATH = "C:/Users/hexie/Documents/APS_result/" + str( journal_volume) + "/" + str(subject) try: os.mkdir(PATH) os.chdir(PATH) except: os.chdir(PATH) degree_centrality = nxc.degree_centrality(G) try: eigen_vector_centrality = nxc.eigenvector_centrality(G) np.save("eigen_vector_centrality.npy", eigen_vector_centrality) except: print("fail to converge within 100 iterations of power") closeness_centrality = nxc.closeness_centrality(G) betweeness_centrality = nxc.betweenness_centrality(G) np.save("degree_centrality.npy", degree_centrality) np.save("closeness_centrality.npy", closeness_centrality) np.save("betweeness_centrality.npy", betweeness_centrality) with open(str(subject) + str(journal_volume) + ".txt", 'w') as f: f.write('Number of Edges: ' + str(nx.number_of_edges(G)) + "\n") f.write('Number of Nodes: ' + str(nx.number_of_nodes(G)) + "\n") nx.draw(G) plt.savefig(str(subject) + str(journal_volume) + ".png") plt.clf()
def bc_degree_relativity(): song = fm.FontProperties(fname=os.path.join(base_dir, '../simsun.ttc'), size=10.5) sns.set(style='ticks', palette='Set2') plt.figure(dpi=200) c = { 'family': 'sans-serif', 'sans-serif': ['Times New Roman', 'NSimSun'], 'size': 10.5 } rc('font', **c) plt.rcParams['axes.unicode_minus'] = False network = get_shanghai_subway_graph() bc_list = betweenness_centrality(network) degree_list = dict() for node in network.nodes: degree_list[node] = network.degree(node) fig, ax = plt.subplots(num=1, figsize=(3.54, 2.26)) plt.subplots_adjust(right=0.99, left=0.125, bottom=0.14, top=0.975) x = list() # degree y = list() # bc for i in degree_list.keys(): x.append(degree_list[i]) y.append(bc_list[i]) x1, y1 = zip(*sorted(zip(x, y))) p2, = ax.plot(x1, y1, 'o', ms=4) ax.set_xlabel('度', fontproperties=song) ax.set_ylabel('介数中心性', fontproperties=song) plt.show()
def extract_betweenness_centrality(self): output = open( 'output/' + self.set_ + '/' + self.set_ + '_betweenness_centrality.csv', 'w') print('Calculating betweenness centrality') nodes = centrality.betweenness_centrality(self.G) for key in nodes: output.write(str(key) + ',' + str(nodes[key]) + '\n')
def create_centrality_df(df): centrs_dict = {} for i, row in df.iterrows(): G = build_graph(row) centrs = betweenness_centrality(G, weight='weight') centrs_dict[row['tijd']] = centrs dfcentr = pd.DataFrame(centrs_dict).transpose() return dfcentr
def get_centrality(def_centrality, toll_centrality, model, nodes_int): left_b_centr = centrality.betweenness_centrality(model.get_nx_graph()) left_c_centr = centrality.closeness_centrality(model.get_nx_graph()) for el in left_b_centr: node = model.get_node_by_id(el) if node in nodes_int: toll_centrality += left_c_centr[el] + left_b_centr[el] else: def_centrality += left_c_centr[el] + left_b_centr[el] return def_centrality, toll_centrality
def calc_graph_measures(data_matrix, thresh=0): from networkx import eccentricity from networkx.algorithms.efficiency import global_efficiency from networkx.algorithms.shortest_paths.generic import average_shortest_path_length from networkx.algorithms.centrality import betweenness_centrality from networkx.algorithms.cluster import average_clustering from networkx.algorithms.community.modularity_max import greedy_modularity_communities from networkx.algorithms.community.quality import performance def _avg_values(results): values = [] if isinstance(results, dict): for k in results: values.append(results[k]) elif isinstance(results, list): for tup in results: values.append(tup[1]) return np.mean(values) below_thresh_indices = np.abs(data_matrix) < thresh data_matrix[below_thresh_indices] = 0 if isinstance(data_matrix, np.ndarray): graph = networkx.convert_matrix.from_numpy_matrix(np.real(data_matrix)) if isinstance(data_matrix, pd.DataFrame): graph = networkx.convert_matrix.from_pandas_adjacency(data_matrix) degree = list(graph.degree) global_eff = global_efficiency(graph) b_central = betweenness_centrality(graph) modularity = performance(graph, greedy_modularity_communities(graph)) try: ecc = eccentricity(graph) except networkx.exception.NetworkXError: ecc = [(0, 0)] try: clust = average_clustering(graph) except networkx.exception.NetworkXError: clust = 0 try: char_path = average_shortest_path_length(graph) except networkx.exception.NetworkXError: char_path = 0 graph_dict = {'degree': _avg_values(degree), 'eccentricity': _avg_values(ecc), 'global_efficiency': global_eff, 'characteristic_path_length': char_path, 'betweenness_centrality': _avg_values(b_central), 'clustering_coefficient': clust, 'modularity': modularity} return graph_dict
def bc_list(g): """ 生成降序 BC 排列的节点 list。 :param g: 要分析的 Graph :return: 一个 list """ result = betweenness_centrality(g) l = list() for key in result: l.append((key, result[key])) return sorted(l, key=lambda s: s[1], reverse=True)
def analyze(filename): nodes = read_csv(filename) G = create_graph(filename) degree = G.degree() betweenness = centrality.betweenness_centrality(G) eigen = centrality.eigenvector_centrality_numpy(G,weight='weight') actors_degree= [] actors_betweenness = [] actors_eigen = [] # in_deg = G.in_degree() # out_deg = G.out_degree() # a = 0 # for i in in_deg.keys(): # a+=in_deg[i] # print "IN DEGREE " # print a/len(in_deg) # b = 0 # for i in out_deg.keys(): # a+=out_deg[i] # print "OUT DEGREE " # print b/len(out_deg) for i in actors: if i in degree.keys(): actors_degree.append((i,degree[i])) if i in betweenness.keys(): actors_betweenness.append((i,betweenness[i])) if i in eigen.keys(): actors_eigen.append((i,eigen[i])) actors_degree = sorted(degree.items(), key=itemgetter(1)) actors_betweenness = sorted(betweenness.items(), key=itemgetter(1)) actors_eigen = sorted(eigen.items(), key=itemgetter(1)) actors_degree.reverse() actors_betweenness.reverse() actors_eigen.reverse() print "DEGREE: " print actors_degree print print "BETWEENNESS" print actors_betweenness print print "EIGEN" print actors_eigen print
def compute_metrics(graph): G = json_graph.node_link_graph(graph, multigraph=False) degree_centrality = centrality.degree_centrality(G) closeness_centrality = centrality.closeness_centrality(G) betweenness_centrality = centrality.betweenness_centrality(G) page_rank = link_analysis.pagerank_alg.pagerank(G) max_clique = approximation.clique.max_clique(G) diameters = [distance_measures.diameter(g) for g in connected_component_subgraphs(G)] copy = dict() copy['id'] = graph['id'] copy['name'] = graph['name'] copy['graph'] = dict() copy['graph']['nodes'] = graph['nodes'] copy['graph']['links'] = graph['links'] copy['metrics'] = dict() # diameters copy['metrics']['diameter'] = dict() copy['metrics']['diameter']['all'] = diameters copy['metrics']['diameter']['max'] = max(diameters) copy['metrics']['diameter']['average'] = float(sum(diameters)) / float(len(diameters)) # clique size copy['metrics']['maxClique'] = len(list(max_clique)) # degree centrality copy['metrics']['degreeCentrality'] = dict() copy['metrics']['degreeCentrality']['byId'] = degree_centrality copy['metrics']['degreeCentrality']['max'] = sum(degree_centrality.values()) copy['metrics']['degreeCentrality']['average'] = float(sum(degree_centrality.values())) / float(len(degree_centrality.values())) # closeness centrality copy['metrics']['closenessCentrality'] = dict() copy['metrics']['closenessCentrality']['byId'] = closeness_centrality copy['metrics']['closenessCentrality']['max'] = sum(closeness_centrality.values()) copy['metrics']['closenessCentrality']['average'] = float(sum(closeness_centrality.values())) / float(len(closeness_centrality.values())) # degree centrality copy['metrics']['betweennessCentrality'] = dict() copy['metrics']['betweennessCentrality']['byId'] = betweenness_centrality copy['metrics']['betweennessCentrality']['max'] = sum(betweenness_centrality.values()) copy['metrics']['betweennessCentrality']['average'] = float(sum(betweenness_centrality.values())) / float(len(betweenness_centrality.values())) # degree centrality copy['metrics']['pageRank'] = dict() copy['metrics']['pageRank']['byId'] = page_rank copy['metrics']['pageRank']['max'] = sum(page_rank.values()) copy['metrics']['pageRank']['average'] = float(sum(page_rank.values())) / float(len(page_rank.values())) return copy
def compute_betweenness(G): ng = nx.Graph() for start in G.iternodes(): others = G.neighbors(start) for other in others: ng.add_edge(start, other) c = centrality.betweenness_centrality(ng) for k, v in c.items(): c[k] = v return c
def compute_betweenness(G): ng = nx.Graph() for start in G.iternodes(): others = G.neighbors(start) for other in others: ng.add_edge(start, other) c = centrality.betweenness_centrality(ng) for k, v in c.items(): c[k] = v return c
def betweenness_plot(G, title="", fig=1): y = sorted(betweenness_centrality(G).items(), key=itemgetter(1), reverse=True) y = [(G.degree(n), bc) for n, bc in y] deg, bc = zip(*y) plt.figure(fig) plt.plot(bc, deg, 'o') plt.title(title) plt.xlabel("Betweenness Centrality") plt.ylabel("Degree of node") plt.ylim(top=35) plt.xlim(right=0.4) plt.tight_layout() plt.draw()
def shanghai_average_bc(): result = shanghai_graph_by_date() l = list() for k in result.keys(): s = betweenness_centrality(result[k]) count = 0 for i in s: count += s[i] l.append({ 'date': k.strftime("%Y-%m-%d"), 'average_bc': count / len(s) }) with open(os.path.join(base_dir, '上海分阶段数据/average_bc.csv'), 'a') as f: w = csv.DictWriter(f, ['date', 'average_bc']) w.writeheader() w.writerows(l)
def greedy_fragile(self, graph, nedges): nodes = centrality.betweenness_centrality(graph, weight='weight') nwc = float(sum(nodes.values())/len(nodes.values())) total_centrality = (graph.order()) * nwc result = {} if nedges == None: nedges = graph.nodes(data=True) for n in nedges: if n[0] in graph.nodes(): neigh_central = sum([v for k,v in nodes.iteritems() if k in graph.neighbors(n[0])]) order = graph.order() - (1 + len(graph.neighbors(n[0]))) mc = nodes[n[0]] + neigh_central gf = nwc - ((total_centrality - mc)/order) result[n[1]['name']] = gf else: result[n[1]['name']] = 0 return result
def subproblem(self, c, clusterproblem): # return basic connectivityproblem # (graph, agents, eagents, big_agents, reward_dict) # induced by cluster c agents = { r: clusterproblem.graph.agents[r] for r in self.agent_clusters[c] } static_agents = [ r for r in agents.keys() if r in clusterproblem.static_agents ] # add childcluster stuff addn_nodes = set() for C in self.child_clusters[c]: agents[self.submasters[C[0]]] = C[1] static_agents.append(self.submasters[C[0]]) addn_nodes.add(C[1]) G = deepcopy(clusterproblem.graph) del_nodes = set( clusterproblem.graph.nodes) - self.subgraphs[c] - addn_nodes G.remove_nodes_from(del_nodes) G.init_agents(agents) # basic rewards based on centrality reward_dict = betweenness_centrality(nx.DiGraph(G)) norm = max(reward_dict.values()) if norm == 0: norm = 1 reward_dict = { v: clusterproblem.max_centrality_reward * val / norm for v, val in reward_dict.items() } # initialize subproblem return ConnectivityProblem( graph=G, static_agents=static_agents, eagents=[r for r in agents if r in clusterproblem.eagents], big_agents=[r for r in agents if r in clusterproblem.big_agents], reward_dict=reward_dict, )
def centralization_metrics(G, prefix=""): # NB: G can be either directed or undirected network # Metrics: # (betweennes / closeness / eigenvector / pagerank) # betweenness # => expensive # sample: k=min(10, len(G)) betweenness = betweenness_centrality(G, normalized=True) betweenness_arr = np.fromiter(betweenness.values(), dtype=np.float) betweenness_mean = np.mean(np.max(betweenness_arr) - betweenness_arr) # closeness # => expensive # NB: normilizes by the CC size closeness = closeness_centrality(G, wf_improved=False) closeness_arr = np.fromiter(closeness.values(), dtype=np.float) closeness_mean = np.mean(np.max(closeness_arr) - closeness_arr) # eigenvector eigenvec_mean = None if len(G) > 2: try: eigenvec = eigenvector_centrality_numpy(G) eigenvec_arr = np.fromiter(eigenvec.values(), dtype=np.float) eigenvec_mean = np.mean(np.max(eigenvec_arr) - eigenvec_arr) except: eigenvec_mean = None # pagerank try: pagerank = pagerank_numpy(G) pagerank_arr = np.fromiter(pagerank.values(), dtype=np.float) pagerank_mean = np.mean(np.max(pagerank_arr) - pagerank_arr) except: pagerank_mean = None centralization = { f"cent{prefix}_betweenness_mean": betweenness_mean, f"cent{prefix}_closeness_mean": closeness_mean, f"cent{prefix}_eigenvec_mean": eigenvec_mean, f"cent{prefix}_pagerank_mean": pagerank_mean } return centralization
def get_betweenness_centrality(G, **kwargs): """Returns a dictionary of betweenness centrality values for all nodes. """ # Get the graph without glycine residues H = get_graph_without_glycine(G, kwargs["identifiers"], kwargs["residue_names"]) # Calculate the betweenness centrality values centrality_dict = \ nxc.betweenness_centrality(G = H, normalized = kwargs["normalized"], weight = kwargs["weight"], endpoints = kwargs["endpoints"]) # Return the finalized the dictionary of centrality values return finalize_dict(G, centrality_dict)
def top_nodes(G, k=3): """ Returns the top k nodes for various centrality measures: degree, betweennes and closeness. Args: G (nx.Graph): graph for which the top nodes must be determined. k (int): number of top nodes to return. if set to -ve, all the nodes will be returned. Returns: res_dict (dict): dictionary of each centrality measure with list of top k nodes in that measure as values to the dictionary. """ # number of nodes in the graph each node is connected to node_deg_dict = centrality.degree_centrality(G) # number of all pair shortest paths that pass through each node node_btw_dict = centrality.betweenness_centrality(G) # number of neighbours connected to each other for each node node_clo_dict = centrality.closeness_centrality(G) # sort by nodes by each centrality measure in decreasing order top_k_deg_nodes = sorted(node_deg_dict.items(), key=lambda x: -x[1]) top_k_btw_nodes = sorted(node_btw_dict.items(), key=lambda x: -x[1]) top_k_clo_nodes = sorted(node_clo_dict.items(), key=lambda x: -x[1]) # pick the top k nodes res_dict = dict() if k > 0: res_dict["degree"] = list(zip(*top_k_deg_nodes[:k]))[0] res_dict["betweenness"] = list(zip(*top_k_btw_nodes[:k]))[0] res_dict["closeness"] = list(zip(*top_k_clo_nodes[:k]))[0] else: res_dict["degree"] = list(zip(*top_k_deg_nodes))[0] res_dict["betweenness"] = list(zip(*top_k_btw_nodes))[0] res_dict["closeness"] = list(zip(*top_k_clo_nodes))[0] return res_dict
def parse(name): print(name) pathbase = path.abspath(path.dirname(__file__)) G = nx.Graph() data = json.load(open('{0}/{1}.json'.format(pathbase, name))) nodes = data['nodes'] text = {i: node['text'] for i, node in enumerate(nodes)} weight = {i: float(node['weight']) for i, node in enumerate(nodes)} for i in range(len(nodes)): G.add_node(i) for link in data['links']: G.add_edge(link['source'], link['target']) degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweenness = centrality.betweenness_centrality(G) #edge_betweenness = centrality.edge_betweenness_centrality(G) #current_flow_closeness = centrality.current_flow_closeness_centrality(G) #current_flow_betweenness =\ # centrality.current_flow_betweenness_centrality(G) try: eigenvector = centrality.eigenvector_centrality(G, max_iter=1000) except: eigenvector = {i: 0 for i in range(len(nodes))} katz = centrality.katz_centrality(G) obj = {'nodes': [], 'links': data['links']} for i in range(len(nodes)): obj['nodes'].append({ 'text': text[i], 'weight': weight[i], 'degree': degree[i], 'closeness': closeness[i], 'betweenness': betweenness[i], #'edge_betweenness': edge_betweenness[i], #'current_flow_closeness': current_flow_closeness[i], #'current_flow_betweenness': current_flow_betweenness[i], 'eigenvector': eigenvector[i], 'katz': katz[i], }) json.dump(obj, open('{0}/../data/{1}.json'.format(pathbase, name), 'w'), sort_keys=True)
def greedy_fragile(self, graph, nedges): nodes = centrality.betweenness_centrality(graph, weight='weight') nwc = float(sum(nodes.values()) / len(nodes.values())) total_centrality = (graph.order()) * nwc result = {} if nedges == None: nedges = graph.nodes(data=True) for n in nedges: if n[0] in graph.nodes(): neigh_central = sum([ v for k, v in nodes.iteritems() if k in graph.neighbors(n[0]) ]) order = graph.order() - (1 + len(graph.neighbors(n[0]))) mc = nodes[n[0]] + neigh_central gf = nwc - ((total_centrality - mc) / order) result[n[1]['name']] = gf else: result[n[1]['name']] = 0 return result
def chengdu_bc_with_date(): """ 分阶段的BC排序。 :return: """ result = chengdu_graph_by_date() for k in result.keys(): s = betweenness_centrality(result[k]) l = list() for key in s: l.append((key, s[key])) l = sorted(l, key=lambda t: t[1], reverse=True) temp = list() for i in l: temp.append({'name': i[0], 'BC': i[1]}) with open( os.path.join(base_dir, '分阶段数据/{}.csv'.format(k.strftime("%Y-%m-%d"))), 'a') as f: w = csv.DictWriter(f, ['name', 'BC']) w.writeheader() w.writerows(temp)
def graph_stats(G): """ Compute all the graph-related statistics in the features. Note that since the graph is always fully connected, all of these are the weighted versions. For this reason, many of these functions use the implementations in bctpy rather than NetworkX. """ # Local measures clustering_dict = clustering(G, weight='weight') adjacency = np.array(adjacency_matrix(G).todense()) betweenness_centrality_dict = betweenness_centrality(G, weight='weight') paths = shortest_path_length(G, weight='weight') eccentricities = [max(dists.values()) for (source, dists) in sorted(paths)] local_measures = np.concatenate( [[v for (k, v) in sorted(clustering_dict.items())], [v for (k, v) in sorted(betweenness_centrality_dict.items())], eccentricities]) graph_diameter = max(eccentricities) graph_radius = min(eccentricities) aspl = average_shortest_path_length(G, weight='weight') global_measures = np.array([graph_diameter, graph_radius, aspl]) return np.concatenate([local_measures, global_measures])
def get_bc_info(g, k_top= 5): start_time = time.time() BC_dict = betweenness_centrality(g) total_time = round(time.time() - start_time, 2) # list of pairs (node, bc_value) max_BCs = list(sorted(BC_dict.items(), key=operator.itemgetter(1), reverse=True)[:k_top]) total_BC = 0 for bc in BC_dict.values(): total_BC += bc avg_BC = total_BC / len(BC_dict) max_total = 0 for bc in max_BCs: max_total += bc[1] avg_max_BCs = max_total / len(max_BCs) return {"BC_dict": BC_dict, "avg_BC": avg_BC, "max_BCs": max_BCs, "avg_max_BCs": avg_max_BCs, "time": total_time}
def connectivity_matrix(graph): ''' graph: Architecture graph, assumed bidirected for now. ''' # Initialize matrix and resize size = len(graph) con_m = np.zeros(size**2) con_m.resize(size,size) # Generate betweeness centrality for graph betweenness = central = betweenness_centrality(graph, k=size) its = [range(size)]*2 for i,j in product(*its): if i == j: # self-interaction con_m[i,j] = 1 else: # pair interaction con_m[i,j] = (1-betweenness[i]) * (1-betweenness[j]) return con_m
g_simple, g_mst, ridge_dims = extract_structural_backbone(T, data, s, max_angle=opt.maxangle, relaxation=opt.relaxation) mmwrite(opt.output + '.' + docstr + '.g_simple.mm', g_simple) mmwrite(opt.output + '.' + docstr + '.g_mst.mm', g_mst) np.savetxt(opt.output + '.' + docstr + '.ridge_dims', ridge_dims, fmt='%d') df = pd.DataFrame({'x': T[:, 0], 'y': T[:, 1], 'c': anno[opt.anno_column].map(str)}) df_e = pd.DataFrame({'xs': T[g_simple.nonzero()[0], 0], 'xe': T[g_simple.nonzero()[1], 0], 'ys': T[g_simple.nonzero()[0], 1], 'ye': T[g_simple.nonzero()[1], 1]}) p = ggplot(df) + \ geom_segment(mapping=aes(x='xs', xend='xe', y='ys', yend='ye'), data=df_e, size=0.5) + \ geom_point(mapping=aes('x', 'y', color='c'), size=0.5) + theme_minimal() p.save(opt.output + '.' + docstr + '.g_simple.pdf') df_e = pd.DataFrame({'xs': T[g_mst.nonzero()[0], 0], 'xe': T[g_mst.nonzero()[1], 0], 'ys': T[g_mst.nonzero()[0], 1], 'ye': T[g_mst.nonzero()[1], 1]}) p = ggplot(df) + \ geom_segment(mapping=aes(x='xs', xend='xe', y='ys', yend='ye'), data=df_e, size=0.5) + \ geom_point(mapping=aes('x', 'y', color='c'), size=0.5) + theme_minimal() p.save(opt.output + '.' + docstr + '.g_mst.pdf') G_simple = nx.from_scipy_sparse_matrix(g_simple) nodes_bc = betweenness_centrality(G_simple, k=np.minimum(500, g_simple.shape[0]), normalized=False) pd.DataFrame(pd.Series(nodes_bc) / g_simple.shape[0]).to_csv(opt.output + '.' + docstr + '.g_simple.bc.csv') G_mst = nx.from_scipy_sparse_matrix(g_mst) nodes_bc = betweenness_centrality(G_mst, k=np.minimum(500, g_mst.shape[0]), normalized=False) pd.DataFrame(pd.Series(nodes_bc) / g_mst.shape[0]).to_csv(opt.output + '.' + docstr + '.g_mst.bc.csv')
def get_metric_from_graph(self, metric=None, nedges=None, keyword=None, graph=None, month=None): #'''this func will do most of the work. lets you get a named metric for nodes, optionally restricting this by month, by specified nodes, or by entity type ie lobby/staffer/lobbyist/commissioner. first constructs a cache key and then looks in the cache ck = str(metric) + str(month) + str(keyword) if ck in self.cache: return self.cache[ck] g = graph if keyword: nedges = [node for node in g.nodes_iter(data=True) if node[1]['type'] == keyword] #'''if a keyword search is specified, we list the nodes where that keyword is found in one of its attributes''' if metric == u'Degree': upshot = self.degree(g, nedges) if metric == u'Gatekeepership': upshot = self.gatekeeper(g, nedges) if metric == u'Closeness Centrality': u = centrality.closeness_centrality(g, normalized=True) if nedges: filter_list = [n[0] for n in nedges] upshot = {g.node[k]['name']: v for k,v in u.items() if k in filter_list} else: upshot = {g.node[k]['name']: v for k,v in u.items()} if metric == u'Betweenness': u = centrality.betweenness_centrality(g, weight='weight', normalized=True) if nedges: filter_list = [n[0] for n in nedges] upshot = {g.node[k]['name']: v for k,v in u.items() if k in filter_list} else: upshot = {g.node[k]['name']: v for k,v in u.items()} if metric == u'Greedy_Fragile': upshot = self.greedy_fragile(g, nedges) if metric == u'Link Centrality': u = centrality.edge_betweenness_centrality(g, weight='weight', normalized=True) upshot = {} for k, v in u.items(): # doing it in a similar way to the other linkwise metric below. a, b = k c = g.node[a]['name'] d = g.node[b]['name'] if nedges: filter_list = [n[0] for n in nedges] if a in filter_list or b in filter_list: upshot[unicode(c + ' - ' + d)] = v else: upshot[unicode(a + ' - ' + b)] = v if metric == u'Predicted Links': gr = self.make_unigraph_from_multigraph(mg=g) u = link_prediction.resource_allocation_index(gr) upshot = {} for k, v, p in u: if p > 0: #RAI examines all nonexistent edges in graph and will return all of them, including ones with a zero index. we therefore filter for positive index values. a = g.node[k]['name'] b = g.node[v]['name'] if nedges: filter_list = [n[0] for n in nedges] if k in filter_list or v in filter_list: upshot[unicode(a + ' - ' + b)] = p else: upshot[unicode(a + ' - ' + b)] = p self.cacheflow(ck, data=upshot) return upshot
def calculate_all_centralities(data): """ Calculates all four centralities metrics for the input graph Paramaters: data: a json object which represents the graph. This json is manipulated and the necessary metrics are added to it. """ G = json_graph.node_link_graph(data) #loads the data to a NetworkX graph object #Calculates three of the metrics degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweeness = centrality.betweenness_centrality(G) eigenvector_fail = False try: #Eigenvector centrality can fail to converge. eigenvector = centrality.eigenvector_centrality(DiGraph(G),max_iter=100000) except NetworkXError: #Eigenvector values will be None if calculation fails. eigenvector = [] eigenvector_fail = True print "Max iterations exceeded" degree_max = -1.0 closeness_max = -1.0 betweeness_max = -1.0 eigenvector_max = -1.0 for author in data['nodes']: #Adds the unnormalized values in the json i = author['id'] author['degreeCentralityUnnormalized'] = degree[i] author['closenessCentralityUnnormalized'] = closeness[i] author['betweennessCentralityUnnormalized'] = betweeness[i] author['eigenvectorCentralityUnnormalized'] = eigenvector[i] if not eigenvector_fail else 1.0 #Finds the highest values for each centrality type for i in degree: if degree[i]>degree_max: degree_max = degree[i] for i in closeness: if closeness[i]>closeness_max: closeness_max = closeness[i] for i in betweeness: if betweeness[i]>betweeness_max: betweeness_max = betweeness[i] for i in eigenvector: if eigenvector[i]>eigenvector_max: eigenvector_max = eigenvector[i] #Normalizes the values for i in degree: if degree[i] != 0: degree[i] = degree[i]/degree_max for i in closeness: if closeness[i] != 0: closeness[i] = closeness[i]/closeness_max for i in betweeness: if betweeness[i] != 0: betweeness[i] = betweeness[i]/betweeness_max for i in eigenvector: if eigenvector[i] != 0: eigenvector[i] = eigenvector[i]/eigenvector_max #Adds the normalized values to the json for author in data['nodes']: i = author['id'] author['degreeCentrality'] = degree[i] author['closenessCentrality'] = closeness[i] author['betweennessCentrality'] = betweeness[i] author['eigenvectorCentrality'] = eigenvector[i] if not eigenvector_fail else 1.0 return data
def calculate_all_centralities(data): """ Calculates all four centralities metrics for the input graph Paramaters: data: a json object which represents the graph. This json is manipulated and the necessary metrics are added to it. """ G = json_graph.node_link_graph( data) #loads the data to a NetworkX graph object #Calculates three of the metrics degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweeness = centrality.betweenness_centrality(G) eigenvector_fail = False try: #Eigenvector centrality can fail to converge. eigenvector = centrality.eigenvector_centrality(DiGraph(G), max_iter=100000) except NetworkXError: #Eigenvector values will be None if calculation fails. eigenvector = [] eigenvector_fail = True print "Max iterations exceeded" degree_max = -1.0 closeness_max = -1.0 betweeness_max = -1.0 eigenvector_max = -1.0 for author in data['nodes']: #Adds the unnormalized values in the json i = author['id'] author['degreeCentralityUnnormalized'] = degree[i] author['closenessCentralityUnnormalized'] = closeness[i] author['betweennessCentralityUnnormalized'] = betweeness[i] author['eigenvectorCentralityUnnormalized'] = eigenvector[ i] if not eigenvector_fail else 1.0 #Finds the highest values for each centrality type for i in degree: if degree[i] > degree_max: degree_max = degree[i] for i in closeness: if closeness[i] > closeness_max: closeness_max = closeness[i] for i in betweeness: if betweeness[i] > betweeness_max: betweeness_max = betweeness[i] for i in eigenvector: if eigenvector[i] > eigenvector_max: eigenvector_max = eigenvector[i] #Normalizes the values for i in degree: if degree[i] != 0: degree[i] = degree[i] / degree_max for i in closeness: if closeness[i] != 0: closeness[i] = closeness[i] / closeness_max for i in betweeness: if betweeness[i] != 0: betweeness[i] = betweeness[i] / betweeness_max for i in eigenvector: if eigenvector[i] != 0: eigenvector[i] = eigenvector[i] / eigenvector_max #Adds the normalized values to the json for author in data['nodes']: i = author['id'] author['degreeCentrality'] = degree[i] author['closenessCentrality'] = closeness[i] author['betweennessCentrality'] = betweeness[i] author['eigenvectorCentrality'] = eigenvector[ i] if not eigenvector_fail else 1.0 return data
def std_betweeness_centrality(self, graph): between_centr = betweenness_centrality(graph) return np.std(list(between_centr.values()))
def analyze(directed_df, undirected_df, auxiliary_df): directed_df = directed_df.copy(deep=True) undirected_df = undirected_df.copy(deep=True) directed_df = directed_df.rename(mapper=lambda name: name.lower(), axis='columns') undirected_df = undirected_df.rename(mapper=lambda name: name.lower(), axis='columns') G = nx.from_pandas_edgelist(directed_df, edge_attr=['weight', 'change'], create_using=nx.DiGraph) G_undirected = nx.from_pandas_edgelist(undirected_df, edge_attr=['weight', 'change']) alpha_coef = 0.9 alpha = alpha_coef / max(nx.adjacency_spectrum(G).real) alpha_undirected = alpha_coef / max( nx.adjacency_spectrum(G_undirected).real) centralities = { 'out_degree': weighted_degree_centrality(G), 'in_degree': weighted_degree_centrality(G.reverse()), 'undirected_degree': weighted_degree_centrality(G_undirected), 'out_eigenvector': centrality.eigenvector_centrality(G, weight='weight'), 'in_eigenvector': centrality.eigenvector_centrality(G.reverse(), weight='weight'), 'undirected_eigenvector': centrality.eigenvector_centrality(G_undirected, weight='weight'), 'out_closeness': centrality.closeness_centrality(G, distance='weight'), 'in_closeness': centrality.closeness_centrality(G.reverse(), distance='weight'), 'undirected_closeness': centrality.closeness_centrality(G_undirected, distance='weight'), 'out_betweenness': centrality.betweenness_centrality(G, weight='weight'), 'in_betweenness': centrality.betweenness_centrality(G.reverse(), weight='weight'), 'undirected_betweenness': centrality.betweenness_centrality(G_undirected, weight='weight'), 'out_katz': centrality.katz_centrality(G, alpha=alpha, weight='weight'), 'in_katz': centrality.katz_centrality(G.reverse(), alpha=alpha, weight='weight'), 'undirected_katz': centrality.katz_centrality(G_undirected, alpha=alpha, weight='weight') } for centrality_type in centralities.keys(): directed_df[centrality_type] = np.NaN augmented_auxiliary_df = auxiliary_df.copy(deep=True) for key, row in augmented_auxiliary_df.iterrows(): node = row['docid'] for centrality_type, values in centralities.items(): if node in values: augmented_auxiliary_df.at[key, centrality_type] = values[node] print(augmented_auxiliary_df) return augmented_auxiliary_df
def df_from_betweeness(inG): print(f'calculate betweenness, network of size {inG.order()}') temp_betweenness = centrality.betweenness_centrality(inG) return pd.DataFrame( temp_betweenness.items(), columns=['tag', 'betweenness']).sort_values('betweenness', ascending=False)
def betweennesscentrality(self, brain,outfilebase = "brain", append=True): """ Calculates node and hub betweenness centralities. For hub centralities there are two files, one with the values in and another with the hub identities in corresponding rows. """ ## betweenness centrality # node centrality outfile = outfilebase+'_betweenness_centralities_nodes' boolVal = self.fileCheck(outfile, append) if append and boolVal: f= open(outfile,"ab") writeObj = DictWriter(f,fieldnames = brain.G.nodes()) else: f = open(outfile,"wb") writeObj = DictWriter(f,fieldnames = brain.G.nodes()) headers = dict((n,n) for n in brain.G.nodes()) writeObj.writerow(headers) centralities = centrality.betweenness_centrality(brain.G) # calculate centralities for largest connected component nodecentralitiestowrite = dict((n,None) for n in brain.G.nodes()) # create a blank dictionary of all nodes in the graph for node in centralities: nodecentralitiestowrite[node] = centralities[node] # populate the blank dictionary with centrality values writeObj.writerow(nodecentralitiestowrite) # write out centrality values f.close() ## ================================================================== ## hub centrality outfile = outfilebase+'_betweenness_centralities_hubs' hubidfile = outfilebase+'_betweenness_centralities_hubs_ids' OFbool = self.fileCheck(outfile, append) self.fileCheck(hubidfile, append) if append and OFbool: f = open(outfile,"ab") g = open(hubidfile,"ab") else: f= open(outfile,"wb") g = open(hubidfile,"wb") centhubs = [hub for hub in brain.hubs if hub in brain.G] # hubs within largest connected graph component # write hub identifies to file writeObj = DictWriter(f,fieldnames = brain.hubs) hubwriter = DictWriter(g,fieldnames = brain.hubs) headers = dict((n,n) for n in brain.hubs) # dictionary of all hubs in network to write hubwriter.writerow(headers) hubcentralitieistowrite = dict((n,None) for n in brain.hubs) # empty dictionary to populate with centralities data for hub in centhubs: hubcentralitieistowrite[hub] = nodecentralitiestowrite[hub] writeObj.writerow(hubcentralitieistowrite) f.close() g.close()
weights = [r.split(',')[-1] for r in rows[1:]] edge_tuples=[(e[0], e[1], int(weights[i])) for i,e in enumerate(edges)] # Only get edges for the select nodes in the node csv. edges = [] for e in edge_tuples: if all(x in list(node_ids) for x in e[:2]): edges.append(e) # Initialize graph, add nodes and edges, calculate modularity and centrality. G = nx.Graph() G.add_nodes_from(list(node_ids)) G.add_weighted_edges_from(edges) groups = community.best_partition(G) degree = cn.degree_centrality(G) betweenness = cn.betweenness_centrality(G, weight='weight') eigenvector = cn.eigenvector_centrality(G, weight='weight') # Add node attributes for name, modularity, and three types of centrality. nx.set_node_attributes(G, 'name', node_dict) nx.set_node_attributes(G, 'group', groups) nx.set_node_attributes(G, 'degree', degree) nx.set_node_attributes(G, 'betweenness', betweenness) nx.set_node_attributes(G, 'eigenvector', eigenvector) # Create json representation of the graph (for d3). data = json_graph.node_link_data(G) # You could create the needed json without NetworkX (but you would forfeit network metrics). #new_data = dict(nodes=[dict(id=n) for n in list(set(nodes))], links=[dict(source=node_dict[e[0]], target=node_dict[e[1]], weight=e[2]) for e in edges])