def get_layer_info(subject, journal_volume, edge_list): G = nx.Graph() G.add_weighted_edges_from(edge_list) PATH = "C:/Users/hexie/Documents/APS_result/" + str( journal_volume) + "/" + str(subject) try: os.mkdir(PATH) os.chdir(PATH) except: os.chdir(PATH) degree_centrality = nxc.degree_centrality(G) try: eigen_vector_centrality = nxc.eigenvector_centrality(G) np.save("eigen_vector_centrality.npy", eigen_vector_centrality) except: print("fail to converge within 100 iterations of power") closeness_centrality = nxc.closeness_centrality(G) betweeness_centrality = nxc.betweenness_centrality(G) np.save("degree_centrality.npy", degree_centrality) np.save("closeness_centrality.npy", closeness_centrality) np.save("betweeness_centrality.npy", betweeness_centrality) with open(str(subject) + str(journal_volume) + ".txt", 'w') as f: f.write('Number of Edges: ' + str(nx.number_of_edges(G)) + "\n") f.write('Number of Nodes: ' + str(nx.number_of_nodes(G)) + "\n") nx.draw(G) plt.savefig(str(subject) + str(journal_volume) + ".png") plt.clf()
def get_centrality(x, edge_index, batch): num_graphs = batch[-1] + 1 N = x.shape[0] num_nodes = scatter_add(batch.new_ones(x.size(0)), batch, dim=0) cum_num_nodes = torch.cat( [num_nodes.new_zeros(1), num_nodes.cumsum(dim=0)[:-1]], dim=0) cum_num_nodes = torch.cat((cum_num_nodes, torch.tensor([N]).cuda())) row, col = edge_index c_centrality = [] d_centrality = [] for i in range(num_graphs): '''each graph''' s_id = cum_num_nodes[i] e_id = cum_num_nodes[i + 1] mask = torch.eq(row, s_id) for node in range(s_id + 1, e_id): mask = mask + torch.eq(row, node) g_row = torch.masked_select(row, mask) - s_id g_col = torch.masked_select(col, mask) - s_id G = to_networkx(torch.stack([g_row, g_col], dim=0)) c_centrality = c_centrality + list(closeness_centrality(G).values()) d_centrality = d_centrality + list(degree_centrality(G).values()) c_centrality = torch.Tensor(c_centrality).cuda() d_centrality = torch.Tensor(d_centrality).cuda() return c_centrality, d_centrality
def get_topological_features(G, nodes=None): N_ = len(G.nodes) if nodes is None: nodes = G.nodes # Degree centrality d_c = get_features(degree_centrality(G).values()) print 'a' # Betweeness centrality b_c = get_features(betweenness_centrality(G).values()) print 'b' # Close ness centrality c_c = get_features(closeness_centrality(G).values()) print 'c' # Clustering c = get_features(clustering(G).values()) print 'd' d = diameter(G) r = radius(G) s_p_average = [] for s in shortest_path_length(G): dic = s[1] lengths = dic.values() s_p_average += [sum(lengths) / float(N_)] s_p_average = get_features(s_p_average) features = np.concatenate((d_c, b_c, c_c, c, s_p_average, [d], [r]), axis=0) return features
def get_centrality_labels(knn_graph_obj, perc_labeled, type='degree'): import random if type == 'degree': degree_centrality_knn = pd.DataFrame.from_dict(centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = degree_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(degree_centrality_knn.index))].tolist() elif type == 'closeness': closeness_centrality_knn = pd.DataFrame.from_dict(centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = closeness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(closeness_centrality_knn.index))].tolist() elif type == 'betweenness': betweenness_centrality_knn = pd.DataFrame.from_dict(centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = betweenness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(betweenness_centrality_knn.index))].tolist() elif type == 'katz': katz_centrality_knn = pd.DataFrame.from_dict(centrality.katz_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = katz_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(katz_centrality_knn.index))].tolist() elif type == 'clustering': clustering_knn = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = clustering_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(clustering_knn.index))].tolist() else: indexes = list(knn_graph_obj.nodes) #print(indexes) node_toget_labels = random.sample(indexes, int(perc_labeled*len(indexes))) #print(node_toget_labels) return node_toget_labels
def get_centrality_labels(knn_graph_obj, type='degree'): import random if type == 'degree': node_toget_labels = pd.DataFrame.from_dict( centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value']) elif type == 'closeness': node_toget_labels = pd.DataFrame.from_dict( centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value']) elif type == 'betweenness': node_toget_labels = pd.DataFrame.from_dict( centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value']) elif type == 'clustering': node_toget_labels = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value']) else: node_toget_labels = list(knn_graph_obj.nodes) #print(node_toget_labels) return node_toget_labels
def extract_closeness_centrality(self): output = open( 'output/' + self.set_ + '/' + self.set_ + '_closeness_centrality.csv', 'w') print('Calculating closeness centrality') nodes = centrality.closeness_centrality(self.G) for key in nodes: output.write(str(key) + ',' + str(nodes[key]) + '\n')
def get_closeness_centrality(dataset): centrality = [] for data in dataset: '''each graph''' g_row, g_col = data.edge_index G = to_networkx(torch.stack([g_row, g_col], dim=0)) c_centrality = list(closeness_centrality(G).values()) centrality = centrality + c_centrality return centrality
def get_centrality(def_centrality, toll_centrality, model, nodes_int): left_b_centr = centrality.betweenness_centrality(model.get_nx_graph()) left_c_centr = centrality.closeness_centrality(model.get_nx_graph()) for el in left_b_centr: node = model.get_node_by_id(el) if node in nodes_int: toll_centrality += left_c_centr[el] + left_b_centr[el] else: def_centrality += left_c_centr[el] + left_b_centr[el] return def_centrality, toll_centrality
def compute_metrics(graph): G = json_graph.node_link_graph(graph, multigraph=False) degree_centrality = centrality.degree_centrality(G) closeness_centrality = centrality.closeness_centrality(G) betweenness_centrality = centrality.betweenness_centrality(G) page_rank = link_analysis.pagerank_alg.pagerank(G) max_clique = approximation.clique.max_clique(G) diameters = [distance_measures.diameter(g) for g in connected_component_subgraphs(G)] copy = dict() copy['id'] = graph['id'] copy['name'] = graph['name'] copy['graph'] = dict() copy['graph']['nodes'] = graph['nodes'] copy['graph']['links'] = graph['links'] copy['metrics'] = dict() # diameters copy['metrics']['diameter'] = dict() copy['metrics']['diameter']['all'] = diameters copy['metrics']['diameter']['max'] = max(diameters) copy['metrics']['diameter']['average'] = float(sum(diameters)) / float(len(diameters)) # clique size copy['metrics']['maxClique'] = len(list(max_clique)) # degree centrality copy['metrics']['degreeCentrality'] = dict() copy['metrics']['degreeCentrality']['byId'] = degree_centrality copy['metrics']['degreeCentrality']['max'] = sum(degree_centrality.values()) copy['metrics']['degreeCentrality']['average'] = float(sum(degree_centrality.values())) / float(len(degree_centrality.values())) # closeness centrality copy['metrics']['closenessCentrality'] = dict() copy['metrics']['closenessCentrality']['byId'] = closeness_centrality copy['metrics']['closenessCentrality']['max'] = sum(closeness_centrality.values()) copy['metrics']['closenessCentrality']['average'] = float(sum(closeness_centrality.values())) / float(len(closeness_centrality.values())) # degree centrality copy['metrics']['betweennessCentrality'] = dict() copy['metrics']['betweennessCentrality']['byId'] = betweenness_centrality copy['metrics']['betweennessCentrality']['max'] = sum(betweenness_centrality.values()) copy['metrics']['betweennessCentrality']['average'] = float(sum(betweenness_centrality.values())) / float(len(betweenness_centrality.values())) # degree centrality copy['metrics']['pageRank'] = dict() copy['metrics']['pageRank']['byId'] = page_rank copy['metrics']['pageRank']['max'] = sum(page_rank.values()) copy['metrics']['pageRank']['average'] = float(sum(page_rank.values())) / float(len(page_rank.values())) return copy
def getClosenessCentrality(G, normalized): closeness_centrality = centrality.closeness_centrality(G) if not normalized: return closeness_centrality max_closeness = closeness_centrality[max(closeness_centrality, key=closeness_centrality.get)] normalize(closeness_centrality, max_closeness) print(closeness_centrality) color(G, closeness_centrality, 1, "olivedrab") return closeness_centrality
def get_closeness_centrality(G, **kwargs): """Returns a dictionary of closeness centrality values for all nodes. """ # Get the graph without glycine residues H = get_graph_without_glycine(G, kwargs["identifiers"], kwargs["residue_names"]) # Calculate the closeness centrality values centrality_dict = \ nxc.closeness_centrality(G = G, distance = kwargs["weight"]) # Return the finalized the dictionary of centrality values return finalize_dict(G, centrality_dict)
def centralization_metrics(G, prefix=""): # NB: G can be either directed or undirected network # Metrics: # (betweennes / closeness / eigenvector / pagerank) # betweenness # => expensive # sample: k=min(10, len(G)) betweenness = betweenness_centrality(G, normalized=True) betweenness_arr = np.fromiter(betweenness.values(), dtype=np.float) betweenness_mean = np.mean(np.max(betweenness_arr) - betweenness_arr) # closeness # => expensive # NB: normilizes by the CC size closeness = closeness_centrality(G, wf_improved=False) closeness_arr = np.fromiter(closeness.values(), dtype=np.float) closeness_mean = np.mean(np.max(closeness_arr) - closeness_arr) # eigenvector eigenvec_mean = None if len(G) > 2: try: eigenvec = eigenvector_centrality_numpy(G) eigenvec_arr = np.fromiter(eigenvec.values(), dtype=np.float) eigenvec_mean = np.mean(np.max(eigenvec_arr) - eigenvec_arr) except: eigenvec_mean = None # pagerank try: pagerank = pagerank_numpy(G) pagerank_arr = np.fromiter(pagerank.values(), dtype=np.float) pagerank_mean = np.mean(np.max(pagerank_arr) - pagerank_arr) except: pagerank_mean = None centralization = { f"cent{prefix}_betweenness_mean": betweenness_mean, f"cent{prefix}_closeness_mean": closeness_mean, f"cent{prefix}_eigenvec_mean": eigenvec_mean, f"cent{prefix}_pagerank_mean": pagerank_mean } return centralization
def calc_node_based_centrality(edge_index, centrality='degree'): adj_list = edge_index.numpy().T G = nx.Graph() G.add_edges_from(adj_list) if centrality == 'degree': nodes_centrality = degree_centrality(G) elif centrality == 'eigenvector': nodes_centrality = eigenvector_centrality(G) elif centrality == "closeness": nodes_centrality = closeness_centrality(G) else: print(centrality, "is not defined") exit(1) edges_centrality = dict() for u, v in adj_list: edges_centrality[(u, v)] = nodes_centrality[u] * nodes_centrality[v] return edges_centrality
def top_nodes(G, k=3): """ Returns the top k nodes for various centrality measures: degree, betweennes and closeness. Args: G (nx.Graph): graph for which the top nodes must be determined. k (int): number of top nodes to return. if set to -ve, all the nodes will be returned. Returns: res_dict (dict): dictionary of each centrality measure with list of top k nodes in that measure as values to the dictionary. """ # number of nodes in the graph each node is connected to node_deg_dict = centrality.degree_centrality(G) # number of all pair shortest paths that pass through each node node_btw_dict = centrality.betweenness_centrality(G) # number of neighbours connected to each other for each node node_clo_dict = centrality.closeness_centrality(G) # sort by nodes by each centrality measure in decreasing order top_k_deg_nodes = sorted(node_deg_dict.items(), key=lambda x: -x[1]) top_k_btw_nodes = sorted(node_btw_dict.items(), key=lambda x: -x[1]) top_k_clo_nodes = sorted(node_clo_dict.items(), key=lambda x: -x[1]) # pick the top k nodes res_dict = dict() if k > 0: res_dict["degree"] = list(zip(*top_k_deg_nodes[:k]))[0] res_dict["betweenness"] = list(zip(*top_k_btw_nodes[:k]))[0] res_dict["closeness"] = list(zip(*top_k_clo_nodes[:k]))[0] else: res_dict["degree"] = list(zip(*top_k_deg_nodes))[0] res_dict["betweenness"] = list(zip(*top_k_btw_nodes))[0] res_dict["closeness"] = list(zip(*top_k_clo_nodes))[0] return res_dict
def parse(name): print(name) pathbase = path.abspath(path.dirname(__file__)) G = nx.Graph() data = json.load(open('{0}/{1}.json'.format(pathbase, name))) nodes = data['nodes'] text = {i: node['text'] for i, node in enumerate(nodes)} weight = {i: float(node['weight']) for i, node in enumerate(nodes)} for i in range(len(nodes)): G.add_node(i) for link in data['links']: G.add_edge(link['source'], link['target']) degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweenness = centrality.betweenness_centrality(G) #edge_betweenness = centrality.edge_betweenness_centrality(G) #current_flow_closeness = centrality.current_flow_closeness_centrality(G) #current_flow_betweenness =\ # centrality.current_flow_betweenness_centrality(G) try: eigenvector = centrality.eigenvector_centrality(G, max_iter=1000) except: eigenvector = {i: 0 for i in range(len(nodes))} katz = centrality.katz_centrality(G) obj = {'nodes': [], 'links': data['links']} for i in range(len(nodes)): obj['nodes'].append({ 'text': text[i], 'weight': weight[i], 'degree': degree[i], 'closeness': closeness[i], 'betweenness': betweenness[i], #'edge_betweenness': edge_betweenness[i], #'current_flow_closeness': current_flow_closeness[i], #'current_flow_betweenness': current_flow_betweenness[i], 'eigenvector': eigenvector[i], 'katz': katz[i], }) json.dump(obj, open('{0}/../data/{1}.json'.format(pathbase, name), 'w'), sort_keys=True)
def calculate_metrics(network): ''' Calcula las métricas más importantes sobre la red y las devuelve en forma de diccionario Parameters ---------- network : nx.Graph Red de la que se quiere calcular las metricas Returns ------- metrics : dict Diccionario que almacena las métricas para la red pasada por parámetro ''' # Incialización del diccionario que almacena las métricas metrics = {} # Obtenemos los nombres y afiliaciones de la red names = nx.get_node_attributes(network, 'name') affiliation = nx.get_node_attributes(network, 'affiliation') # Función para obtener el nombre en una tupla getprops = lambda author_tuple: (names[author_tuple[0]], affiliation[ author_tuple[0]], author_tuple[1]) # Número de nodos y aristas n = len(network.nodes.data()) m = len(network.edges.data()) metrics['n'] = n metrics['m'] = m # Tamaño total de la red (suma de pesos) metrics['size'] = network.size(weight='weight') # Grado promedio, densidad y grado máximo metrics['av_degree'] = round((2 * m) / n, 5) metrics['density'] = (2 * m) / (n * (n - 1)) metrics['max_degree'] = getprops( max(dict(network.degree()).items(), key=lambda degree: degree[1]))[0] # Distribución de probabilidad del grado degree_distribution = [ (i, len([author for (author, degree) in network.degree() if degree == i]) / len(network.nodes.data())) for i in range( max(dict(network.degree()).items(), key=lambda degree: degree[1]) [1]) ] metrics['max_degree_p'] = max(degree_distribution, key=lambda degree_p: degree_p[1])[0] # Coeficiente de clustering promedio metrics['clustering_coefficient'] = average_clustering(network) # Nodo con mayor centralidad promedio metrics['max_closeness_centrality'] = getprops( max(centrality.closeness_centrality(network).items(), key=lambda pair: pair[1]))[0] return metrics
def std_closeness_centrality(self, graph): close_centr = closeness_centrality(graph) return np.std(list(close_centr.values()))
def avg_closeness_centrality(self, graph): close_centr = closeness_centrality(graph) return np.average(list(close_centr.values()))
return G def draw_graph(G): nx.draw(G, node_size=30) plt.show() if __name__ == "__main__": print("Start parsing:") data = parse_group() G = create_graph(data) draw_graph(G) degree = pd.Series(nxa.degree_centrality(G)).idxmax() closeness = pd.Series(nxa.closeness_centrality(G)).idxmax() eigenvector = pd.Series(nxa.eigenvector_centrality(G)).idxmax() betweennes = pd.Series(nxa.betweenness_centrality(G)).idxmax() degree_user = api.users.get(user_ids=degree)[0] closeness_user = api.users.get(user_ids=closeness)[0] eigenvector_user = api.users.get(user_ids=eigenvector)[0] betweeness_user = api.users.get(user_ids=betweennes)[0] print("Most important user:"******"Degree centrality: id{degree} - {degree_user['first_name'] + ' ' + degree_user['last_name']}" ) print( f"Closeness centrality: id{closeness} - {closeness_user['first_name'] + ' ' + closeness_user['last_name']}" )
g_dict[i] = i c_list = [] for j in range(len(d)): for n in range(len(c)): if d[j] in c[n]: c_list.append(g_dict[n]) #中心性解析 #次数中心性 cent_values = degree_centrality(G).values() cent_central = degree_centrality(G) cent_keys = degree_centrality(G).keys() #近接中心性 d_values = closeness_centrality(G).values() d_central = closeness_centrality(G) d_keys = closeness_centrality(G).keys() #媒介中心性 bet_values = betweenness_centrality(G).values() bet_central = betweenness_centrality(G) bet_keys = betweenness_centrality(G).keys() #sorted(d_dict.values()) d_values_list = list(d_values) #print(d_values) #print(d_keys) #print(d_values_list) a = 0 b = 0
G = read_dot(os.path.join("output", fname)) nx.draw(G) except: print("cannot load graph") continue if G.number_of_nodes() == 0: print("Cannot read binary file") continue data = [] data.append(fname) data.append(G.number_of_nodes()) data.append(G.number_of_edges()) data.append(density(G)) deg_centrality = degree_centrality(G) data.extend(properties_of_array(deg_centrality)) cln_centrality = closeness_centrality(G) data.extend(properties_of_array(cln_centrality)) btn_centrality = betweenness_centrality(G) data.extend(properties_of_array(btn_centrality)) st_path = shortest_path(G) deg = [len(val) for key, val in st_path.items()] d = np.array(deg) data.extend( [np.min(d), np.max(d), np.median(d), np.mean(d), np.std(d)]) try: data.append(diameter(G.to_undirected())) except:
def summarise_communities(relG): """ Creates summaries for all clusters in a given graph with the following attributes: 1. bridges - node with the highest betweeness 2. members 3. Closeness - how close the group (higher, closer) 4. relation counts in the cluster Args: relG (nx.Graph): graph for which the summary must be created. Note: the edges in relG must have 'relation' edge attribute referring to the relation type. Returns: summaries (dict): dictionary of clusters with the string summaries as values. """ # get communities communities = list(enumerate(detect_communities(relG))) node_comm_dict = dict() for i, c in communities: for u in c: node_comm_dict[u] = i # get bridges: nodes in clusters in highest betweenness centrality node_btw_dict = centrality.betweenness_centrality(relG) comm_bridge_dict = {i: (None, -1) for i in range(len(communities))} for n, b in node_btw_dict.items(): c = node_comm_dict[n] if b > comm_bridge_dict[c][1]: comm_bridge_dict[c] = (n, b) # get powers of clusters comm_graph_dict = {i: nx.Graph() for i, c in communities} for (u, v, r) in relG.edges.data('relation'): uc, vc = node_comm_dict[u], node_comm_dict[v] if uc == vc: comm_graph_dict[uc].add_edge(u, v, relation=r) # get average closeness centrality comm_avg_clo_dict = dict() for i, G in comm_graph_dict.items(): node_clo_dict = centrality.closeness_centrality(G) s, n = sum(list(node_clo_dict.values())), len(node_clo_dict) comm_avg_clo_dict[i] = s / n # get relation counts for each cluster rel_count_comm_dict = dict() for i, c in comm_graph_dict.items(): u, v, r = list(zip(*c.edges.data('relation'))) rels, counts = np.unique(r, return_counts=True) rel_count_comm_dict[i] = list(zip(rels, counts)) # create string summary for each cluster summaries = dict() for i, c in communities: summaries[i] = f"Bridge: {comm_bridge_dict[i][0]}\n" members = ", ".join([str(x) for x in c]) summaries[i] += f"Members: {members}\n" closeness = np.around(comm_avg_clo_dict[i], decimals=4) summaries[i] += f"Closeness: {closeness}\n" rel_counts = [ '\'' + r + '\'- ' + str(c) for r, c in rel_count_comm_dict[i] ] summaries[i] += "Relations:\n" + "\n".join(rel_counts) return summaries
def closenesscentrality(self, brain,outfilebase = "brain", append=True): """ Calculates node and hub closeness centralities. For hub centralities there are two files, one with the values in and another with the hub identities in corresponding rows. """ ## closeness centrality # node centrality outfile = outfilebase+'_closeness_centralities_nodes' boolVal = self.fileCheck(outfile, append) # open file and write headers if necessary if append and boolVal: f= open(outfile,"ab") headers = None else: f = open(outfile,"wb") headers = dict((n,n) for n in brain.G.nodes()) writeObj = DictWriter(f,fieldnames = brain.G.nodes()) if headers: writeObj.writerow(headers) # make calculations centralities = centrality.closeness_centrality(brain.G) # calculate centralities for largest connected component nodecentralitiestowrite = dict((n,None) for n in brain.G.nodes()) # create a blank dictionary of all nodes in the graph for node in centralities: nodecentralitiestowrite[node] = centralities[node] # populate the blank dictionary with centrality values writeObj.writerow(nodecentralitiestowrite) # write out centrality values f.close() # hub centrality outfile = outfilebase+'_closeness_centralities_hubs' hubidfile = outfilebase+'_closeness_centralities_hubs_ids' OFbool = self.fileCheck(outfile, append) self.fileCheck(hubidfile, append) if append and OFbool: f = open(outfile,"ab") g = open(hubidfile,"ab") else: f= open(outfile,"wb") g = open(hubidfile,"wb") centhubs = [hub for hub in brain.hubs if hub in brain.G] # hubs within largest connected graph component # write hub identifies to file writeObj = DictWriter(f,fieldnames = brain.hubs) hubwriter = DictWriter(g,fieldnames = brain.hubs) headers = dict((n,n) for n in brain.hubs) # dictionary of all hubs in network to write hubwriter.writerow(headers) hubcentralitieistowrite = dict((n,None) for n in brain.hubs) # empty dictionary to populate with centralities data for hub in centhubs: hubcentralitieistowrite[hub] = nodecentralitiestowrite[hub] writeObj.writerow(hubcentralitieistowrite) f.close() g.close()
class Create_network(): centrality_metrics = { "Degree Centrality": lambda n, weight='_': centrality_algorithms.degree_centrality(n), "Closeness Centrality": lambda n, weight='_': centrality_algorithms.closeness_centrality(n), "Betweenness Centrality": centrality_algorithms.betweenness_centrality } community_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00','#ffff33','#a65628', \ '#b3cde3','#ccebc5','#decbe4','#fed9a6','#ffffcc','#e5d8bd','#fddaec',\ '#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02','#a6761d','#666666'] def __init__(self, network_file, layout_file, count_path, title, width=800, thresh_val=8): self.network_file = network_file self.layout_file = layout_file self.count_path = count_path self.network_tuple = self.load_network(network_file, layout_file) self.nodes_sources_tab1 = self.column_source(self.network_tuple[1], count_path) self.network_plots_n_circle_tab1 = self.create_network_plot( self.nodes_sources_tab1, title, width) self.network_lines_tab1 = self.add_lines( self.network_tuple, self.network_plots_n_circle_tab1[0]) self.get_centrality_n_community(self.network_tuple[0], self.nodes_sources_tab1, self.network_plots_n_circle_tab1[1]) self.drop_button_tab1 = Button(label="Remove Node", button_type="warning") self.drop_button_tab1.on_click(self.remove_node_tab1) self.remove_unattached_button = Button(label="Remove unattached nodes", button_type="success") self.remove_unattached_button.on_click(self.remove_unbound_nodes) self.update_props_button = Button(label="Update Properties", button_type="warning") self.update_props_button.on_click(self.update_properties) self.update_layout_button = Button(label="Update Layout", button_type="success") self.update_layout_button.on_click(self.update_layout) self.select_centrality = Select(title="Centrality Metric:", value="Degree Centrality", options=list( self.centrality_metrics.keys())) self.select_centrality.on_change('value', self.update_centrality) self.slider = Slider(start=0, end=10, value=0, step=1, title="Threshold %") self.slider.on_change('value', self.filter_threshold) self.slider.value = thresh_val #self.filter_threshold('',0,3) def reinit(self, network_file, layout_file, count_path, title): lines_source = self.network_lines_tab1 nodes_source = self.nodes_sources_tab1 self.network_file = network_file self.layout_file = layout_file self.count_path = count_path self.network_plots_n_circle_tab1[0].title.text = title self.network_tuple = self.load_network(network_file, layout_file) network, layout = self.network_tuple print('loaded new network') nodes, nodes_coordinates = zip(*sorted(layout.items())) count_dict = dict(pickle.load(open(self.count_path, 'rb'))) nodes_xs, nodes_ys = list(zip(*nodes_coordinates)) node_occurances = [count_dict[node] for node in nodes] nodes_source.data['x'] = nodes_xs nodes_source.data['y'] = nodes_ys nodes_source.data['name'] = nodes nodes_source.data['counts'] = node_occurances lines_source.data = self.get_edges_specs(network, layout) self.update_properties() self.slider.value = 8 self.filter_threshold('', 0, 8) def load_network(self, network_file, layout_file): network = pickle.load(open(network_file, 'rb')) layout = pickle.load(open(layout_file, 'rb')) return (network, layout) def column_source(self, layout, count_path): nodes, nodes_coordinates = zip(*sorted(layout.items())) count_dict = dict(pickle.load(open(count_path, 'rb'))) nodes_xs, nodes_ys = list(zip(*nodes_coordinates)) node_occurances = [count_dict[node] for node in nodes] nodes_source = ColumnDataSource( dict(x=nodes_xs, y=nodes_ys, name=nodes, counts=node_occurances)) return nodes_source def create_network_plot(self, nodes_source, title='', width=800): plot = figure(plot_width=width, plot_height=700, tools=['tap', 'box_zoom', 'reset', 'pan', 'wheel_zoom'], title=title) plot.title.text_font = "helvica" plot.title.text_font_style = "bold" plot.title.text_font_size = "20px" plot.background_fill_color = "beige" plot.background_fill_alpha = 0.2 g1 = Circle(x='x', y='y', size=2, fill_color='blue') g1_r = plot.add_glyph(source_or_glyph=nodes_source, glyph=g1) g1_hover = HoverTool(renderers=[g1_r], tooltips=[('name', '@name'), ('count', '@counts')]) glyph_text = Text(x="x", y="y", text="name", text_color="#ff4a4a", text_font_size='6pt', text_alpha=0.7) plot.add_glyph(nodes_source, glyph_text) plot.add_tools(g1_hover) plot.grid.grid_line_color = None plot.axis.visible = False return plot, g1_r, glyph_text def get_edges_specs(self, _network, _layout): d = dict(xs=[], ys=[], alphas=[]) weights = [d['weight'] for u, v, d in _network.edges(data=True)] max_weight = max(weights) calc_alpha = lambda h: 0.1 + 0.6 * (h / max_weight) for u, v, data in _network.edges(data=True): d['xs'].append([_layout[u][0], _layout[v][0]]) d['ys'].append([_layout[u][1], _layout[v][1]]) d['alphas'].append(calc_alpha(data['weight'])) return d def add_lines(self, network_tuple, plot): lines_source = ColumnDataSource(self.get_edges_specs(*network_tuple)) r_lines = plot.multi_line('xs', 'ys', line_width=2, alpha='alphas', color='navy', source=lines_source) return lines_source def get_centrality_n_community(self, network, nodes_source, g1_r): community_colors = self.community_colors centrality = networkx.algorithms.centrality.degree_centrality(network) # first element, are nodes again _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.add( [7 + 10 * t / max(nodes_centrality) for t in nodes_centrality], 'centrality') partition = community.best_partition(network) p_, nodes_community = zip(*sorted(partition.items())) nodes_source.add(nodes_community, 'community') nodes_source.add([community_colors[t % len(community_colors)]\ for t in nodes_community], 'community_color') g1_r.glyph.size = 'centrality' g1_r.glyph.fill_color = 'community_color' def remove_node_1_net(self, nodes_source, lines_source, network, layout): print('line 92') print(type(nodes_source.selected['1d']['indices'])) print(len(nodes_source.selected['1d']['indices'])) if (nodes_source.selected['1d']['indices']): idx = nodes_source.selected['1d']['indices'][0] else: return # update networkX network object node = nodes_source.data['name'][idx] network.remove_node(node) print('line 97') # update layout layout.pop(node) # update nodes ColumnDataSource new_source_data = dict() for col in nodes_source.column_names: print('line 104') new_source_data[col] = [ e for i, e in enumerate(nodes_source.data[col]) if i != idx ] nodes_source.data = new_source_data # update lines ColumnDataSource lines_source.data = self.get_edges_specs(network, layout) def remove_node_tab1(self): self.remove_node_1_net(self.nodes_sources_tab1, self.network_lines_tab1, *self.network_tuple) def remove_unbound_nodes(self): network, layout = self.network_tuple lines_source = self.network_lines_tab1 nodes_source = self.nodes_sources_tab1 unbound_nodes = [] for node in network.nodes(): if not network.edges(node): unbound_nodes.append(node) for node in unbound_nodes: network.remove_node(node) layout.pop(node) nodes, nodes_coordinates = zip(*sorted(layout.items())) count_dict = dict(pickle.load(open(self.count_path, 'rb'))) nodes_xs, nodes_ys = list(zip(*nodes_coordinates)) node_occurances = [count_dict[node] for node in nodes] nodes_source.data['x'] = nodes_xs nodes_source.data['y'] = nodes_ys nodes_source.data['name'] = nodes nodes_source.data['counts'] = node_occurances self.update_properties() lines_source.data = self.get_edges_specs(network, layout) def update_properties(self): community_colors = self.community_colors network, layout = self.network_tuple nodes_source = self.nodes_sources_tab1 partition = community.best_partition(network) p_, nodes_community = zip(*sorted(partition.items())) nodes_source.data['community'] = nodes_community nodes_source.data['community_color'] = [ community_colors[t % len(community_colors)] for t in nodes_community ] centrality = self.centrality_metrics[self.select_centrality.value]( network, weight='weight') _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.data['centrality'] = [ 7 + 10 * t / max(nodes_centrality) for t in nodes_centrality ] def update_centrality(self, attrname, old, new): network, _ = self.network_tuple nodes_source = self.nodes_sources_tab1 centrality = self.centrality_metrics[self.select_centrality.value]( network, weight='weight') _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.data['centrality'] = [ 7 + 10 * t / max(nodes_centrality) for t in nodes_centrality ] def update_layout(self): network, layout = self.network_tuple lines_source = self.network_lines_tab1 nodes_source = self.nodes_sources_tab1 new_layout = networkx.spring_layout(network, k=1.1 / sqrt(network.number_of_nodes()), iterations=100) layout = new_layout nodes, nodes_coordinates = zip(*sorted(layout.items())) nodes_xs, nodes_ys = list(zip(*nodes_coordinates)) nodes_source.data['x'] = nodes_xs nodes_source.data['y'] = nodes_ys lines_source.data = self.get_edges_specs(network, layout) def filter_threshold(self, attrname, old, new): network, layout = self.network_tuple if (old == new): return if (old > new): self.network_tuple = self.load_network(self.network_file, self.layout_file) network, layout = self.network_tuple weights = [d['weight'] for u, v, d in network.edges(data=True)] max_weight = max(weights) min_weight = min(weights) threshold = (new * (max_weight - min_weight) / 100.0) to_remove_list = [] sources_in = set() for (u, v, d) in network.edges(data='weight'): if (d < threshold): if (((u, v, d) in sources_in) or ((v, u, d) in sources_in)): continue to_remove_list.append((u, v)) sources_in.add((u, v, d)) network.remove_edges_from(to_remove_list) self.remove_unbound_nodes() font_size = 6 + new font_size = min(10, font_size) self.network_plots_n_circle_tab1[2].text_font_size = '{}pt'.format( font_size) self.update_layout() def return_view(self): return column(self.network_plots_n_circle_tab1[0],row(widgetbox(self.slider,self.select_centrality),\ widgetbox(self.drop_button_tab1,self.remove_unattached_button),\ widgetbox(self.update_props_button, self.update_layout_button,)))
""" GraVE Documentation ------------------- """ import networkx as nx from networkx.algorithms.centrality import closeness_centrality import matplotlib.pyplot as plt from grave import plot_network, use_attributes toy_network = nx.barbell_graph(10, 14) toy_centrality = closeness_centrality(toy_network) max_centrality = max(toy_centrality.values()) for u, v, edge_attributes in toy_network.edges.data(): c = (toy_centrality[u] + toy_centrality[v]) / 2 color_idx = (c / max_centrality) cmap = plt.get_cmap() edge_attributes['color'] = cmap(color_idx) edge_attributes['width'] = 2 for node, node_attributes in toy_network.nodes.data(): node_attributes['size'] = (1 - (toy_centrality[node] / max_centrality) + .1) * 100 def edge_style(edge_attributes): return {'linewidth': edge_attributes.get('weight', 1)}
def calculate_all_centralities(data): """ Calculates all four centralities metrics for the input graph Paramaters: data: a json object which represents the graph. This json is manipulated and the necessary metrics are added to it. """ G = json_graph.node_link_graph( data) #loads the data to a NetworkX graph object #Calculates three of the metrics degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweeness = centrality.betweenness_centrality(G) eigenvector_fail = False try: #Eigenvector centrality can fail to converge. eigenvector = centrality.eigenvector_centrality(DiGraph(G), max_iter=100000) except NetworkXError: #Eigenvector values will be None if calculation fails. eigenvector = [] eigenvector_fail = True print "Max iterations exceeded" degree_max = -1.0 closeness_max = -1.0 betweeness_max = -1.0 eigenvector_max = -1.0 for author in data['nodes']: #Adds the unnormalized values in the json i = author['id'] author['degreeCentralityUnnormalized'] = degree[i] author['closenessCentralityUnnormalized'] = closeness[i] author['betweennessCentralityUnnormalized'] = betweeness[i] author['eigenvectorCentralityUnnormalized'] = eigenvector[ i] if not eigenvector_fail else 1.0 #Finds the highest values for each centrality type for i in degree: if degree[i] > degree_max: degree_max = degree[i] for i in closeness: if closeness[i] > closeness_max: closeness_max = closeness[i] for i in betweeness: if betweeness[i] > betweeness_max: betweeness_max = betweeness[i] for i in eigenvector: if eigenvector[i] > eigenvector_max: eigenvector_max = eigenvector[i] #Normalizes the values for i in degree: if degree[i] != 0: degree[i] = degree[i] / degree_max for i in closeness: if closeness[i] != 0: closeness[i] = closeness[i] / closeness_max for i in betweeness: if betweeness[i] != 0: betweeness[i] = betweeness[i] / betweeness_max for i in eigenvector: if eigenvector[i] != 0: eigenvector[i] = eigenvector[i] / eigenvector_max #Adds the normalized values to the json for author in data['nodes']: i = author['id'] author['degreeCentrality'] = degree[i] author['closenessCentrality'] = closeness[i] author['betweennessCentrality'] = betweeness[i] author['eigenvectorCentrality'] = eigenvector[ i] if not eigenvector_fail else 1.0 return data
def analyze(directed_df, undirected_df, auxiliary_df): directed_df = directed_df.copy(deep=True) undirected_df = undirected_df.copy(deep=True) directed_df = directed_df.rename(mapper=lambda name: name.lower(), axis='columns') undirected_df = undirected_df.rename(mapper=lambda name: name.lower(), axis='columns') G = nx.from_pandas_edgelist(directed_df, edge_attr=['weight', 'change'], create_using=nx.DiGraph) G_undirected = nx.from_pandas_edgelist(undirected_df, edge_attr=['weight', 'change']) alpha_coef = 0.9 alpha = alpha_coef / max(nx.adjacency_spectrum(G).real) alpha_undirected = alpha_coef / max( nx.adjacency_spectrum(G_undirected).real) centralities = { 'out_degree': weighted_degree_centrality(G), 'in_degree': weighted_degree_centrality(G.reverse()), 'undirected_degree': weighted_degree_centrality(G_undirected), 'out_eigenvector': centrality.eigenvector_centrality(G, weight='weight'), 'in_eigenvector': centrality.eigenvector_centrality(G.reverse(), weight='weight'), 'undirected_eigenvector': centrality.eigenvector_centrality(G_undirected, weight='weight'), 'out_closeness': centrality.closeness_centrality(G, distance='weight'), 'in_closeness': centrality.closeness_centrality(G.reverse(), distance='weight'), 'undirected_closeness': centrality.closeness_centrality(G_undirected, distance='weight'), 'out_betweenness': centrality.betweenness_centrality(G, weight='weight'), 'in_betweenness': centrality.betweenness_centrality(G.reverse(), weight='weight'), 'undirected_betweenness': centrality.betweenness_centrality(G_undirected, weight='weight'), 'out_katz': centrality.katz_centrality(G, alpha=alpha, weight='weight'), 'in_katz': centrality.katz_centrality(G.reverse(), alpha=alpha, weight='weight'), 'undirected_katz': centrality.katz_centrality(G_undirected, alpha=alpha, weight='weight') } for centrality_type in centralities.keys(): directed_df[centrality_type] = np.NaN augmented_auxiliary_df = auxiliary_df.copy(deep=True) for key, row in augmented_auxiliary_df.iterrows(): node = row['docid'] for centrality_type, values in centralities.items(): if node in values: augmented_auxiliary_df.at[key, centrality_type] = values[node] print(augmented_auxiliary_df) return augmented_auxiliary_df
def run_GT_calcs(G, just_data, Do_kdist, Do_dia, Do_BCdist, Do_CCdist, Do_ECdist, Do_GD, Do_Eff, \ Do_clust, Do_ANC, Do_Ast, Do_WI, multigraph): # getting nodes and edges and defining variables for later use klist = [0] Tlist = [0] BCdist = [0] CCdist = [0] ECdist = [0] if multigraph: Do_BCdist = 0 Do_ECdist = 0 Do_clust = 0 data_dict = {"x": [], "y": []} nnum = int(nx.number_of_nodes(G)) enum = int(nx.number_of_edges(G)) if Do_ANC | Do_dia: connected_graph = nx.is_connected(G) # making a dictionary for the parameters and results just_data.append(nnum) data_dict["x"].append("Number of nodes") data_dict["y"].append(nnum) just_data.append(enum) data_dict["x"].append("Number of edges") data_dict["y"].append(enum) multi_image_settings.progress(35) # calculating parameters as requested # creating degree histogram if (Do_kdist == 1): klist1 = nx.degree(G) ksum = 0 klist = np.zeros(len(klist1)) for j in range(len(klist1)): ksum = ksum + klist1[j] klist[j] = klist1[j] k = ksum / len(klist1) k = round(k, 5) just_data.append(k) data_dict["x"].append("Average degree") data_dict["y"].append(k) multi_image_settings.progress(40) # calculating network diameter if (Do_dia == 1): if connected_graph: dia = int(diameter(G)) else: dia = 'NaN' just_data.append(dia) data_dict["x"].append("Network Diameter") data_dict["y"].append(dia) multi_image_settings.progress(45) # calculating graph density if (Do_GD == 1): GD = nx.density(G) GD = round(GD, 5) just_data.append(GD) data_dict["x"].append("Graph density") data_dict["y"].append(GD) multi_image_settings.progress(50) # calculating global efficiency if (Do_Eff == 1): Eff = global_efficiency(G) Eff = round(Eff, 5) just_data.append(Eff) data_dict["x"].append("Global Efficiency") data_dict["y"].append(Eff) multi_image_settings.progress(55) if (Do_WI == 1): WI = wiener_index(G) WI = round(WI, 1) just_data.append(WI) data_dict["x"].append("Wiener Index") data_dict["y"].append(WI) multi_image_settings.progress(60) # calculating clustering coefficients if (Do_clust == 1): Tlist1 = clustering(G) Tlist = np.zeros(len(Tlist1)) for j in range(len(Tlist1)): Tlist[j] = Tlist1[j] clust = average_clustering(G) clust = round(clust, 5) just_data.append(clust) data_dict["x"].append("Average clustering coefficient") data_dict["y"].append(clust) # calculating average nodal connectivity if (Do_ANC == 1): if connected_graph: ANC = average_node_connectivity(G) ANC = round(ANC, 5) else: ANC = 'NaN' just_data.append(ANC) data_dict["x"].append("Average nodal connectivity") data_dict["y"].append(ANC) multi_image_settings.progress(65) # calculating assortativity coefficient if (Do_Ast == 1): Ast = degree_assortativity_coefficient(G) Ast = round(Ast, 5) just_data.append(Ast) data_dict["x"].append("Assortativity Coefficient") data_dict["y"].append(Ast) multi_image_settings.progress(70) # calculating betweenness centrality histogram if (Do_BCdist == 1): BCdist1 = betweenness_centrality(G) Bsum = 0 BCdist = np.zeros(len(BCdist1)) for j in range(len(BCdist1)): Bsum += BCdist1[j] BCdist[j] = BCdist1[j] Bcent = Bsum / len(BCdist1) Bcent = round(Bcent, 5) just_data.append(Bcent) data_dict["x"].append("Average betweenness centrality") data_dict["y"].append(Bcent) multi_image_settings.progress(75) # calculating closeness centrality if (Do_CCdist == 1): CCdist1 = closeness_centrality(G) Csum = 0 CCdist = np.zeros(len(CCdist1)) for j in range(len(CCdist1)): Csum += CCdist1[j] CCdist[j] = CCdist1[j] Ccent = Csum / len(CCdist1) Ccent = round(Ccent, 5) just_data.append(Ccent) data_dict["x"].append("Average closeness centrality") data_dict["y"].append(Ccent) multi_image_settings.progress(80) # calculating eigenvector centrality if (Do_ECdist == 1): try: ECdist1 = eigenvector_centrality(G, max_iter=100) except: ECdist1 = eigenvector_centrality(G, max_iter=10000) Esum = 0 ECdist = np.zeros(len(ECdist1)) for j in range(len(ECdist1)): Esum += ECdist1[j] ECdist[j] = ECdist1[j] Ecent = Esum / len(ECdist1) Ecent = round(Ccent, 5) just_data.append(Ecent) data_dict["x"].append("Average eigenvector centrality") data_dict["y"].append(Ecent) data = pd.DataFrame(data_dict) return data, just_data, klist, Tlist, BCdist, CCdist, ECdist
def compute_features(self): # Degree centrality degree_centrality = lambda graph: list( centrality.degree_centrality(graph).values()) self.add_feature( "degree centrality", degree_centrality, "The degree centrality distribution", InterpretabilityScore(5), statistics="centrality", ) # Betweenness Centrality betweenness_centrality = lambda graph: list( centrality.betweenness_centrality(graph).values()) self.add_feature( "betweenness centrality", betweenness_centrality, "Betweenness centrality of a node v is the sum of the fraction of \ all-pairs shortest paths that pass through v", InterpretabilityScore(5), statistics="centrality", ) # Closeness centrality closeness_centrality = lambda graph: list( centrality.closeness_centrality(graph).values()) self.add_feature( "closeness centrality", closeness_centrality, "Closeness is the reciprocal of the average shortest path distance", InterpretabilityScore(5), statistics="centrality", ) # Edge betweenness centrality def edge_betweenness_centrality(graph): if graph.edges: return list( centrality.edge_betweenness_centrality(graph).values()) return [np.nan] self.add_feature( "edge betweenness centrality", edge_betweenness_centrality, "Betweenness centrality of an edge e is the sum of the fraction of \ all-pairs shortest paths that pass through e", InterpretabilityScore(4), statistics="centrality", ) # Harmonic centrality harmonic_centrality = lambda graph: list( centrality.harmonic_centrality(graph).values()) self.add_feature( "harmonic centrality", harmonic_centrality, "Harmonic centrality of a node u is the sum of the reciprocal \ of the shortest path distances from all other nodes to u", InterpretabilityScore(4), statistics="centrality", ) # Subgraph centrality subgraph_centrality = lambda graph: list( centrality.subgraph_centrality(graph).values()) self.add_feature( "subgraph centrality", subgraph_centrality, "The subgraph centrality for a node is the sum of weighted closed walks \ of all lengths starting and ending at that node.", InterpretabilityScore(3), statistics="centrality", ) # Second order centrality second_order_centrality = lambda graph: list( centrality.second_order_centrality(utils.ensure_connected(graph)). values()) self.add_feature( "second order centrality", second_order_centrality, "The second order centrality of a given node is the standard deviation \ of the return times to that node of a perpetual random walk on G", InterpretabilityScore(4), statistics="centrality", ) # Eigenvector centrality eigenvector_centrality = lambda graph: list( centrality.eigenvector_centrality_numpy( utils.ensure_connected(graph)).values()) self.add_feature( "eigenvector centrality", eigenvector_centrality, "Eigenvector centrality computes the centrality for a node based \ on the centrality of its neighbors", InterpretabilityScore(4), statistics="centrality", ) # Katz centrality katz_centrality = lambda graph: list( centrality.katz_centrality_numpy(utils.ensure_connected(graph)). values()) self.add_feature( "katz centrality", katz_centrality, "Generalisation of eigenvector centrality - Katz centrality computes the \ centrality for a node based on the centrality of its neighbors", InterpretabilityScore(4), statistics="centrality", ) # Page Rank pagerank = lambda graph: list(nx.pagerank_numpy(graph).values()) self.add_feature( "pagerank", pagerank, "The pagerank computes a ranking of the nodes in the graph based on \ the structure of the incoming links. ", InterpretabilityScore(4), statistics="centrality", )
corpo_pairs_list = open('./data/corpo_pairs_res.txt').readlines() G = nx.Graph() name_index_list = {} index = 0 for _pair in corpo_pairs_list: _, pair_a, _, pair_b, _ = _pair.split(',') if pair_a not in name_index_list: name_index_list[pair_a] = str(index) index += 1 if pair_b not in name_index_list: name_index_list[pair_b] = str(index) index += 1 # print(pair_a + ',' + pair_b) G.add_edge(pair_a, pair_b) # nx.draw(G) # plt.savefig("path.png") from networkx.algorithms.centrality import degree_centrality, closeness_centrality,betweenness_centrality,communicability_betweenness_centrality degree_res = degree_centrality(G) closeness_res = closeness_centrality(G) betweenness_res = communicability_betweenness_centrality(G) centrality_out = open('./data/centrality.txt', 'w') centrality_out.write('pattern,degree,closeness,betweenness') for key, value in degree_res.items(): centrality_out.write('\n' + key + '\t %.2f \t %.2f \t %.2f'%(value,closeness_res[key],betweenness_res[key])) centrality_out.close() print(degree_res) print(closeness_res) print(betweenness_res)
p_, nodes_community = zip(*sorted(partition.items())) nodes_source.data['community'] = nodes_community nodes_source.data['community_color'] = [community_colors[t % len(community_colors)] for t in nodes_community] centrality = centrality_metrics[select_centrality.value](network, weight='weight') _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.data['centrality'] = [7 + 10 * t / max(nodes_centrality) for t in nodes_centrality] update_props_button = Button(label="Update Properties") update_props_button.on_click(update_properties) centrality_metrics = {"Degree Centrality": lambda n, weight=_: centrality_algorithms.degree_centrality(n), "Closeness Centrality": lambda n, weight=_: centrality_algorithms.closeness_centrality(n), "Betweenness Centrality": centrality_algorithms.betweenness_centrality} def update_centrality(attrname, old, new): centrality = centrality_metrics[select_centrality.value](network, weight='weight') _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.data['centrality'] = [7 + 10 * t / max(nodes_centrality) for t in nodes_centrality] select_centrality = Select(title="Centrality Metric:", value="Degree Centrality", options=list(centrality_metrics.keys())) select_centrality.on_change('value', update_centrality)
def closenesscentrality(self, brain, outfilebase="brain", append=True): """ Calculates node and hub closeness centralities. For hub centralities there are two files, one with the values in and another with the hub identities in corresponding rows. """ ## closeness centrality # node centrality outfile = outfilebase + '_closeness_centralities_nodes' boolVal = self.fileCheck(outfile, append) # open file and write headers if necessary if append and boolVal: f = open(outfile, "ab") headers = None else: f = open(outfile, "wb") headers = dict((n, n) for n in brain.G.nodes()) writeObj = DictWriter(f, fieldnames=brain.G.nodes()) if headers: writeObj.writerow(headers) # make calculations centralities = centrality.closeness_centrality( brain.G) # calculate centralities for largest connected component nodecentralitiestowrite = dict( (n, None) for n in brain.G.nodes() ) # create a blank dictionary of all nodes in the graph for node in centralities: nodecentralitiestowrite[node] = centralities[ node] # populate the blank dictionary with centrality values writeObj.writerow( nodecentralitiestowrite) # write out centrality values f.close() # hub centrality outfile = outfilebase + '_closeness_centralities_hubs' hubidfile = outfilebase + '_closeness_centralities_hubs_ids' OFbool = self.fileCheck(outfile, append) self.fileCheck(hubidfile, append) if append and OFbool: f = open(outfile, "ab") g = open(hubidfile, "ab") else: f = open(outfile, "wb") g = open(hubidfile, "wb") centhubs = [hub for hub in brain.hubs if hub in brain.G ] # hubs within largest connected graph component # write hub identifies to file writeObj = DictWriter(f, fieldnames=brain.hubs) hubwriter = DictWriter(g, fieldnames=brain.hubs) headers = dict( (n, n) for n in brain.hubs) # dictionary of all hubs in network to write hubwriter.writerow(headers) hubcentralitieistowrite = dict( (n, None) for n in brain.hubs) # empty dictionary to populate with centralities data for hub in centhubs: hubcentralitieistowrite[hub] = nodecentralitiestowrite[hub] writeObj.writerow(hubcentralitieistowrite) f.close() g.close()
def closeness_centrality(graph): """closeness_centrality""" return list(centrality.closeness_centrality(graph).values())
def calculate_all_centralities(data): """ Calculates all four centralities metrics for the input graph Paramaters: data: a json object which represents the graph. This json is manipulated and the necessary metrics are added to it. """ G = json_graph.node_link_graph(data) #loads the data to a NetworkX graph object #Calculates three of the metrics degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweeness = centrality.betweenness_centrality(G) eigenvector_fail = False try: #Eigenvector centrality can fail to converge. eigenvector = centrality.eigenvector_centrality(DiGraph(G),max_iter=100000) except NetworkXError: #Eigenvector values will be None if calculation fails. eigenvector = [] eigenvector_fail = True print "Max iterations exceeded" degree_max = -1.0 closeness_max = -1.0 betweeness_max = -1.0 eigenvector_max = -1.0 for author in data['nodes']: #Adds the unnormalized values in the json i = author['id'] author['degreeCentralityUnnormalized'] = degree[i] author['closenessCentralityUnnormalized'] = closeness[i] author['betweennessCentralityUnnormalized'] = betweeness[i] author['eigenvectorCentralityUnnormalized'] = eigenvector[i] if not eigenvector_fail else 1.0 #Finds the highest values for each centrality type for i in degree: if degree[i]>degree_max: degree_max = degree[i] for i in closeness: if closeness[i]>closeness_max: closeness_max = closeness[i] for i in betweeness: if betweeness[i]>betweeness_max: betweeness_max = betweeness[i] for i in eigenvector: if eigenvector[i]>eigenvector_max: eigenvector_max = eigenvector[i] #Normalizes the values for i in degree: if degree[i] != 0: degree[i] = degree[i]/degree_max for i in closeness: if closeness[i] != 0: closeness[i] = closeness[i]/closeness_max for i in betweeness: if betweeness[i] != 0: betweeness[i] = betweeness[i]/betweeness_max for i in eigenvector: if eigenvector[i] != 0: eigenvector[i] = eigenvector[i]/eigenvector_max #Adds the normalized values to the json for author in data['nodes']: i = author['id'] author['degreeCentrality'] = degree[i] author['closenessCentrality'] = closeness[i] author['betweennessCentrality'] = betweeness[i] author['eigenvectorCentrality'] = eigenvector[i] if not eigenvector_fail else 1.0 return data
def get_metric_from_graph(self, metric=None, nedges=None, keyword=None, graph=None, month=None): #'''this func will do most of the work. lets you get a named metric for nodes, optionally restricting this by month, by specified nodes, or by entity type ie lobby/staffer/lobbyist/commissioner. first constructs a cache key and then looks in the cache ck = str(metric) + str(month) + str(keyword) if ck in self.cache: return self.cache[ck] g = graph if keyword: nedges = [node for node in g.nodes_iter(data=True) if node[1]['type'] == keyword] #'''if a keyword search is specified, we list the nodes where that keyword is found in one of its attributes''' if metric == u'Degree': upshot = self.degree(g, nedges) if metric == u'Gatekeepership': upshot = self.gatekeeper(g, nedges) if metric == u'Closeness Centrality': u = centrality.closeness_centrality(g, normalized=True) if nedges: filter_list = [n[0] for n in nedges] upshot = {g.node[k]['name']: v for k,v in u.items() if k in filter_list} else: upshot = {g.node[k]['name']: v for k,v in u.items()} if metric == u'Betweenness': u = centrality.betweenness_centrality(g, weight='weight', normalized=True) if nedges: filter_list = [n[0] for n in nedges] upshot = {g.node[k]['name']: v for k,v in u.items() if k in filter_list} else: upshot = {g.node[k]['name']: v for k,v in u.items()} if metric == u'Greedy_Fragile': upshot = self.greedy_fragile(g, nedges) if metric == u'Link Centrality': u = centrality.edge_betweenness_centrality(g, weight='weight', normalized=True) upshot = {} for k, v in u.items(): # doing it in a similar way to the other linkwise metric below. a, b = k c = g.node[a]['name'] d = g.node[b]['name'] if nedges: filter_list = [n[0] for n in nedges] if a in filter_list or b in filter_list: upshot[unicode(c + ' - ' + d)] = v else: upshot[unicode(a + ' - ' + b)] = v if metric == u'Predicted Links': gr = self.make_unigraph_from_multigraph(mg=g) u = link_prediction.resource_allocation_index(gr) upshot = {} for k, v, p in u: if p > 0: #RAI examines all nonexistent edges in graph and will return all of them, including ones with a zero index. we therefore filter for positive index values. a = g.node[k]['name'] b = g.node[v]['name'] if nedges: filter_list = [n[0] for n in nedges] if k in filter_list or v in filter_list: upshot[unicode(a + ' - ' + b)] = p else: upshot[unicode(a + ' - ' + b)] = p self.cacheflow(ck, data=upshot) return upshot