def centrality_analysis(G, isDriected=False): ''' :param g: Digraph()/ Graph() :return: several types of centrality of each nodes ''' nodes = G.nodes() if isDriected: in_dc = centrality.in_degree_centrality(G) out_dc = centrality.out_degree_centrality(G) bc = centrality.betweenness_centrality(G) ec = centrality.eigenvector_centrality(G) cent = {} for node in nodes: cent[node] = [in_dc[node], out_dc[node], bc[node], ec[node]] print( "Four types of centrality are calculated \n" + "\n\tin_degree_centrality\n\tout_degree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality" ) return cent else: dc = centrality.degree_centrality(G) bc = centrality.betweenness_centrality(G) ec = centrality.eigenvector_centrality(G) cent = {} for node in nodes: cent[node] = [dc[node], bc[node], ec[node]] print( "Three types of centrality are calculated \n" + "\n\tdegree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality" ) return cent
def get_layer_info(subject, journal_volume, edge_list): G = nx.Graph() G.add_weighted_edges_from(edge_list) PATH = "C:/Users/hexie/Documents/APS_result/" + str( journal_volume) + "/" + str(subject) try: os.mkdir(PATH) os.chdir(PATH) except: os.chdir(PATH) degree_centrality = nxc.degree_centrality(G) try: eigen_vector_centrality = nxc.eigenvector_centrality(G) np.save("eigen_vector_centrality.npy", eigen_vector_centrality) except: print("fail to converge within 100 iterations of power") closeness_centrality = nxc.closeness_centrality(G) betweeness_centrality = nxc.betweenness_centrality(G) np.save("degree_centrality.npy", degree_centrality) np.save("closeness_centrality.npy", closeness_centrality) np.save("betweeness_centrality.npy", betweeness_centrality) with open(str(subject) + str(journal_volume) + ".txt", 'w') as f: f.write('Number of Edges: ' + str(nx.number_of_edges(G)) + "\n") f.write('Number of Nodes: ' + str(nx.number_of_nodes(G)) + "\n") nx.draw(G) plt.savefig(str(subject) + str(journal_volume) + ".png") plt.clf()
def set_type_centrality(G, type_str): g_ss = nx.Graph() g_ss.edges.data('weight', default=1) for u, v, k in G.edges(keys=True): if k[:13] == type_str: if g_ss.has_edge(u, v): g_ss[u][v]['weight'] += 1 else: g_ss.add_edge(u, v, weight=1) max_iterations = 300 # Need a lot of exception handling in case algorithm doesn't converge. try: centrality_dict = dict(eigenvector_centrality(g_ss, weight='weight')) nx.set_node_attributes(G, centrality_dict, type_str + '_centrality') except nx.NetworkXPointlessConcept: nx.set_node_attributes(G, 0, type_str + '_centrality') except nx.PowerIterationFailedConvergence: logging.debug( "Centrality algorithm failed to converge in 100 iterations.") try: centrality_dict = dict( eigenvector_centrality(g_ss, max_iter=max_iterations, weight='weight')) nx.set_node_attributes(G, centrality_dict, type_str + '_centrality') except nx.PowerIterationFailedConvergence: logging.debug( "Centrality algorithm failed to converge in {} iterations.". format(max_iterations)) nx.set_node_attributes(G, 0, type_str + '_centrality') except Exception: logging.debug("Centrality algorithm failed") nx.set_node_attributes(G, 0, type_str + '_centrality')
def calcCentrality(linkrecords): """Calculate betweenness centrality measure for each package or application Linkrecords: list of dicts with keys: focal, other, type, raw_count, scaled_count""" G = nx.Graph() for link in linkrecords: G.add_node(link["focal"]) G.add_node(link["other"]) G.add_edge(link["focal"], link["other"], weight=link["raw_count"]) from networkx.algorithms.centrality import eigenvector_centrality #centralities = betweenness_centrality(G, k=G.number_of_nodes(), weight="weight", endpoints=True) centralities = eigenvector_centrality(G, max_iter=1000) return centralities
def calc_node_based_centrality(edge_index, centrality='degree'): adj_list = edge_index.numpy().T G = nx.Graph() G.add_edges_from(adj_list) if centrality == 'degree': nodes_centrality = degree_centrality(G) elif centrality == 'eigenvector': nodes_centrality = eigenvector_centrality(G) elif centrality == "closeness": nodes_centrality = closeness_centrality(G) else: print(centrality, "is not defined") exit(1) edges_centrality = dict() for u, v in adj_list: edges_centrality[(u, v)] = nodes_centrality[u] * nodes_centrality[v] return edges_centrality
def parse(name): print(name) pathbase = path.abspath(path.dirname(__file__)) G = nx.Graph() data = json.load(open('{0}/{1}.json'.format(pathbase, name))) nodes = data['nodes'] text = {i: node['text'] for i, node in enumerate(nodes)} weight = {i: float(node['weight']) for i, node in enumerate(nodes)} for i in range(len(nodes)): G.add_node(i) for link in data['links']: G.add_edge(link['source'], link['target']) degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweenness = centrality.betweenness_centrality(G) #edge_betweenness = centrality.edge_betweenness_centrality(G) #current_flow_closeness = centrality.current_flow_closeness_centrality(G) #current_flow_betweenness =\ # centrality.current_flow_betweenness_centrality(G) try: eigenvector = centrality.eigenvector_centrality(G, max_iter=1000) except: eigenvector = {i: 0 for i in range(len(nodes))} katz = centrality.katz_centrality(G) obj = {'nodes': [], 'links': data['links']} for i in range(len(nodes)): obj['nodes'].append({ 'text': text[i], 'weight': weight[i], 'degree': degree[i], 'closeness': closeness[i], 'betweenness': betweenness[i], #'edge_betweenness': edge_betweenness[i], #'current_flow_closeness': current_flow_closeness[i], #'current_flow_betweenness': current_flow_betweenness[i], 'eigenvector': eigenvector[i], 'katz': katz[i], }) json.dump(obj, open('{0}/../data/{1}.json'.format(pathbase, name), 'w'), sort_keys=True)
def fit(self, X_df, y_array): d = {'link': np.array(y_array)} y_array = pd.DataFrame(data=d) path = os.path.dirname(__file__) self.data = pd.read_csv(os.path.join(path, 'nodes_info_new.csv'),low_memory=False) def clean_date(s): s = re.sub('[^0-9]', '', str(s)) if len(s)==0: return np.nan if s == '1': return np.nan if len(s)<4: date = int(s) else: date = int(s[:4]) if date>2000: date = int(s[:3]) return date self.data['birth_date'] = self.data['birth_date'].apply(clean_date) self.data['death_date'] = self.data['death_date'].apply(clean_date) def get_country(s): s = re.sub('[^a-zA-Z ]', '', str(s)) if len(s)==0: return np.nan return s.split()[-1] self.data['birth_place'] = self.data['birth_place'].apply(get_country) self.data['death_place'] = self.data['death_place'].apply(get_country) #defining a dictionary which contains information for each thinker according to their names self.thinker_dictionary = {} for i,row in self.data.iterrows(): self.thinker_dictionary[row['thinker']] = {'thinker_id': row['id'], 'birth_date': row['birth_date'], 'birth_place': row['birth_place'], 'death_place': row['death_place'], 'death_date': row['death_date'], 'summary': row['summary']} max_id = self.data['id'].max() self.nodes = np.arange(1,max_id+1) self.edges = np.array([[self.thinker_dictionary[row['thinker_1']]['thinker_id'],self.thinker_dictionary[row['thinker_2']]['thinker_id']] for i,row in (X_df[(y_array['link']==1).values.flatten()]).iterrows()]) self.G.add_nodes_from(self.nodes) self.G.add_edges_from(self.edges) self.graph_features = pd.DataFrame({'thinker_id':self.nodes}) self.connected_comp = list(nx.connected_components(self.G)) group_id = {} group_len = {} for think_id in self.nodes: for i,group in enumerate(self.connected_comp): if think_id in group: group_id[think_id] = i group_len[think_id] = len(group) break self.graph_features['connected_comp'] = [group_id[think_id] for think_id in self.nodes] self.graph_features['connected_comp_len'] = [group_len[think_id] for think_id in self.nodes] self.graph_features['degree_centrality'] = degree_centrality(self.G).values() self.graph_features['degree_centrality']/=self.graph_features['degree_centrality'].max() self.graph_features['eigenvector_centrality'] = eigenvector_centrality(self.G).values() self.graph_features['eigenvector_centrality']/=self.graph_features['eigenvector_centrality'].max() # self.graph_features['closeness_centrality'] = closeness_centrality(self.G).values() # self.graph_features['closeness_centrality']/=self.graph_features['closeness_centrality'].max() # self.graph_features['betweenness_centrality'] = betweenness_centrality(self.G).values() # self.graph_features['betweenness_centrality']/=self.graph_features['betweenness_centrality'].max() # self.graph_features['subgraph_centrality'] = subgraph_centrality(self.G).values() # self.graph_features['subgraph_centrality']/=self.graph_features['subgraph_centrality'].max() self.graph_features['pagerank'] = nx.pagerank(self.G, alpha=0.9).values() self.graph_features['pagerank']/=self.graph_features['pagerank'].max() return self
def draw(self, method='', h_i_shock=None, alpha=None, max_iter=100, is_savefig=False, font_size=5, node_color='b', seed=None, **kwargs): """draw financial network. Parameters: --- `method`: <str>. the optional, the color of nodes map to the important level of bank. i.e. {'dr','nldr','dc',...}. Default = 'dr'. `h_i_shock`: <np.ndarray>. the initial shock. see `tt.creating_initial_shock()`. `alpha`: <float>. optional, the parameter of Non-Linear DebtRank. Default = 0. `t_max`: <int>. the max number of iteration. Default = 100. `is_savefig`: <False>. optional, if True, it will be saved to the current work environment. otherwise, plt.show(). `font_size`: <int>. the size of the labels of nodes. Default = 5. `node_color`: <str or RGB>. the color of nodes. if method is not empty, the colors reflect the importance level. `**kwargs`: customize your figure, see detail in networkx.draw. """ # initial setting title = 'The interbank network' + '(%s)' % self._data._label_year method = str(method) debtrank_alias = {'dr': 'debtrank', 'nldr': 'nonlinear debtrank'} importance_alias = {'lp': 'loss_percentile'} centrality_alias = { 'idc': 'in-degree centrality', 'odc': 'out-degree centrality', 'dc': 'degree centrality', 'bc': 'betweenness centrality', 'cc': 'closeness(in) centrality', 'occ': 'out-closeness centrality', 'ec': 'eigenvector(in) centrality', 'oec': 'out-eigenvector centrality', 'kc': 'katz centrality', } # method if method in debtrank_alias: if h_i_shock is None: try: self._h_i_shock = self._data.h_i_shock except: raise Exception( "ERROR: the parameter 'h_i_shock' cannot be empty.", h_i_shock) else: self._h_i_shock = h_i_shock assert isinstance( self._h_i_shock, (list, np.ndarray) ), "ERROR: the 'h_i_shock' you provided should be a list or np.ndarray." assert len( self._h_i_shock ) == self._data._N, "ERROR: the length of 'h_i_shock' you provided is not equal to data." # the node labels self._node_labels = {} for i, j in zip(self._nodes, self._h_i_shock): assert j >= 0, "ERROR: the value of h_i_shock should in [0,1]" if j == 0.0: self._node_labels[i] = i else: self._node_labels[i] = i + r"$\bigstar$" # the method of debtrant if method == 'dr': # the legend labels self._legend_labels = [ 'debtrank < 25%', 'debtrank > 25%', 'debtrank > 50%', 'debtrank > 75%' ] # the color of nodes self._nodes_color = self._run_centrality( method='dr', h_i_shock=self._h_i_shock, t_max=max_iter)['node color'] elif method == 'nldr': if alpha is None: alpha = 0 print( "Warning: the paramater of 'alpha' is essential! Default = %.2f" % alpha) # rename figure title title = 'The interbank network, ' + r'$\alpha = %.2f$' % alpha + ' (%s)' % self._data._label_year # the legend labels self._legend_labels = [ 'nonlinear debtrank < 25%', 'nonlinear debtrank > 25%', 'nonlinear debtrank > 50%', 'nonlinear debtrank > 75%' ] # the color of nodes self._nodes_color = self._run_centrality( method='nldr', h_i_shock=self._h_i_shock, alpha=alpha, t_max=max_iter)['node color'] else: pass # TODO _legend_elements = [ Line2D([0], [0], marker='o', color="#6495ED", markersize=3.5, label=self._legend_labels[0]), Line2D([0], [0], marker='o', color="#EEEE00", markersize=3.5, label=self._legend_labels[1]), Line2D([0], [0], marker='o', color="#EE9A00", markersize=3.5, label=self._legend_labels[2]), Line2D([0], [0], marker='o', color="#EE0000", markersize=3.5, label=self._legend_labels[3]), Line2D([0], [0], marker='*', markerfacecolor="#000000", color='w', markersize=6.5, label='the initial shock') ] _ncol = 5 elif method in importance_alias: # title title = r'$x_{shock} = %.2f$' % kwargs[ 'x_shock'] + ', t = %d' % kwargs[ 't'] + ' (%s)' % self._data._label_year # the node labels self._node_labels = dict(zip(self._nodes, self._nodes)) # 'lp' self._legend_labels = [ 'importantance level < 25%', 'importantance level > 25 %', 'importantance level > 50%', 'importantance level > 75%' ] # the color of nodes self._nodes_color = self._run_centrality( method='lp', t=kwargs['t'], x_shock=kwargs['x_shock'])['node color'] _legend_elements = [ Line2D([0], [0], marker='o', color="#6495ED", markersize=3.5, label=self._legend_labels[0]), Line2D([0], [0], marker='o', color="#EEEE00", markersize=3.5, label=self._legend_labels[1]), Line2D([0], [0], marker='o', color="#EE9A00", markersize=3.5, label=self._legend_labels[2]), Line2D([0], [0], marker='o', color="#EE0000", markersize=3.5, label=self._legend_labels[3]) ] _ncol = 4 elif method in centrality_alias: # the node labels self._node_labels = dict(zip(self._nodes, self._nodes)) # 'dc' if method == 'idc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'in-degree centrality < 25%', 'in-degree centrality > 25%', 'in-degree centrality > 50%', 'in-degree centrality > 75%' ] # the color of nodes self._in_degree_centrality = ct.in_degree_centrality(self._FN) self._nodes_color = self._run_centrality( method='idc', centrality=self._in_degree_centrality)['node color'] elif method == 'odc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'out-degree centrality < 25%', 'out-degree centrality > 25%', 'out-degree centrality > 50%', 'out-degree centrality > 75%' ] # the color of nodes self._out_degree_centrality = ct.out_degree_centrality( self._FN) self._nodes_color = self._run_centrality( method='odc', centrality=self._out_degree_centrality)['node color'] elif method == 'dc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'degree centrality < 25%', 'degree centrality > 25%', 'degree centrality > 50%', 'degree centrality > 75%' ] # the color of nodes self._degree_centrality = ct.degree_centrality(self._FN) self._nodes_color = self._run_centrality( method='dc', centrality=self._degree_centrality)['node color'] elif method == 'bc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'betweenness centrality < 25%', 'betweenness centrality > 25%', 'betweenness centrality > 50%', 'betweenness centrality > 75%' ] # the color of nodes self._betweenness_centrality = ct.betweenness_centrality( self._FN, weight='weight', seed=seed) self._nodes_color = self._run_centrality( method='bc', centrality=self._betweenness_centrality)['node color'] elif method == 'cc' or method == 'icc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'in-closeness centrality < 25%', 'in-closeness centrality > 25%', 'in-closeness centrality > 50%', 'in-closeness centrality > 75%' ] # the color of nodes self._in_closeness_centrality = ct.closeness_centrality( self._FN, distance='weight') self._nodes_color = self._run_centrality( method='cc', centrality=self._in_closeness_centrality)['node color'] elif method == 'occ': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'out-closeness centrality < 25%', 'out-closeness centrality > 25%', 'out-closeness centrality > 50%', 'out-closeness centrality > 75%' ] # the color of nodes self._out_closeness_centrality = ct.closeness_centrality( self._FN.reverse(), distance='weight') self._nodes_color = self._run_centrality( method='occ', centrality=self._out_closeness_centrality)['node color'] elif method == 'ec' or method == 'iec': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'in-eigenvector centrality < 25%', 'in-eigenvector centrality > 25%', 'in-eigenvector centrality > 50%', 'in-eigenvector centrality > 75%' ] # the color of nodes self._in_eigenvector_centrality = ct.eigenvector_centrality( self._FN, max_iter=max_iter, weight='weight') self._nodes_color = self._run_centrality( method='ec', centrality=self._in_eigenvector_centrality)['node color'] elif method == 'oec': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'out-eigenvector centrality < 25%', 'out-eigenvector centrality > 25%', 'out-eigenvector centrality > 50%', 'out-eigenvector centrality > 75%' ] # the color of nodes self._out_eigenvector_centrality = ct.eigenvector_centrality( self._FN.reverse(), max_iter=max_iter, weight='weight') self._nodes_color = self._run_centrality( method='oec', centrality=self._out_eigenvector_centrality)['node color'] elif method == 'kc': # bug # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'katz centrality < 25%', 'katz centrality > 25%', 'katz centrality > 50%', 'katz centrality > 75%' ] # the color of nodes phi, _ = np.linalg.eig(self._Ad_ij) self._katz_centrality = ct.katz_centrality( self._FN, alpha=1 / np.max(phi) - 0.01, weight='weight') self._nodes_color = self._run_centrality( method='kc', centrality=self._katz_centrality)['node color'] else: pass # TODO _legend_elements = [ Line2D([0], [0], marker='o', color="#6495ED", markersize=3.5, label=self._legend_labels[0]), Line2D([0], [0], marker='o', color="#EEEE00", markersize=3.5, label=self._legend_labels[1]), Line2D([0], [0], marker='o', color="#EE9A00", markersize=3.5, label=self._legend_labels[2]), Line2D([0], [0], marker='o', color="#EE0000", markersize=3.5, label=self._legend_labels[3]) ] _ncol = 4 else: # the node labels self._node_labels = dict(zip(self._nodes, self._nodes)) self._nodes_color = node_color # "#00BFFF" print("Warning: the color of nodes have no special meaning.") # draw draw_default = { 'node_size': self._node_assets, 'node_color': self._nodes_color, 'edge_color': self._edge_color, 'edge_cmap': plt.cm.binary, 'labels': self._node_labels, 'width': 0.8, 'style': 'solid', 'with_labels': True } # customize your nx.draw if 'node_size' in kwargs: draw_default['node_size'] = kwargs['node_size'] if 'node_color' in kwargs: draw_default['node_color'] = kwargs['node_color'] if 'edge_cmap' in kwargs: draw_default['edge_cmap'] = kwargs['edge_cmap'] if 'labels' in kwargs: draw_default['labels'] = kwargs['labels'] if 'style' in kwargs: draw_default['style'] = kwargs['style'] if 'with_labels' in kwargs: draw_default['with_labels'] = kwargs['with_labels'] draw_kwargs = draw_default plt.rcParams['figure.dpi'] = 160 plt.rcParams['savefig.dpi'] = 400 plt.title(title, fontsize=font_size + 2) nx.draw(self._FN, pos=nx.circular_layout(self._FN), font_size=font_size, **draw_kwargs) if method: plt.legend(handles=_legend_elements, ncol=_ncol, fontsize=font_size - 1, loc='lower center', frameon=False) if is_savefig: net = "interbanknetwork" date = parse(self._data._label_year).strftime("%Y%m%d") plt.savefig(net + date + '.png', format='png', dpi=400) print("save to '%s'" % os.getcwd() + ' and named as %s' % (net + date) + '.png') else: plt.show()
def run_GT_calcs(G, just_data, Do_kdist, Do_dia, Do_BCdist, Do_CCdist, Do_ECdist, Do_GD, Do_Eff, \ Do_clust, Do_ANC, Do_Ast, Do_WI, multigraph): # getting nodes and edges and defining variables for later use klist = [0] Tlist = [0] BCdist = [0] CCdist = [0] ECdist = [0] if multigraph: Do_BCdist = 0 Do_ECdist = 0 Do_clust = 0 data_dict = {"x": [], "y": []} nnum = int(nx.number_of_nodes(G)) enum = int(nx.number_of_edges(G)) if Do_ANC | Do_dia: connected_graph = nx.is_connected(G) # making a dictionary for the parameters and results just_data.append(nnum) data_dict["x"].append("Number of nodes") data_dict["y"].append(nnum) just_data.append(enum) data_dict["x"].append("Number of edges") data_dict["y"].append(enum) multi_image_settings.progress(35) # calculating parameters as requested # creating degree histogram if (Do_kdist == 1): klist1 = nx.degree(G) ksum = 0 klist = np.zeros(len(klist1)) for j in range(len(klist1)): ksum = ksum + klist1[j] klist[j] = klist1[j] k = ksum / len(klist1) k = round(k, 5) just_data.append(k) data_dict["x"].append("Average degree") data_dict["y"].append(k) multi_image_settings.progress(40) # calculating network diameter if (Do_dia == 1): if connected_graph: dia = int(diameter(G)) else: dia = 'NaN' just_data.append(dia) data_dict["x"].append("Network Diameter") data_dict["y"].append(dia) multi_image_settings.progress(45) # calculating graph density if (Do_GD == 1): GD = nx.density(G) GD = round(GD, 5) just_data.append(GD) data_dict["x"].append("Graph density") data_dict["y"].append(GD) multi_image_settings.progress(50) # calculating global efficiency if (Do_Eff == 1): Eff = global_efficiency(G) Eff = round(Eff, 5) just_data.append(Eff) data_dict["x"].append("Global Efficiency") data_dict["y"].append(Eff) multi_image_settings.progress(55) if (Do_WI == 1): WI = wiener_index(G) WI = round(WI, 1) just_data.append(WI) data_dict["x"].append("Wiener Index") data_dict["y"].append(WI) multi_image_settings.progress(60) # calculating clustering coefficients if (Do_clust == 1): Tlist1 = clustering(G) Tlist = np.zeros(len(Tlist1)) for j in range(len(Tlist1)): Tlist[j] = Tlist1[j] clust = average_clustering(G) clust = round(clust, 5) just_data.append(clust) data_dict["x"].append("Average clustering coefficient") data_dict["y"].append(clust) # calculating average nodal connectivity if (Do_ANC == 1): if connected_graph: ANC = average_node_connectivity(G) ANC = round(ANC, 5) else: ANC = 'NaN' just_data.append(ANC) data_dict["x"].append("Average nodal connectivity") data_dict["y"].append(ANC) multi_image_settings.progress(65) # calculating assortativity coefficient if (Do_Ast == 1): Ast = degree_assortativity_coefficient(G) Ast = round(Ast, 5) just_data.append(Ast) data_dict["x"].append("Assortativity Coefficient") data_dict["y"].append(Ast) multi_image_settings.progress(70) # calculating betweenness centrality histogram if (Do_BCdist == 1): BCdist1 = betweenness_centrality(G) Bsum = 0 BCdist = np.zeros(len(BCdist1)) for j in range(len(BCdist1)): Bsum += BCdist1[j] BCdist[j] = BCdist1[j] Bcent = Bsum / len(BCdist1) Bcent = round(Bcent, 5) just_data.append(Bcent) data_dict["x"].append("Average betweenness centrality") data_dict["y"].append(Bcent) multi_image_settings.progress(75) # calculating closeness centrality if (Do_CCdist == 1): CCdist1 = closeness_centrality(G) Csum = 0 CCdist = np.zeros(len(CCdist1)) for j in range(len(CCdist1)): Csum += CCdist1[j] CCdist[j] = CCdist1[j] Ccent = Csum / len(CCdist1) Ccent = round(Ccent, 5) just_data.append(Ccent) data_dict["x"].append("Average closeness centrality") data_dict["y"].append(Ccent) multi_image_settings.progress(80) # calculating eigenvector centrality if (Do_ECdist == 1): try: ECdist1 = eigenvector_centrality(G, max_iter=100) except: ECdist1 = eigenvector_centrality(G, max_iter=10000) Esum = 0 ECdist = np.zeros(len(ECdist1)) for j in range(len(ECdist1)): Esum += ECdist1[j] ECdist[j] = ECdist1[j] Ecent = Esum / len(ECdist1) Ecent = round(Ccent, 5) just_data.append(Ecent) data_dict["x"].append("Average eigenvector centrality") data_dict["y"].append(Ecent) data = pd.DataFrame(data_dict) return data, just_data, klist, Tlist, BCdist, CCdist, ECdist
def run_weighted_GT_calcs(G, just_data, Do_kdist, Do_BCdist, Do_CCdist, Do_ECdist, Do_ANC, Do_Ast, Do_WI, multigraph): # includes weight in the calculations klist = [0] BCdist = [0] CCdist = [0] ECdist = [0] if multigraph: Do_BCdist = 0 Do_ECdist = 0 Do_ANC = 0 if Do_ANC: connected_graph = nx.is_connected(G) wdata_dict = {"x": [], "y": []} if (Do_kdist == 1): klist1 = nx.degree(G, weight='weight') ksum = 0 klist = np.zeros(len(klist1)) for j in range(len(klist1)): ksum = ksum + klist1[j] klist[j] = klist1[j] k = ksum / len(klist1) k = round(k, 5) just_data.append(k) wdata_dict["x"].append("Weighted average degree") wdata_dict["y"].append(k) if (Do_WI == 1): WI = wiener_index(G, weight='length') WI = round(WI, 1) just_data.append(WI) wdata_dict["x"].append("Length-weighted Wiener Index") wdata_dict["y"].append(WI) if (Do_ANC == 1): if connected_graph: max_flow = float(0) p = periphery(G) q = len(p) - 1 for s in range(0, q - 1): for t in range(s + 1, q): flow_value = maximum_flow(G, p[s], p[t], capacity='weight')[0] if (flow_value > max_flow): max_flow = flow_value max_flow = round(max_flow, 5) else: max_flow = 'NaN' just_data.append(max_flow) wdata_dict["x"].append("Max flow between periphery") wdata_dict["y"].append(max_flow) if (Do_Ast == 1): Ast = degree_assortativity_coefficient(G, weight='pixel width') Ast = round(Ast, 5) just_data.append(Ast) wdata_dict["x"].append("Weighted assortativity coefficient") wdata_dict["y"].append(Ast) if (Do_BCdist == 1): BCdist1 = betweenness_centrality(G, weight='weight') Bsum = 0 BCdist = np.zeros(len(BCdist1)) for j in range(len(BCdist1)): Bsum += BCdist1[j] BCdist[j] = BCdist1[j] Bcent = Bsum / len(BCdist1) Bcent = round(Bcent, 5) just_data.append(Bcent) wdata_dict["x"].append("Width-weighted average betweenness centrality") wdata_dict["y"].append(Bcent) if (Do_CCdist == 1): CCdist1 = closeness_centrality(G, distance='length') Csum = 0 CCdist = np.zeros(len(CCdist1)) for j in range(len(CCdist1)): Csum += CCdist1[j] CCdist[j] = CCdist1[j] Ccent = Csum / len(CCdist1) Ccent = round(Ccent, 5) just_data.append(Ccent) wdata_dict["x"].append("Length-weighted average closeness centrality") wdata_dict["y"].append(Ccent) if (Do_ECdist == 1): try: ECdist1 = eigenvector_centrality(G, max_iter=100, weight='weight') except: ECdist1 = eigenvector_centrality(G, max_iter=10000, weight='weight') Esum = 0 ECdist = np.zeros(len(ECdist1)) for j in range(len(ECdist1)): Esum += ECdist1[j] ECdist[j] = ECdist1[j] Ecent = Esum / len(ECdist1) Ecent = round(Ecent, 5) just_data.append(Ecent) wdata_dict["x"].append("Width-weighted average eigenvector centrality") wdata_dict["y"].append(Ecent) wdata = pd.DataFrame(wdata_dict) return wdata, just_data, klist, BCdist, CCdist, ECdist
edge_tuples = [(e[0], e[1], int(weights[i])) for i, e in enumerate(edges)] # Only get edges for the select nodes in the node csv. edges = [] for e in edge_tuples: if all(x in list(node_ids) for x in e[:2]): edges.append(e) # Initialize graph, add nodes and edges, calculate modularity and centrality. G = nx.Graph() G.add_nodes_from(list(node_ids)) G.add_weighted_edges_from(edges) groups = community.best_partition(G) degree = cn.degree_centrality(G) betweenness = cn.betweenness_centrality(G, weight='weight') eigenvector = cn.eigenvector_centrality(G, weight='weight') # Add node attributes for name, modularity, and three types of centrality. nx.set_node_attributes(G, 'name', node_dict) nx.set_node_attributes(G, 'group', groups) nx.set_node_attributes(G, 'degree', degree) nx.set_node_attributes(G, 'betweenness', betweenness) nx.set_node_attributes(G, 'eigenvector', eigenvector) # Create json representation of the graph (for d3). data = json_graph.node_link_data(G) # You could create the needed json without NetworkX (but you would forfeit network metrics). #new_data = dict(nodes=[dict(id=n) for n in list(set(nodes))], links=[dict(source=node_dict[e[0]], target=node_dict[e[1]], weight=e[2]) for e in edges]) # Output json of the graph.
def calc_centrality(self): """Calculate eigenvector centrality measure for each package or application""" return eigenvector_centrality(self.G)
def centrality(self, h_i_shock=None, alpha=0.0, rank=False, seed=123, max_iter=100, **kwargs): # include: degree centrality,... cdntrality_index = [ 'in-degree centrality', 'out-degree centrality', 'degree centrality', 'betweenness centrality', 'in-closeness centrality', 'out-closeness centrality', 'in-eigenvector centrality', 'out-eigenvector centrality', 'debtrank', 'non-linear debtrank' ] # the greater the value, the more important self._in_degree_centrality = ct.in_degree_centrality(self._FN) # reflect the enthusiasm of banks self._out_degree_centrality = ct.out_degree_centrality(self._FN) # the greater the value, the more important self._degree_centrality = ct.degree_centrality(self._FN) # the greater the value, the more important self._betweenness_centrality = ct.betweenness_centrality( self._FN, weight='weight', seed=seed) # integration self._in_closeness_centrality = ct.closeness_centrality( self._FN, distance='weight') # radiality self._out_closeness_centrality = ct.closeness_centrality( self._FN.reverse(), distance='weight') # # the greater the value, the more important, Similar to PageRank self._in_eigenvector_centrality = ct.eigenvector_centrality( self._FN, max_iter=max_iter, weight='weight') self._out_eigenvector_centrality = ct.eigenvector_centrality( self._FN.reverse(), max_iter=max_iter, weight='weight') # self._katz_centrality = ct.katz_centrality(self._FN, weight='weight') # bug # debtrank if h_i_shock is None: h_i_shock = self._data.h_i_shock assert isinstance( h_i_shock, (list, np.ndarray) ), "ERROR: the 'h_i_shock' you provided should be a list or np.ndarray." assert len( h_i_shock ) == self._data._N, "ERROR: the length of 'h_i_shock' you provided is not equal to data." self._debtrank = self._run_centrality(method='dr', h_i_shock=h_i_shock, t_max=max_iter)['centrality'] self._debtrank = dict(zip(self._nodes, self._debtrank)) self._nonlinear_debtrank = self._run_centrality( method='nldr', h_i_shock=h_i_shock, alpha=alpha, t_max=max_iter)['centrality'] self._nonlinear_debtrank = dict( zip(self._nodes, self._nonlinear_debtrank)) network_centrality = [ self._in_degree_centrality, self._out_degree_centrality, self._degree_centrality, self._betweenness_centrality, self._in_closeness_centrality, self._out_closeness_centrality, self._in_eigenvector_centrality, self._in_eigenvector_centrality, self._debtrank, self._nonlinear_debtrank ] df = pd.DataFrame(network_centrality).T df.columns = cdntrality_index if rank: df = df.rank(method='min', ascending=False) return df
def analyze(directed_df, undirected_df, auxiliary_df): directed_df = directed_df.copy(deep=True) undirected_df = undirected_df.copy(deep=True) directed_df = directed_df.rename(mapper=lambda name: name.lower(), axis='columns') undirected_df = undirected_df.rename(mapper=lambda name: name.lower(), axis='columns') G = nx.from_pandas_edgelist(directed_df, edge_attr=['weight', 'change'], create_using=nx.DiGraph) G_undirected = nx.from_pandas_edgelist(undirected_df, edge_attr=['weight', 'change']) alpha_coef = 0.9 alpha = alpha_coef / max(nx.adjacency_spectrum(G).real) alpha_undirected = alpha_coef / max( nx.adjacency_spectrum(G_undirected).real) centralities = { 'out_degree': weighted_degree_centrality(G), 'in_degree': weighted_degree_centrality(G.reverse()), 'undirected_degree': weighted_degree_centrality(G_undirected), 'out_eigenvector': centrality.eigenvector_centrality(G, weight='weight'), 'in_eigenvector': centrality.eigenvector_centrality(G.reverse(), weight='weight'), 'undirected_eigenvector': centrality.eigenvector_centrality(G_undirected, weight='weight'), 'out_closeness': centrality.closeness_centrality(G, distance='weight'), 'in_closeness': centrality.closeness_centrality(G.reverse(), distance='weight'), 'undirected_closeness': centrality.closeness_centrality(G_undirected, distance='weight'), 'out_betweenness': centrality.betweenness_centrality(G, weight='weight'), 'in_betweenness': centrality.betweenness_centrality(G.reverse(), weight='weight'), 'undirected_betweenness': centrality.betweenness_centrality(G_undirected, weight='weight'), 'out_katz': centrality.katz_centrality(G, alpha=alpha, weight='weight'), 'in_katz': centrality.katz_centrality(G.reverse(), alpha=alpha, weight='weight'), 'undirected_katz': centrality.katz_centrality(G_undirected, alpha=alpha, weight='weight') } for centrality_type in centralities.keys(): directed_df[centrality_type] = np.NaN augmented_auxiliary_df = auxiliary_df.copy(deep=True) for key, row in augmented_auxiliary_df.iterrows(): node = row['docid'] for centrality_type, values in centralities.items(): if node in values: augmented_auxiliary_df.at[key, centrality_type] = values[node] print(augmented_auxiliary_df) return augmented_auxiliary_df
def calculate_all_centralities(data): """ Calculates all four centralities metrics for the input graph Paramaters: data: a json object which represents the graph. This json is manipulated and the necessary metrics are added to it. """ G = json_graph.node_link_graph( data) #loads the data to a NetworkX graph object #Calculates three of the metrics degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweeness = centrality.betweenness_centrality(G) eigenvector_fail = False try: #Eigenvector centrality can fail to converge. eigenvector = centrality.eigenvector_centrality(DiGraph(G), max_iter=100000) except NetworkXError: #Eigenvector values will be None if calculation fails. eigenvector = [] eigenvector_fail = True print "Max iterations exceeded" degree_max = -1.0 closeness_max = -1.0 betweeness_max = -1.0 eigenvector_max = -1.0 for author in data['nodes']: #Adds the unnormalized values in the json i = author['id'] author['degreeCentralityUnnormalized'] = degree[i] author['closenessCentralityUnnormalized'] = closeness[i] author['betweennessCentralityUnnormalized'] = betweeness[i] author['eigenvectorCentralityUnnormalized'] = eigenvector[ i] if not eigenvector_fail else 1.0 #Finds the highest values for each centrality type for i in degree: if degree[i] > degree_max: degree_max = degree[i] for i in closeness: if closeness[i] > closeness_max: closeness_max = closeness[i] for i in betweeness: if betweeness[i] > betweeness_max: betweeness_max = betweeness[i] for i in eigenvector: if eigenvector[i] > eigenvector_max: eigenvector_max = eigenvector[i] #Normalizes the values for i in degree: if degree[i] != 0: degree[i] = degree[i] / degree_max for i in closeness: if closeness[i] != 0: closeness[i] = closeness[i] / closeness_max for i in betweeness: if betweeness[i] != 0: betweeness[i] = betweeness[i] / betweeness_max for i in eigenvector: if eigenvector[i] != 0: eigenvector[i] = eigenvector[i] / eigenvector_max #Adds the normalized values to the json for author in data['nodes']: i = author['id'] author['degreeCentrality'] = degree[i] author['closenessCentrality'] = closeness[i] author['betweennessCentrality'] = betweeness[i] author['eigenvectorCentrality'] = eigenvector[ i] if not eigenvector_fail else 1.0 return data
def draw_graph(G): nx.draw(G, node_size=30) plt.show() if __name__ == "__main__": print("Start parsing:") data = parse_group() G = create_graph(data) draw_graph(G) degree = pd.Series(nxa.degree_centrality(G)).idxmax() closeness = pd.Series(nxa.closeness_centrality(G)).idxmax() eigenvector = pd.Series(nxa.eigenvector_centrality(G)).idxmax() betweennes = pd.Series(nxa.betweenness_centrality(G)).idxmax() degree_user = api.users.get(user_ids=degree)[0] closeness_user = api.users.get(user_ids=closeness)[0] eigenvector_user = api.users.get(user_ids=eigenvector)[0] betweeness_user = api.users.get(user_ids=betweennes)[0] print("Most important user:"******"Degree centrality: id{degree} - {degree_user['first_name'] + ' ' + degree_user['last_name']}" ) print( f"Closeness centrality: id{closeness} - {closeness_user['first_name'] + ' ' + closeness_user['last_name']}" ) print(
def calculate_all_centralities(data): """ Calculates all four centralities metrics for the input graph Paramaters: data: a json object which represents the graph. This json is manipulated and the necessary metrics are added to it. """ G = json_graph.node_link_graph(data) #loads the data to a NetworkX graph object #Calculates three of the metrics degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweeness = centrality.betweenness_centrality(G) eigenvector_fail = False try: #Eigenvector centrality can fail to converge. eigenvector = centrality.eigenvector_centrality(DiGraph(G),max_iter=100000) except NetworkXError: #Eigenvector values will be None if calculation fails. eigenvector = [] eigenvector_fail = True print "Max iterations exceeded" degree_max = -1.0 closeness_max = -1.0 betweeness_max = -1.0 eigenvector_max = -1.0 for author in data['nodes']: #Adds the unnormalized values in the json i = author['id'] author['degreeCentralityUnnormalized'] = degree[i] author['closenessCentralityUnnormalized'] = closeness[i] author['betweennessCentralityUnnormalized'] = betweeness[i] author['eigenvectorCentralityUnnormalized'] = eigenvector[i] if not eigenvector_fail else 1.0 #Finds the highest values for each centrality type for i in degree: if degree[i]>degree_max: degree_max = degree[i] for i in closeness: if closeness[i]>closeness_max: closeness_max = closeness[i] for i in betweeness: if betweeness[i]>betweeness_max: betweeness_max = betweeness[i] for i in eigenvector: if eigenvector[i]>eigenvector_max: eigenvector_max = eigenvector[i] #Normalizes the values for i in degree: if degree[i] != 0: degree[i] = degree[i]/degree_max for i in closeness: if closeness[i] != 0: closeness[i] = closeness[i]/closeness_max for i in betweeness: if betweeness[i] != 0: betweeness[i] = betweeness[i]/betweeness_max for i in eigenvector: if eigenvector[i] != 0: eigenvector[i] = eigenvector[i]/eigenvector_max #Adds the normalized values to the json for author in data['nodes']: i = author['id'] author['degreeCentrality'] = degree[i] author['closenessCentrality'] = closeness[i] author['betweennessCentrality'] = betweeness[i] author['eigenvectorCentrality'] = eigenvector[i] if not eigenvector_fail else 1.0 return data
edge_tuples=[(e[0], e[1], int(weights[i])) for i,e in enumerate(edges)] # Only get edges for the select nodes in the node csv. edges = [] for e in edge_tuples: if all(x in list(node_ids) for x in e[:2]): edges.append(e) # Initialize graph, add nodes and edges, calculate modularity and centrality. G = nx.Graph() G.add_nodes_from(list(node_ids)) G.add_weighted_edges_from(edges) groups = community.best_partition(G) degree = cn.degree_centrality(G) betweenness = cn.betweenness_centrality(G, weight='weight') eigenvector = cn.eigenvector_centrality(G, weight='weight') # Add node attributes for name, modularity, and three types of centrality. nx.set_node_attributes(G, 'name', node_dict) nx.set_node_attributes(G, 'group', groups) nx.set_node_attributes(G, 'degree', degree) nx.set_node_attributes(G, 'betweenness', betweenness) nx.set_node_attributes(G, 'eigenvector', eigenvector) # Create json representation of the graph (for d3). data = json_graph.node_link_data(G) # You could create the needed json without NetworkX (but you would forfeit network metrics). #new_data = dict(nodes=[dict(id=n) for n in list(set(nodes))], links=[dict(source=node_dict[e[0]], target=node_dict[e[1]], weight=e[2]) for e in edges]) # Output json of the graph.