def get_centrality_labels(knn_graph_obj, perc_labeled, type='degree'): import random if type == 'degree': degree_centrality_knn = pd.DataFrame.from_dict(centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = degree_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(degree_centrality_knn.index))].tolist() elif type == 'closeness': closeness_centrality_knn = pd.DataFrame.from_dict(centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = closeness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(closeness_centrality_knn.index))].tolist() elif type == 'betweenness': betweenness_centrality_knn = pd.DataFrame.from_dict(centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = betweenness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(betweenness_centrality_knn.index))].tolist() elif type == 'katz': katz_centrality_knn = pd.DataFrame.from_dict(centrality.katz_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = katz_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(katz_centrality_knn.index))].tolist() elif type == 'clustering': clustering_knn = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = clustering_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(clustering_knn.index))].tolist() else: indexes = list(knn_graph_obj.nodes) #print(indexes) node_toget_labels = random.sample(indexes, int(perc_labeled*len(indexes))) #print(node_toget_labels) return node_toget_labels
def parse(name): print(name) pathbase = path.abspath(path.dirname(__file__)) G = nx.Graph() data = json.load(open('{0}/{1}.json'.format(pathbase, name))) nodes = data['nodes'] text = {i: node['text'] for i, node in enumerate(nodes)} weight = {i: float(node['weight']) for i, node in enumerate(nodes)} for i in range(len(nodes)): G.add_node(i) for link in data['links']: G.add_edge(link['source'], link['target']) degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweenness = centrality.betweenness_centrality(G) #edge_betweenness = centrality.edge_betweenness_centrality(G) #current_flow_closeness = centrality.current_flow_closeness_centrality(G) #current_flow_betweenness =\ # centrality.current_flow_betweenness_centrality(G) try: eigenvector = centrality.eigenvector_centrality(G, max_iter=1000) except: eigenvector = {i: 0 for i in range(len(nodes))} katz = centrality.katz_centrality(G) obj = {'nodes': [], 'links': data['links']} for i in range(len(nodes)): obj['nodes'].append({ 'text': text[i], 'weight': weight[i], 'degree': degree[i], 'closeness': closeness[i], 'betweenness': betweenness[i], #'edge_betweenness': edge_betweenness[i], #'current_flow_closeness': current_flow_closeness[i], #'current_flow_betweenness': current_flow_betweenness[i], 'eigenvector': eigenvector[i], 'katz': katz[i], }) json.dump(obj, open('{0}/../data/{1}.json'.format(pathbase, name), 'w'), sort_keys=True)
def analyze(directed_df, undirected_df, auxiliary_df): directed_df = directed_df.copy(deep=True) undirected_df = undirected_df.copy(deep=True) directed_df = directed_df.rename(mapper=lambda name: name.lower(), axis='columns') undirected_df = undirected_df.rename(mapper=lambda name: name.lower(), axis='columns') G = nx.from_pandas_edgelist(directed_df, edge_attr=['weight', 'change'], create_using=nx.DiGraph) G_undirected = nx.from_pandas_edgelist(undirected_df, edge_attr=['weight', 'change']) alpha_coef = 0.9 alpha = alpha_coef / max(nx.adjacency_spectrum(G).real) alpha_undirected = alpha_coef / max( nx.adjacency_spectrum(G_undirected).real) centralities = { 'out_degree': weighted_degree_centrality(G), 'in_degree': weighted_degree_centrality(G.reverse()), 'undirected_degree': weighted_degree_centrality(G_undirected), 'out_eigenvector': centrality.eigenvector_centrality(G, weight='weight'), 'in_eigenvector': centrality.eigenvector_centrality(G.reverse(), weight='weight'), 'undirected_eigenvector': centrality.eigenvector_centrality(G_undirected, weight='weight'), 'out_closeness': centrality.closeness_centrality(G, distance='weight'), 'in_closeness': centrality.closeness_centrality(G.reverse(), distance='weight'), 'undirected_closeness': centrality.closeness_centrality(G_undirected, distance='weight'), 'out_betweenness': centrality.betweenness_centrality(G, weight='weight'), 'in_betweenness': centrality.betweenness_centrality(G.reverse(), weight='weight'), 'undirected_betweenness': centrality.betweenness_centrality(G_undirected, weight='weight'), 'out_katz': centrality.katz_centrality(G, alpha=alpha, weight='weight'), 'in_katz': centrality.katz_centrality(G.reverse(), alpha=alpha, weight='weight'), 'undirected_katz': centrality.katz_centrality(G_undirected, alpha=alpha, weight='weight') } for centrality_type in centralities.keys(): directed_df[centrality_type] = np.NaN augmented_auxiliary_df = auxiliary_df.copy(deep=True) for key, row in augmented_auxiliary_df.iterrows(): node = row['docid'] for centrality_type, values in centralities.items(): if node in values: augmented_auxiliary_df.at[key, centrality_type] = values[node] print(augmented_auxiliary_df) return augmented_auxiliary_df
def katz_centrality(self): self.katz_centrality_dict = centrality.katz_centrality(self.G)
def draw(self, method='', h_i_shock=None, alpha=None, max_iter=100, is_savefig=False, font_size=5, node_color='b', seed=None, **kwargs): """draw financial network. Parameters: --- `method`: <str>. the optional, the color of nodes map to the important level of bank. i.e. {'dr','nldr','dc',...}. Default = 'dr'. `h_i_shock`: <np.ndarray>. the initial shock. see `tt.creating_initial_shock()`. `alpha`: <float>. optional, the parameter of Non-Linear DebtRank. Default = 0. `t_max`: <int>. the max number of iteration. Default = 100. `is_savefig`: <False>. optional, if True, it will be saved to the current work environment. otherwise, plt.show(). `font_size`: <int>. the size of the labels of nodes. Default = 5. `node_color`: <str or RGB>. the color of nodes. if method is not empty, the colors reflect the importance level. `**kwargs`: customize your figure, see detail in networkx.draw. """ # initial setting title = 'The interbank network' + '(%s)' % self._data._label_year method = str(method) debtrank_alias = {'dr': 'debtrank', 'nldr': 'nonlinear debtrank'} importance_alias = {'lp': 'loss_percentile'} centrality_alias = { 'idc': 'in-degree centrality', 'odc': 'out-degree centrality', 'dc': 'degree centrality', 'bc': 'betweenness centrality', 'cc': 'closeness(in) centrality', 'occ': 'out-closeness centrality', 'ec': 'eigenvector(in) centrality', 'oec': 'out-eigenvector centrality', 'kc': 'katz centrality', } # method if method in debtrank_alias: if h_i_shock is None: try: self._h_i_shock = self._data.h_i_shock except: raise Exception( "ERROR: the parameter 'h_i_shock' cannot be empty.", h_i_shock) else: self._h_i_shock = h_i_shock assert isinstance( self._h_i_shock, (list, np.ndarray) ), "ERROR: the 'h_i_shock' you provided should be a list or np.ndarray." assert len( self._h_i_shock ) == self._data._N, "ERROR: the length of 'h_i_shock' you provided is not equal to data." # the node labels self._node_labels = {} for i, j in zip(self._nodes, self._h_i_shock): assert j >= 0, "ERROR: the value of h_i_shock should in [0,1]" if j == 0.0: self._node_labels[i] = i else: self._node_labels[i] = i + r"$\bigstar$" # the method of debtrant if method == 'dr': # the legend labels self._legend_labels = [ 'debtrank < 25%', 'debtrank > 25%', 'debtrank > 50%', 'debtrank > 75%' ] # the color of nodes self._nodes_color = self._run_centrality( method='dr', h_i_shock=self._h_i_shock, t_max=max_iter)['node color'] elif method == 'nldr': if alpha is None: alpha = 0 print( "Warning: the paramater of 'alpha' is essential! Default = %.2f" % alpha) # rename figure title title = 'The interbank network, ' + r'$\alpha = %.2f$' % alpha + ' (%s)' % self._data._label_year # the legend labels self._legend_labels = [ 'nonlinear debtrank < 25%', 'nonlinear debtrank > 25%', 'nonlinear debtrank > 50%', 'nonlinear debtrank > 75%' ] # the color of nodes self._nodes_color = self._run_centrality( method='nldr', h_i_shock=self._h_i_shock, alpha=alpha, t_max=max_iter)['node color'] else: pass # TODO _legend_elements = [ Line2D([0], [0], marker='o', color="#6495ED", markersize=3.5, label=self._legend_labels[0]), Line2D([0], [0], marker='o', color="#EEEE00", markersize=3.5, label=self._legend_labels[1]), Line2D([0], [0], marker='o', color="#EE9A00", markersize=3.5, label=self._legend_labels[2]), Line2D([0], [0], marker='o', color="#EE0000", markersize=3.5, label=self._legend_labels[3]), Line2D([0], [0], marker='*', markerfacecolor="#000000", color='w', markersize=6.5, label='the initial shock') ] _ncol = 5 elif method in importance_alias: # title title = r'$x_{shock} = %.2f$' % kwargs[ 'x_shock'] + ', t = %d' % kwargs[ 't'] + ' (%s)' % self._data._label_year # the node labels self._node_labels = dict(zip(self._nodes, self._nodes)) # 'lp' self._legend_labels = [ 'importantance level < 25%', 'importantance level > 25 %', 'importantance level > 50%', 'importantance level > 75%' ] # the color of nodes self._nodes_color = self._run_centrality( method='lp', t=kwargs['t'], x_shock=kwargs['x_shock'])['node color'] _legend_elements = [ Line2D([0], [0], marker='o', color="#6495ED", markersize=3.5, label=self._legend_labels[0]), Line2D([0], [0], marker='o', color="#EEEE00", markersize=3.5, label=self._legend_labels[1]), Line2D([0], [0], marker='o', color="#EE9A00", markersize=3.5, label=self._legend_labels[2]), Line2D([0], [0], marker='o', color="#EE0000", markersize=3.5, label=self._legend_labels[3]) ] _ncol = 4 elif method in centrality_alias: # the node labels self._node_labels = dict(zip(self._nodes, self._nodes)) # 'dc' if method == 'idc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'in-degree centrality < 25%', 'in-degree centrality > 25%', 'in-degree centrality > 50%', 'in-degree centrality > 75%' ] # the color of nodes self._in_degree_centrality = ct.in_degree_centrality(self._FN) self._nodes_color = self._run_centrality( method='idc', centrality=self._in_degree_centrality)['node color'] elif method == 'odc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'out-degree centrality < 25%', 'out-degree centrality > 25%', 'out-degree centrality > 50%', 'out-degree centrality > 75%' ] # the color of nodes self._out_degree_centrality = ct.out_degree_centrality( self._FN) self._nodes_color = self._run_centrality( method='odc', centrality=self._out_degree_centrality)['node color'] elif method == 'dc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'degree centrality < 25%', 'degree centrality > 25%', 'degree centrality > 50%', 'degree centrality > 75%' ] # the color of nodes self._degree_centrality = ct.degree_centrality(self._FN) self._nodes_color = self._run_centrality( method='dc', centrality=self._degree_centrality)['node color'] elif method == 'bc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'betweenness centrality < 25%', 'betweenness centrality > 25%', 'betweenness centrality > 50%', 'betweenness centrality > 75%' ] # the color of nodes self._betweenness_centrality = ct.betweenness_centrality( self._FN, weight='weight', seed=seed) self._nodes_color = self._run_centrality( method='bc', centrality=self._betweenness_centrality)['node color'] elif method == 'cc' or method == 'icc': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'in-closeness centrality < 25%', 'in-closeness centrality > 25%', 'in-closeness centrality > 50%', 'in-closeness centrality > 75%' ] # the color of nodes self._in_closeness_centrality = ct.closeness_centrality( self._FN, distance='weight') self._nodes_color = self._run_centrality( method='cc', centrality=self._in_closeness_centrality)['node color'] elif method == 'occ': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'out-closeness centrality < 25%', 'out-closeness centrality > 25%', 'out-closeness centrality > 50%', 'out-closeness centrality > 75%' ] # the color of nodes self._out_closeness_centrality = ct.closeness_centrality( self._FN.reverse(), distance='weight') self._nodes_color = self._run_centrality( method='occ', centrality=self._out_closeness_centrality)['node color'] elif method == 'ec' or method == 'iec': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'in-eigenvector centrality < 25%', 'in-eigenvector centrality > 25%', 'in-eigenvector centrality > 50%', 'in-eigenvector centrality > 75%' ] # the color of nodes self._in_eigenvector_centrality = ct.eigenvector_centrality( self._FN, max_iter=max_iter, weight='weight') self._nodes_color = self._run_centrality( method='ec', centrality=self._in_eigenvector_centrality)['node color'] elif method == 'oec': # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'out-eigenvector centrality < 25%', 'out-eigenvector centrality > 25%', 'out-eigenvector centrality > 50%', 'out-eigenvector centrality > 75%' ] # the color of nodes self._out_eigenvector_centrality = ct.eigenvector_centrality( self._FN.reverse(), max_iter=max_iter, weight='weight') self._nodes_color = self._run_centrality( method='oec', centrality=self._out_eigenvector_centrality)['node color'] elif method == 'kc': # bug # dict: dictionary. see detail in centrality. # the legend labels self._legend_labels = [ 'katz centrality < 25%', 'katz centrality > 25%', 'katz centrality > 50%', 'katz centrality > 75%' ] # the color of nodes phi, _ = np.linalg.eig(self._Ad_ij) self._katz_centrality = ct.katz_centrality( self._FN, alpha=1 / np.max(phi) - 0.01, weight='weight') self._nodes_color = self._run_centrality( method='kc', centrality=self._katz_centrality)['node color'] else: pass # TODO _legend_elements = [ Line2D([0], [0], marker='o', color="#6495ED", markersize=3.5, label=self._legend_labels[0]), Line2D([0], [0], marker='o', color="#EEEE00", markersize=3.5, label=self._legend_labels[1]), Line2D([0], [0], marker='o', color="#EE9A00", markersize=3.5, label=self._legend_labels[2]), Line2D([0], [0], marker='o', color="#EE0000", markersize=3.5, label=self._legend_labels[3]) ] _ncol = 4 else: # the node labels self._node_labels = dict(zip(self._nodes, self._nodes)) self._nodes_color = node_color # "#00BFFF" print("Warning: the color of nodes have no special meaning.") # draw draw_default = { 'node_size': self._node_assets, 'node_color': self._nodes_color, 'edge_color': self._edge_color, 'edge_cmap': plt.cm.binary, 'labels': self._node_labels, 'width': 0.8, 'style': 'solid', 'with_labels': True } # customize your nx.draw if 'node_size' in kwargs: draw_default['node_size'] = kwargs['node_size'] if 'node_color' in kwargs: draw_default['node_color'] = kwargs['node_color'] if 'edge_cmap' in kwargs: draw_default['edge_cmap'] = kwargs['edge_cmap'] if 'labels' in kwargs: draw_default['labels'] = kwargs['labels'] if 'style' in kwargs: draw_default['style'] = kwargs['style'] if 'with_labels' in kwargs: draw_default['with_labels'] = kwargs['with_labels'] draw_kwargs = draw_default plt.rcParams['figure.dpi'] = 160 plt.rcParams['savefig.dpi'] = 400 plt.title(title, fontsize=font_size + 2) nx.draw(self._FN, pos=nx.circular_layout(self._FN), font_size=font_size, **draw_kwargs) if method: plt.legend(handles=_legend_elements, ncol=_ncol, fontsize=font_size - 1, loc='lower center', frameon=False) if is_savefig: net = "interbanknetwork" date = parse(self._data._label_year).strftime("%Y%m%d") plt.savefig(net + date + '.png', format='png', dpi=400) print("save to '%s'" % os.getcwd() + ' and named as %s' % (net + date) + '.png') else: plt.show()