Ejemplo n.º 1
0
def get_centrality_labels(knn_graph_obj, perc_labeled, type='degree'):
        import random

        if type == 'degree':
                degree_centrality_knn = pd.DataFrame.from_dict(centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = degree_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(degree_centrality_knn.index))].tolist()
        elif type == 'closeness':
                closeness_centrality_knn = pd.DataFrame.from_dict(centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = closeness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(closeness_centrality_knn.index))].tolist()
        elif type == 'betweenness':
                betweenness_centrality_knn = pd.DataFrame.from_dict(centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = betweenness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(betweenness_centrality_knn.index))].tolist()
        elif type == 'katz':
                katz_centrality_knn = pd.DataFrame.from_dict(centrality.katz_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = katz_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(katz_centrality_knn.index))].tolist()
        elif type == 'clustering':
                clustering_knn = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = clustering_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(clustering_knn.index))].tolist()
        else:
                indexes = list(knn_graph_obj.nodes)
                #print(indexes)
                node_toget_labels = random.sample(indexes, int(perc_labeled*len(indexes)))
                #print(node_toget_labels)

        return node_toget_labels
Ejemplo n.º 2
0
def parse(name):
    print(name)
    pathbase = path.abspath(path.dirname(__file__))
    G = nx.Graph()
    data = json.load(open('{0}/{1}.json'.format(pathbase, name)))
    nodes = data['nodes']
    text = {i: node['text'] for i, node in enumerate(nodes)}
    weight = {i: float(node['weight']) for i, node in enumerate(nodes)}
    for i in range(len(nodes)):
        G.add_node(i)
    for link in data['links']:
        G.add_edge(link['source'], link['target'])

    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweenness = centrality.betweenness_centrality(G)
    #edge_betweenness = centrality.edge_betweenness_centrality(G)
    #current_flow_closeness = centrality.current_flow_closeness_centrality(G)
    #current_flow_betweenness =\
    #    centrality.current_flow_betweenness_centrality(G)
    try:
        eigenvector = centrality.eigenvector_centrality(G, max_iter=1000)
    except:
        eigenvector = {i: 0 for i in range(len(nodes))}
    katz = centrality.katz_centrality(G)

    obj = {'nodes': [], 'links': data['links']}
    for i in range(len(nodes)):
        obj['nodes'].append({
            'text': text[i],
            'weight': weight[i],
            'degree': degree[i],
            'closeness': closeness[i],
            'betweenness': betweenness[i],
            #'edge_betweenness': edge_betweenness[i],
            #'current_flow_closeness': current_flow_closeness[i],
            #'current_flow_betweenness': current_flow_betweenness[i],
            'eigenvector': eigenvector[i],
            'katz': katz[i],
        })
    json.dump(obj,
              open('{0}/../data/{1}.json'.format(pathbase, name), 'w'),
              sort_keys=True)
Ejemplo n.º 3
0
def analyze(directed_df, undirected_df, auxiliary_df):
    directed_df = directed_df.copy(deep=True)
    undirected_df = undirected_df.copy(deep=True)

    directed_df = directed_df.rename(mapper=lambda name: name.lower(),
                                     axis='columns')
    undirected_df = undirected_df.rename(mapper=lambda name: name.lower(),
                                         axis='columns')

    G = nx.from_pandas_edgelist(directed_df,
                                edge_attr=['weight', 'change'],
                                create_using=nx.DiGraph)
    G_undirected = nx.from_pandas_edgelist(undirected_df,
                                           edge_attr=['weight', 'change'])

    alpha_coef = 0.9

    alpha = alpha_coef / max(nx.adjacency_spectrum(G).real)
    alpha_undirected = alpha_coef / max(
        nx.adjacency_spectrum(G_undirected).real)

    centralities = {
        'out_degree':
        weighted_degree_centrality(G),
        'in_degree':
        weighted_degree_centrality(G.reverse()),
        'undirected_degree':
        weighted_degree_centrality(G_undirected),
        'out_eigenvector':
        centrality.eigenvector_centrality(G, weight='weight'),
        'in_eigenvector':
        centrality.eigenvector_centrality(G.reverse(), weight='weight'),
        'undirected_eigenvector':
        centrality.eigenvector_centrality(G_undirected, weight='weight'),
        'out_closeness':
        centrality.closeness_centrality(G, distance='weight'),
        'in_closeness':
        centrality.closeness_centrality(G.reverse(), distance='weight'),
        'undirected_closeness':
        centrality.closeness_centrality(G_undirected, distance='weight'),
        'out_betweenness':
        centrality.betweenness_centrality(G, weight='weight'),
        'in_betweenness':
        centrality.betweenness_centrality(G.reverse(), weight='weight'),
        'undirected_betweenness':
        centrality.betweenness_centrality(G_undirected, weight='weight'),
        'out_katz':
        centrality.katz_centrality(G, alpha=alpha, weight='weight'),
        'in_katz':
        centrality.katz_centrality(G.reverse(), alpha=alpha, weight='weight'),
        'undirected_katz':
        centrality.katz_centrality(G_undirected, alpha=alpha, weight='weight')
    }

    for centrality_type in centralities.keys():
        directed_df[centrality_type] = np.NaN

    augmented_auxiliary_df = auxiliary_df.copy(deep=True)

    for key, row in augmented_auxiliary_df.iterrows():
        node = row['docid']
        for centrality_type, values in centralities.items():
            if node in values:
                augmented_auxiliary_df.at[key, centrality_type] = values[node]

    print(augmented_auxiliary_df)
    return augmented_auxiliary_df
Ejemplo n.º 4
0
 def katz_centrality(self):
     self.katz_centrality_dict = centrality.katz_centrality(self.G)
Ejemplo n.º 5
0
    def draw(self,
             method='',
             h_i_shock=None,
             alpha=None,
             max_iter=100,
             is_savefig=False,
             font_size=5,
             node_color='b',
             seed=None,
             **kwargs):
        """draw financial network.

        Parameters:
        ---
        `method`: <str>.
            the optional, the color of nodes map to the important level of bank. i.e. {'dr','nldr','dc',...}. Default = 'dr'.
        
        `h_i_shock`: <np.ndarray>. 
            the initial shock. see `tt.creating_initial_shock()`.

        `alpha`: <float>.
            optional, the parameter of Non-Linear DebtRank. Default = 0.

        `t_max`: <int>. 
            the max number of iteration. Default = 100.

        `is_savefig`: <False>. 
            optional, if True, it will be saved to the current work environment. otherwise, plt.show().

        `font_size`: <int>. 
            the size of the labels of nodes. Default = 5.  

        `node_color`: <str or RGB>.
            the color of nodes. if method is not empty, the colors reflect the importance level.  

        `**kwargs`: 
            customize your figure, see detail in networkx.draw.
        """
        # initial setting
        title = 'The interbank network' + '(%s)' % self._data._label_year
        method = str(method)
        debtrank_alias = {'dr': 'debtrank', 'nldr': 'nonlinear debtrank'}
        importance_alias = {'lp': 'loss_percentile'}
        centrality_alias = {
            'idc': 'in-degree centrality',
            'odc': 'out-degree centrality',
            'dc': 'degree centrality',
            'bc': 'betweenness centrality',
            'cc': 'closeness(in) centrality',
            'occ': 'out-closeness centrality',
            'ec': 'eigenvector(in) centrality',
            'oec': 'out-eigenvector centrality',
            'kc': 'katz centrality',
        }
        # method
        if method in debtrank_alias:
            if h_i_shock is None:
                try:
                    self._h_i_shock = self._data.h_i_shock
                except:
                    raise Exception(
                        "ERROR: the parameter 'h_i_shock' cannot be empty.",
                        h_i_shock)
            else:
                self._h_i_shock = h_i_shock

            assert isinstance(
                self._h_i_shock, (list, np.ndarray)
            ), "ERROR: the 'h_i_shock' you provided should be a list or np.ndarray."
            assert len(
                self._h_i_shock
            ) == self._data._N, "ERROR: the length of 'h_i_shock' you provided is not equal to data."

            # the node labels
            self._node_labels = {}
            for i, j in zip(self._nodes, self._h_i_shock):
                assert j >= 0, "ERROR: the value of h_i_shock should in [0,1]"
                if j == 0.0:
                    self._node_labels[i] = i
                else:
                    self._node_labels[i] = i + r"$\bigstar$"
            # the method of debtrant
            if method == 'dr':
                # the legend labels
                self._legend_labels = [
                    'debtrank < 25%', 'debtrank > 25%', 'debtrank > 50%',
                    'debtrank > 75%'
                ]
                # the color of nodes
                self._nodes_color = self._run_centrality(
                    method='dr', h_i_shock=self._h_i_shock,
                    t_max=max_iter)['node color']
            elif method == 'nldr':
                if alpha is None:
                    alpha = 0
                    print(
                        "Warning: the paramater of 'alpha' is essential! Default = %.2f"
                        % alpha)
                # rename figure title
                title = 'The interbank network, ' + r'$\alpha = %.2f$' % alpha + ' (%s)' % self._data._label_year
                # the legend labels
                self._legend_labels = [
                    'nonlinear debtrank < 25%', 'nonlinear debtrank > 25%',
                    'nonlinear debtrank > 50%', 'nonlinear debtrank > 75%'
                ]
                # the color of nodes
                self._nodes_color = self._run_centrality(
                    method='nldr',
                    h_i_shock=self._h_i_shock,
                    alpha=alpha,
                    t_max=max_iter)['node color']
            else:
                pass  # TODO

            _legend_elements = [
                Line2D([0], [0],
                       marker='o',
                       color="#6495ED",
                       markersize=3.5,
                       label=self._legend_labels[0]),
                Line2D([0], [0],
                       marker='o',
                       color="#EEEE00",
                       markersize=3.5,
                       label=self._legend_labels[1]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE9A00",
                       markersize=3.5,
                       label=self._legend_labels[2]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE0000",
                       markersize=3.5,
                       label=self._legend_labels[3]),
                Line2D([0], [0],
                       marker='*',
                       markerfacecolor="#000000",
                       color='w',
                       markersize=6.5,
                       label='the initial shock')
            ]
            _ncol = 5
        elif method in importance_alias:
            # title
            title = r'$x_{shock} = %.2f$' % kwargs[
                'x_shock'] + ', t = %d' % kwargs[
                    't'] + ' (%s)' % self._data._label_year
            # the node labels
            self._node_labels = dict(zip(self._nodes, self._nodes))
            # 'lp'
            self._legend_labels = [
                'importantance level < 25%', 'importantance level > 25 %',
                'importantance level > 50%', 'importantance level > 75%'
            ]
            # the color of nodes
            self._nodes_color = self._run_centrality(
                method='lp', t=kwargs['t'],
                x_shock=kwargs['x_shock'])['node color']

            _legend_elements = [
                Line2D([0], [0],
                       marker='o',
                       color="#6495ED",
                       markersize=3.5,
                       label=self._legend_labels[0]),
                Line2D([0], [0],
                       marker='o',
                       color="#EEEE00",
                       markersize=3.5,
                       label=self._legend_labels[1]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE9A00",
                       markersize=3.5,
                       label=self._legend_labels[2]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE0000",
                       markersize=3.5,
                       label=self._legend_labels[3])
            ]
            _ncol = 4

        elif method in centrality_alias:
            # the node labels
            self._node_labels = dict(zip(self._nodes, self._nodes))
            # 'dc'
            if method == 'idc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'in-degree centrality < 25%', 'in-degree centrality > 25%',
                    'in-degree centrality > 50%', 'in-degree centrality > 75%'
                ]
                # the color of nodes
                self._in_degree_centrality = ct.in_degree_centrality(self._FN)
                self._nodes_color = self._run_centrality(
                    method='idc',
                    centrality=self._in_degree_centrality)['node color']
            elif method == 'odc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'out-degree centrality < 25%',
                    'out-degree centrality > 25%',
                    'out-degree centrality > 50%',
                    'out-degree centrality > 75%'
                ]
                # the color of nodes
                self._out_degree_centrality = ct.out_degree_centrality(
                    self._FN)
                self._nodes_color = self._run_centrality(
                    method='odc',
                    centrality=self._out_degree_centrality)['node color']
            elif method == 'dc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'degree centrality < 25%', 'degree centrality > 25%',
                    'degree centrality > 50%', 'degree centrality > 75%'
                ]
                # the color of nodes
                self._degree_centrality = ct.degree_centrality(self._FN)
                self._nodes_color = self._run_centrality(
                    method='dc',
                    centrality=self._degree_centrality)['node color']
            elif method == 'bc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'betweenness centrality < 25%',
                    'betweenness centrality > 25%',
                    'betweenness centrality > 50%',
                    'betweenness centrality > 75%'
                ]
                # the color of nodes
                self._betweenness_centrality = ct.betweenness_centrality(
                    self._FN, weight='weight', seed=seed)
                self._nodes_color = self._run_centrality(
                    method='bc',
                    centrality=self._betweenness_centrality)['node color']
            elif method == 'cc' or method == 'icc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'in-closeness centrality < 25%',
                    'in-closeness centrality > 25%',
                    'in-closeness centrality > 50%',
                    'in-closeness centrality > 75%'
                ]
                # the color of nodes
                self._in_closeness_centrality = ct.closeness_centrality(
                    self._FN, distance='weight')
                self._nodes_color = self._run_centrality(
                    method='cc',
                    centrality=self._in_closeness_centrality)['node color']
            elif method == 'occ':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'out-closeness centrality < 25%',
                    'out-closeness centrality > 25%',
                    'out-closeness centrality > 50%',
                    'out-closeness centrality > 75%'
                ]
                # the color of nodes
                self._out_closeness_centrality = ct.closeness_centrality(
                    self._FN.reverse(), distance='weight')
                self._nodes_color = self._run_centrality(
                    method='occ',
                    centrality=self._out_closeness_centrality)['node color']
            elif method == 'ec' or method == 'iec':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'in-eigenvector centrality < 25%',
                    'in-eigenvector centrality > 25%',
                    'in-eigenvector centrality > 50%',
                    'in-eigenvector centrality > 75%'
                ]
                # the color of nodes
                self._in_eigenvector_centrality = ct.eigenvector_centrality(
                    self._FN, max_iter=max_iter, weight='weight')
                self._nodes_color = self._run_centrality(
                    method='ec',
                    centrality=self._in_eigenvector_centrality)['node color']
            elif method == 'oec':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'out-eigenvector centrality < 25%',
                    'out-eigenvector centrality > 25%',
                    'out-eigenvector centrality > 50%',
                    'out-eigenvector centrality > 75%'
                ]
                # the color of nodes
                self._out_eigenvector_centrality = ct.eigenvector_centrality(
                    self._FN.reverse(), max_iter=max_iter, weight='weight')
                self._nodes_color = self._run_centrality(
                    method='oec',
                    centrality=self._out_eigenvector_centrality)['node color']
            elif method == 'kc':  # bug
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'katz centrality < 25%', 'katz centrality > 25%',
                    'katz centrality > 50%', 'katz centrality > 75%'
                ]
                # the color of nodes
                phi, _ = np.linalg.eig(self._Ad_ij)
                self._katz_centrality = ct.katz_centrality(
                    self._FN, alpha=1 / np.max(phi) - 0.01, weight='weight')
                self._nodes_color = self._run_centrality(
                    method='kc',
                    centrality=self._katz_centrality)['node color']
            else:
                pass  # TODO

            _legend_elements = [
                Line2D([0], [0],
                       marker='o',
                       color="#6495ED",
                       markersize=3.5,
                       label=self._legend_labels[0]),
                Line2D([0], [0],
                       marker='o',
                       color="#EEEE00",
                       markersize=3.5,
                       label=self._legend_labels[1]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE9A00",
                       markersize=3.5,
                       label=self._legend_labels[2]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE0000",
                       markersize=3.5,
                       label=self._legend_labels[3])
            ]
            _ncol = 4

        else:
            # the node labels
            self._node_labels = dict(zip(self._nodes, self._nodes))
            self._nodes_color = node_color  # "#00BFFF"
            print("Warning: the color of nodes have no special meaning.")

        # draw
        draw_default = {
            'node_size': self._node_assets,
            'node_color': self._nodes_color,
            'edge_color': self._edge_color,
            'edge_cmap': plt.cm.binary,
            'labels': self._node_labels,
            'width': 0.8,
            'style': 'solid',
            'with_labels': True
        }

        # customize your nx.draw
        if 'node_size' in kwargs:
            draw_default['node_size'] = kwargs['node_size']
        if 'node_color' in kwargs:
            draw_default['node_color'] = kwargs['node_color']
        if 'edge_cmap' in kwargs:
            draw_default['edge_cmap'] = kwargs['edge_cmap']
        if 'labels' in kwargs:
            draw_default['labels'] = kwargs['labels']
        if 'style' in kwargs:
            draw_default['style'] = kwargs['style']
        if 'with_labels' in kwargs:
            draw_default['with_labels'] = kwargs['with_labels']

        draw_kwargs = draw_default

        plt.rcParams['figure.dpi'] = 160
        plt.rcParams['savefig.dpi'] = 400
        plt.title(title, fontsize=font_size + 2)
        nx.draw(self._FN,
                pos=nx.circular_layout(self._FN),
                font_size=font_size,
                **draw_kwargs)
        if method:
            plt.legend(handles=_legend_elements,
                       ncol=_ncol,
                       fontsize=font_size - 1,
                       loc='lower center',
                       frameon=False)

        if is_savefig:
            net = "interbanknetwork"
            date = parse(self._data._label_year).strftime("%Y%m%d")
            plt.savefig(net + date + '.png', format='png', dpi=400)
            print("save to '%s'" % os.getcwd() + ' and named as %s' %
                  (net + date) + '.png')
        else:
            plt.show()