def centrality_analysis(G, isDriected=False):
    '''
    :param g: Digraph()/ Graph()
    :return: several types of centrality of each nodes
    '''
    nodes = G.nodes()
    if isDriected:
        in_dc = centrality.in_degree_centrality(G)
        out_dc = centrality.out_degree_centrality(G)
        bc = centrality.betweenness_centrality(G)
        ec = centrality.eigenvector_centrality(G)

        cent = {}
        for node in nodes:
            cent[node] = [in_dc[node], out_dc[node], bc[node], ec[node]]
        print(
            "Four types of centrality are calculated \n" +
            "\n\tin_degree_centrality\n\tout_degree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality"
        )
        return cent
    else:
        dc = centrality.degree_centrality(G)
        bc = centrality.betweenness_centrality(G)
        ec = centrality.eigenvector_centrality(G)

        cent = {}
        for node in nodes:
            cent[node] = [dc[node], bc[node], ec[node]]
        print(
            "Three types of centrality are calculated \n" +
            "\n\tdegree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality"
        )
        return cent
def get_layer_info(subject, journal_volume, edge_list):

    G = nx.Graph()
    G.add_weighted_edges_from(edge_list)

    PATH = "C:/Users/hexie/Documents/APS_result/" + str(
        journal_volume) + "/" + str(subject)

    try:
        os.mkdir(PATH)
        os.chdir(PATH)
    except:
        os.chdir(PATH)

    degree_centrality = nxc.degree_centrality(G)
    try:
        eigen_vector_centrality = nxc.eigenvector_centrality(G)
        np.save("eigen_vector_centrality.npy", eigen_vector_centrality)
    except:
        print("fail to converge within 100 iterations of power")

    closeness_centrality = nxc.closeness_centrality(G)
    betweeness_centrality = nxc.betweenness_centrality(G)

    np.save("degree_centrality.npy", degree_centrality)
    np.save("closeness_centrality.npy", closeness_centrality)
    np.save("betweeness_centrality.npy", betweeness_centrality)

    with open(str(subject) + str(journal_volume) + ".txt", 'w') as f:
        f.write('Number of Edges: ' + str(nx.number_of_edges(G)) + "\n")
        f.write('Number of Nodes: ' + str(nx.number_of_nodes(G)) + "\n")

    nx.draw(G)
    plt.savefig(str(subject) + str(journal_volume) + ".png")
    plt.clf()
Beispiel #3
0
def set_type_centrality(G, type_str):
    g_ss = nx.Graph()
    g_ss.edges.data('weight', default=1)
    for u, v, k in G.edges(keys=True):
        if k[:13] == type_str:
            if g_ss.has_edge(u, v):
                g_ss[u][v]['weight'] += 1
            else:
                g_ss.add_edge(u, v, weight=1)

    max_iterations = 300

    # Need a lot of exception handling in case algorithm doesn't converge.
    try:
        centrality_dict = dict(eigenvector_centrality(g_ss, weight='weight'))
        nx.set_node_attributes(G, centrality_dict, type_str + '_centrality')
    except nx.NetworkXPointlessConcept:
        nx.set_node_attributes(G, 0, type_str + '_centrality')
    except nx.PowerIterationFailedConvergence:
        logging.debug(
            "Centrality algorithm failed to converge in 100 iterations.")

        try:
            centrality_dict = dict(
                eigenvector_centrality(g_ss,
                                       max_iter=max_iterations,
                                       weight='weight'))
            nx.set_node_attributes(G, centrality_dict,
                                   type_str + '_centrality')
        except nx.PowerIterationFailedConvergence:
            logging.debug(
                "Centrality algorithm failed to converge in {} iterations.".
                format(max_iterations))
            nx.set_node_attributes(G, 0, type_str + '_centrality')

    except Exception:
        logging.debug("Centrality algorithm failed")
        nx.set_node_attributes(G, 0, type_str + '_centrality')
Beispiel #4
0
def calcCentrality(linkrecords):
    """Calculate betweenness centrality measure for each package or application
    
    Linkrecords: list of dicts with keys: focal, other, type, raw_count, scaled_count"""

    G = nx.Graph()
    for link in linkrecords:
        G.add_node(link["focal"])
        G.add_node(link["other"])
        G.add_edge(link["focal"], link["other"], weight=link["raw_count"])

    from networkx.algorithms.centrality import eigenvector_centrality
    #centralities = betweenness_centrality(G, k=G.number_of_nodes(), weight="weight", endpoints=True)
    centralities = eigenvector_centrality(G, max_iter=1000)
    return centralities
def calcCentrality(linkrecords):
    """Calculate betweenness centrality measure for each package or application
    
    Linkrecords: list of dicts with keys: focal, other, type, raw_count, scaled_count"""
    
    G = nx.Graph()
    for link in linkrecords:
        G.add_node(link["focal"])
        G.add_node(link["other"])
        G.add_edge(link["focal"], link["other"], weight=link["raw_count"])
        
    
    from networkx.algorithms.centrality import eigenvector_centrality
    #centralities = betweenness_centrality(G, k=G.number_of_nodes(), weight="weight", endpoints=True)
    centralities = eigenvector_centrality(G, max_iter=1000)
    return centralities
Beispiel #6
0
def calc_node_based_centrality(edge_index, centrality='degree'):
    adj_list = edge_index.numpy().T
    G = nx.Graph()
    G.add_edges_from(adj_list)
    if centrality == 'degree':
        nodes_centrality = degree_centrality(G)
    elif centrality == 'eigenvector':
        nodes_centrality = eigenvector_centrality(G)
    elif centrality == "closeness":
        nodes_centrality = closeness_centrality(G)
    else:
        print(centrality, "is not defined")
        exit(1)

    edges_centrality = dict()
    for u, v in adj_list:
        edges_centrality[(u, v)] = nodes_centrality[u] * nodes_centrality[v]
    return edges_centrality
Beispiel #7
0
def parse(name):
    print(name)
    pathbase = path.abspath(path.dirname(__file__))
    G = nx.Graph()
    data = json.load(open('{0}/{1}.json'.format(pathbase, name)))
    nodes = data['nodes']
    text = {i: node['text'] for i, node in enumerate(nodes)}
    weight = {i: float(node['weight']) for i, node in enumerate(nodes)}
    for i in range(len(nodes)):
        G.add_node(i)
    for link in data['links']:
        G.add_edge(link['source'], link['target'])

    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweenness = centrality.betweenness_centrality(G)
    #edge_betweenness = centrality.edge_betweenness_centrality(G)
    #current_flow_closeness = centrality.current_flow_closeness_centrality(G)
    #current_flow_betweenness =\
    #    centrality.current_flow_betweenness_centrality(G)
    try:
        eigenvector = centrality.eigenvector_centrality(G, max_iter=1000)
    except:
        eigenvector = {i: 0 for i in range(len(nodes))}
    katz = centrality.katz_centrality(G)

    obj = {'nodes': [], 'links': data['links']}
    for i in range(len(nodes)):
        obj['nodes'].append({
            'text': text[i],
            'weight': weight[i],
            'degree': degree[i],
            'closeness': closeness[i],
            'betweenness': betweenness[i],
            #'edge_betweenness': edge_betweenness[i],
            #'current_flow_closeness': current_flow_closeness[i],
            #'current_flow_betweenness': current_flow_betweenness[i],
            'eigenvector': eigenvector[i],
            'katz': katz[i],
        })
    json.dump(obj,
              open('{0}/../data/{1}.json'.format(pathbase, name), 'w'),
              sort_keys=True)
Beispiel #8
0
    def fit(self, X_df, y_array):
        d = {'link': np.array(y_array)}
        y_array = pd.DataFrame(data=d)


        path = os.path.dirname(__file__)
        self.data = pd.read_csv(os.path.join(path, 'nodes_info_new.csv'),low_memory=False)

        def clean_date(s):
            s = re.sub('[^0-9]', '', str(s))
            if len(s)==0:
                return np.nan
            if s == '1':
                return np.nan
            if len(s)<4:
                date = int(s)
            else:
                date = int(s[:4])
                if date>2000:
                    date = int(s[:3])
            return date

        self.data['birth_date'] = self.data['birth_date'].apply(clean_date)
        self.data['death_date'] = self.data['death_date'].apply(clean_date)

        def get_country(s):
            s = re.sub('[^a-zA-Z ]', '', str(s))
            if len(s)==0:
                return np.nan
            return s.split()[-1]

        self.data['birth_place'] = self.data['birth_place'].apply(get_country)
        self.data['death_place'] = self.data['death_place'].apply(get_country)


        #defining a dictionary which contains information for each thinker according to their names
        self.thinker_dictionary = {}
        for i,row in self.data.iterrows():
            self.thinker_dictionary[row['thinker']] = {'thinker_id': row['id'], 'birth_date': row['birth_date'], 'birth_place': row['birth_place'], 'death_place': row['death_place'], 'death_date': row['death_date'], 'summary': row['summary']}



        max_id = self.data['id'].max()
        self.nodes = np.arange(1,max_id+1)

        self.edges = np.array([[self.thinker_dictionary[row['thinker_1']]['thinker_id'],self.thinker_dictionary[row['thinker_2']]['thinker_id']] for i,row in (X_df[(y_array['link']==1).values.flatten()]).iterrows()])

        self.G.add_nodes_from(self.nodes)
        self.G.add_edges_from(self.edges)

        self.graph_features = pd.DataFrame({'thinker_id':self.nodes})
        self.connected_comp = list(nx.connected_components(self.G))

        group_id = {}
        group_len = {}
        for think_id in self.nodes:
            for i,group in enumerate(self.connected_comp):
                if think_id in group:
                    group_id[think_id] = i
                    group_len[think_id] = len(group)
                    break

        self.graph_features['connected_comp'] = [group_id[think_id] for think_id in self.nodes]
        self.graph_features['connected_comp_len'] = [group_len[think_id] for think_id in self.nodes]

        self.graph_features['degree_centrality'] = degree_centrality(self.G).values()
        self.graph_features['degree_centrality']/=self.graph_features['degree_centrality'].max()

        self.graph_features['eigenvector_centrality'] = eigenvector_centrality(self.G).values()
        self.graph_features['eigenvector_centrality']/=self.graph_features['eigenvector_centrality'].max()

#        self.graph_features['closeness_centrality'] = closeness_centrality(self.G).values()
#        self.graph_features['closeness_centrality']/=self.graph_features['closeness_centrality'].max()

#        self.graph_features['betweenness_centrality'] = betweenness_centrality(self.G).values()
#        self.graph_features['betweenness_centrality']/=self.graph_features['betweenness_centrality'].max()

#        self.graph_features['subgraph_centrality'] = subgraph_centrality(self.G).values()
#        self.graph_features['subgraph_centrality']/=self.graph_features['subgraph_centrality'].max()

        self.graph_features['pagerank'] = nx.pagerank(self.G, alpha=0.9).values()
        self.graph_features['pagerank']/=self.graph_features['pagerank'].max()




        return self
Beispiel #9
0
    def draw(self,
             method='',
             h_i_shock=None,
             alpha=None,
             max_iter=100,
             is_savefig=False,
             font_size=5,
             node_color='b',
             seed=None,
             **kwargs):
        """draw financial network.

        Parameters:
        ---
        `method`: <str>.
            the optional, the color of nodes map to the important level of bank. i.e. {'dr','nldr','dc',...}. Default = 'dr'.
        
        `h_i_shock`: <np.ndarray>. 
            the initial shock. see `tt.creating_initial_shock()`.

        `alpha`: <float>.
            optional, the parameter of Non-Linear DebtRank. Default = 0.

        `t_max`: <int>. 
            the max number of iteration. Default = 100.

        `is_savefig`: <False>. 
            optional, if True, it will be saved to the current work environment. otherwise, plt.show().

        `font_size`: <int>. 
            the size of the labels of nodes. Default = 5.  

        `node_color`: <str or RGB>.
            the color of nodes. if method is not empty, the colors reflect the importance level.  

        `**kwargs`: 
            customize your figure, see detail in networkx.draw.
        """
        # initial setting
        title = 'The interbank network' + '(%s)' % self._data._label_year
        method = str(method)
        debtrank_alias = {'dr': 'debtrank', 'nldr': 'nonlinear debtrank'}
        importance_alias = {'lp': 'loss_percentile'}
        centrality_alias = {
            'idc': 'in-degree centrality',
            'odc': 'out-degree centrality',
            'dc': 'degree centrality',
            'bc': 'betweenness centrality',
            'cc': 'closeness(in) centrality',
            'occ': 'out-closeness centrality',
            'ec': 'eigenvector(in) centrality',
            'oec': 'out-eigenvector centrality',
            'kc': 'katz centrality',
        }
        # method
        if method in debtrank_alias:
            if h_i_shock is None:
                try:
                    self._h_i_shock = self._data.h_i_shock
                except:
                    raise Exception(
                        "ERROR: the parameter 'h_i_shock' cannot be empty.",
                        h_i_shock)
            else:
                self._h_i_shock = h_i_shock

            assert isinstance(
                self._h_i_shock, (list, np.ndarray)
            ), "ERROR: the 'h_i_shock' you provided should be a list or np.ndarray."
            assert len(
                self._h_i_shock
            ) == self._data._N, "ERROR: the length of 'h_i_shock' you provided is not equal to data."

            # the node labels
            self._node_labels = {}
            for i, j in zip(self._nodes, self._h_i_shock):
                assert j >= 0, "ERROR: the value of h_i_shock should in [0,1]"
                if j == 0.0:
                    self._node_labels[i] = i
                else:
                    self._node_labels[i] = i + r"$\bigstar$"
            # the method of debtrant
            if method == 'dr':
                # the legend labels
                self._legend_labels = [
                    'debtrank < 25%', 'debtrank > 25%', 'debtrank > 50%',
                    'debtrank > 75%'
                ]
                # the color of nodes
                self._nodes_color = self._run_centrality(
                    method='dr', h_i_shock=self._h_i_shock,
                    t_max=max_iter)['node color']
            elif method == 'nldr':
                if alpha is None:
                    alpha = 0
                    print(
                        "Warning: the paramater of 'alpha' is essential! Default = %.2f"
                        % alpha)
                # rename figure title
                title = 'The interbank network, ' + r'$\alpha = %.2f$' % alpha + ' (%s)' % self._data._label_year
                # the legend labels
                self._legend_labels = [
                    'nonlinear debtrank < 25%', 'nonlinear debtrank > 25%',
                    'nonlinear debtrank > 50%', 'nonlinear debtrank > 75%'
                ]
                # the color of nodes
                self._nodes_color = self._run_centrality(
                    method='nldr',
                    h_i_shock=self._h_i_shock,
                    alpha=alpha,
                    t_max=max_iter)['node color']
            else:
                pass  # TODO

            _legend_elements = [
                Line2D([0], [0],
                       marker='o',
                       color="#6495ED",
                       markersize=3.5,
                       label=self._legend_labels[0]),
                Line2D([0], [0],
                       marker='o',
                       color="#EEEE00",
                       markersize=3.5,
                       label=self._legend_labels[1]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE9A00",
                       markersize=3.5,
                       label=self._legend_labels[2]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE0000",
                       markersize=3.5,
                       label=self._legend_labels[3]),
                Line2D([0], [0],
                       marker='*',
                       markerfacecolor="#000000",
                       color='w',
                       markersize=6.5,
                       label='the initial shock')
            ]
            _ncol = 5
        elif method in importance_alias:
            # title
            title = r'$x_{shock} = %.2f$' % kwargs[
                'x_shock'] + ', t = %d' % kwargs[
                    't'] + ' (%s)' % self._data._label_year
            # the node labels
            self._node_labels = dict(zip(self._nodes, self._nodes))
            # 'lp'
            self._legend_labels = [
                'importantance level < 25%', 'importantance level > 25 %',
                'importantance level > 50%', 'importantance level > 75%'
            ]
            # the color of nodes
            self._nodes_color = self._run_centrality(
                method='lp', t=kwargs['t'],
                x_shock=kwargs['x_shock'])['node color']

            _legend_elements = [
                Line2D([0], [0],
                       marker='o',
                       color="#6495ED",
                       markersize=3.5,
                       label=self._legend_labels[0]),
                Line2D([0], [0],
                       marker='o',
                       color="#EEEE00",
                       markersize=3.5,
                       label=self._legend_labels[1]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE9A00",
                       markersize=3.5,
                       label=self._legend_labels[2]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE0000",
                       markersize=3.5,
                       label=self._legend_labels[3])
            ]
            _ncol = 4

        elif method in centrality_alias:
            # the node labels
            self._node_labels = dict(zip(self._nodes, self._nodes))
            # 'dc'
            if method == 'idc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'in-degree centrality < 25%', 'in-degree centrality > 25%',
                    'in-degree centrality > 50%', 'in-degree centrality > 75%'
                ]
                # the color of nodes
                self._in_degree_centrality = ct.in_degree_centrality(self._FN)
                self._nodes_color = self._run_centrality(
                    method='idc',
                    centrality=self._in_degree_centrality)['node color']
            elif method == 'odc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'out-degree centrality < 25%',
                    'out-degree centrality > 25%',
                    'out-degree centrality > 50%',
                    'out-degree centrality > 75%'
                ]
                # the color of nodes
                self._out_degree_centrality = ct.out_degree_centrality(
                    self._FN)
                self._nodes_color = self._run_centrality(
                    method='odc',
                    centrality=self._out_degree_centrality)['node color']
            elif method == 'dc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'degree centrality < 25%', 'degree centrality > 25%',
                    'degree centrality > 50%', 'degree centrality > 75%'
                ]
                # the color of nodes
                self._degree_centrality = ct.degree_centrality(self._FN)
                self._nodes_color = self._run_centrality(
                    method='dc',
                    centrality=self._degree_centrality)['node color']
            elif method == 'bc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'betweenness centrality < 25%',
                    'betweenness centrality > 25%',
                    'betweenness centrality > 50%',
                    'betweenness centrality > 75%'
                ]
                # the color of nodes
                self._betweenness_centrality = ct.betweenness_centrality(
                    self._FN, weight='weight', seed=seed)
                self._nodes_color = self._run_centrality(
                    method='bc',
                    centrality=self._betweenness_centrality)['node color']
            elif method == 'cc' or method == 'icc':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'in-closeness centrality < 25%',
                    'in-closeness centrality > 25%',
                    'in-closeness centrality > 50%',
                    'in-closeness centrality > 75%'
                ]
                # the color of nodes
                self._in_closeness_centrality = ct.closeness_centrality(
                    self._FN, distance='weight')
                self._nodes_color = self._run_centrality(
                    method='cc',
                    centrality=self._in_closeness_centrality)['node color']
            elif method == 'occ':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'out-closeness centrality < 25%',
                    'out-closeness centrality > 25%',
                    'out-closeness centrality > 50%',
                    'out-closeness centrality > 75%'
                ]
                # the color of nodes
                self._out_closeness_centrality = ct.closeness_centrality(
                    self._FN.reverse(), distance='weight')
                self._nodes_color = self._run_centrality(
                    method='occ',
                    centrality=self._out_closeness_centrality)['node color']
            elif method == 'ec' or method == 'iec':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'in-eigenvector centrality < 25%',
                    'in-eigenvector centrality > 25%',
                    'in-eigenvector centrality > 50%',
                    'in-eigenvector centrality > 75%'
                ]
                # the color of nodes
                self._in_eigenvector_centrality = ct.eigenvector_centrality(
                    self._FN, max_iter=max_iter, weight='weight')
                self._nodes_color = self._run_centrality(
                    method='ec',
                    centrality=self._in_eigenvector_centrality)['node color']
            elif method == 'oec':
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'out-eigenvector centrality < 25%',
                    'out-eigenvector centrality > 25%',
                    'out-eigenvector centrality > 50%',
                    'out-eigenvector centrality > 75%'
                ]
                # the color of nodes
                self._out_eigenvector_centrality = ct.eigenvector_centrality(
                    self._FN.reverse(), max_iter=max_iter, weight='weight')
                self._nodes_color = self._run_centrality(
                    method='oec',
                    centrality=self._out_eigenvector_centrality)['node color']
            elif method == 'kc':  # bug
                # dict: dictionary. see detail in centrality.
                # the legend labels
                self._legend_labels = [
                    'katz centrality < 25%', 'katz centrality > 25%',
                    'katz centrality > 50%', 'katz centrality > 75%'
                ]
                # the color of nodes
                phi, _ = np.linalg.eig(self._Ad_ij)
                self._katz_centrality = ct.katz_centrality(
                    self._FN, alpha=1 / np.max(phi) - 0.01, weight='weight')
                self._nodes_color = self._run_centrality(
                    method='kc',
                    centrality=self._katz_centrality)['node color']
            else:
                pass  # TODO

            _legend_elements = [
                Line2D([0], [0],
                       marker='o',
                       color="#6495ED",
                       markersize=3.5,
                       label=self._legend_labels[0]),
                Line2D([0], [0],
                       marker='o',
                       color="#EEEE00",
                       markersize=3.5,
                       label=self._legend_labels[1]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE9A00",
                       markersize=3.5,
                       label=self._legend_labels[2]),
                Line2D([0], [0],
                       marker='o',
                       color="#EE0000",
                       markersize=3.5,
                       label=self._legend_labels[3])
            ]
            _ncol = 4

        else:
            # the node labels
            self._node_labels = dict(zip(self._nodes, self._nodes))
            self._nodes_color = node_color  # "#00BFFF"
            print("Warning: the color of nodes have no special meaning.")

        # draw
        draw_default = {
            'node_size': self._node_assets,
            'node_color': self._nodes_color,
            'edge_color': self._edge_color,
            'edge_cmap': plt.cm.binary,
            'labels': self._node_labels,
            'width': 0.8,
            'style': 'solid',
            'with_labels': True
        }

        # customize your nx.draw
        if 'node_size' in kwargs:
            draw_default['node_size'] = kwargs['node_size']
        if 'node_color' in kwargs:
            draw_default['node_color'] = kwargs['node_color']
        if 'edge_cmap' in kwargs:
            draw_default['edge_cmap'] = kwargs['edge_cmap']
        if 'labels' in kwargs:
            draw_default['labels'] = kwargs['labels']
        if 'style' in kwargs:
            draw_default['style'] = kwargs['style']
        if 'with_labels' in kwargs:
            draw_default['with_labels'] = kwargs['with_labels']

        draw_kwargs = draw_default

        plt.rcParams['figure.dpi'] = 160
        plt.rcParams['savefig.dpi'] = 400
        plt.title(title, fontsize=font_size + 2)
        nx.draw(self._FN,
                pos=nx.circular_layout(self._FN),
                font_size=font_size,
                **draw_kwargs)
        if method:
            plt.legend(handles=_legend_elements,
                       ncol=_ncol,
                       fontsize=font_size - 1,
                       loc='lower center',
                       frameon=False)

        if is_savefig:
            net = "interbanknetwork"
            date = parse(self._data._label_year).strftime("%Y%m%d")
            plt.savefig(net + date + '.png', format='png', dpi=400)
            print("save to '%s'" % os.getcwd() + ' and named as %s' %
                  (net + date) + '.png')
        else:
            plt.show()
Beispiel #10
0
def run_GT_calcs(G, just_data, Do_kdist, Do_dia, Do_BCdist, Do_CCdist, Do_ECdist, Do_GD, Do_Eff, \
                               Do_clust, Do_ANC, Do_Ast, Do_WI, multigraph):

    # getting nodes and edges and defining variables for later use
    klist = [0]
    Tlist = [0]
    BCdist = [0]
    CCdist = [0]
    ECdist = [0]
    if multigraph:
        Do_BCdist = 0
        Do_ECdist = 0
        Do_clust = 0

    data_dict = {"x": [], "y": []}

    nnum = int(nx.number_of_nodes(G))
    enum = int(nx.number_of_edges(G))

    if Do_ANC | Do_dia:
        connected_graph = nx.is_connected(G)

    # making a dictionary for the parameters and results
    just_data.append(nnum)
    data_dict["x"].append("Number of nodes")
    data_dict["y"].append(nnum)
    just_data.append(enum)
    data_dict["x"].append("Number of edges")
    data_dict["y"].append(enum)
    multi_image_settings.progress(35)

    # calculating parameters as requested

    # creating degree histogram
    if (Do_kdist == 1):
        klist1 = nx.degree(G)
        ksum = 0
        klist = np.zeros(len(klist1))
        for j in range(len(klist1)):
            ksum = ksum + klist1[j]
            klist[j] = klist1[j]
        k = ksum / len(klist1)
        k = round(k, 5)
        just_data.append(k)
        data_dict["x"].append("Average degree")
        data_dict["y"].append(k)

    multi_image_settings.progress(40)

    # calculating network diameter
    if (Do_dia == 1):
        if connected_graph:
            dia = int(diameter(G))
        else:
            dia = 'NaN'
        just_data.append(dia)
        data_dict["x"].append("Network Diameter")
        data_dict["y"].append(dia)

    multi_image_settings.progress(45)

    # calculating graph density
    if (Do_GD == 1):
        GD = nx.density(G)
        GD = round(GD, 5)
        just_data.append(GD)
        data_dict["x"].append("Graph density")
        data_dict["y"].append(GD)

    multi_image_settings.progress(50)

    # calculating global efficiency
    if (Do_Eff == 1):
        Eff = global_efficiency(G)
        Eff = round(Eff, 5)
        just_data.append(Eff)
        data_dict["x"].append("Global Efficiency")
        data_dict["y"].append(Eff)

    multi_image_settings.progress(55)

    if (Do_WI == 1):
        WI = wiener_index(G)
        WI = round(WI, 1)
        just_data.append(WI)
        data_dict["x"].append("Wiener Index")
        data_dict["y"].append(WI)

    multi_image_settings.progress(60)

    # calculating clustering coefficients
    if (Do_clust == 1):
        Tlist1 = clustering(G)
        Tlist = np.zeros(len(Tlist1))
        for j in range(len(Tlist1)):
            Tlist[j] = Tlist1[j]
        clust = average_clustering(G)
        clust = round(clust, 5)
        just_data.append(clust)
        data_dict["x"].append("Average clustering coefficient")
        data_dict["y"].append(clust)

    # calculating average nodal connectivity
    if (Do_ANC == 1):
        if connected_graph:
            ANC = average_node_connectivity(G)
            ANC = round(ANC, 5)
        else:
            ANC = 'NaN'
        just_data.append(ANC)
        data_dict["x"].append("Average nodal connectivity")
        data_dict["y"].append(ANC)

    multi_image_settings.progress(65)

    # calculating assortativity coefficient
    if (Do_Ast == 1):
        Ast = degree_assortativity_coefficient(G)
        Ast = round(Ast, 5)
        just_data.append(Ast)
        data_dict["x"].append("Assortativity Coefficient")
        data_dict["y"].append(Ast)

    multi_image_settings.progress(70)

    # calculating betweenness centrality histogram
    if (Do_BCdist == 1):
        BCdist1 = betweenness_centrality(G)
        Bsum = 0
        BCdist = np.zeros(len(BCdist1))
        for j in range(len(BCdist1)):
            Bsum += BCdist1[j]
            BCdist[j] = BCdist1[j]
        Bcent = Bsum / len(BCdist1)
        Bcent = round(Bcent, 5)
        just_data.append(Bcent)
        data_dict["x"].append("Average betweenness centrality")
        data_dict["y"].append(Bcent)
    multi_image_settings.progress(75)

    # calculating closeness centrality
    if (Do_CCdist == 1):
        CCdist1 = closeness_centrality(G)
        Csum = 0
        CCdist = np.zeros(len(CCdist1))
        for j in range(len(CCdist1)):
            Csum += CCdist1[j]
            CCdist[j] = CCdist1[j]
        Ccent = Csum / len(CCdist1)
        Ccent = round(Ccent, 5)
        just_data.append(Ccent)
        data_dict["x"].append("Average closeness centrality")
        data_dict["y"].append(Ccent)

        multi_image_settings.progress(80)

        # calculating eigenvector centrality
        if (Do_ECdist == 1):
            try:
                ECdist1 = eigenvector_centrality(G, max_iter=100)
            except:
                ECdist1 = eigenvector_centrality(G, max_iter=10000)
            Esum = 0
            ECdist = np.zeros(len(ECdist1))
            for j in range(len(ECdist1)):
                Esum += ECdist1[j]
                ECdist[j] = ECdist1[j]
            Ecent = Esum / len(ECdist1)
            Ecent = round(Ccent, 5)
            just_data.append(Ecent)
            data_dict["x"].append("Average eigenvector centrality")
            data_dict["y"].append(Ecent)

    data = pd.DataFrame(data_dict)

    return data, just_data, klist, Tlist, BCdist, CCdist, ECdist
Beispiel #11
0
def run_weighted_GT_calcs(G, just_data, Do_kdist, Do_BCdist, Do_CCdist,
                          Do_ECdist, Do_ANC, Do_Ast, Do_WI, multigraph):

    # includes weight in the calculations
    klist = [0]
    BCdist = [0]
    CCdist = [0]
    ECdist = [0]
    if multigraph:
        Do_BCdist = 0
        Do_ECdist = 0
        Do_ANC = 0

    if Do_ANC:
        connected_graph = nx.is_connected(G)

    wdata_dict = {"x": [], "y": []}

    if (Do_kdist == 1):
        klist1 = nx.degree(G, weight='weight')
        ksum = 0
        klist = np.zeros(len(klist1))
        for j in range(len(klist1)):
            ksum = ksum + klist1[j]
            klist[j] = klist1[j]
        k = ksum / len(klist1)
        k = round(k, 5)
        just_data.append(k)
        wdata_dict["x"].append("Weighted average degree")
        wdata_dict["y"].append(k)

    if (Do_WI == 1):
        WI = wiener_index(G, weight='length')
        WI = round(WI, 1)
        just_data.append(WI)
        wdata_dict["x"].append("Length-weighted Wiener Index")
        wdata_dict["y"].append(WI)

    if (Do_ANC == 1):
        if connected_graph:
            max_flow = float(0)
            p = periphery(G)
            q = len(p) - 1
            for s in range(0, q - 1):
                for t in range(s + 1, q):
                    flow_value = maximum_flow(G, p[s], p[t],
                                              capacity='weight')[0]
                    if (flow_value > max_flow):
                        max_flow = flow_value
            max_flow = round(max_flow, 5)
        else:
            max_flow = 'NaN'
        just_data.append(max_flow)
        wdata_dict["x"].append("Max flow between periphery")
        wdata_dict["y"].append(max_flow)

    if (Do_Ast == 1):
        Ast = degree_assortativity_coefficient(G, weight='pixel width')
        Ast = round(Ast, 5)
        just_data.append(Ast)
        wdata_dict["x"].append("Weighted assortativity coefficient")
        wdata_dict["y"].append(Ast)

    if (Do_BCdist == 1):
        BCdist1 = betweenness_centrality(G, weight='weight')
        Bsum = 0
        BCdist = np.zeros(len(BCdist1))
        for j in range(len(BCdist1)):
            Bsum += BCdist1[j]
            BCdist[j] = BCdist1[j]
        Bcent = Bsum / len(BCdist1)
        Bcent = round(Bcent, 5)
        just_data.append(Bcent)
        wdata_dict["x"].append("Width-weighted average betweenness centrality")
        wdata_dict["y"].append(Bcent)

    if (Do_CCdist == 1):
        CCdist1 = closeness_centrality(G, distance='length')
        Csum = 0
        CCdist = np.zeros(len(CCdist1))
        for j in range(len(CCdist1)):
            Csum += CCdist1[j]
            CCdist[j] = CCdist1[j]
        Ccent = Csum / len(CCdist1)
        Ccent = round(Ccent, 5)
        just_data.append(Ccent)
        wdata_dict["x"].append("Length-weighted average closeness centrality")
        wdata_dict["y"].append(Ccent)

    if (Do_ECdist == 1):
        try:
            ECdist1 = eigenvector_centrality(G, max_iter=100, weight='weight')
        except:
            ECdist1 = eigenvector_centrality(G,
                                             max_iter=10000,
                                             weight='weight')
        Esum = 0
        ECdist = np.zeros(len(ECdist1))
        for j in range(len(ECdist1)):
            Esum += ECdist1[j]
            ECdist[j] = ECdist1[j]
        Ecent = Esum / len(ECdist1)
        Ecent = round(Ecent, 5)
        just_data.append(Ecent)
        wdata_dict["x"].append("Width-weighted average eigenvector centrality")
        wdata_dict["y"].append(Ecent)

    wdata = pd.DataFrame(wdata_dict)

    return wdata, just_data, klist, BCdist, CCdist, ECdist
Beispiel #12
0
    edge_tuples = [(e[0], e[1], int(weights[i])) for i, e in enumerate(edges)]

# Only get edges for the select nodes in the node csv.
edges = []
for e in edge_tuples:
    if all(x in list(node_ids) for x in e[:2]):
        edges.append(e)

# Initialize graph, add nodes and edges, calculate modularity and centrality.
G = nx.Graph()
G.add_nodes_from(list(node_ids))
G.add_weighted_edges_from(edges)
groups = community.best_partition(G)
degree = cn.degree_centrality(G)
betweenness = cn.betweenness_centrality(G, weight='weight')
eigenvector = cn.eigenvector_centrality(G, weight='weight')

# Add node attributes for name, modularity, and three types of centrality.
nx.set_node_attributes(G, 'name', node_dict)
nx.set_node_attributes(G, 'group', groups)
nx.set_node_attributes(G, 'degree', degree)
nx.set_node_attributes(G, 'betweenness', betweenness)
nx.set_node_attributes(G, 'eigenvector', eigenvector)

# Create json representation of the graph (for d3).
data = json_graph.node_link_data(G)

# You could create the needed json without NetworkX (but you would forfeit network metrics).
#new_data = dict(nodes=[dict(id=n) for n in list(set(nodes))], links=[dict(source=node_dict[e[0]], target=node_dict[e[1]], weight=e[2]) for e in edges])

# Output json of the graph.
Beispiel #13
0
 def calc_centrality(self):
     """Calculate eigenvector centrality measure for each package or application"""
     return eigenvector_centrality(self.G)
Beispiel #14
0
    def centrality(self,
                   h_i_shock=None,
                   alpha=0.0,
                   rank=False,
                   seed=123,
                   max_iter=100,
                   **kwargs):
        #       include: degree centrality,...
        cdntrality_index = [
            'in-degree centrality', 'out-degree centrality',
            'degree centrality', 'betweenness centrality',
            'in-closeness centrality', 'out-closeness centrality',
            'in-eigenvector centrality', 'out-eigenvector centrality',
            'debtrank', 'non-linear debtrank'
        ]

        # the greater the value, the more important
        self._in_degree_centrality = ct.in_degree_centrality(self._FN)
        # reflect the enthusiasm of banks
        self._out_degree_centrality = ct.out_degree_centrality(self._FN)
        # the greater the value, the more important
        self._degree_centrality = ct.degree_centrality(self._FN)
        # the greater the value, the more important
        self._betweenness_centrality = ct.betweenness_centrality(
            self._FN, weight='weight', seed=seed)
        # integration
        self._in_closeness_centrality = ct.closeness_centrality(
            self._FN, distance='weight')
        # radiality
        self._out_closeness_centrality = ct.closeness_centrality(
            self._FN.reverse(), distance='weight')
        # # the greater the value, the more important, Similar to PageRank
        self._in_eigenvector_centrality = ct.eigenvector_centrality(
            self._FN, max_iter=max_iter, weight='weight')
        self._out_eigenvector_centrality = ct.eigenvector_centrality(
            self._FN.reverse(), max_iter=max_iter, weight='weight')
        # self._katz_centrality = ct.katz_centrality(self._FN, weight='weight') # bug

        # debtrank
        if h_i_shock is None:
            h_i_shock = self._data.h_i_shock

        assert isinstance(
            h_i_shock, (list, np.ndarray)
        ), "ERROR: the 'h_i_shock' you provided should be a list or np.ndarray."
        assert len(
            h_i_shock
        ) == self._data._N, "ERROR: the length of 'h_i_shock' you provided is not equal to data."

        self._debtrank = self._run_centrality(method='dr',
                                              h_i_shock=h_i_shock,
                                              t_max=max_iter)['centrality']
        self._debtrank = dict(zip(self._nodes, self._debtrank))
        self._nonlinear_debtrank = self._run_centrality(
            method='nldr', h_i_shock=h_i_shock, alpha=alpha,
            t_max=max_iter)['centrality']
        self._nonlinear_debtrank = dict(
            zip(self._nodes, self._nonlinear_debtrank))

        network_centrality = [
            self._in_degree_centrality, self._out_degree_centrality,
            self._degree_centrality, self._betweenness_centrality,
            self._in_closeness_centrality, self._out_closeness_centrality,
            self._in_eigenvector_centrality, self._in_eigenvector_centrality,
            self._debtrank, self._nonlinear_debtrank
        ]
        df = pd.DataFrame(network_centrality).T
        df.columns = cdntrality_index

        if rank:
            df = df.rank(method='min', ascending=False)

        return df
Beispiel #15
0
def analyze(directed_df, undirected_df, auxiliary_df):
    directed_df = directed_df.copy(deep=True)
    undirected_df = undirected_df.copy(deep=True)

    directed_df = directed_df.rename(mapper=lambda name: name.lower(),
                                     axis='columns')
    undirected_df = undirected_df.rename(mapper=lambda name: name.lower(),
                                         axis='columns')

    G = nx.from_pandas_edgelist(directed_df,
                                edge_attr=['weight', 'change'],
                                create_using=nx.DiGraph)
    G_undirected = nx.from_pandas_edgelist(undirected_df,
                                           edge_attr=['weight', 'change'])

    alpha_coef = 0.9

    alpha = alpha_coef / max(nx.adjacency_spectrum(G).real)
    alpha_undirected = alpha_coef / max(
        nx.adjacency_spectrum(G_undirected).real)

    centralities = {
        'out_degree':
        weighted_degree_centrality(G),
        'in_degree':
        weighted_degree_centrality(G.reverse()),
        'undirected_degree':
        weighted_degree_centrality(G_undirected),
        'out_eigenvector':
        centrality.eigenvector_centrality(G, weight='weight'),
        'in_eigenvector':
        centrality.eigenvector_centrality(G.reverse(), weight='weight'),
        'undirected_eigenvector':
        centrality.eigenvector_centrality(G_undirected, weight='weight'),
        'out_closeness':
        centrality.closeness_centrality(G, distance='weight'),
        'in_closeness':
        centrality.closeness_centrality(G.reverse(), distance='weight'),
        'undirected_closeness':
        centrality.closeness_centrality(G_undirected, distance='weight'),
        'out_betweenness':
        centrality.betweenness_centrality(G, weight='weight'),
        'in_betweenness':
        centrality.betweenness_centrality(G.reverse(), weight='weight'),
        'undirected_betweenness':
        centrality.betweenness_centrality(G_undirected, weight='weight'),
        'out_katz':
        centrality.katz_centrality(G, alpha=alpha, weight='weight'),
        'in_katz':
        centrality.katz_centrality(G.reverse(), alpha=alpha, weight='weight'),
        'undirected_katz':
        centrality.katz_centrality(G_undirected, alpha=alpha, weight='weight')
    }

    for centrality_type in centralities.keys():
        directed_df[centrality_type] = np.NaN

    augmented_auxiliary_df = auxiliary_df.copy(deep=True)

    for key, row in augmented_auxiliary_df.iterrows():
        node = row['docid']
        for centrality_type, values in centralities.items():
            if node in values:
                augmented_auxiliary_df.at[key, centrality_type] = values[node]

    print(augmented_auxiliary_df)
    return augmented_auxiliary_df
Beispiel #16
0
def calculate_all_centralities(data):
    """
        Calculates all four centralities metrics for the input graph
        Paramaters:
            data: a json object which represents the graph.
            This json is manipulated and the necessary metrics are added to it.
    """
    G = json_graph.node_link_graph(
        data)  #loads the data to a NetworkX graph object
    #Calculates three of the metrics
    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweeness = centrality.betweenness_centrality(G)
    eigenvector_fail = False
    try:  #Eigenvector centrality can fail to converge.
        eigenvector = centrality.eigenvector_centrality(DiGraph(G),
                                                        max_iter=100000)
    except NetworkXError:  #Eigenvector values will be None if calculation fails.
        eigenvector = []
        eigenvector_fail = True
        print "Max iterations exceeded"
    degree_max = -1.0
    closeness_max = -1.0
    betweeness_max = -1.0
    eigenvector_max = -1.0
    for author in data['nodes']:  #Adds the unnormalized values in the json
        i = author['id']
        author['degreeCentralityUnnormalized'] = degree[i]
        author['closenessCentralityUnnormalized'] = closeness[i]
        author['betweennessCentralityUnnormalized'] = betweeness[i]
        author['eigenvectorCentralityUnnormalized'] = eigenvector[
            i] if not eigenvector_fail else 1.0

    #Finds the highest values for each centrality type
    for i in degree:
        if degree[i] > degree_max:
            degree_max = degree[i]
    for i in closeness:
        if closeness[i] > closeness_max:
            closeness_max = closeness[i]
    for i in betweeness:
        if betweeness[i] > betweeness_max:
            betweeness_max = betweeness[i]
    for i in eigenvector:
        if eigenvector[i] > eigenvector_max:
            eigenvector_max = eigenvector[i]

    #Normalizes the values
    for i in degree:
        if degree[i] != 0:
            degree[i] = degree[i] / degree_max
    for i in closeness:
        if closeness[i] != 0:
            closeness[i] = closeness[i] / closeness_max
    for i in betweeness:
        if betweeness[i] != 0:
            betweeness[i] = betweeness[i] / betweeness_max
    for i in eigenvector:
        if eigenvector[i] != 0:
            eigenvector[i] = eigenvector[i] / eigenvector_max

    #Adds the normalized values to the json
    for author in data['nodes']:
        i = author['id']
        author['degreeCentrality'] = degree[i]
        author['closenessCentrality'] = closeness[i]
        author['betweennessCentrality'] = betweeness[i]
        author['eigenvectorCentrality'] = eigenvector[
            i] if not eigenvector_fail else 1.0
    return data
Beispiel #17
0

def draw_graph(G):
    nx.draw(G, node_size=30)
    plt.show()


if __name__ == "__main__":
    print("Start parsing:")
    data = parse_group()
    G = create_graph(data)
    draw_graph(G)

    degree = pd.Series(nxa.degree_centrality(G)).idxmax()
    closeness = pd.Series(nxa.closeness_centrality(G)).idxmax()
    eigenvector = pd.Series(nxa.eigenvector_centrality(G)).idxmax()
    betweennes = pd.Series(nxa.betweenness_centrality(G)).idxmax()

    degree_user = api.users.get(user_ids=degree)[0]
    closeness_user = api.users.get(user_ids=closeness)[0]
    eigenvector_user = api.users.get(user_ids=eigenvector)[0]
    betweeness_user = api.users.get(user_ids=betweennes)[0]

    print("Most important user:"******"Degree centrality: id{degree} - {degree_user['first_name'] + ' ' + degree_user['last_name']}"
    )
    print(
        f"Closeness centrality: id{closeness} - {closeness_user['first_name'] + ' ' + closeness_user['last_name']}"
    )
    print(
def calculate_all_centralities(data):
    """
        Calculates all four centralities metrics for the input graph
        Paramaters:
            data: a json object which represents the graph.
            This json is manipulated and the necessary metrics are added to it.
    """
    G = json_graph.node_link_graph(data) #loads the data to a NetworkX graph object
    #Calculates three of the metrics
    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweeness = centrality.betweenness_centrality(G)
    eigenvector_fail = False
    try: #Eigenvector centrality can fail to converge.
        eigenvector = centrality.eigenvector_centrality(DiGraph(G),max_iter=100000)
    except NetworkXError: #Eigenvector values will be None if calculation fails.
        eigenvector = []
        eigenvector_fail = True
        print "Max iterations exceeded"
    degree_max = -1.0
    closeness_max = -1.0
    betweeness_max = -1.0
    eigenvector_max = -1.0
    for author in data['nodes']: #Adds the unnormalized values in the json
        i = author['id']
        author['degreeCentralityUnnormalized'] = degree[i]
        author['closenessCentralityUnnormalized'] = closeness[i]
        author['betweennessCentralityUnnormalized'] = betweeness[i]
        author['eigenvectorCentralityUnnormalized'] = eigenvector[i] if not eigenvector_fail else 1.0

    #Finds the highest values for each centrality type
    for i in degree:
        if degree[i]>degree_max:
            degree_max = degree[i]
    for i in closeness:
        if closeness[i]>closeness_max:
            closeness_max = closeness[i]
    for i in betweeness:
        if betweeness[i]>betweeness_max:
            betweeness_max = betweeness[i]
    for i in eigenvector:
        if eigenvector[i]>eigenvector_max:
            eigenvector_max = eigenvector[i]

    #Normalizes the values
    for i in degree:
        if degree[i] != 0:
            degree[i] = degree[i]/degree_max
    for i in closeness:
        if closeness[i] != 0:
            closeness[i] = closeness[i]/closeness_max
    for i in betweeness:
        if betweeness[i] != 0:
            betweeness[i] = betweeness[i]/betweeness_max
    for i in eigenvector:
        if eigenvector[i] != 0:
            eigenvector[i] = eigenvector[i]/eigenvector_max

    #Adds the normalized values to the json
    for author in data['nodes']:
        i = author['id']
        author['degreeCentrality'] = degree[i]
        author['closenessCentrality'] = closeness[i]
        author['betweennessCentrality'] = betweeness[i]
        author['eigenvectorCentrality'] = eigenvector[i] if not eigenvector_fail else 1.0
    return data
Beispiel #19
0
    edge_tuples=[(e[0], e[1], int(weights[i])) for i,e in enumerate(edges)]

# Only get edges for the select nodes in the node csv.
edges = []
for e in edge_tuples:
    if all(x in list(node_ids) for x in e[:2]):
        edges.append(e)

# Initialize graph, add nodes and edges, calculate modularity and centrality.
G = nx.Graph()
G.add_nodes_from(list(node_ids))
G.add_weighted_edges_from(edges)
groups = community.best_partition(G)
degree = cn.degree_centrality(G)
betweenness = cn.betweenness_centrality(G, weight='weight')
eigenvector = cn.eigenvector_centrality(G, weight='weight')

# Add node attributes for name, modularity, and three types of centrality.
nx.set_node_attributes(G, 'name', node_dict)
nx.set_node_attributes(G, 'group', groups)
nx.set_node_attributes(G, 'degree', degree)
nx.set_node_attributes(G, 'betweenness', betweenness)
nx.set_node_attributes(G, 'eigenvector', eigenvector)

# Create json representation of the graph (for d3).
data = json_graph.node_link_data(G)

# You could create the needed json without NetworkX (but you would forfeit network metrics).
#new_data = dict(nodes=[dict(id=n) for n in list(set(nodes))], links=[dict(source=node_dict[e[0]], target=node_dict[e[1]], weight=e[2]) for e in edges])

# Output json of the graph.