def centrality_analysis(G, isDriected=False):
    '''
    :param g: Digraph()/ Graph()
    :return: several types of centrality of each nodes
    '''
    nodes = G.nodes()
    if isDriected:
        in_dc = centrality.in_degree_centrality(G)
        out_dc = centrality.out_degree_centrality(G)
        bc = centrality.betweenness_centrality(G)
        ec = centrality.eigenvector_centrality(G)

        cent = {}
        for node in nodes:
            cent[node] = [in_dc[node], out_dc[node], bc[node], ec[node]]
        print(
            "Four types of centrality are calculated \n" +
            "\n\tin_degree_centrality\n\tout_degree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality"
        )
        return cent
    else:
        dc = centrality.degree_centrality(G)
        bc = centrality.betweenness_centrality(G)
        ec = centrality.eigenvector_centrality(G)

        cent = {}
        for node in nodes:
            cent[node] = [dc[node], bc[node], ec[node]]
        print(
            "Three types of centrality are calculated \n" +
            "\n\tdegree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality"
        )
        return cent
def get_topological_features(G, nodes=None):
    N_ = len(G.nodes)
    if nodes is None:
        nodes = G.nodes
    # Degree centrality
    d_c = get_features(degree_centrality(G).values())
    print 'a'
    # Betweeness centrality
    b_c = get_features(betweenness_centrality(G).values())
    print 'b'

    # Close ness centrality
    c_c = get_features(closeness_centrality(G).values())
    print 'c'
    # Clustering
    c = get_features(clustering(G).values())
    print 'd'

    d = diameter(G)
    r = radius(G)

    s_p_average = []
    for s in shortest_path_length(G):
        dic = s[1]
        lengths = dic.values()
        s_p_average += [sum(lengths) / float(N_)]

    s_p_average = get_features(s_p_average)

    features = np.concatenate((d_c, b_c, c_c, c, s_p_average, [d], [r]),
                              axis=0)

    return features
Beispiel #3
0
def get_centrality_labels(knn_graph_obj, perc_labeled, type='degree'):
        import random

        if type == 'degree':
                degree_centrality_knn = pd.DataFrame.from_dict(centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = degree_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(degree_centrality_knn.index))].tolist()
        elif type == 'closeness':
                closeness_centrality_knn = pd.DataFrame.from_dict(centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = closeness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(closeness_centrality_knn.index))].tolist()
        elif type == 'betweenness':
                betweenness_centrality_knn = pd.DataFrame.from_dict(centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = betweenness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(betweenness_centrality_knn.index))].tolist()
        elif type == 'katz':
                katz_centrality_knn = pd.DataFrame.from_dict(centrality.katz_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = katz_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(katz_centrality_knn.index))].tolist()
        elif type == 'clustering':
                clustering_knn = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = clustering_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(clustering_knn.index))].tolist()
        else:
                indexes = list(knn_graph_obj.nodes)
                #print(indexes)
                node_toget_labels = random.sample(indexes, int(perc_labeled*len(indexes)))
                #print(node_toget_labels)

        return node_toget_labels
def get_centrality_labels(knn_graph_obj, type='degree'):
    import random

    if type == 'degree':
        node_toget_labels = pd.DataFrame.from_dict(
            centrality.degree_centrality(knn_graph_obj),
            orient='index',
            columns=['value'])
    elif type == 'closeness':
        node_toget_labels = pd.DataFrame.from_dict(
            centrality.closeness_centrality(knn_graph_obj),
            orient='index',
            columns=['value'])
    elif type == 'betweenness':
        node_toget_labels = pd.DataFrame.from_dict(
            centrality.betweenness_centrality(knn_graph_obj),
            orient='index',
            columns=['value'])
    elif type == 'clustering':
        node_toget_labels = pd.DataFrame.from_dict(clustering(knn_graph_obj),
                                                   orient='index',
                                                   columns=['value'])
    else:
        node_toget_labels = list(knn_graph_obj.nodes)
        #print(node_toget_labels)

    return node_toget_labels
def get_layer_info(subject, journal_volume, edge_list):

    G = nx.Graph()
    G.add_weighted_edges_from(edge_list)

    PATH = "C:/Users/hexie/Documents/APS_result/" + str(
        journal_volume) + "/" + str(subject)

    try:
        os.mkdir(PATH)
        os.chdir(PATH)
    except:
        os.chdir(PATH)

    degree_centrality = nxc.degree_centrality(G)
    try:
        eigen_vector_centrality = nxc.eigenvector_centrality(G)
        np.save("eigen_vector_centrality.npy", eigen_vector_centrality)
    except:
        print("fail to converge within 100 iterations of power")

    closeness_centrality = nxc.closeness_centrality(G)
    betweeness_centrality = nxc.betweenness_centrality(G)

    np.save("degree_centrality.npy", degree_centrality)
    np.save("closeness_centrality.npy", closeness_centrality)
    np.save("betweeness_centrality.npy", betweeness_centrality)

    with open(str(subject) + str(journal_volume) + ".txt", 'w') as f:
        f.write('Number of Edges: ' + str(nx.number_of_edges(G)) + "\n")
        f.write('Number of Nodes: ' + str(nx.number_of_nodes(G)) + "\n")

    nx.draw(G)
    plt.savefig(str(subject) + str(journal_volume) + ".png")
    plt.clf()
Beispiel #6
0
def bc_degree_relativity():
    song = fm.FontProperties(fname=os.path.join(base_dir, '../simsun.ttc'),
                             size=10.5)
    sns.set(style='ticks', palette='Set2')
    plt.figure(dpi=200)
    c = {
        'family': 'sans-serif',
        'sans-serif': ['Times New Roman', 'NSimSun'],
        'size': 10.5
    }
    rc('font', **c)
    plt.rcParams['axes.unicode_minus'] = False
    network = get_shanghai_subway_graph()
    bc_list = betweenness_centrality(network)
    degree_list = dict()
    for node in network.nodes:
        degree_list[node] = network.degree(node)
    fig, ax = plt.subplots(num=1, figsize=(3.54, 2.26))
    plt.subplots_adjust(right=0.99, left=0.125, bottom=0.14, top=0.975)
    x = list()  # degree
    y = list()  # bc
    for i in degree_list.keys():
        x.append(degree_list[i])
        y.append(bc_list[i])
    x1, y1 = zip(*sorted(zip(x, y)))
    p2, = ax.plot(x1, y1, 'o', ms=4)
    ax.set_xlabel('度', fontproperties=song)
    ax.set_ylabel('介数中心性', fontproperties=song)
    plt.show()
Beispiel #7
0
 def extract_betweenness_centrality(self):
     output = open(
         'output/' + self.set_ + '/' + self.set_ +
         '_betweenness_centrality.csv', 'w')
     print('Calculating betweenness centrality')
     nodes = centrality.betweenness_centrality(self.G)
     for key in nodes:
         output.write(str(key) + ',' + str(nodes[key]) + '\n')
Beispiel #8
0
def create_centrality_df(df):
    centrs_dict = {}
    for i, row in df.iterrows():
        G = build_graph(row)
        centrs = betweenness_centrality(G, weight='weight')
        centrs_dict[row['tijd']] = centrs

    dfcentr = pd.DataFrame(centrs_dict).transpose()
    return dfcentr
def get_centrality(def_centrality, toll_centrality, model, nodes_int):
    left_b_centr = centrality.betweenness_centrality(model.get_nx_graph())
    left_c_centr = centrality.closeness_centrality(model.get_nx_graph())
    for el in left_b_centr:
        node = model.get_node_by_id(el)
        if node in nodes_int:
            toll_centrality += left_c_centr[el] + left_b_centr[el]
        else:
            def_centrality += left_c_centr[el] + left_b_centr[el]
    return def_centrality, toll_centrality
Beispiel #10
0
def calc_graph_measures(data_matrix, thresh=0):
    from networkx import eccentricity
    from networkx.algorithms.efficiency import global_efficiency
    from networkx.algorithms.shortest_paths.generic import average_shortest_path_length
    from networkx.algorithms.centrality import betweenness_centrality
    from networkx.algorithms.cluster import average_clustering
    from networkx.algorithms.community.modularity_max import greedy_modularity_communities
    from networkx.algorithms.community.quality import performance

    def _avg_values(results):
        values = []
        if isinstance(results, dict):
            for k in results:
                values.append(results[k])
        elif isinstance(results, list):
            for tup in results:
                values.append(tup[1])

        return np.mean(values)

    below_thresh_indices = np.abs(data_matrix) < thresh
    data_matrix[below_thresh_indices] = 0
    if isinstance(data_matrix, np.ndarray):
        graph = networkx.convert_matrix.from_numpy_matrix(np.real(data_matrix))
    if isinstance(data_matrix, pd.DataFrame):
        graph = networkx.convert_matrix.from_pandas_adjacency(data_matrix)

    degree = list(graph.degree)
    global_eff = global_efficiency(graph)
    b_central = betweenness_centrality(graph)
    modularity = performance(graph, greedy_modularity_communities(graph))
    try:
        ecc = eccentricity(graph)
    except networkx.exception.NetworkXError:
        ecc = [(0, 0)]

    try:
        clust = average_clustering(graph)
    except networkx.exception.NetworkXError:
        clust = 0

    try:
        char_path = average_shortest_path_length(graph)
    except networkx.exception.NetworkXError:
        char_path = 0

    graph_dict = {'degree': _avg_values(degree),
                  'eccentricity': _avg_values(ecc),
                  'global_efficiency': global_eff,
                  'characteristic_path_length': char_path,
                  'betweenness_centrality': _avg_values(b_central),
                  'clustering_coefficient': clust,
                  'modularity': modularity}

    return graph_dict
def bc_list(g):
    """
    生成降序 BC 排列的节点 list。
    :param g: 要分析的 Graph
    :return: 一个 list
    """
    result = betweenness_centrality(g)
    l = list()
    for key in result:
        l.append((key, result[key]))
    return sorted(l, key=lambda s: s[1], reverse=True)
Beispiel #12
0
def analyze(filename):
	nodes = read_csv(filename)
	G = create_graph(filename)
	degree = G.degree()
	betweenness = centrality.betweenness_centrality(G)
	eigen = centrality.eigenvector_centrality_numpy(G,weight='weight')

	actors_degree= []
	actors_betweenness = []
	actors_eigen = [] 

	# in_deg = G.in_degree()
	# out_deg = G.out_degree()

	# a = 0
	# for i in in_deg.keys():
	# 	a+=in_deg[i]

	# print "IN DEGREE "
	# print a/len(in_deg)

	# b = 0
	# for i in out_deg.keys():
	# 	a+=out_deg[i]

	# print "OUT DEGREE "
	# print b/len(out_deg)

	for i in actors:
		if i in degree.keys():
			actors_degree.append((i,degree[i]))
		if i in betweenness.keys():
			actors_betweenness.append((i,betweenness[i]))
		if i in eigen.keys():
			actors_eigen.append((i,eigen[i]))

	actors_degree = sorted(degree.items(), key=itemgetter(1))
	actors_betweenness = sorted(betweenness.items(), key=itemgetter(1))
	actors_eigen = sorted(eigen.items(), key=itemgetter(1))

	actors_degree.reverse()
	actors_betweenness.reverse()
	actors_eigen.reverse()

	print "DEGREE: "
	print actors_degree
	print 
	print "BETWEENNESS" 
	print actors_betweenness
	print 
	print "EIGEN"
	print actors_eigen
	print
def compute_metrics(graph):

    G = json_graph.node_link_graph(graph, multigraph=False)
    degree_centrality = centrality.degree_centrality(G)
    closeness_centrality = centrality.closeness_centrality(G)
    betweenness_centrality = centrality.betweenness_centrality(G)
    page_rank = link_analysis.pagerank_alg.pagerank(G)
    max_clique = approximation.clique.max_clique(G)
    diameters = [distance_measures.diameter(g) for g in connected_component_subgraphs(G)]

    copy = dict()

    copy['id'] = graph['id']
    copy['name'] = graph['name']
    copy['graph'] = dict()
    copy['graph']['nodes'] = graph['nodes']
    copy['graph']['links'] = graph['links']
    copy['metrics'] = dict()

    # diameters
    copy['metrics']['diameter'] = dict()
    copy['metrics']['diameter']['all'] = diameters
    copy['metrics']['diameter']['max'] = max(diameters)
    copy['metrics']['diameter']['average'] = float(sum(diameters)) / float(len(diameters))

    # clique size
    copy['metrics']['maxClique'] = len(list(max_clique))

    # degree centrality
    copy['metrics']['degreeCentrality'] = dict()
    copy['metrics']['degreeCentrality']['byId'] = degree_centrality
    copy['metrics']['degreeCentrality']['max'] = sum(degree_centrality.values())
    copy['metrics']['degreeCentrality']['average'] = float(sum(degree_centrality.values())) / float(len(degree_centrality.values()))

    # closeness centrality
    copy['metrics']['closenessCentrality'] = dict()
    copy['metrics']['closenessCentrality']['byId'] = closeness_centrality
    copy['metrics']['closenessCentrality']['max'] = sum(closeness_centrality.values())
    copy['metrics']['closenessCentrality']['average'] = float(sum(closeness_centrality.values())) / float(len(closeness_centrality.values()))

    # degree centrality
    copy['metrics']['betweennessCentrality'] = dict()
    copy['metrics']['betweennessCentrality']['byId'] = betweenness_centrality
    copy['metrics']['betweennessCentrality']['max'] = sum(betweenness_centrality.values())
    copy['metrics']['betweennessCentrality']['average'] = float(sum(betweenness_centrality.values())) / float(len(betweenness_centrality.values()))

    # degree centrality
    copy['metrics']['pageRank'] = dict()
    copy['metrics']['pageRank']['byId'] = page_rank
    copy['metrics']['pageRank']['max'] = sum(page_rank.values())
    copy['metrics']['pageRank']['average'] = float(sum(page_rank.values())) / float(len(page_rank.values()))

    return copy
Beispiel #14
0
def compute_betweenness(G):
    ng = nx.Graph()
    for start in G.iternodes():
        others = G.neighbors(start)
        for other in others:
            ng.add_edge(start, other)

    c = centrality.betweenness_centrality(ng)

    for k, v in c.items():
        c[k] = v

    return c
Beispiel #15
0
def compute_betweenness(G):
    ng = nx.Graph()
    for start in G.iternodes():
        others = G.neighbors(start)
        for other in others:
            ng.add_edge(start, other)

    c = centrality.betweenness_centrality(ng)

    for k, v in c.items():
        c[k] = v

    return c
Beispiel #16
0
def betweenness_plot(G, title="", fig=1):
    y = sorted(betweenness_centrality(G).items(), key=itemgetter(1),
               reverse=True)
    y = [(G.degree(n), bc) for n, bc in y]
    deg, bc = zip(*y)
    plt.figure(fig)
    plt.plot(bc, deg, 'o')
    plt.title(title)
    plt.xlabel("Betweenness Centrality")
    plt.ylabel("Degree of node")
    plt.ylim(top=35)
    plt.xlim(right=0.4)
    plt.tight_layout()
    plt.draw()
def shanghai_average_bc():
    result = shanghai_graph_by_date()
    l = list()
    for k in result.keys():
        s = betweenness_centrality(result[k])
        count = 0
        for i in s:
            count += s[i]
        l.append({
            'date': k.strftime("%Y-%m-%d"),
            'average_bc': count / len(s)
        })
    with open(os.path.join(base_dir, '上海分阶段数据/average_bc.csv'), 'a') as f:
        w = csv.DictWriter(f, ['date', 'average_bc'])
        w.writeheader()
        w.writerows(l)
	def greedy_fragile(self, graph, nedges):
		nodes = centrality.betweenness_centrality(graph, weight='weight')
		nwc = float(sum(nodes.values())/len(nodes.values()))
		total_centrality = (graph.order()) * nwc
		result = {}
		if nedges == None:
			nedges = graph.nodes(data=True)
		for n in nedges:
			if n[0] in graph.nodes():
				neigh_central = sum([v for k,v in nodes.iteritems() if k in graph.neighbors(n[0])])  
				order = graph.order() - (1 + len(graph.neighbors(n[0])))
				mc = nodes[n[0]] + neigh_central
				gf = nwc - ((total_centrality - mc)/order)
				result[n[1]['name']] = gf
			else:
				result[n[1]['name']] = 0
		return result
Beispiel #19
0
    def subproblem(self, c, clusterproblem):
        # return basic connectivityproblem
        #  (graph, agents, eagents, big_agents, reward_dict)
        # induced by cluster c

        agents = {
            r: clusterproblem.graph.agents[r]
            for r in self.agent_clusters[c]
        }

        static_agents = [
            r for r in agents.keys() if r in clusterproblem.static_agents
        ]

        # add childcluster stuff
        addn_nodes = set()
        for C in self.child_clusters[c]:
            agents[self.submasters[C[0]]] = C[1]
            static_agents.append(self.submasters[C[0]])
            addn_nodes.add(C[1])

        G = deepcopy(clusterproblem.graph)
        del_nodes = set(
            clusterproblem.graph.nodes) - self.subgraphs[c] - addn_nodes
        G.remove_nodes_from(del_nodes)
        G.init_agents(agents)

        # basic rewards based on centrality
        reward_dict = betweenness_centrality(nx.DiGraph(G))
        norm = max(reward_dict.values())
        if norm == 0:
            norm = 1
        reward_dict = {
            v: clusterproblem.max_centrality_reward * val / norm
            for v, val in reward_dict.items()
        }

        # initialize subproblem
        return ConnectivityProblem(
            graph=G,
            static_agents=static_agents,
            eagents=[r for r in agents if r in clusterproblem.eagents],
            big_agents=[r for r in agents if r in clusterproblem.big_agents],
            reward_dict=reward_dict,
        )
def centralization_metrics(G, prefix=""):
    # NB: G can be either directed or undirected network
    # Metrics:
    # (betweennes / closeness / eigenvector / pagerank)

    # betweenness
    # => expensive
    # sample: k=min(10, len(G))
    betweenness = betweenness_centrality(G, normalized=True)
    betweenness_arr = np.fromiter(betweenness.values(), dtype=np.float)
    betweenness_mean = np.mean(np.max(betweenness_arr) - betweenness_arr)

    # closeness
    # => expensive
    # NB: normilizes by the CC size
    closeness = closeness_centrality(G, wf_improved=False)
    closeness_arr = np.fromiter(closeness.values(), dtype=np.float)
    closeness_mean = np.mean(np.max(closeness_arr) - closeness_arr)

    # eigenvector
    eigenvec_mean = None
    if len(G) > 2:
        try:
            eigenvec = eigenvector_centrality_numpy(G)
            eigenvec_arr = np.fromiter(eigenvec.values(), dtype=np.float)
            eigenvec_mean = np.mean(np.max(eigenvec_arr) - eigenvec_arr)
        except:
            eigenvec_mean = None

    # pagerank
    try:
        pagerank = pagerank_numpy(G)
        pagerank_arr = np.fromiter(pagerank.values(), dtype=np.float)
        pagerank_mean = np.mean(np.max(pagerank_arr) - pagerank_arr)
    except:
        pagerank_mean = None

    centralization = {
        f"cent{prefix}_betweenness_mean": betweenness_mean,
        f"cent{prefix}_closeness_mean": closeness_mean,
        f"cent{prefix}_eigenvec_mean": eigenvec_mean,
        f"cent{prefix}_pagerank_mean": pagerank_mean
    }

    return centralization
Beispiel #21
0
def get_betweenness_centrality(G, **kwargs):
    """Returns a dictionary of betweenness centrality
    values for all nodes.
    """

    # Get the graph without glycine residues
    H = get_graph_without_glycine(G, kwargs["identifiers"],
                                  kwargs["residue_names"])

    # Calculate the betweenness centrality values
    centrality_dict = \
        nxc.betweenness_centrality(G = H,
                                   normalized = kwargs["normalized"],
                                   weight = kwargs["weight"],
                                   endpoints = kwargs["endpoints"])

    # Return the finalized the dictionary of centrality values
    return finalize_dict(G, centrality_dict)
Beispiel #22
0
def top_nodes(G, k=3):
    """
        Returns the top k nodes for various
        centrality measures: degree, 
        betweennes and closeness.
        
        Args:
            G (nx.Graph): graph for which the 
                top nodes must be determined.
            
            k (int): number of top nodes to return.
                if set to -ve, all the nodes will be
                returned.
            
        Returns:
            res_dict (dict): dictionary of each centrality
                measure with list of top k nodes in that 
                measure as values to the dictionary.
    """
    # number of nodes in the graph each node is connected to
    node_deg_dict = centrality.degree_centrality(G)
    # number of all pair shortest paths that pass through each node
    node_btw_dict = centrality.betweenness_centrality(G)
    # number of neighbours connected to each other for each node
    node_clo_dict = centrality.closeness_centrality(G)

    # sort by nodes by each centrality measure in decreasing order
    top_k_deg_nodes = sorted(node_deg_dict.items(), key=lambda x: -x[1])
    top_k_btw_nodes = sorted(node_btw_dict.items(), key=lambda x: -x[1])
    top_k_clo_nodes = sorted(node_clo_dict.items(), key=lambda x: -x[1])

    # pick the top k nodes
    res_dict = dict()
    if k > 0:
        res_dict["degree"] = list(zip(*top_k_deg_nodes[:k]))[0]
        res_dict["betweenness"] = list(zip(*top_k_btw_nodes[:k]))[0]
        res_dict["closeness"] = list(zip(*top_k_clo_nodes[:k]))[0]

    else:
        res_dict["degree"] = list(zip(*top_k_deg_nodes))[0]
        res_dict["betweenness"] = list(zip(*top_k_btw_nodes))[0]
        res_dict["closeness"] = list(zip(*top_k_clo_nodes))[0]

    return res_dict
Beispiel #23
0
def parse(name):
    print(name)
    pathbase = path.abspath(path.dirname(__file__))
    G = nx.Graph()
    data = json.load(open('{0}/{1}.json'.format(pathbase, name)))
    nodes = data['nodes']
    text = {i: node['text'] for i, node in enumerate(nodes)}
    weight = {i: float(node['weight']) for i, node in enumerate(nodes)}
    for i in range(len(nodes)):
        G.add_node(i)
    for link in data['links']:
        G.add_edge(link['source'], link['target'])

    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweenness = centrality.betweenness_centrality(G)
    #edge_betweenness = centrality.edge_betweenness_centrality(G)
    #current_flow_closeness = centrality.current_flow_closeness_centrality(G)
    #current_flow_betweenness =\
    #    centrality.current_flow_betweenness_centrality(G)
    try:
        eigenvector = centrality.eigenvector_centrality(G, max_iter=1000)
    except:
        eigenvector = {i: 0 for i in range(len(nodes))}
    katz = centrality.katz_centrality(G)

    obj = {'nodes': [], 'links': data['links']}
    for i in range(len(nodes)):
        obj['nodes'].append({
            'text': text[i],
            'weight': weight[i],
            'degree': degree[i],
            'closeness': closeness[i],
            'betweenness': betweenness[i],
            #'edge_betweenness': edge_betweenness[i],
            #'current_flow_closeness': current_flow_closeness[i],
            #'current_flow_betweenness': current_flow_betweenness[i],
            'eigenvector': eigenvector[i],
            'katz': katz[i],
        })
    json.dump(obj,
              open('{0}/../data/{1}.json'.format(pathbase, name), 'w'),
              sort_keys=True)
Beispiel #24
0
 def greedy_fragile(self, graph, nedges):
     nodes = centrality.betweenness_centrality(graph, weight='weight')
     nwc = float(sum(nodes.values()) / len(nodes.values()))
     total_centrality = (graph.order()) * nwc
     result = {}
     if nedges == None:
         nedges = graph.nodes(data=True)
     for n in nedges:
         if n[0] in graph.nodes():
             neigh_central = sum([
                 v for k, v in nodes.iteritems()
                 if k in graph.neighbors(n[0])
             ])
             order = graph.order() - (1 + len(graph.neighbors(n[0])))
             mc = nodes[n[0]] + neigh_central
             gf = nwc - ((total_centrality - mc) / order)
             result[n[1]['name']] = gf
         else:
             result[n[1]['name']] = 0
     return result
def chengdu_bc_with_date():
    """
    分阶段的BC排序。
    :return:
    """
    result = chengdu_graph_by_date()
    for k in result.keys():
        s = betweenness_centrality(result[k])
        l = list()
        for key in s:
            l.append((key, s[key]))
        l = sorted(l, key=lambda t: t[1], reverse=True)
        temp = list()
        for i in l:
            temp.append({'name': i[0], 'BC': i[1]})
        with open(
                os.path.join(base_dir,
                             '分阶段数据/{}.csv'.format(k.strftime("%Y-%m-%d"))),
                'a') as f:
            w = csv.DictWriter(f, ['name', 'BC'])
            w.writeheader()
            w.writerows(temp)
def graph_stats(G):
    """
    Compute all the graph-related statistics in the features.

    Note that since the graph is always fully connected, all of these are the
    weighted versions. For this reason, many of these functions use the
    implementations in bctpy rather than NetworkX.
    """
    # Local measures
    clustering_dict = clustering(G, weight='weight')
    adjacency = np.array(adjacency_matrix(G).todense())
    betweenness_centrality_dict = betweenness_centrality(G, weight='weight')
    paths = shortest_path_length(G, weight='weight')
    eccentricities = [max(dists.values()) for (source, dists) in sorted(paths)]
    local_measures = np.concatenate(
        [[v for (k, v) in sorted(clustering_dict.items())],
         [v for (k, v) in sorted(betweenness_centrality_dict.items())],
         eccentricities])
    graph_diameter = max(eccentricities)
    graph_radius = min(eccentricities)
    aspl = average_shortest_path_length(G, weight='weight')
    global_measures = np.array([graph_diameter, graph_radius, aspl])
    return np.concatenate([local_measures, global_measures])
Beispiel #27
0
def get_bc_info(g, k_top= 5):
    start_time = time.time()
    BC_dict = betweenness_centrality(g)
    total_time = round(time.time() - start_time, 2)

    # list of pairs (node, bc_value)
    max_BCs = list(sorted(BC_dict.items(), key=operator.itemgetter(1), reverse=True)[:k_top])

    total_BC = 0
    for bc in BC_dict.values():
        total_BC += bc
    avg_BC = total_BC / len(BC_dict)

    max_total = 0
    for bc in max_BCs:
        max_total += bc[1]
    avg_max_BCs = max_total / len(max_BCs)

    return {"BC_dict": BC_dict,
            "avg_BC": avg_BC,
            "max_BCs": max_BCs,
            "avg_max_BCs": avg_max_BCs,
            "time": total_time}
Beispiel #28
0
def connectivity_matrix(graph):
    '''

    graph:
        Architecture graph, assumed bidirected for now.
    '''
    # Initialize matrix and resize
    size = len(graph)
    con_m = np.zeros(size**2)
    con_m.resize(size,size)

    # Generate betweeness centrality for graph
    betweenness = central = betweenness_centrality(graph, k=size)

    its = [range(size)]*2

    for i,j in product(*its):
        if i == j: # self-interaction
            con_m[i,j] = 1
        else: # pair interaction
            con_m[i,j] = (1-betweenness[i]) * (1-betweenness[j])

    return con_m
Beispiel #29
0
    g_simple, g_mst, ridge_dims = extract_structural_backbone(T, data, s, max_angle=opt.maxangle,
                                                                    relaxation=opt.relaxation)

    mmwrite(opt.output + '.' + docstr + '.g_simple.mm', g_simple)
    mmwrite(opt.output + '.' + docstr + '.g_mst.mm', g_mst)
    np.savetxt(opt.output + '.' + docstr + '.ridge_dims', ridge_dims, fmt='%d')

    df = pd.DataFrame({'x': T[:, 0], 'y': T[:, 1], 'c': anno[opt.anno_column].map(str)})
    df_e = pd.DataFrame({'xs': T[g_simple.nonzero()[0], 0], 'xe': T[g_simple.nonzero()[1], 0],
                         'ys': T[g_simple.nonzero()[0], 1], 'ye': T[g_simple.nonzero()[1], 1]})
    p = ggplot(df) + \
        geom_segment(mapping=aes(x='xs', xend='xe', y='ys', yend='ye'), data=df_e, size=0.5) + \
        geom_point(mapping=aes('x', 'y', color='c'), size=0.5) + theme_minimal()
    p.save(opt.output + '.' + docstr + '.g_simple.pdf')

    df_e = pd.DataFrame({'xs': T[g_mst.nonzero()[0], 0], 'xe': T[g_mst.nonzero()[1], 0],
                         'ys': T[g_mst.nonzero()[0], 1], 'ye': T[g_mst.nonzero()[1], 1]})
    p = ggplot(df) + \
        geom_segment(mapping=aes(x='xs', xend='xe', y='ys', yend='ye'), data=df_e, size=0.5) + \
        geom_point(mapping=aes('x', 'y', color='c'), size=0.5) + theme_minimal()
    p.save(opt.output + '.' + docstr + '.g_mst.pdf')


    G_simple = nx.from_scipy_sparse_matrix(g_simple)
    nodes_bc = betweenness_centrality(G_simple, k=np.minimum(500, g_simple.shape[0]), normalized=False)
    pd.DataFrame(pd.Series(nodes_bc) / g_simple.shape[0]).to_csv(opt.output + '.' + docstr + '.g_simple.bc.csv')

    G_mst = nx.from_scipy_sparse_matrix(g_mst)
    nodes_bc = betweenness_centrality(G_mst, k=np.minimum(500, g_mst.shape[0]), normalized=False)
    pd.DataFrame(pd.Series(nodes_bc) / g_mst.shape[0]).to_csv(opt.output + '.' + docstr + '.g_mst.bc.csv')
	def get_metric_from_graph(self, metric=None, nedges=None, keyword=None, graph=None, month=None):

	#'''this func will do most of the work. lets you get a named metric for nodes, optionally restricting this by month, by specified nodes, or by entity type ie lobby/staffer/lobbyist/commissioner. first constructs a cache key and then looks in the cache

		ck = str(metric) + str(month) + str(keyword)
		if ck in self.cache:
			return self.cache[ck]

		g = graph
		if keyword:
			nedges = [node for node in g.nodes_iter(data=True) if node[1]['type'] == keyword]
		#'''if a keyword search is specified, we list the nodes where that keyword is found in one of its attributes'''
		
		if metric == u'Degree':		
			upshot = self.degree(g, nedges)
		
		if metric == u'Gatekeepership':
			upshot = self.gatekeeper(g, nedges)

		if metric == u'Closeness Centrality':
			u = centrality.closeness_centrality(g, normalized=True)
			if nedges:
				filter_list = [n[0] for n in nedges]
				upshot = {g.node[k]['name']: v for k,v in u.items() if k in filter_list}
			else:
				upshot = {g.node[k]['name']: v for k,v in u.items()}

		if metric == u'Betweenness':
                        u = centrality.betweenness_centrality(g, weight='weight', normalized=True)	
			if nedges:
				filter_list = [n[0] for n in nedges]
				upshot = {g.node[k]['name']: v for k,v in u.items() if k in filter_list}
			else:
				upshot = {g.node[k]['name']: v for k,v in u.items()}

		if metric == u'Greedy_Fragile':
			upshot = self.greedy_fragile(g, nedges)

		if metric == u'Link Centrality':
			u = centrality.edge_betweenness_centrality(g, weight='weight', normalized=True)
			upshot = {}
			for k, v in u.items(): # doing it in a similar way to the other linkwise metric below.
				a, b = k
				c = g.node[a]['name']
				d = g.node[b]['name'] 
				if nedges:
					filter_list = [n[0] for n in nedges]
					if a in filter_list or b in filter_list:
						upshot[unicode(c + ' - ' + d)] = v
				else:
					upshot[unicode(a + ' - ' + b)] = v

		if metric == u'Predicted Links':
			gr = self.make_unigraph_from_multigraph(mg=g)
			u = link_prediction.resource_allocation_index(gr)
			upshot = {}
			for k, v, p in u:
				if p > 0: #RAI examines all nonexistent edges in graph and will return all of them, including ones with a zero index. we therefore filter for positive index values.
					a = g.node[k]['name']
					b = g.node[v]['name']
					if nedges:
						filter_list = [n[0] for n in nedges]
						if k in filter_list or v in filter_list:
							upshot[unicode(a + ' - ' + b)] = p
					else:
						upshot[unicode(a + ' - ' + b)] = p
		self.cacheflow(ck, data=upshot)
		return upshot
def calculate_all_centralities(data):
    """
        Calculates all four centralities metrics for the input graph
        Paramaters:
            data: a json object which represents the graph.
            This json is manipulated and the necessary metrics are added to it.
    """
    G = json_graph.node_link_graph(data) #loads the data to a NetworkX graph object
    #Calculates three of the metrics
    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweeness = centrality.betweenness_centrality(G)
    eigenvector_fail = False
    try: #Eigenvector centrality can fail to converge.
        eigenvector = centrality.eigenvector_centrality(DiGraph(G),max_iter=100000)
    except NetworkXError: #Eigenvector values will be None if calculation fails.
        eigenvector = []
        eigenvector_fail = True
        print "Max iterations exceeded"
    degree_max = -1.0
    closeness_max = -1.0
    betweeness_max = -1.0
    eigenvector_max = -1.0
    for author in data['nodes']: #Adds the unnormalized values in the json
        i = author['id']
        author['degreeCentralityUnnormalized'] = degree[i]
        author['closenessCentralityUnnormalized'] = closeness[i]
        author['betweennessCentralityUnnormalized'] = betweeness[i]
        author['eigenvectorCentralityUnnormalized'] = eigenvector[i] if not eigenvector_fail else 1.0

    #Finds the highest values for each centrality type
    for i in degree:
        if degree[i]>degree_max:
            degree_max = degree[i]
    for i in closeness:
        if closeness[i]>closeness_max:
            closeness_max = closeness[i]
    for i in betweeness:
        if betweeness[i]>betweeness_max:
            betweeness_max = betweeness[i]
    for i in eigenvector:
        if eigenvector[i]>eigenvector_max:
            eigenvector_max = eigenvector[i]

    #Normalizes the values
    for i in degree:
        if degree[i] != 0:
            degree[i] = degree[i]/degree_max
    for i in closeness:
        if closeness[i] != 0:
            closeness[i] = closeness[i]/closeness_max
    for i in betweeness:
        if betweeness[i] != 0:
            betweeness[i] = betweeness[i]/betweeness_max
    for i in eigenvector:
        if eigenvector[i] != 0:
            eigenvector[i] = eigenvector[i]/eigenvector_max

    #Adds the normalized values to the json
    for author in data['nodes']:
        i = author['id']
        author['degreeCentrality'] = degree[i]
        author['closenessCentrality'] = closeness[i]
        author['betweennessCentrality'] = betweeness[i]
        author['eigenvectorCentrality'] = eigenvector[i] if not eigenvector_fail else 1.0
    return data
Beispiel #32
0
def calculate_all_centralities(data):
    """
        Calculates all four centralities metrics for the input graph
        Paramaters:
            data: a json object which represents the graph.
            This json is manipulated and the necessary metrics are added to it.
    """
    G = json_graph.node_link_graph(
        data)  #loads the data to a NetworkX graph object
    #Calculates three of the metrics
    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweeness = centrality.betweenness_centrality(G)
    eigenvector_fail = False
    try:  #Eigenvector centrality can fail to converge.
        eigenvector = centrality.eigenvector_centrality(DiGraph(G),
                                                        max_iter=100000)
    except NetworkXError:  #Eigenvector values will be None if calculation fails.
        eigenvector = []
        eigenvector_fail = True
        print "Max iterations exceeded"
    degree_max = -1.0
    closeness_max = -1.0
    betweeness_max = -1.0
    eigenvector_max = -1.0
    for author in data['nodes']:  #Adds the unnormalized values in the json
        i = author['id']
        author['degreeCentralityUnnormalized'] = degree[i]
        author['closenessCentralityUnnormalized'] = closeness[i]
        author['betweennessCentralityUnnormalized'] = betweeness[i]
        author['eigenvectorCentralityUnnormalized'] = eigenvector[
            i] if not eigenvector_fail else 1.0

    #Finds the highest values for each centrality type
    for i in degree:
        if degree[i] > degree_max:
            degree_max = degree[i]
    for i in closeness:
        if closeness[i] > closeness_max:
            closeness_max = closeness[i]
    for i in betweeness:
        if betweeness[i] > betweeness_max:
            betweeness_max = betweeness[i]
    for i in eigenvector:
        if eigenvector[i] > eigenvector_max:
            eigenvector_max = eigenvector[i]

    #Normalizes the values
    for i in degree:
        if degree[i] != 0:
            degree[i] = degree[i] / degree_max
    for i in closeness:
        if closeness[i] != 0:
            closeness[i] = closeness[i] / closeness_max
    for i in betweeness:
        if betweeness[i] != 0:
            betweeness[i] = betweeness[i] / betweeness_max
    for i in eigenvector:
        if eigenvector[i] != 0:
            eigenvector[i] = eigenvector[i] / eigenvector_max

    #Adds the normalized values to the json
    for author in data['nodes']:
        i = author['id']
        author['degreeCentrality'] = degree[i]
        author['closenessCentrality'] = closeness[i]
        author['betweennessCentrality'] = betweeness[i]
        author['eigenvectorCentrality'] = eigenvector[
            i] if not eigenvector_fail else 1.0
    return data
 def std_betweeness_centrality(self, graph):
     between_centr = betweenness_centrality(graph)
     return np.std(list(between_centr.values()))
Beispiel #34
0
def analyze(directed_df, undirected_df, auxiliary_df):
    directed_df = directed_df.copy(deep=True)
    undirected_df = undirected_df.copy(deep=True)

    directed_df = directed_df.rename(mapper=lambda name: name.lower(),
                                     axis='columns')
    undirected_df = undirected_df.rename(mapper=lambda name: name.lower(),
                                         axis='columns')

    G = nx.from_pandas_edgelist(directed_df,
                                edge_attr=['weight', 'change'],
                                create_using=nx.DiGraph)
    G_undirected = nx.from_pandas_edgelist(undirected_df,
                                           edge_attr=['weight', 'change'])

    alpha_coef = 0.9

    alpha = alpha_coef / max(nx.adjacency_spectrum(G).real)
    alpha_undirected = alpha_coef / max(
        nx.adjacency_spectrum(G_undirected).real)

    centralities = {
        'out_degree':
        weighted_degree_centrality(G),
        'in_degree':
        weighted_degree_centrality(G.reverse()),
        'undirected_degree':
        weighted_degree_centrality(G_undirected),
        'out_eigenvector':
        centrality.eigenvector_centrality(G, weight='weight'),
        'in_eigenvector':
        centrality.eigenvector_centrality(G.reverse(), weight='weight'),
        'undirected_eigenvector':
        centrality.eigenvector_centrality(G_undirected, weight='weight'),
        'out_closeness':
        centrality.closeness_centrality(G, distance='weight'),
        'in_closeness':
        centrality.closeness_centrality(G.reverse(), distance='weight'),
        'undirected_closeness':
        centrality.closeness_centrality(G_undirected, distance='weight'),
        'out_betweenness':
        centrality.betweenness_centrality(G, weight='weight'),
        'in_betweenness':
        centrality.betweenness_centrality(G.reverse(), weight='weight'),
        'undirected_betweenness':
        centrality.betweenness_centrality(G_undirected, weight='weight'),
        'out_katz':
        centrality.katz_centrality(G, alpha=alpha, weight='weight'),
        'in_katz':
        centrality.katz_centrality(G.reverse(), alpha=alpha, weight='weight'),
        'undirected_katz':
        centrality.katz_centrality(G_undirected, alpha=alpha, weight='weight')
    }

    for centrality_type in centralities.keys():
        directed_df[centrality_type] = np.NaN

    augmented_auxiliary_df = auxiliary_df.copy(deep=True)

    for key, row in augmented_auxiliary_df.iterrows():
        node = row['docid']
        for centrality_type, values in centralities.items():
            if node in values:
                augmented_auxiliary_df.at[key, centrality_type] = values[node]

    print(augmented_auxiliary_df)
    return augmented_auxiliary_df
Beispiel #35
0
def df_from_betweeness(inG):
    print(f'calculate betweenness, network of size {inG.order()}')
    temp_betweenness = centrality.betweenness_centrality(inG)
    return pd.DataFrame(
        temp_betweenness.items(),
        columns=['tag', 'betweenness']).sort_values('betweenness', ascending=False)
Beispiel #36
0
 def betweennesscentrality(self, brain,outfilebase = "brain", append=True):
     """ Calculates node and hub betweenness centralities. For hub centralities there are two files, one with the values in and another
     with the hub identities in corresponding rows.
     """
     
     ## betweenness centrality
     # node centrality
     outfile = outfilebase+'_betweenness_centralities_nodes'
     boolVal = self.fileCheck(outfile, append)
     
     if append and boolVal:
         f= open(outfile,"ab")
         writeObj = DictWriter(f,fieldnames = brain.G.nodes())
         
     else:
         f = open(outfile,"wb")
         writeObj = DictWriter(f,fieldnames = brain.G.nodes())
         headers = dict((n,n) for n in brain.G.nodes())
         writeObj.writerow(headers)
         
     centralities = centrality.betweenness_centrality(brain.G)  # calculate centralities for largest connected component
     nodecentralitiestowrite = dict((n,None) for n in brain.G.nodes())   # create a blank dictionary of all nodes in the graph
     for node in centralities:
         nodecentralitiestowrite[node] = centralities[node]    # populate the blank dictionary with centrality values
     writeObj.writerow(nodecentralitiestowrite)                    # write out centrality values
     f.close()
     
     ## ==================================================================        
     
     ## hub centrality
     outfile = outfilebase+'_betweenness_centralities_hubs'
     hubidfile = outfilebase+'_betweenness_centralities_hubs_ids'
     
     OFbool = self.fileCheck(outfile, append)
     self.fileCheck(hubidfile, append)
     
     if append and OFbool:
         f = open(outfile,"ab")
         g = open(hubidfile,"ab")
         
     else:
         f= open(outfile,"wb")
         g = open(hubidfile,"wb")
         
     centhubs = [hub for hub in brain.hubs if hub in brain.G] # hubs within largest connected graph component
 
     # write hub identifies to file    
     writeObj = DictWriter(f,fieldnames = brain.hubs)
     hubwriter = DictWriter(g,fieldnames = brain.hubs)
     
     headers = dict((n,n) for n in brain.hubs)         # dictionary of all hubs in network to write
     hubwriter.writerow(headers)
     
     hubcentralitieistowrite = dict((n,None) for n in brain.hubs) # empty dictionary to populate with centralities data
 
     for hub in centhubs:
         hubcentralitieistowrite[hub] = nodecentralitiestowrite[hub]
         
     writeObj.writerow(hubcentralitieistowrite)
     f.close()
     g.close()
Beispiel #37
0
    weights = [r.split(',')[-1] for r in rows[1:]]
    edge_tuples=[(e[0], e[1], int(weights[i])) for i,e in enumerate(edges)]

# Only get edges for the select nodes in the node csv.
edges = []
for e in edge_tuples:
    if all(x in list(node_ids) for x in e[:2]):
        edges.append(e)

# Initialize graph, add nodes and edges, calculate modularity and centrality.
G = nx.Graph()
G.add_nodes_from(list(node_ids))
G.add_weighted_edges_from(edges)
groups = community.best_partition(G)
degree = cn.degree_centrality(G)
betweenness = cn.betweenness_centrality(G, weight='weight')
eigenvector = cn.eigenvector_centrality(G, weight='weight')

# Add node attributes for name, modularity, and three types of centrality.
nx.set_node_attributes(G, 'name', node_dict)
nx.set_node_attributes(G, 'group', groups)
nx.set_node_attributes(G, 'degree', degree)
nx.set_node_attributes(G, 'betweenness', betweenness)
nx.set_node_attributes(G, 'eigenvector', eigenvector)

# Create json representation of the graph (for d3).
data = json_graph.node_link_data(G)

# You could create the needed json without NetworkX (but you would forfeit network metrics).
#new_data = dict(nodes=[dict(id=n) for n in list(set(nodes))], links=[dict(source=node_dict[e[0]], target=node_dict[e[1]], weight=e[2]) for e in edges])