def test_eigenvector_v_katz_random(self): G = nx.gnp_random_graph(10, 0.5, seed=1234) l = float(max(np.linalg.eigvals(nx.adjacency_matrix(G).todense()))) e = nx.eigenvector_centrality_numpy(G) k = nx.katz_centrality_numpy(G, 1.0 / l) for n in G: assert e[n] == pytest.approx(k[n], abs=1e-7)
def test_eigenvector_v_katz_random(self): G = nx.gnp_random_graph(10, 0.5, seed=1234) l = float(max(eigvals(nx.adjacency_matrix(G).todense()))) e = nx.eigenvector_centrality_numpy(G) k = nx.katz_centrality_numpy(G, 1.0 / l) for n in G: assert almost_equal(e[n], k[n])
def compute_centrality(g, centrality_type, n): """ To obtain the centrality measures of a graph @param g: a grapml file @param centrality_type: {Degree, Eigenvector, Katz} centrality measures @param n: number of top central nodes @return: a dictionary of top central nodes and a centrality """ if centrality_type == 'eigen': centrality = nx.eigenvector_centrality_numpy(g) elif centrality_type == 'degree': centrality = nx.degree_centrality(g) else: centrality = nx.katz_centrality_numpy(g) # Return the top n accounts by the highest centrality value ordered_accounts = tuple(centrality.items()) ordered_accounts = sorted(ordered_accounts, reverse=True, key=lambda x: x[1]) result = {} for item in ordered_accounts[0:n]: result[item[0]] = item[1] print('{}: {} centrality of {}'.format(centrality_type, item[0], item[1])) return result
def plot_centralities(G): n = G.number_of_nodes() plt.figure('Graph %s: %s - %s' % (str(i), tmsp2str(Tmin + i * dt), tmsp2str(Tmin + (i + 1) * dt))) plt.suptitle('Centrality Measurements (Graph size = ' + str(n) + ')') in_degrees = [(n - 1) * d for d in nx.in_degree_centrality(G).values()] out_degrees = [(n - 1) * d for d in nx.out_degree_centrality(G).values()] degrees = [(n - 1) * d for d in nx.degree_centrality(G).values()] hist_plot('Degrees', [in_degrees, out_degrees, degrees], (3, 1, 1), ['r', 'g', 'b']) plt.legend(['Degree', 'In-Degree', 'Out-Degree']) G = nx.Graph(G) #directed -> undirected hist_plot('Closeness', nx.closeness_centrality(G).values(), (3, 2, 3), 'xkcd:orangered') hist_plot('Betweenness', nx.betweenness_centrality(G).values(), (3, 2, 4), 'xkcd:crimson') hist_plot('Eigenvector', nx.eigenvector_centrality_numpy(G).values(), (3, 2, 5), 'xkcd:teal') hist_plot('Katz', nx.katz_centrality_numpy(G).values(), (3, 2, 6), 'xkcd:brown') plt.tight_layout(rect=(0, 0, 1, 0.95)) if args.PDF: pp.savefig() plt.close() else: plt.show()
def calc_network_props(primary_nodes,df_nodes, df_interactome, df_network, filter_condition): ''' Use NetworkX to calculate degree centrality etc. ''' start = timeit.default_timer() G = nx.from_pandas_edgelist(df_interactome,'source','target',edge_attr=['type','#Experiments','#Methods','#Publications']) # if the filtered network the primary nodes may end up not having edges and therefore be missing as nodes. Add them back here. for p in primary_nodes: if p not in G.nodes: G.add_node(p) # The degree centrality for a node v is the fraction of nodes it is connected to d = nx.degree_centrality(G) df_nodes['Degree centrality'] = df_nodes['Standard name'].map(d) ### Drop nodes without interactions # these show up as nan, because they did not exist in the networkx graph due to not having any interactions df_nodes = df_nodes[pd.notnull(df_nodes['Degree centrality'])] df_nodes = df_nodes.reset_index(drop=True) # calculate eigenvector and katz centrality if filter_condition == 'Eigenvector centrality': d = nx.eigenvector_centrality(G,max_iter=400) # this takes about 40%-50% of the time the stuff below this most of the rest df_nodes['Eigenvector centrality'] = df_nodes['Standard name'].map(d) if filter_condition == 'Katz centrality': # don't calculate this unless asked for d = nx.katz_centrality_numpy(G) df_nodes['Katz centrality'] = df_nodes['Standard name'].map(d) # clustering coefficient # df_network['Average clustering coefficient'] = nx.average_clustering(G) # this takes siginficant time return df_nodes, df_interactome, df_network, G
def max_centrality_individual(k, G, centrality_metric="degree"): """ returns k nodes with the highest centrality metric :param k: seed set size :param G: networkx graph :param centrality_metric: centrality metric string, to be chosen from "degree", "eigenvector", "katz", "closeness", "betweenness", "second_order" :return: """ if centrality_metric == "degree": nodes_centrality = nx.degree_centrality(G) elif centrality_metric == "eigenvector": nodes_centrality = nx.eigenvector_centrality_numpy(G) elif centrality_metric == "katz": nodes_centrality = nx.katz_centrality_numpy(G) elif centrality_metric == "closeness": nodes_centrality = nx.closeness_centrality(G) elif centrality_metric == "betweenness": nodes_centrality = nx.betweenness_centrality(G) elif centrality_metric == "second_order": nodes_centrality = nx.second_order_centrality(G) sorted_nodes_centrality = dict( sorted(nodes_centrality.items(), key=lambda nodes_centrality: nodes_centrality[1], reverse=True)) return list(sorted_nodes_centrality)[:k]
def test_eigenvector_v_katz_random(self): G = networkx.gnp_random_graph(10, 0.5) l = float(max(eigvals(networkx.adjacency_matrix(G)))) e = networkx.eigenvector_centrality_numpy(G) k = networkx.katz_centrality_numpy(G, 1.0 / l) for n in G: assert_almost_equal(e[n], k[n])
def centralitity(G, i): """ :param G: The networkx graph :param i: Which centrality :return: the dictory with vertices """ degc = {} if i == 1: #print("Making use of eigenvector centrality") degc = nx.eigenvector_centrality(G) elif i == 2: #print("Making use of closeness centrality") degc = nx.closeness_centrality(G) elif i == 3: #print("Making use of betweenness centrality") degc = nx.betweenness_centrality(G) elif i == 4: #print("Making use of degree centrality") degc = nx.degree_centrality(G) else: #print("Making use of katz centrality") degc = nx.katz_centrality_numpy(G) return degc
def centrality_values(G): """ :param G: Networkx Graph :return: Plotted Graph """ original_centrality_val = [] noise_centrality_val = [] res = 0 cnt = 0 res_original_nodes = 0 cnt_original_nodes = 0 color_map = [] degc = {} degc = nx.katz_centrality_numpy(G) for n in G.nodes(): if G.node[n]['target_degree'] == 0 and G.node[n]['degree'] >= 1: color_map.append('blue') res = res + degc[n] cnt = cnt + 1 #Debugging # print G.degree(n), G.node[n]['degree'], G.node[n] else: color_map.append('red') res_original_nodes = res_original_nodes + degc[n] cnt_original_nodes = cnt_original_nodes + 1 original_centrality_val.append(res_original_nodes / cnt_original_nodes) noise_centrality_val.append(res / cnt) plt.xlabel('Values of K') plt.ylabel('Social Importance') plt.legend() plt.show()
def test_eigenvector_v_katz_random(self): G = nx.gnp_random_graph(10,0.5, seed=1234) l = float(max(eigvals(nx.adjacency_matrix(G).todense()))) e = nx.eigenvector_centrality_numpy(G) k = nx.katz_centrality_numpy(G, 1.0/l) for n in G: assert_almost_equal(e[n], k[n])
def attributes(G): #elen={} #for e in G.edges: ## Centrality metrics G_clustering=nx.clustering(G) G_deg=nx.degree_centrality(G) G_degree=nx.degree(G) #G_bet=nx.betweenness_centrality(G) G_eig=nx.eigenvector_centrality_numpy(G) G_page=nx.pagerank_numpy(G) #G_load=nx.load_centrality(G) G_katz=nx.katz_centrality_numpy(G) G_closeness=nx.closeness_centrality(G) # aka node strenght https://arxiv.org/pdf/0803.3884.pdf # closeness #print(G_closeness) Centrality_metric={"Degree_centrality":G_deg,"Eigencentrality":G_eig,"katz":G_katz,"Pagerank":G_page,"Closeness":G_closeness,"Clustering":G_clustering} #Centrality_metric={"Degree_centrality":G_deg,"Clustering":G_clustering} for cent in Centrality_metric: nx.set_node_attributes(G,name=cent,values=Centrality_metric[cent]) d = {key: value for (key, value) in G_degree} nx.set_node_attributes(G,name="Degree",values=d) #CBN1.0 Centrality by node G=cbn2(G) G=cbn3(G) #return G,G_page,G_katz,G_closeness return G
def test_eigenvector_centrality_weighted(self): G = self.G alpha = self.G.alpha p = networkx.katz_centrality_numpy(G, alpha) print p.values() for (a, b) in zip(list(p.values()), self.G.evc): assert_almost_equal(a, b)
def test_P3_unweighted(self): """Katz centrality: P3""" alpha = 0.1 G = nx.path_graph(3) b_answer = {0: 0.5598852584152165, 1: 0.6107839182711449, 2: 0.5598852584152162} b = nx.katz_centrality_numpy(G, alpha, weight=None) for n in sorted(G): assert b[n] == pytest.approx(b_answer[n], abs=1e-4)
def test_beta_as_dict(self): alpha = 0.1 beta = {0: 1.0, 1: 1.0, 2: 1.0} b_answer = {0: 0.5598852584152165, 1: 0.6107839182711449, 2: 0.5598852584152162} G = nx.path_graph(3) b = nx.katz_centrality_numpy(G, alpha, beta) for n in sorted(G): assert b[n] == pytest.approx(b_answer[n], abs=1e-4)
def write_highest_k(temp, file_katz, stop): k_high = sort_dictionary_by_value_desc(nx.katz_centrality_numpy(temp)) k_high_count = Counter(k_high) writer = csv.writer(file_katz, delimiter=';') row = [stop.date()] for k, v in k_high_count.most_common(5): row.append('%s: %f' % (k.replace(',', ''), v)) writer.writerow(row) return
def test_beta_as_dict(self): alpha = 0.1 beta = {0: 1.0, 1: 1.0, 2: 1.0} b_answer = {0: 0.5598852584152165, 1: 0.6107839182711449, 2: 0.5598852584152162} G = networkx.path_graph(3) b = networkx.katz_centrality_numpy(G, alpha, beta) for n in sorted(G): assert_almost_equal(b[n], b_answer[n], places=4)
def test_P3(self): """Katz centrality: P3""" alpha = 0.1 G = networkx.path_graph(3) b_answer = {0: 0.5598852584152165, 1: 0.6107839182711449, 2: 0.5598852584152162} b = networkx.katz_centrality_numpy(G, alpha) for n in sorted(G): assert_almost_equal(b[n], b_answer[n], places=4)
def export_katz(self, G): print("train",) result = nx.katz_centrality_numpy(G,) import csv with open("public/jaccard.csv", "w") as _file: wr = csv.writer(_file, dialect='excel') wr.writerows(result)
def create_centralities_list(G,maxiter=2000,pphi=5,centList=[]): if len(centList)==0: centList=['degree_centrality','closeness_centrality','betweenness_centrality', 'eigenvector_centrality','katz_centrality','page_rank'] cenLen=len(centList) valus={} # plt.figure(figsize=figsi) for uu,centr in enumerate(centList): if centr=='degree_centrality': cent=nx.degree_centrality(G) sstt='Degree Centralities' ssttt='degree centrality' valus[centr]=cent elif centr=='closeness_centrality': cent=nx.closeness_centrality(G) sstt='Closeness Centralities' ssttt='closeness centrality' valus[centr]=cent elif centr=='betweenness_centrality': cent=nx.betweenness_centrality(G) sstt='Betweenness Centralities' ssttt='betweenness centrality' valus[centr]=cent elif centr=='eigenvector_centrality': try: cent=nx.eigenvector_centrality(G,max_iter=maxiter) sstt='Eigenvector Centralities' ssttt='eigenvector centrality' valus[centr]=cent except: valus[centr]=None continue elif centr=='katz_centrality': phi = (1+math.sqrt(pphi))/2.0 # largest eigenvalue of adj matrix cent=nx.katz_centrality_numpy(G,1/phi-0.01) sstt='Katz Centralities' ssttt='Katz centrality' valus[centr]=cent elif centr=='page_rank': try: cent=nx.pagerank(G) sstt='PageRank' ssttt='pagerank' valus[centr]=cent except: valus[centr]=None continue print '%s done!!!' %sstt return valus
def test_beta_as_dict(self): alpha = 0.1 beta = {0: 1.0, 1: 1.0, 2: 1.0} b_answer = {0: 0.5598852584152165, 1: 0.6107839182711449, 2: 0.5598852584152162} G = nx.path_graph(3) b = nx.katz_centrality_numpy(G, alpha, beta) for n in sorted(G): assert_almost_equal(b[n], b_answer[n], places=4)
def test_P3_unweighted(self): """Katz centrality: P3""" alpha = 0.1 G = nx.path_graph(3) b_answer = {0: 0.5598852584152165, 1: 0.6107839182711449, 2: 0.5598852584152162} b = nx.katz_centrality_numpy(G, alpha, weight=None) for n in sorted(G): assert_almost_equal(b[n], b_answer[n], places=4)
def draw_centralities(G,centr,pos,with_edgewidth=False,withLabels=True,pernode_dict={},title_st='', labfs=10,valpha=0.4,ealpha=0.4): plt.figure(figsize=(12,12)) if centr=='degree_centrality': cent=nx.degree_centrality(G) sstt='Degree Centralities' ssttt='degree centrality' elif centr=='closeness_centrality': cent=nx.closeness_centrality(G) sstt='Closeness Centralities' ssttt='closeness centrality' elif centr=='betweenness_centrality': cent=nx.betweenness_centrality(G) sstt='Betweenness Centralities' ssttt='betweenness centrality' elif centr=='eigenvector_centrality': cent=nx.eigenvector_centrality(G,max_iter=1000) sstt='Eigenvector Centralities' ssttt='eigenvector centrality' elif centr=='katz_centrality': phi = (1+math.sqrt(5))/2.0 # largest eigenvalue of adj matrix cent=nx.katz_centrality_numpy(G,1/phi-0.01) sstt='Katz Centralities' ssttt='Katz centrality' elif centr=='page_rank': cent=nx.pagerank(G) sstt='PageRank' ssttt='pagerank' cs={} for k,v in cent.items(): if v not in cs: cs[v]=[k] else: cs[v].append(k) for k in sorted(cs,reverse=True): for v in cs[k]: print 'Node %s has %s = %.4f' %(v,ssttt,k) if withLabels: if len(pernode_dict)>1: labels={i:v for v,i in pernode_dict.items() if i in G.nodes()} labe=nx.draw_networkx_labels(G,pos=pos,labels=labels,font_size=20) else: labe=nx.draw_networkx_labels(G,pos=pos,font_size=labfs) nx.draw_networkx_nodes(G,pos=pos,nodelist=cent.keys(), #with_labels=withLabels, node_size = [d*4000 for d in cent.values()],node_color=cent.values(), cmap=plt.cm.Reds,alpha=valpha) if with_edgewidth: edgewidth=[] for (u,v,d) in G.edges(data=True): edgewidth.append(d['weight']) else: edgewidth=[1 for i in G.edges()] nx.draw_networkx_edges(G,pos=pos,edge_color='b',width=edgewidth, alpha=ealpha) plt.title(title_st+' '+ sstt,fontsize=20) kk=plt.axis('off')
def katz_strategy(graph, num_seeds, num_rounds): highest_katz = [] lam_max = max(nx.adjacency_spectrum(graph)) node_values = nx.katz_centrality_numpy(graph, 1 / int(lam_max)) top_katz = sorted(node_values.items(), key=operator.itemgetter(1), reverse=True)[:num_seeds] highest_katz = [i[0] for i in top_katz] return (highest_katz * num_rounds)
def centrailtyM(A,num=5): G=nx.DiGraph(A) ranks=np.zeros((num,8)) ranks[:,0]=np.argsort(nx.in_degree_centrality(G).values())[::-1][:num] ranks[:,1]=np.argsort(nx.closeness_centrality(G).values())[::-1][:num] ranks[:,2]=np.argsort(nx.betweenness_centrality(G).values())[::-1][:num] ranks[:,3]=np.argsort(nx.eigenvector_centrality_numpy(G).values())[::-1][:num] ranks[:,4]=np.argsort(nx.katz_centrality_numpy(G,weight=None).values())[::-1][:num] ranks[:,5]=np.argsort(nx.pagerank_numpy(G,weight=None).values())[::-1][:num] return ranks
def draw_centralities(G,centr,pos,with_edgewidth=False,withLabels=True,pernode_dict={},title_st='', labfs=10,valpha=0.4,ealpha=0.4): plt.figure(figsize=(12,12)) if centr=='degree_centrality': cent=nx.degree_centrality(G) sstt='Degree Centralities' ssttt='degree centrality' elif centr=='closeness_centrality': cent=nx.closeness_centrality(G) sstt='Closeness Centralities' ssttt='closeness centrality' elif centr=='betweenness_centrality': cent=nx.betweenness_centrality(G) sstt='Betweenness Centralities' ssttt='betweenness centrality' elif centr=='eigenvector_centrality': cent=nx.eigenvector_centrality(G,max_iter=2000) sstt='Eigenvector Centralities' ssttt='eigenvector centrality' elif centr=='katz_centrality': phi = (1+math.sqrt(5))/2.0 # largest eigenvalue of adj matrix cent=nx.katz_centrality_numpy(G,1/phi-0.01) sstt='Katz Centralities' ssttt='Katz centrality' elif centr=='page_rank': cent=nx.pagerank(G) sstt='PageRank' ssttt='pagerank' cs={} nods_dici={v:k for k,v in pernode_dict.items()} for k,v in cent.items(): if v not in cs: cs[v]=[k] else: cs[v].append(k) for k in sorted(cs,reverse=True): for v in cs[k]: print 'Node %s has %s = %.4f' %(nods_dici[v],ssttt,k) if withLabels: if len(pernode_dict)>1: labels={i:v for v,i in pernode_dict.items() if i in G.nodes()} labe=nx.draw_networkx_labels(G,pos=pos,labels=labels,font_size=20) else: labe=nx.draw_networkx_labels(G,pos=pos,font_size=labfs) nx.draw_networkx_nodes(G,pos=pos,nodelist=cent.keys(), #with_labels=withLabels, node_size = [d*4000 for d in cent.values()],node_color=cent.values(), cmap=plt.cm.Reds,alpha=valpha) if with_edgewidth: edgewidth=[] for (u,v,d) in G.edges(data=True): edgewidth.append(d['weight']) else: edgewidth=[1 for i in G.edges()] nx.draw_networkx_edges(G,pos=pos,edge_color='b',width=edgewidth, alpha=ealpha) plt.title(title_st+' '+ sstt,fontsize=20) kk=plt.axis('off')
def network_algorithms(g, dfs): print("Calculating network algorithms") # iterate over graph components for i in dfs: metrics = [] # find all edges of the subgraph and only keep the "offer" relationship id_ = i.select("id").map(lambda a: a.id) ids = id_.collect() edges_ = g.edges.rdd.filter(lambda a: a.src in ids).toDF() df = edges_.select("src", "dst").toPandas() edge_list = [tuple(x) for x in df.values] # generate a networkx graph G = nx.Graph() G.add_edges_from(edge_list) # calculate several network metrics for the graph metrics.append(result_to_pandas(dict(nx.degree(G)), "degree")) metrics.append(result_to_pandas(nx.closeness_centrality(G), "closeness_centrality")) metrics.append(result_to_pandas(nx.betweenness_centrality(G), "betweenness_centrality")) metrics.append(result_to_pandas(nx.current_flow_closeness_centrality(G), "current_flow_closeness_centrality")) metrics.append(result_to_pandas(nx.current_flow_betweenness_centrality(G), "current_flow_betweenness_centrality")) metrics.append(result_to_pandas(nx.katz_centrality_numpy(G), "katz_centrality")) metrics.append(result_to_pandas(nx.load_centrality(G), "load_centrality")) metrics.append(result_to_pandas(nx.pagerank(G), "pagerank")) # TypeError: Cannot use scipy.linalg.eig for sparse A with k >= N - 1. Use scipy.linalg.eig(A.toarray()) or reduce k. # metrics.append(result_to_pandas(nx.eigenvector_centrality_numpy(G), "eigenvector_centrality")) # join network metrics into one graph res = pd.concat(metrics, axis=1, sort=False) res = res.reset_index(drop=False) res.rename(columns={"index": "id"}, inplace=True) print(res) # convert the result into spark dataframe spark_df = sqlContext.createDataFrame(res) # create or add to big dataframe that contains all components try: out = out.unionAll(spark_df) except NameError: out = spark_df return out
def centrality_measurements(H): # calculate temp_deg = nx.degree_centrality(H) temp_in_deg = nx.in_degree_centrality(H) temp_out_deg = nx.out_degree_centrality(H) temp_close_centr = nx.closeness_centrality(H) temp_bet_centr = nx.betweenness_centrality(H) temp_eig_centr = nx.eigenvector_centrality_numpy(H) temp_katz_centr = nx.katz_centrality_numpy(H) return temp_deg , temp_in_deg, temp_out_deg, temp_close_centr, temp_bet_centr, temp_eig_centr, temp_katz_centr
def __init__(self, G): self._nodes = len(G.nodes()) self._edges = len(G.edges()) self._density = nx.density(G) self._diameter = nx.diameter(G) self._radius = nx.radius(G) self._avg_shortest_path_length = nx.average_shortest_path_length(G) self._betweenness_centrality = np.array([v for k, v in nx.betweenness_centrality(G).iteritems()]) self._katz_centrality = np.array([v for k, v in nx.katz_centrality_numpy(G).iteritems()]) self._eigenvector_centrality = np.array([v for k, v in nx.eigenvector_centrality_numpy(G).iteritems()]) self._closeness_centrality = np.array([v for k, v in nx.closeness_centrality(G).iteritems()])
def get_most_central_users(N=1000): g = load_nx_graph() # g = gt.load_graph(GT_GRAPH_PATH) katzc = nx.katz_centrality_numpy(g) # katzc = gt.katz(g) # katzc_array = katzc.get_array() # katzc_sorted = sorted(enumerate(katzc_array), key=lambda v: v[1]) katzc_sorted = sorted(katzc.items(), key=lambda x: x[1]) # most_central = [id for (id, c) in katzc_sorted][:N] # most_central_twids = [get_twitter_id(g,id) for id in most_central] most_central_twids = [k for k, v in katzc_sorted][:N] return most_central_twids
def seriate(M, W=None): '''Attempt to obtain a unique seriation of M with on-site immutables W. There may be multiple index permutations which yield the same matrix. Only one such permutation is returned. This method has not yet been robustly check against a large range of circuits''' # number of problem nodes N = M.shape[0] # map M values to positive integers values = sorted(set(M.ravel().tolist())) values.remove(0) J = np.zeros(M.shape, dtype=int) for i in xrange(len(values)): J += (M == values[i])*(i+1) # construct graph of J G = nx.Graph(J) # compute maximum eigenvalue of G lmax = max(np.linalg.eigvalsh(J)) # scale factor for integer mapping scale = max(1, int(np.max(np.abs(M))*1e6)) # Katz centrality K = nx.katz_centrality_numpy(G, alpha=lmax, beta=1.0) K = [int(scale*K[i]) for i in range(N)] D = np.diag(M) if W is None: X = zip(K, D) else: X = zip(W, K, D) vals, inds = [list(x) for x in zip(*sorted(zip(X, range(N))))] # update J matrix J = J[inds, :][:, inds] # determine sub-problems sub_probs = get_sub_problems(vals) # solve each sub-problem for key in sorted(sub_probs): sub_seriate(J, sub_probs[key], inds) return inds
def Centrality(interval, G, title): dc = 0 label = "" if title == "DegreeCentrality": # dc = list(nx.degree_centrality(G).items(), key=lambda kv: kv[1]) dc = list(nx.degree_centrality(G).items()) label = "Degree Centrality" elif title == "ClosenessCentrality": # dc = list(nx.closeness_centrality(G).items(), key=lambda kv: kv[1]) dc = list(nx.closeness_centrality(G).items()) label = "Closeness Centrality" elif title == "BetweennesCentrality": # dc = list(nx.betweenness_centrality(G).items(), key=lambda kv: kv[1]) dc = list(nx.betweenness_centrality(G).items()) label = "Betweennes Centrality" elif title == "EigenvectorCentrality": # dc = list(nx.eigenvector_centrality_numpy(G,1000).items(), key=lambda kv: kv[1]) dc = list(nx.eigenvector_centrality_numpy(G, 1000).items()) label = "Eigenvector Centrality" elif title == "KatzCentrality": # dc = list(nx.katz_centrality_numpy(G).items(), key=lambda kv: kv[1]) dc = list(nx.katz_centrality_numpy(G).items()) label = "Katz Centrality" elif title == "InDegreeCentrality": # dc = list(nx.in_degree_centrality(G).items(), key=lambda kv: kv[1]) dc = list(nx.in_degree_centrality(G).items()) label = "In-Degree Centrality" elif title == "OutDegreeCentrality": # dc = list(nx.out_degree_centrality(G).items(), key=lambda kv: kv[1]) dc = list(nx.out_degree_centrality(G).items()) label = "Out-Degree Centrality" i = 0 vals = {} for v in dc: if v[1] in vals: vals[v[1]] += 1 else: vals[v[1]] = 1 xlist = list(vals.keys()) ylist = list(vals.values()) dia.plot(xlist, ylist, alpha=0.8, color='gold', label='Key Value') dia.xlabel('Centrality') dia.ylabel('Percentage') dia.title(interval.getMinMax() + "\n" + label) global pic pic += 1 dia.savefig("Centrality_Diagrams/" + label + str(pic))
def centrailtyM(A, num=5): G = nx.DiGraph(A) ranks = np.zeros((num, 8)) ranks[:, 0] = np.argsort(nx.in_degree_centrality(G).values())[::-1][:num] ranks[:, 1] = np.argsort(nx.closeness_centrality(G).values())[::-1][:num] ranks[:, 2] = np.argsort(nx.betweenness_centrality(G).values())[::-1][:num] ranks[:, 3] = np.argsort( nx.eigenvector_centrality_numpy(G).values())[::-1][:num] ranks[:, 4] = np.argsort( nx.katz_centrality_numpy(G, weight=None).values())[::-1][:num] ranks[:, 5] = np.argsort(nx.pagerank_numpy(G, weight=None).values())[::-1][:num] return ranks
def dump_to_treemap_d3js(graph, file: str, cluster_threshold: int = 3) -> None: """ Dumps into an output json file all clusters and the information for each institution about their centrality scores. Given that it's an undirected graph, the centrality scores included are: - hub - authorities - betweenness - closeness centrality - katz centrality - eigen centrality """ partition = community.best_partition(graph) institution_clusters: Dict[str, List[str]] = defaultdict(list) for k, p in partition.items(): institution_clusters[p].append(k) eigen_centrality = nx.eigenvector_centrality(graph) katz_centrality = nx.katz_centrality_numpy(graph) closeness_centrality = nx.closeness_centrality(graph) betweenness_centrality = nx.betweenness_centrality(graph) hubs, authorities = nx.hits(graph) output: Dict[str, Any] = {'name': 'institutions', 'children': []} for k, v in institution_clusters.items(): if len(v) > cluster_threshold: name_k = f'cluster_{k}' cluster_childrens: Dict[str, Any] = { 'name': name_k, 'children': [] } for institution in v: degree = graph.degree[institution] institution_data = { 'name': institution, 'hub': hubs[institution], 'authorities': authorities[institution], 'betweenness': betweenness_centrality[institution], 'closeness': closeness_centrality[institution], 'katz': katz_centrality[institution], 'eigen': eigen_centrality[institution], 'size': degree } cluster_childrens['children'].append(institution_data) output['children'].append(cluster_childrens) with open(file, 'w') as f: json.dump(output, f)
def displayCentralities(): print("---------------------------") print("Degree centrality (the number of links incident upon a node) => LIKELIHOOD TO CATCH AN INFORMATION") print(sorted(list(nx.degree_centrality(G).items()),key=operator.itemgetter(1),reverse=True)) print("---------------------------") print("---------------------------") print("Betweenness centrality (quantifies the number of times a node acts as a bridge along the shortest path between two other nodes) => CONTROL ON OTHERS") print(sorted(list(nx.betweenness_centrality(G).items()),key=operator.itemgetter(1),reverse=True)) print("---------------------------") print("---------------------------") print("Eigenvector centrality (a measure of the influence of a node in a network)") print(sorted(list(nx.eigenvector_centrality(G).items()),key=operator.itemgetter(1),reverse=True)) print("---------------------------") print("---------------------------") print("Katz centrality (relative influence of a node)") print(sorted(list(nx.katz_centrality_numpy(G).items()),key=operator.itemgetter(1),reverse=True)) print("---------------------------")
def all_measures(node_dict, adj, iteration, alpha=0.9): #pool = mp.Pool(mp.cpu_count()) G = nx.convert_matrix.from_numpy_matrix(adj) df = pd.DataFrame.from_dict(node_dict, orient='index') df.columns = ['Node'] df['iteration'] = iteration df['status'] = pd.Series([val.status for val in node_dict.values()]) df['degree'] = pd.Series(nx.degree_centrality(G)) df['eigenvector'] = pd.Series(nx.eigenvector_centrality(G)) df['katz'] = pd.Series(nx.katz_centrality_numpy(G)) #df['closeness'] = pd.Series(nx.closeness_centrality(G)) #df['betweenness'] = pd.Series(nx.betweenness_centrality(G)) df['pagerank'] = pd.Series(nx.pagerank(G, alpha)) df['local_clustering_coefficients'] = pd.Series(nx.clustering(G)) return (df)
def Centrality(G, N=10): phi = 1.618033988749895 # largest eigenvalue of adj matrix ranking = nx.katz_centrality_numpy(G, 1 / phi) important_nodes = sorted(ranking.items(), key=operator.itemgetter(1))[::-1] #[0:Nimportant] Mstd = 1 # 1 standard Deviation CI data = np.array([n[1] for n in important_nodes]) out = len(data[abs(data - np.mean(data)) > Mstd * np.std(data)]) # outlier within m stDev interval if out > N: dnodes = [n[0] for n in important_nodes[:N]] print('Influencial Users: {0}'.format(str(dnodes))) else: dnodes = [n[0] for n in important_nodes[:out]] print('Influencial Users: {0}'.format(str(important_nodes[:out]))) Gt = G.subgraph(dnodes) drawGraph(Gt, Label=True) return Gt
def get_node_features(G, list_of_node_features): graph_features_dict = {} if 'bet_cent' in list_of_node_features: graph_features_dict['bet_cent'] = nx.betweenness_centrality( G ) # change 'weight' to hashtag's inverse support, because the edge weights are currently treated as penalties/costs if 'deg_cent' in list_of_node_features: graph_features_dict['deg_cent'] = nx.degree_centrality(G) if 'eig_cent' in list_of_node_features: graph_features_dict['eig_cent'] = nx.eigenvector_centrality_numpy(G) if 'katz_cent' in list_of_node_features: graph_features_dict['katz_cent'] = nx.katz_centrality_numpy(G) if 'load_cent' in list_of_node_features: graph_features_dict['load_cent'] = nx.load_centrality(G) if 'pr' in list_of_node_features: graph_features_dict['pr'] = nx.pagerank(G) if 'degree' in list_of_node_features: graph_features_dict['degree'] = dict(nx.degree(G)) return graph_features_dict
def test_multiple_alpha(self): alpha_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6] for alpha in alpha_list: b_answer = {0.1: {0: 0.5598852584152165, 1: 0.6107839182711449, 2: 0.5598852584152162}, 0.2: {0: 0.5454545454545454, 1: 0.6363636363636365, 2: 0.5454545454545454}, 0.3: {0: 0.5333964609104419, 1: 0.6564879518897746, 2: 0.5333964609104419}, 0.4: {0: 0.5232045649263551, 1: 0.6726915834767423, 2: 0.5232045649263551}, 0.5: {0: 0.5144957746691622, 1: 0.6859943117075809, 2: 0.5144957746691622}, 0.6: {0: 0.5069794004195823, 1: 0.6970966755769258, 2: 0.5069794004195823}} G = nx.path_graph(3) b = nx.katz_centrality_numpy(G, alpha) for n in sorted(G): assert_almost_equal(b[n], b_answer[alpha][n], places=4)
def test_multiple_alpha(self): alpha_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6] for alpha in alpha_list: b_answer = {0.1: {0: 0.5598852584152165, 1: 0.6107839182711449, 2: 0.5598852584152162}, 0.2: {0: 0.5454545454545454, 1: 0.6363636363636365, 2: 0.5454545454545454}, 0.3: {0: 0.5333964609104419, 1: 0.6564879518897746, 2: 0.5333964609104419}, 0.4: {0: 0.5232045649263551, 1: 0.6726915834767423, 2: 0.5232045649263551}, 0.5: {0: 0.5144957746691622, 1: 0.6859943117075809, 2: 0.5144957746691622}, 0.6: {0: 0.5069794004195823, 1: 0.6970966755769258, 2: 0.5069794004195823}} G = networkx.path_graph(3) b = networkx.katz_centrality_numpy(G, alpha) for n in sorted(G): assert_almost_equal(b[n], b_answer[alpha][n], places=4)
print('Page rank',pager ) plt.bar(range(len(pager)), pager.values(), align='center') plt.xticks(range(len(pager)), pager.keys()) plt.show() centrality = nx.eigenvector_centrality(G,100000) print(['%s %0.2f'%(node,centrality[node]) for node in centrality]) #plt.plot(node,centrality[node]) plt.bar(range(len(centrality)), centrality.values(), align='center') plt.xticks(range(len(centrality)), centrality.keys()) plt.show() #plt.savefig("./assignment3/eigenvectorcentralityRG.png") kz=nx.katz_centrality_numpy(G,0.62) print('Katz centrality', kz) plt.bar(range(len(kz)), kz.values(), align='center') plt.xticks(range(len(kz)), kz.keys()) plt.show() loops = G.selfloop_edges() # remove parallel edges and self-loops graph = nx.Graph(G) graph.remove_edges_from(loops) # get largest connected component # unfortunately, the iterator over the components is not guaranteed to be sorted by size components = sorted(nx.connected_components(graph), key=len, reverse=True) lcc = graph.subgraph(components[0]) pos=nx.spring_layout(lcc) d = nx.degree(lcc)
def draw_centralities_subplots(G,pos,withLabels=True,labfs=10,valpha=0.4,ealpha=0.4,figsi=(12,12),vals=False): centList=['degree_centrality','closeness_centrality','betweenness_centrality', 'eigenvector_centrality','katz_centrality','page_rank'] cenLen=len(centList) valus={} plt.figure(figsize=figsi) for uu,centr in enumerate(centList): if centr=='degree_centrality': cent=nx.degree_centrality(G) sstt='Degree Centralities' ssttt='degree centrality' valus[centr]=cent elif centr=='closeness_centrality': cent=nx.closeness_centrality(G) sstt='Closeness Centralities' ssttt='closeness centrality' valus[centr]=cent elif centr=='betweenness_centrality': cent=nx.betweenness_centrality(G) sstt='Betweenness Centralities' ssttt='betweenness centrality' valus[centr]=cent elif centr=='eigenvector_centrality': try: cent=nx.eigenvector_centrality(G,max_iter=2000) sstt='Eigenvector Centralities' ssttt='eigenvector centrality' valus[centr]=cent except: valus[centr]=None continue elif centr=='katz_centrality': phi = (1+math.sqrt(5))/2.0 # largest eigenvalue of adj matrix cent=nx.katz_centrality_numpy(G,1/phi-0.01) sstt='Katz Centralities' ssttt='Katz centrality' valus[centr]=cent elif centr=='page_rank': try: cent=nx.pagerank(G) sstt='PageRank' ssttt='pagerank' valus[centr]=cent except: valus[centr]=None continue cs={} for k,v in cent.items(): if v not in cs: cs[v]=[k] else: cs[v].append(k) nodrank=[] uui=0 for k in sorted(cs,reverse=True): for v in cs[k]: if uui<5: nodrank.append(v) uui+=1 nodeclo=[] for k,v in cent.items(): if k in nodrank : nodeclo.append(v) else: nodeclo.append(0.) plt.subplot(1+cenLen/2.,2,uu+1).set_title(sstt) if withLabels: labe=nx.draw_networkx_labels(G,pos=pos,font_size=labfs) nx.draw_networkx_nodes(G,pos=pos,nodelist=cent.keys(), node_color=nodeclo, cmap=plt.cm.Reds,alpha=valpha) nx.draw_networkx_edges(G,pos=pos,edge_color='b', alpha=ealpha) plt.title(sstt,fontsize=20) kk=plt.axis('off') if vals: return valus
import matplotlib.pyplot as plt import pygraphviz import math edges = pd.read_csv('fulllist.csv', encoding = 'utf-8') #edges[(edges.name_x == 'Zadie Smith') | (edges.name_y == 'Zadie Smith')] H = nx.DiGraph() #phil = edges[(edges.phil_x == 1) & (edges.phil_y == 1)] #phil = phil.dropna(subset = ['name_x', 'name_y']) #H.add_edges_from(numpy.array(phil[['name_x', 'name_y']])) edges = edges.dropna(subset = ['name_x', 'name_y']) H.add_edges_from(numpy.array(edges[['name_x', 'name_y']])) d = nx.degree(H) k = nx.katz_centrality_numpy(H.reverse(), alpha = 0.075, beta = 1) #b = nx.betweenness_centrality(H) s = pd.Series(k, name = 'kc_score') s.index.name = 'name' s.reset_index() s.sort('kc_score', ascending=False) print (s[0:60]) #nx.ancestors(H, 'Plato') #plt.figure(figsize = (50,50)) #try: #pos=nx.graphviz_layout(H, prog='dot') #except: # pos=nx.spring_layout(H,iterations=20) #pos = nx.spring_layout(H,iterations=20)
def test_katz_centrality_unweighted(self): G = self.H alpha = self.H.alpha p = nx.katz_centrality_numpy(G, alpha) for (a, b) in zip(list(p.values()), self.G.evc): assert_almost_equal(a, b)
def create_tex_sum_central(G,tem_dici,dici_tem,dic_of_nodes_multi,outfile_name='scent_out.tex'): print outfile_name # print list_ofNod,dic_of_nodes_multi fop=open(outfile_name,'w') lat=r'''\documentclass[10pt]{article} \usepackage{lscape} \usepackage{adjustbox} \begin{document} %\global\pdfpageattr\expandafter{\the\pdfpageattr/Rotate 90} \begin{table}[ht] \centering \begin{adjustbox}{width=1\textwidth,center=\textwidth} \small \begin{tabular}{|c||r|r|r|r|r|r|r|r|r||} \hline Node/Centralities & In & Out & Degree & Closeness & Betweenness & Eigenvector & Katz & PageRank & Communicability \\ \hline \hline'''.decode('utf-8') fop.write(lat) fop.write('\n') degce=nx.degree_centrality(G) cloce=cent=nx.closeness_centrality(G) becen=nx.betweenness_centrality(G) eigce=nx.eigenvector_centrality(G,max_iter=2000) katce=nx.katz_centrality_numpy(G)#,1/phi-0.01) pagce=nx.pagerank(G) # comce=nx.communicability_centrality(G) comce=nx.communicability_centrality_exp(G) from scipy import stats dic_of_nodes_multi_r={ii:i for i,v in dic_of_nodes_multi.items() for ii in v} # print stats.pearsonr(degce.values(),dici_tem.values()) # lats=r'|' cent_dics={} latl=r' ' for i in dici_tem: latl+='%i & %.6f & %.6f & %.6f & %.6f & %.6f & %.6f & %.6f & %.6f & %.6f ' %(i, dici_tem[i]/15., tem_dici[i]/15.,degce[i],cloce[i],becen[i],eigce[i],katce[i],pagce[i],comce[i])+r'''\\ \hline '''.decode('utf-8') cent_dics[i]=(i, dici_tem[i]/15., tem_dici[i]/15.,degce[i],cloce[i],becen[i],eigce[i],katce[i],pagce[i],comce[i],dic_of_nodes_multi_r[i]) fop.write(latl) fop.write('In & %.6f & & %.6f & %.6f & %.6f & %.6f & %.6f & %.6f & %.6f ' %(sum(dici_tem.values()), stats.pearsonr(degce.values(),dici_tem.values())[0] ,stats.pearsonr(cloce.values(),dici_tem.values())[0],stats.pearsonr(becen.values(),dici_tem.values())[0], stats.pearsonr(eigce.values(),dici_tem.values())[0],stats.pearsonr(katce.values(),dici_tem.values())[0], stats.pearsonr(pagce.values(),dici_tem.values())[0],stats.pearsonr(comce.values(),dici_tem.values())[0]) +r'''\\ \hline '''.decode('utf-8')) fop.write('Out & & %.6f & %.6f & %.6f & %.6f & %.6f & %.6f & %.6f & %.6f ' %(sum(tem_dici.values()), stats.pearsonr(degce.values(),tem_dici.values())[0] ,stats.pearsonr(cloce.values(),tem_dici.values())[0],stats.pearsonr(becen.values(),tem_dici.values())[0], stats.pearsonr(eigce.values(),tem_dici.values())[0],stats.pearsonr(katce.values(),tem_dici.values())[0], stats.pearsonr(pagce.values(),tem_dici.values())[0],stats.pearsonr(comce.values(),tem_dici.values())[0]) +r'''\\ \hline '''.decode('utf-8')) # # print steady_dict # # print len() # for i in list_ofNod: # if i =='Node/Node':# or kk==0: # continue # for kk,j in enumerate(list_ofNod[1:]): # # print steady_dict[i],j,kk # # for j in steady_dict[i]: # if j not in dici_tem: # dici_tem[j]=steady_dict[i][kk] # else: # dici_tem[j]+=steady_dict[i][kk] # # print dici_tem # # print steady_dict # # latll=r'' # for kk,i in enumerate(list_ofNod): # sumout=0 # if i =='Node/Node': # continue # else: # # for # for ii in steady_dict[i]: # sumout+=ii # tem_dici[i]=sumout # latl+='%s & %.6f & %.6f & %.6f & %.6f' %(i,dici_tem[i] ,sumout,dici_tem[i]/len(list_ofNod[1:]),sumout/len(list_ofNod[1:])) # # latll=latll[:-2] # if i ==dic_of_nodes_multi[0][-1]: # latl+='\\\ \n \hline \hline'+'\n' # else: # latl+='\\\ \n \hline '+'\n' # fop.write(latl) fop.write(r'''\hline \end{tabular} \end{adjustbox} \end{table} \end{document}'''.decode('utf-8')) fop.close() return cent_dics
def test_bad_beta_numbe(self): G = nx.Graph([(0,1)]) e = nx.katz_centrality_numpy(G, 0.1,beta='foo')
def test_bad_beta(self): G = nx.Graph([(0,1)]) beta = {0:77} e = nx.katz_centrality_numpy(G, 0.1,beta=beta)
def calculate_katz(g): return nx.katz_centrality_numpy(g)
def test_multigraph_numpy(self): e = networkx.katz_centrality_numpy(networkx.MultiGraph(), 0.1)
def test_empty_numpy(self): e = networkx.katz_centrality_numpy(networkx.Graph(), 0.1)
def create_centralities_list(G,maxiter=2000,pphi=5,centList=[]): if len(centList)==0: centList=['degree_centrality','closeness_centrality','betweenness_centrality', 'eigenvector_centrality','katz_centrality','page_rank'] cenLen=len(centList) valus={} # plt.figure(figsize=figsi) for uu,centr in enumerate(centList): if centr=='degree_centrality': cent=nx.degree_centrality(G) sstt='Degree Centralities' ssttt='degree centrality' valus[centr]=cent elif centr=='closeness_centrality': cent=nx.closeness_centrality(G) sstt='Closeness Centralities' ssttt='closeness centrality' valus[centr]=cent elif centr=='betweenness_centrality': cent=nx.betweenness_centrality(G) sstt='Betweenness Centralities' ssttt='betweenness centrality' valus[centr]=cent elif centr=='eigenvector_centrality': try: cent=nx.eigenvector_centrality(G,max_iter=maxiter) sstt='Eigenvector Centralities' ssttt='eigenvector centrality' valus[centr]=cent except: valus[centr]=None continue elif centr=='katz_centrality': phi = (1+math.sqrt(pphi))/2.0 # largest eigenvalue of adj matrix cent=nx.katz_centrality_numpy(G,1/phi-0.01) sstt='Katz Centralities' ssttt='Katz centrality' valus[centr]=cent elif centr=='page_rank': try: cent=nx.pagerank(G) sstt='PageRank' ssttt='pagerank' valus[centr]=cent except: valus[centr]=None continue print '%s done!!!' %sstt # cs={} # for k,v in cent.items(): # if v not in cs: # cs[v]=[k] # else: # cs[v].append(k) # nodrank=[] # uui=0 # for k in sorted(cs,reverse=True): # for v in cs[k]: # if uui<5: # nodrank.append(v) # uui+=1 # nodeclo=[] # for k,v in cent.items(): # if k in nodrank : # nodeclo.append(v) # else: # nodeclo.append(0.) # plt.subplot(1+cenLen/2.,2,uu+1).set_title(sstt) # if withLabels: # labe=nx.draw_networkx_labels(G,pos=pos,font_size=labfs) # nx.draw_networkx_nodes(G,pos=pos,nodelist=cent.keys(), # node_color=nodeclo, # cmap=plt.cm.Reds,alpha=valpha) # nx.draw_networkx_edges(G,pos=pos,edge_color='b', alpha=ealpha) # plt.title(sstt,fontsize=20) # kk=plt.axis('off') # if vals: return valus
import networkx as nx import plot_multigraph import matplotlib.pylab as plt from matplotlib import pylab as plt n = 80 p = 10. / n G = nx.fast_gnp_random_graph(n, p, seed=42) def to_list(dict_): return [dict_[k] for k in G.nodes()] graph_colors = [ ("degree", to_list(nx.degree_centrality(G))), ("betweenness", to_list(nx.betweenness_centrality(G))), ("load", to_list(nx.load_centrality(G))), ("eigenvector", to_list(nx.eigenvector_centrality_numpy(G))), ("closeness_centrality", to_list(nx.closeness_centrality(G))), ("current_flow_closeness", to_list(nx.current_flow_closeness_centrality(G))), ("current_flow_betweenness", to_list(nx.current_flow_betweenness_centrality(G))), ("katz", to_list(nx.katz_centrality_numpy(G))), ("communicability", to_list(nx.communicability_centrality(G))), ] fig = plot_multigraph.plot_color_multigraph(G, graph_colors, 3, 3, node_size=50) plt.savefig('graphs/centrality.png', facecolor=fig.get_facecolor())