def getInfo(graph, vaccination_list): """ :param graph: :param vaccination_list: :return: list,[整个网络平均度,接种集合点的平均度,所有节点的品据聚类系数,接种集合点的平均聚类系数,所有点的平均k_shell,接种点的平均k-shell] """ info_list = [] count_nodes = graph.number_of_nodes() count_degree = 0 for i in nx.degree(graph).values(): count_degree += i info_list.append(double(count_degree) / double(count_nodes)) count_degree = 0 count_clustering = 0.0 for i in vaccination_list: count_degree += nx.degree(graph, i) count_clustering += nx.clustering(graph, i) info_list.append(double(count_degree) / double(len(vaccination_list))) info_list.append(nx.average_clustering(graph)) info_list.append(double(count_clustering) / double(len(vaccination_list))) total = 0 index = 0 shell = 1 while True: if 0 == nx.k_shell(graph, shell).number_of_nodes(): break total += nx.k_shell(graph, shell).number_of_nodes() * shell for j in vaccination_list: if j in nx.k_shell(graph, shell): index += shell shell += 1 info_list.append(double(total) / double(count_nodes)) info_list.append(double(index) / double(len(vaccination_list))) return info_list
def getInfo(self): info_list = [] count_nodes = self.graph.number_of_nodes() count_degree = 0 for i in nx.degree(self.graph).values(): count_degree += i info_list.append(double(count_degree) / double(count_nodes)) count_degree = 0 count_clustering = 0.0 for i in self.vaccination_list: count_degree += nx.degree(self.graph, i) count_clustering += nx.clustering(self.graph, i) info_list.append( double(count_degree) / double(len(self.vaccination_list))) info_list.append(nx.average_clustering(self.graph)) info_list.append( double(count_clustering) / double(len(self.vaccination_list))) total = 0 index = 0 shell = 1 while True: if 0 == nx.k_shell(self.graph, shell).number_of_nodes(): break total += nx.k_shell(self.graph, shell).number_of_nodes() * shell for j in self.vaccination_list: if j in nx.k_shell(self.graph, shell): index += shell shell += 1 info_list.append(double(total) / double(count_nodes)) info_list.append(double(index) / double(len(self.vaccination_list))) return info_list
def test_k_shell(self): # k=0 k_shell_subgraph = nx.k_shell(self.H, k=2) assert_equal(sorted(k_shell_subgraph.nodes()), [2, 4, 5, 6]) # k=1 k_shell_subgraph = nx.k_shell(self.H, k=1) assert_equal(sorted(k_shell_subgraph.nodes()), [1, 3]) # k=2 k_shell_subgraph = nx.k_shell(self.H, k=0) assert_equal(sorted(k_shell_subgraph.nodes()), [0])
def test_k_shell(self): # k=0 k_shell_subgraph = nx.k_shell(self.H, k=2) assert sorted(k_shell_subgraph.nodes()) == [2, 4, 5, 6] # k=1 k_shell_subgraph = nx.k_shell(self.H, k=1) assert sorted(k_shell_subgraph.nodes()) == [1, 3] # k=2 k_shell_subgraph = nx.k_shell(self.H, k=0) assert sorted(k_shell_subgraph.nodes()) == [0]
def calculate_kshell( G, max_k ): #### k-shell decomposition (i need to make a copy and remove the self-loops from that before i can proceed) G_for_kshell = nx.Graph(G.subgraph(G.nodes())) list_edges_to_remove = [] for edge in G_for_kshell.edges(): if edge[0] == edge[1]: list_edges_to_remove.append(edge) for edge in list_edges_to_remove: G_for_kshell.remove_edge(edge[0], edge[1]) cont_zeros = 0 for i in range( max_k ): # k_max is the absolute upper boundary for max kshell index kshell = nx.k_shell( G_for_kshell, k=i, core_number=None) # it returns the k-shell subgraph size_shell = len(kshell) # print " ",i, size_shell, kshell.nodes() for node in kshell.nodes(): G.node[node]["kshell"] = i if size_shell == 0: cont_zeros += 1 if cont_zeros >= 7: # to stop calculating shells after a few come back empty ones break
def calculate_k_shell_node_num(s_parent_dir, file): r_list = [] g = get_nw(s_parent_dir + file) g.remove_edges_from(g.selfloop_edges()) max_num = max(nx.core_number(g).values()) for k in range(max_num + 1): r_list.append(str(k) + "\t" + str(nx.k_shell(g, k).number_of_nodes())) del (r_list[0]) r_list.reverse() return r_list
def calculate_K_Shell_measure(G,i): #for coreness score global K_Shell_measure global DN ks=1 while True: temp=nx.k_shell(G,ks).nodes() if len(temp)==0: break for node in temp: K_Shell_measure[i][node]=ks ks+=1 for node in G.nodes(): if node not in K_Shell_measure[i]: K_Shell_measure[i][node]=0
def get_kshell_dict(g, df_feat): ''' K-Core 是最大的实体组,所有实体都至少与组中的 k 个其他实体相连 The k-shell is the subgraph of nodes in the k-core but not in the (k+1)-core ''' MAX_KSHELL = 44 df_temp = pd.DataFrame(data=g.nodes(), columns=["qid"]) # 初始化k-shell为1 df_temp['kshell'] = 1 for k in range(2, MAX_KSHELL + 1): sk = nx.k_shell(g, k=k).nodes() df_temp.loc[df_temp.qid.isin(sk), "kshell"] = k return df_temp.kshell.to_dict()
def calculate_K_Shell_measure(G, i): #for coreness score global K_Shell_measure global DN ks = 1 for j in range(len(nodes)): temp = nx.k_shell(G, j + 1).nodes() # if len(temp)==0: # break for node in temp: K_Shell_measure[i][node] = j + 1 ks = j + 1 for node in G.nodes(): if node not in K_Shell_measure[i]: K_Shell_measure[i][node] = 0
def getData(network): nodes = network.nodes() degree = np.array([nx.degree(network)[x] for x in nodes]) bc = np.array([nx.betweenness_centrality(network)[x] for x in nodes]) closeness = np.array([nx.closeness_centrality(network)[x] for x in nodes]) strength = np.array( [nx.degree(network, nodes, weight="weight")[x] for x in nodes]) cc = np.array([nx.clustering(network, nodes)[x] for x in nodes]) kshell = nx.degree(network) for k in range(1, max(degree) + 1): curShell = nx.k_shell(network, k) for i in range(0, len(curShell)): kshell[curShell.nodes()[i]] = k kshell = np.array([kshell[x] for x in nodes]) return kshell, cc, degree, strength, bc, closeness
def getShellConnectedComponents(graph, cnumber, candidates=None): nodes = {} if candidates is None: cvals = list(set(cnumber.values())) else: cvals = list(set(cnumber[u] for u in candidates)) for c in cvals: sg = nx.k_shell(graph, c, cnumber) cc = nx.connected_components(sg) for c in cc: for u in c: nodes[u] = c return nodes
def calculate_kshell(G, max_k): #### k-shell decomposition #G_for_kshell = remove_self_loops(G) # there shouldnt be any self loops already for node in G.nodes(): G.node[node]["kshell"]=0 cont_zeros=0 for i in range(max_k): # k_max is the absolute upper boundary for max kshell index kshell= nx.k_shell(G, k=i, core_number=None) # it returns the k-shell subgraph size_shell=len(kshell) # print " ",i, size_shell, kshell.nodes() for node in kshell.nodes(): G.node[node]["kshell"]=i if size_shell==0: cont_zeros +=1 if cont_zeros >=7: # to stop calculating shells after a few ones come back empty break
a) Number of nodes in the k-corona where k is the max k-val (main core kval) ##################################################''' print("\nNumber of nodes in the k-corona: " + str(len(nx.k_corona(G, k=maxKValue).nodes()))) coronaGraph = nx.Graph() coronaEdges = nx.k_corona(G, k=maxKValue).edges() for edge in coronaEdges(): coronaGraph.add_edge(edge[0], edge[1]) drawGraph(coronaGraph) ''' ################################################ Step 6 output: a) Number of nodes in the main shell ##################################################''' mainShell = nx.k_shell(G).nodes() mainShellEdges = nx.k_shell(G).edges() print("\nNumber of nodes in main shell: " + str(len(mainShell))) #Create subgraph that contains just the main shell mainShellGraph = nx.Graph() for edge in mainShellEdges: mainShellGraph.add_edge(edge[0], edge[1]) drawGraph(mainShellGraph) ''' ################################################ Step 7 output: a) Display graph with red main core and blue main crust, no labels ##################################################''' crustNodes = nx.k_crust(G).nodes() coreNodes = mainCore
def test_main_shell(self): main_shell_subgraph = nx.k_shell(self.H) assert_equal(sorted(main_shell_subgraph.nodes()), [2, 4, 5, 6])
def main(): #Reading in the graph G = nx.read_edgelist("GameOfThrones.txt", delimiter=",", data=(('strength', int), ('season', int))) #Displaying the graph pos = nx.spring_layout(G) plt.figure(figsize=(10, 10)) nx.draw_networkx(G, pos=pos, with_labels=True) plt.axis('off') plt.show() #Outputting info print(nx.info(G)) #Outputting highest degree node name & degree nodes = list(G.degree()) print("\nHighest degree node is:", nodes[0][0], "with degree", nodes[0][1]) #Outputting num of connected components print("\nNumber of connected components:", len(list(nx.connected_components(G)))) #Outputting the num of maximal cliques print("\nNumber of maximal cliques:", len(list(nx.find_cliques(G)))) #Outputting num of nodes in main core and k val core_num = list(nx.k_core(G).nodes()) for x in range(len(core_num)): if core_num == list(nx.k_core(G, k=x)): k_val = x print("\nNumber of nodes in main core:", len(core_num), "with k val:", k_val) #Outputting num of nodes in main crust print("\nNumber of nodes in main crust:", len(list(nx.k_crust(G).nodes()))) #Output num of nodes in the k corona print("\nNumber of nodes in k corona:", len(list(nx.k_corona(G, k=k_val).nodes()))) #Output num of nodes in main shell print("\nNumber of nodes in main shell:", len(list(nx.k_shell(G).nodes()))) #Display graph, main core - red, main crust - blue color_map = [] for node in G: if node in list(nx.k_core(G).nodes()): color_map.append('red') elif node in list(nx.k_crust(G).nodes()): color_map.append('blue') nx.draw_networkx(G, pos=pos, node_color=color_map, with_labels=False) plt.axis('off') plt.show() #Louvain Method, output num of communities, size or largest commnunity, size of smallest community, modularity of partition partition = community.best_partition(G) com_num = partition[max(partition, key=partition.get)] print("\nLouvain Method:") print("Number of communities:", com_num + 1) count = [] comm_list = list(partition.values()) for x in range(com_num): count.append(comm_list.count(x)) print("The largest community has count:", max(count)) print("The smallest community has count:", min(count)) print("The modularity of this partitioning:", community.modularity(partition, G)) #Display graph using Louvain, with nodes in diff colors per partition cmap = cm.get_cmap('viridis', max(partition.values()) + 1) nx.draw_networkx_nodes(G, pos, partition.keys(), node_size=40, cmap=cmap, node_color=list(partition.values())) nx.draw_networkx_edges(G, pos, alpha=0.5) plt.axis('off') plt.show() #Girvan-Newman Method, output num of communities, size or largest commnunity, size of smallest community, modularity of partition print("\nGirvan-Newman Method:") components = c.girvan_newman(G) i = 0 for row in components: if (i == 0): finalResult = row i = i + 1 partitions = dict() L = list(finalResult) p = 0 for comp in L: for entry in comp: partitions[entry] = p p = p + 1 com_num = partitions[max(partitions, key=partitions.get)] print("Number of communities:", com_num + 1) count = [] comm_list = list(partition.values()) for x in range(com_num): count.append(comm_list.count(x)) print("The largest community has count:", max(count)) print("The smallest community has count:", min(count)) print("The modularity of this partitioning:", community.modularity(partitions, G)) #Display graph using Girvan-Newman, with nodes in diff colors per partition cmap = cm.get_cmap('viridis', max(partitions.values()) + 1) nx.draw_networkx_nodes(G, pos, partitions.keys(), node_size=60, cmap=cmap, node_color=list(partitions.values())) plt.axis('off') plt.show()
def main(): initial_period = 1 final_period = 250 filename3 = "../Results/Time_evol_network_metrics_monthly___.dat" file3 = open(filename3, 'wt') file3.close() #header: period N L GC avg_degree std_degree max_k avg_pos_w std_pos_w avg_neg_w std_neg_w max_pos_w min_pos_w max_neg_w min_neg_w ## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 # max_shell avg_shortest_path max_clique avg_betweenness std_betweenness # 16 17 18 19 20 list_network_month_files = [] period = initial_period while period <= final_period: list_network_month_files.append( "../Results/Supply_network_slicing_monthly_period_" + str(period) + "_no_network_metrics.pickle") period += 1 list_network_month_files.append( "../Results/Supply_network_1985_2005_no_network_metrics.pickle") ########## i read input pickle network for filename in list_network_month_files: G = pickle.load(open(filename, 'rb')) if len(G.nodes()) > 1: print "\n\nloaded pickle file for the network:", filename try: period = filename.split("period_")[1].split( ".pickle")[0].split("_no_network_metrics")[0] except IndexError: period = filename.split("Supply_network_")[1].split( "_no_network_metrics.pickle")[0] # print G.nodes(data=True) #raw_input() N = len(G.nodes()) L = len(G.edges()) GC = nx.connected_component_subgraphs(G)[0] print "period", period print " N:", N, "L:", L, "GC:", len(GC.nodes()) ####### degree print "degrees:" list_k = [] for node in G.nodes(): #list_k.append(len(G.neighbors(node))) list_k.append(G.degree(node)) avg_degree = numpy.mean(list_k) std_degree = numpy.std(list_k) print " <k>:", avg_degree, "+/-", std_degree path_name_h = "../Results/degree_distribution_period" + str( period) + ".dat" histograma_gral.histogram(list_k, path_name_h) max_k = max(list_k) print " max_k:", max_k ######### weights print "weights:" list_pos_w = [] list_neg_w = [] for edge in G.edges(): list_pos_w.append(G.edge[edge[0]][edge[1]]["pos_weight"]) list_neg_w.append(-1. * (G.edge[edge[0]][edge[1]]["neg_weight"])) avg_pos_w = numpy.mean(list_pos_w) std_pos_w = numpy.std(list_pos_w) print " pos. weight:", avg_pos_w, "+/-", std_pos_w # print >> file3, numpy.mean(list_pos_w), numpy.std(list_pos_w), avg_neg_w = numpy.mean(list_neg_w) std_neg_w = numpy.std(list_neg_w) print " neg. weight:", numpy.mean(list_neg_w), "+/-", numpy.std( list_neg_w) path_name_h = "../Results/weight_pos_trans_distribution_period" + str( period) + ".dat" histograma_gral.histogram(list_pos_w, path_name_h) path_name_h = "../Results/weight_neg_trans_distribution_period" + str( period) + ".dat" histograma_gral.histogram(list_neg_w, path_name_h) max_pos_w = max(list_pos_w) min_pos_w = min(list_pos_w) max_neg_w = max(list_neg_w) min_neg_w = min(list_neg_w) print " max_pos_w:", max_pos_w, " min_pos_w:", min_pos_w print " max_neg_w:", -1. * max_neg_w, " min_neg_w:", -1. * min_neg_w ######### k-shell decomposition print "k-shell structure:" # i need to make a copy and remove the self-loops from that before i can proceed G_for_kshell = nx.Graph(G.subgraph(G.nodes())) list_edges_to_remove = [] for edge in G_for_kshell.edges(): if edge[0] == edge[1]: list_edges_to_remove.append(edge) for edge in list_edges_to_remove: G_for_kshell.remove_edge(edge[0], edge[1]) max_shell = 0 cont_zeros = 0 for i in range(max_k): size_shell = len( nx.k_shell(G_for_kshell, k=i, core_number=None)) print " ", i, size_shell if size_shell == 0: cont_zeros += 1 else: max_shell = i if cont_zeros >= 10: break print "max shell:", max_shell ######### connected components print "connected components:" max_con_comp = 0 list_sizes = [] for item in sorted(nx.connected_components(G), key=len, reverse=True): size = len(item) list_sizes.append(size) if size > max_con_comp: max_con_comp = size # print "list sizes of connected components:",list_sizes path_name_h = "../Results/connected_components_distribution_period" + str( period) + ".dat" histograma_gral.histogram(list_sizes, path_name_h) ########## avg. path lenght avg_shortest_path = nx.average_shortest_path_length(GC) print "average shortest path within GC:", avg_shortest_path ######## max. clique size absolute_max = 1 for i in G.nodes(): maximo = 1 list2 = nx.cliques_containing_node(G, i) # print i, list2 for elem in list2: # print elem,len(elem,) if len(elem) > maximo: maximo = len(elem) # print "\n",maximo G.node[i]['max_clique_size'] = maximo if absolute_max < maximo: absolute_max = maximo lista = list(nx.find_cliques( G)) # crea una lista de cliques (lista de listas) max_clique = nx.graph_clique_number(G) #finds out max size clique num_tot_clique = nx.graph_number_of_cliques( G) #finds out total number of cliques print "max. clique size:", max_clique print "calculating betweenness centrality..." #for item in nx.betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None): dict_betweenness = nx.betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None) list_betweenness = [] for node in G.nodes(): betw = dict_betweenness[node] list_betweenness.append(betw) avg_betweenness = numpy.mean(list_betweenness) std_betweenness = numpy.std(list_betweenness) print "avg centrality:", avg_betweenness, std_betweenness path_name_h = "../Results/betweenness_distribution_period" + str( period) + ".dat" histograma_bines_gral.histograma_bins_norm(list_betweenness, 10, path_name_h) print print file3 = open(filename3, 'at') print >> file3, period, N, L, len( GC.nodes() ), avg_degree, std_degree, max_k, avg_pos_w, std_pos_w, -1. * avg_neg_w, std_neg_w, max_pos_w, min_pos_w, -1. * max_neg_w, -1. * min_neg_w, max_shell, avg_shortest_path, max_clique, avg_betweenness, std_betweenness file3.close() print "written:", filename3
def main(): ####### time window i am currently looking at initial_year = 85 final_year = 95 list_network_year_files = [] y = initial_year while y <= final_year: list_network_year_files.append("../Results/Supply_network_year_" + str(y) + ".pickle") y += 1 list_network_year_files.append("../Results/Supply_network_85_95.pickle") ########## i read input pickle network for filename in list_network_year_files: G = pickle.load(open(filename, 'rb')) print "\n\nloaded pickle file for the network:", filename try: y = filename.split("year_")[1].split(".pickle")[0] except IndexError: y = filename.split("network_")[1].split(".pickle")[0] N = len(G.nodes()) L = len(G.edges()) print "N:", N, "L:", L ####### degree print "degrees:" list_k = [] for node in G.nodes(): #list_k.append(len(G.neighbors(node))) list_k.append(G.degree(node)) print " <k>:", numpy.mean(list_k), "+/-", numpy.std(list_k) path_name_h = "../Results/degree_distribution_y" + str(y) + ".dat" histograma_gral.histogram(list_k, path_name_h) max_k = max(list_k) print " max_k:", max_k ######### weights print "weights:" list_w = [] for edge in G.edges(): list_w.append(G.edge[edge[0]][edge[1]]["weight"]) print " w:", numpy.mean(list_w), "+/-", numpy.std(list_w) #path_name_h="../Results/weight_distribution_"+str(y)+".dat" # histograma_gral.histogram(list_w, path_name_h) max_w = max(list_w) min_w = min(list_w) print " max_w:", max_w, " min_w:", min_w ######### k-shell decomposition print "k-shell structure:" # i need to make a copy and remove the self-loops from that before i can proceed G_for_kshell = nx.Graph(G.subgraph(G.nodes())) list_edges_to_remove = [] for edge in G_for_kshell.edges(): if edge[0] == edge[1]: list_edges_to_remove.append(edge) for edge in list_edges_to_remove: G_for_kshell.remove_edge(edge[0], edge[1]) cont_zeros = 0 for i in range(max_k): size_shell = len(nx.k_shell(G_for_kshell, k=i, core_number=None)) print " ", i, size_shell if size_shell == 0: cont_zeros += 1 if cont_zeros >= 10: break ######### connected components print "connected components:" list_sizes = [] for item in sorted(nx.connected_components(G), key=len, reverse=True): list_sizes.append(len(item)) # print "list sizes of connected components:",list_sizes path_name_h = "../Results/connected_components_distribution_y" + str( y) + ".dat" histograma_gral.histogram(list_sizes, path_name_h) exit() exit() print print "calculating betweenness centrality..." #for item in nx.betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None): dict_betweenness = nx.betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None) list_betweenness = [] for node in G.nodes(): betw = dict_betweenness[node] list_betweenness.append(betw) print "avg centrality:", numpy.mean(list_betweenness) path_name_h = "../Results/betweenness_distribution" + file_info + "_" + name + ".dat" histograma_bines_gral.histograma_bins_norm(list_betweenness, 10, path_name_h) ##### for comparison with ER and SF, same size print "\n\nk-shell structure of the BA synthetic with:" p = 2. * L / (N * N * (N - 1)) m = 2 #int(L/(N*(N-1)))+1 print "p:", p, " m:", m, "\n" graph = nx.barabasi_albert_graph(N, m) #ER_graph=nx.erdos_renyi_graph(N, p) ######### k-shell decomposition cont_zeros = 0 for i in range(max_k): size_shell = len(nx.k_shell(graph, k=i, core_number=None)) print i, size_shell if size_shell == 0: cont_zeros += 1 if cont_zeros >= 10: break ######## I separate into subgraphs for drs with mostly controlled or uncontrolled patients ### to create subgraph : H = G.subgraph([0,1,2]) G_high_ratio = nx.Graph( G.subgraph(list_high_rate_drs) ) # this way i make sure their attributes are indepented from the original!!!! G_low_ratio = nx.Graph(G.subgraph(list_low_rate_drs)) print "Subgraphs:" gml_filename = "../Results/Physician_referral_network_by_dr_rates_HIGH_dates_" + str( initial_date).split(" ")[0] + "_to_" + str(final_date).split( " ")[0] + "_" + num_lines + "lines.gml" nx.write_gml(G_high_ratio, gml_filename) print " written:", gml_filename print " high ratio: N:", len(G_high_ratio.nodes()), " L:", len( G_high_ratio.edges()) filename_network_pickle = "../Results/Physician_referral_network_by_dr_rates_HIGH_dates_" + str( initial_date).split(" ")[0] + "_to_" + str(final_date).split( " ")[0] + "_" + num_lines + "lines.pickle" pickle.dump(G_high_ratio, open(filename_network_pickle, 'wb')) print " written", filename_network_pickle gml_filename = "../Results/Physician_referral_network_by_dr_rates_LOW_dates_" + str( initial_date).split(" ")[0] + "_to_" + str(final_date).split( " ")[0] + "_" + num_lines + "lines.gml" nx.write_gml(G_low_ratio, gml_filename) print " written:", gml_filename print " low ratio: N:", len(G_low_ratio.nodes()), " L:", len( G_low_ratio.edges()) filename_network_pickle = "../Results/Physician_referral_network_by_dr_rates_LOW_dates_" + str( initial_date).split(" ")[0] + "_to_" + str(final_date).split( " ")[0] + "_" + num_lines + "lines.pickle" pickle.dump(G_low_ratio, open(filename_network_pickle, 'wb')) print " written", filename_network_pickle
nx.draw_networkx(network1) #%% nx.degree_centrality(network1) #%% nx.closeness_centrality(network1) #%% nx.betweenness_centrality(network1) #%% nx.pagerank(network1) #%% list(nx.k_shell(network1, k=1)) #%% list(nx.k_shell(network1, k=2)) #%% list(nx.k_shell(network1, k=3)) #%% # community detection # python setup.py install # cmd: pip install -U python-louvain import networkx as nx import numpy as np import community N = 16
os.stat(file_dir_figs) except: os.mkdir(file_dir_figs) fig_name=os.path.join(file_dir_figs,str(G.name)+'_[k='+str(max(nx.core_number(G).values()))+']_k-cores.png') layout=g.layout('auto') # layout = g.layout('kk') ig.plot(g, target=fig_name, layout=layout, vertex_size=7, vertex_color='gray', vertex_label_size=10, vertex_label_dist=2, mark_groups=group_markers) #vertex_label=None print 'ΟΡΙΣΜΟΣ: Το k-κέλυφος είναι ο υπογράφος των κόμβων του k-πυρήνα που δεν περιέχονται στον (k+1)-πυρήνα.' # print 'DEFINITION: The k-shell is the subgraph of nodes in the k-core but not in the (k+1)-core.' print str(" ") kshells=[] for i in set(degree_sequence): if len(nx.k_shell(G,k=i).nodes()) > 0: # print "i =", i ksGi=nx.k_shell(G,k=i) print 'Οι κόμβοι του', str(i)+'-κελύφους:' # print 'The nodes of the', str(i)+'-shell:' print ksGi.nodes() # print 'Οι ακμές του', str(i)+'-κελύφους:' # # print 'The edges of the k-shell of G are:' # print ksGi.edges() # print 'Η ακολουθία βαθμών των κόμβων του', str(i)+'-κελύφους:' # # print 'The degree sequence of the nodes of the', str(i)+'-shell:' # print list(nx.degree(ksGi).values()) # # # print 'The order of the main k-shell of G is:' # # # print 'k =', min(list(nx.degree(ksGi).values())) print str(" ") kshells.append(ksGi.nodes())
import networkx as nx import plot_multigraph import matplotlib.pylab as plt from matplotlib import pylab as plt n = 80 k = int(.2 * n) p = 10. / n G = nx.fast_gnp_random_graph(n, p, seed=42) def set_to_list(set_, G): return [1. * (k in set_) for k in G.nodes()] graph_colors = [ ("center", set_to_list(nx.center(G), G)), ("periphery", set_to_list(nx.periphery(G), G)), ("k_core", set_to_list(nx.k_core(G), G)), ("k_shell", set_to_list(nx.k_shell(G), G)), ("k_crust", set_to_list(nx.k_crust(G), G)), ("k_corona", set_to_list(nx.k_corona(G, k), G)), ] fig = plot_multigraph.plot_color_multigraph(G, graph_colors, 2, 3, node_size=50) plt.savefig('graphs/sets.png', facecolor=fig.get_facecolor())
# https://networkx.github.io/documentation/networkx-2.0/auto_examples/graph/plot_karate_club.html import matplotlib.pyplot as plt import networkx as nx G = nx.karate_club_graph() print("Node Degree") for v in G: print('%s %s' % (v, G.degree(v))) nx.draw_circular(G, with_labels=True) plt.show() #%% import networkx as nx import numpy as np import community G = nx.karate_club_graph() partition = community.best_partition(G) print(partition) print("Louvain Modularity: ", community.modularity(partition, G)) #%% preds = nx.jaccard_coefficient(G) for u, v, p in preds: print('(%d, %d) -> %.8f' % (u, v, p)) #%% list(nx.k_shell(G, k = 2)) #%%
def compute_summaries(G): """ Compute network features, computational times and their nature. Evaluate 54 summary statistics of a network G, plus 4 noise variables, store the computational time to evaluate each summary statistic, and keep track of their nature (discrete or not). Args: G (networkx.classes.graph.Graph): an undirected networkx graph. Returns: resDicts (tuple): a tuple containing the elements: - dictSums (dict): a dictionary with the name of the summaries as keys and the summary statistic values as values; - dictTimes (dict): a dictionary with the name of the summaries as keys and the time to compute each one as values; - dictIsDist (dict): a dictionary indicating if the summary is discrete (True) or not (False). """ dictSums = dict() # Will store the summary statistic values dictTimes = dict() # Will store the evaluation times dictIsDisc = dict() # Will store the summary statistic nature # Extract the largest connected component Gcc = sorted(nx.connected_components(G), key=len, reverse=True) G_lcc = G.subgraph(Gcc[0]) # Number of edges start = time.time() dictSums["num_edges"] = G.number_of_edges() dictTimes["num_edges"] = time.time() - start dictIsDisc["num_edges"] = True # Number of connected components start = time.time() dictSums["num_of_CC"] = nx.number_connected_components(G) dictTimes["num_of_CC"] = time.time() - start dictIsDisc["num_of_CC"] = True # Number of nodes in the largest connected component start = time.time() dictSums["num_nodes_LCC"] = nx.number_of_nodes(G_lcc) dictTimes["num_nodes_LCC"] = time.time() - start dictIsDisc["num_nodes_LCC"] = True # Number of edges in the largest connected component start = time.time() dictSums["num_edges_LCC"] = G_lcc.number_of_edges() dictTimes["num_edges_LCC"] = time.time() - start dictIsDisc["num_edges_LCC"] = True # Diameter of the largest connected component start = time.time() dictSums["diameter_LCC"] = nx.diameter(G_lcc) dictTimes["diameter_LCC"] = time.time() - start dictIsDisc["diameter_LCC"] = True # Average geodesic distance (shortest path length in the LCC) start = time.time() dictSums["avg_geodesic_dist_LCC"] = nx.average_shortest_path_length(G_lcc) dictTimes["avg_geodesic_dist_LCC"] = time.time() - start dictIsDisc["avg_geodesic_dist_LCC"] = False # Average degree of the neighborhood of each node start = time.time() dictSums["avg_deg_connectivity"] = np.mean( list(nx.average_degree_connectivity(G).values())) dictTimes["avg_deg_connectivity"] = time.time() - start dictIsDisc["avg_deg_connectivity"] = False # Average degree of the neighbors of each node in the LCC start = time.time() dictSums["avg_deg_connectivity_LCC"] = np.mean( list(nx.average_degree_connectivity(G_lcc).values())) dictTimes["avg_deg_connectivity_LCC"] = time.time() - start dictIsDisc["avg_deg_connectivity_LCC"] = False # Recover the degree distribution start_degree_extract = time.time() degree_vals = list(dict(G.degree()).values()) degree_extract_time = time.time() - start_degree_extract # Entropy of the degree distribution start = time.time() dictSums["degree_entropy"] = ss.entropy(degree_vals) dictTimes["degree_entropy"] = time.time() - start + degree_extract_time dictIsDisc["degree_entropy"] = False # Maximum degree start = time.time() dictSums["degree_max"] = max(degree_vals) dictTimes["degree_max"] = time.time() - start + degree_extract_time dictIsDisc["degree_max"] = True # Average degree start = time.time() dictSums["degree_mean"] = np.mean(degree_vals) dictTimes["degree_mean"] = time.time() - start + degree_extract_time dictIsDisc["degree_mean"] = False # Median degree start = time.time() dictSums["degree_median"] = np.median(degree_vals) dictTimes["degree_median"] = time.time() - start + degree_extract_time dictIsDisc["degree_median"] = False # Standard deviation of the degree distribution start = time.time() dictSums["degree_std"] = np.std(degree_vals) dictTimes["degree_std"] = time.time() - start + degree_extract_time dictIsDisc["degree_std"] = False # Quantile 25% start = time.time() dictSums["degree_q025"] = np.quantile(degree_vals, 0.25) dictTimes["degree_q025"] = time.time() - start + degree_extract_time dictIsDisc["degree_q025"] = False # Quantile 75% start = time.time() dictSums["degree_q075"] = np.quantile(degree_vals, 0.75) dictTimes["degree_q075"] = time.time() - start + degree_extract_time dictIsDisc["degree_q075"] = False # Average geodesic distance start = time.time() dictSums["avg_shortest_path_length_LCC"] = nx.average_shortest_path_length( G_lcc) dictTimes["avg_shortest_path_length_LCC"] = time.time() - start dictIsDisc["avg_shortest_path_length_LCC"] = False # Average global efficiency: # The efficiency of a pair of nodes in a graph is the multiplicative # inverse of the shortest path distance between the nodes. # The average global efficiency of a graph is the average efficiency of # all pairs of nodes. start = time.time() dictSums["avg_global_efficiency"] = nx.global_efficiency(G) dictTimes["avg_global_efficiency"] = time.time() - start dictIsDisc["avg_global_efficiency"] = False # Harmonic mean which is 1/avg_global_efficiency start = time.time() dictSums["harmonic_mean"] = nx.global_efficiency(G) dictTimes["harmonic_mean"] = time.time() - start dictIsDisc["harmonic_mean"] = False # Average local efficiency # The local efficiency of a node in the graph is the average global # efficiency of the subgraph induced by the neighbors of the node. # The average local efficiency is the average of the # local efficiencies of each node. start = time.time() dictSums["avg_local_efficiency_LCC"] = nx.local_efficiency(G_lcc) dictTimes["avg_local_efficiency_LCC"] = time.time() - start dictIsDisc["avg_local_efficiency_LCC"] = False # Node connectivity # The node connectivity is equal to the minimum number of nodes that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["node_connectivity_LCC"] = nx.node_connectivity(G_lcc) dictTimes["node_connectivity_LCC"] = time.time() - start dictIsDisc["node_connectivity_LCC"] = True # Edge connectivity # The edge connectivity is equal to the minimum number of edges that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["edge_connectivity_LCC"] = nx.edge_connectivity(G_lcc) dictTimes["edge_connectivity_LCC"] = time.time() - start dictIsDisc["edge_connectivity_LCC"] = True # Graph transitivity # 3*times the number of triangles divided by the number of triades start = time.time() dictSums["transitivity"] = nx.transitivity(G) dictTimes["transitivity"] = time.time() - start dictIsDisc["transitivity"] = False # Number of triangles start = time.time() dictSums["num_triangles"] = np.sum(list(nx.triangles(G).values())) / 3 dictTimes["num_triangles"] = time.time() - start dictIsDisc["num_triangles"] = True # Estimate of the average clustering coefficient of G: # Average local clustering coefficient, with local clustering coefficient # defined as C_i = (nbr of pairs of neighbors of i that are connected)/(nbr of pairs of neighbors of i) start = time.time() dictSums["avg_clustering_coef"] = nx.average_clustering(G) dictTimes["avg_clustering_coef"] = time.time() - start dictIsDisc["avg_clustering_coef"] = False # Square clustering (averaged over nodes): # the fraction of possible squares that exist at the node. # We average it over nodes start = time.time() dictSums["square_clustering_mean"] = np.mean( list(nx.square_clustering(G).values())) dictTimes["square_clustering_mean"] = time.time() - start dictIsDisc["square_clustering_mean"] = False # We compute the median start = time.time() dictSums["square_clustering_median"] = np.median( list(nx.square_clustering(G).values())) dictTimes["square_clustering_median"] = time.time() - start dictIsDisc["square_clustering_median"] = False # We compute the standard deviation start = time.time() dictSums["square_clustering_std"] = np.std( list(nx.square_clustering(G).values())) dictTimes["square_clustering_std"] = time.time() - start dictIsDisc["square_clustering_std"] = False # Number of 2-cores start = time.time() dictSums["num_2cores"] = len(nx.k_core(G, k=2)) dictTimes["num_2cores"] = time.time() - start dictIsDisc["num_2cores"] = True # Number of 3-cores start = time.time() dictSums["num_3cores"] = len(nx.k_core(G, k=3)) dictTimes["num_3cores"] = time.time() - start dictIsDisc["num_3cores"] = True # Number of 4-cores start = time.time() dictSums["num_4cores"] = len(nx.k_core(G, k=4)) dictTimes["num_4cores"] = time.time() - start dictIsDisc["num_4cores"] = True # Number of 5-cores start = time.time() dictSums["num_5cores"] = len(nx.k_core(G, k=5)) dictTimes["num_5cores"] = time.time() - start dictIsDisc["num_5cores"] = True # Number of 6-cores start = time.time() dictSums["num_6cores"] = len(nx.k_core(G, k=6)) dictTimes["num_6cores"] = time.time() - start dictIsDisc["num_6cores"] = True # Number of k-shells # The k-shell is the subgraph induced by nodes with core number k. # That is, nodes in the k-core that are not in the k+1-core # Number of 2-shells start = time.time() dictSums["num_2shells"] = len(nx.k_shell(G, 2)) dictTimes["num_2shells"] = time.time() - start dictIsDisc["num_2shells"] = True # Number of 3-shells start = time.time() dictSums["num_3shells"] = len(nx.k_shell(G, 3)) dictTimes["num_3shells"] = time.time() - start dictIsDisc["num_3shells"] = True # Number of 4-shells start = time.time() dictSums["num_4shells"] = len(nx.k_shell(G, 4)) dictTimes["num_4shells"] = time.time() - start dictIsDisc["num_4shells"] = True # Number of 5-shells start = time.time() dictSums["num_5shells"] = len(nx.k_shell(G, 5)) dictTimes["num_5shells"] = time.time() - start dictIsDisc["num_5shells"] = True # Number of 6-shells start = time.time() dictSums["num_6shells"] = len(nx.k_shell(G, 6)) dictTimes["num_6shells"] = time.time() - start dictIsDisc["num_6shells"] = True start = time.time() listOfCliques = list(nx.enumerate_all_cliques(G)) enum_all_cliques_time = time.time() - start # Number of 4-cliques start = time.time() n4Clique = 0 for li in listOfCliques: if len(li) == 4: n4Clique += 1 dictSums["num_4cliques"] = n4Clique dictTimes["num_4cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_4cliques"] = True # Number of 5-cliques start = time.time() n5Clique = 0 for li in listOfCliques: if len(li) == 5: n5Clique += 1 dictSums["num_5cliques"] = n5Clique dictTimes["num_5cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_5cliques"] = True # Maximal size of a clique in the graph start = time.time() dictSums["max_clique_size"] = len(approximation.clique.max_clique(G)) dictTimes["max_clique_size"] = time.time() - start dictIsDisc["max_clique_size"] = True # Approximated size of a large clique in the graph start = time.time() dictSums["large_clique_size"] = approximation.large_clique_size(G) dictTimes["large_clique_size"] = time.time() - start dictIsDisc["large_clique_size"] = True # Number of shortest path of size k start = time.time() listOfPLength = list(nx.shortest_path_length(G)) path_length_time = time.time() - start # when k = 3 start = time.time() n3Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n3Paths += tmp.count(3) dictSums["num_shortest_3paths"] = n3Paths / 2 dictTimes["num_shortest_3paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_3paths"] = True # when k = 4 start = time.time() n4Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n4Paths += tmp.count(4) dictSums["num_shortest_4paths"] = n4Paths / 2 dictTimes["num_shortest_4paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_4paths"] = True # when k = 5 start = time.time() n5Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n5Paths += tmp.count(5) dictSums["num_shortest_5paths"] = n5Paths / 2 dictTimes["num_shortest_5paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_5paths"] = True # when k = 6 start = time.time() n6Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n6Paths += tmp.count(6) dictSums["num_shortest_6paths"] = n6Paths / 2 dictTimes["num_shortest_6paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_6paths"] = True # Size of the minimum (weight) node dominating set: # A subset of nodes where each node not in the subset has for direct # neighbor a node of the dominating set. start = time.time() T = approximation.min_weighted_dominating_set(G) dictSums["size_min_node_dom_set"] = len(T) dictTimes["size_min_node_dom_set"] = time.time() - start dictIsDisc["size_min_node_dom_set"] = True # Idem but with the edge dominating set start = time.time() T = approximation.min_edge_dominating_set(G) dictSums["size_min_edge_dom_set"] = 2 * len( T) # times 2 to have a number of nodes dictTimes["size_min_edge_dom_set"] = time.time() - start dictIsDisc["size_min_edge_dom_set"] = True # The Wiener index of a graph is the sum of the shortest-path distances # between each pair of reachable nodes. For pairs of nodes in undirected graphs, # only one orientation of the pair is counted. # (On LCC otherwise inf) start = time.time() dictSums["wiener_index_LCC"] = nx.wiener_index(G_lcc) dictTimes["wiener_index_LCC"] = time.time() - start dictIsDisc["wiener_index_LCC"] = True # Betweenness node centrality (averaged over nodes): # at node u it is defined as B_u = sum_i,j sigma(i,u,j)/sigma(i,j) # where sigma is the number of shortest path between i and j going through u or not start = time.time() betweenness = list(nx.betweenness_centrality(G).values()) time_betweenness = time.time() - start # Averaged across nodes start = time.time() dictSums["betweenness_centrality_mean"] = np.mean(betweenness) dictTimes["betweenness_centrality_mean"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_mean"] = False # Maximum across nodes start = time.time() dictSums["betweenness_centrality_max"] = max(betweenness) dictTimes["betweenness_centrality_max"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_max"] = False # Central point dominance # CPD = sum_u(B_max - B_u)/(N-1) start = time.time() dictSums["central_point_dominance"] = sum( max(betweenness) - np.array(betweenness)) / (len(betweenness) - 1) dictTimes["central_point_dominance"] = time.time( ) - start + time_betweenness dictIsDisc["central_point_dominance"] = False # Estrata index : sum_i^n exp(lambda_i) # with n the number of nodes, lamda_i the i-th eigen value of the adjacency matrix of G start = time.time() dictSums["Estrata_index"] = nx.estrada_index(G) dictTimes["Estrata_index"] = time.time() - start dictIsDisc["Estrata_index"] = False # Eigenvector centrality # For each node, it is the average eigenvalue centrality of its neighbors, # where centrality of node i is taken as the i-th coordinate of x # such that Ax = lambda*x (for the maximal eigen value) # Averaged start = time.time() dictSums["avg_eigenvec_centrality"] = np.mean( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["avg_eigenvec_centrality"] = time.time() - start dictIsDisc["avg_eigenvec_centrality"] = False # Maximum start = time.time() dictSums["max_eigenvec_centrality"] = max( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["max_eigenvec_centrality"] = time.time() - start dictIsDisc["max_eigenvec_centrality"] = False ### Noise generation ### # Noise simulated from a Normal(0,1) distribution start = time.time() dictSums["noise_Gauss"] = ss.norm.rvs(0, 1) dictTimes["noise_Gauss"] = time.time() - start dictIsDisc["noise_Gauss"] = False # Noise simulated from a Uniform distribution [0-50] start = time.time() dictSums["noise_Unif"] = ss.uniform.rvs(0, 50) dictTimes["noise_Unif"] = time.time() - start dictIsDisc["noise_Unif"] = False # Noise simulated from a Bernoulli B(0.5) distribution start = time.time() dictSums["noise_Bern"] = ss.bernoulli.rvs(0.5) dictTimes["noise_Bern"] = time.time() - start dictIsDisc["noise_Bern"] = True # Noise simulated from a discrete uniform distribution [0,50[ start = time.time() dictSums["noise_disc_Unif"] = ss.randint.rvs(0, 50) dictTimes["noise_disc_Unif"] = time.time() - start dictIsDisc["noise_disc_Unif"] = True resDicts = (dictSums, dictTimes, dictIsDisc) return resDicts
def cal_all_k_shells(g): max_num = max(nx.core_number(g).values()) print(max_num) for k in range(max_num + 1): print(k, end="\t") print(nx.k_shell(g, k).number_of_nodes())
ig.plot(g, target=fig_name, layout=layout, vertex_size=7, vertex_color='gray', vertex_label_size=10, vertex_label_dist=2, mark_groups=group_markers) #vertex_label=None print 'ΟΡΙΣΜΟΣ: Το k-κέλυφος είναι ο υπογράφος των κόμβων του k-πυρήνα που δεν περιέχονται στον (k+1)-πυρήνα.' # print 'DEFINITION: The k-shell is the subgraph of nodes in the k-core but not in the (k+1)-core.' print str(" ") kshells = [] for i in set(degree_sequence): if len(nx.k_shell(G, k=i).nodes()) > 0: # print "i =", i ksGi = nx.k_shell(G, k=i) print 'Οι κόμβοι του', str(i) + '-κελύφους:' # print 'The nodes of the', str(i)+'-shell:' print ksGi.nodes() # print 'Οι ακμές του', str(i)+'-κελύφους:' # # print 'The edges of the k-shell of G are:' # print ksGi.edges() # print 'Η ακολουθία βαθμών των κόμβων του', str(i)+'-κελύφους:' # # print 'The degree sequence of the nodes of the', str(i)+'-shell:' # print list(nx.degree(ksGi).values()) # # # print 'The order of the main k-shell of G is:' # # # print 'k =', min(list(nx.degree(ksGi).values())) print str(" ") kshells.append(ksGi.nodes())
def get_link_measures(net): """ Compute weights and edges betweenness centralities :param net: network :return: w: list of weights eb: list of edge betweeenness centralities """ w, eb, eb_w, eb_w2, eb_pr, eb_cl, eb_ev, eb_s = [], [], [], [], [], [], [], [] # Edge weight and unweighted betweenness centrality edges = net.edges(data=True) betweenness_centr = nx.edge_betweenness_centrality(net, normalized=True) for e in edges: w.append(e[2]['weight']) eb.append(betweenness_centr[(e[0], e[1])]) # Create a copy of the graph with inverse weights, square root is used to reduce the impact of high weights net1 = net.copy() edges1 = net1.edges(data=True) for e in edges1: w_e = e[2]['weight'] net1[e[0]][e[1]]['weight'] = 1 / (w_e**(1 / 3)) # Weighted betweenness centrality on net1 betweenness_centr_w = nx.edge_betweenness_centrality(net1, normalized=True, weight='weight') for e in edges1: eb_w.append(betweenness_centr_w[(e[0], e[1])]) # Node dictionary for k-shells dict_k_shell = {} max_degree = max([net.degree(n) for n in net.nodes]) for k in reversed(range(max_degree + 1)): k_shell = nx.k_shell(net1, k=k) k_shell_nodes = k_shell.nodes() for i in k_shell_nodes: if i not in dict_k_shell: dict_k_shell[i] = k # node dict for pagerank dict_page_rank = nx.pagerank(net1, weight='weight') # closeness centrality closeness_centr = nx.closeness_centrality(net, distance='weight') closeness_centr = dict( sorted(closeness_centr.items(), key=lambda pair: list(nodes).index(pair[0]))) # eigenvector centrality eigenvector_centr = nx.eigenvector_centrality(net, tol=10**-1, weight='weight') eigenvector_centr = dict( sorted(eigenvector_centr.items(), key=lambda pair: list(nodes).index(pair[0]))) # strengths of nodes strengths = dict(nx.degree(net1, weight='weight')) # For each edge, take lower value of centrality measureof the two nodes and use it to normalize previously # computed weighted betweenness j = 0 for e in edges: eb_w2.append(eb_w[j] / min(dict_k_shell[e[0]], dict_k_shell[e[1]])) eb_pr.append(eb_w[j] / min(dict_page_rank[e[0]], dict_page_rank[e[1]])) eb_cl.append(eb_w[j] / min(closeness_centr[e[0]], closeness_centr[e[1]])) eb_ev.append(eb_w[j] / min(eigenvector_centr[e[0]], eigenvector_centr[e[1]])) eb_s.append(eb_w[j] / min(strengths[e[0]], strengths[e[1]])) j = j + 1 return w, eb, eb_w, eb_w2, eb_pr, eb_cl, eb_ev, eb_s