def community(self, resolution=1): partition = community_louvain.best_partition(G, resolution=resolution) n_community = max(list(partition.values())) print('Modularity:', community_louvain.modularity(partition, G)) print('Number of communities:', n_community) #drawing size = float(len(set(partition.values()))) pos = nx.spring_layout(G) count = 0 plt.figure(figsize=(12, 8)) for com in set(partition.values()): count = count + 1. list_nodes = [ nodes for nodes in partition.keys() if partition[nodes] == com ] nx.draw_networkx_nodes(G, pos, list_nodes, node_size=20, node_color=[random(), random(), random()]) nx.draw_networkx_edges(G, pos, alpha=0.5, width=0.5) plt.show() return partition
def clustering(G): """ Conduct clustering on graph :param G: networkx graph :return: """ partition = community.best_partition(G) print() print("Clusters") num_clusts = max(partition.values()) print("Num clusters = " + str(num_clusts)) modValue = community.modularity(partition, G) print("modularity: {}".format(modValue))
def create_community_graph(nodes, edges, filename, scale): import community.community_louvain as community plt.figure(figsize=(20,20)) G = nx.Graph() for node in nodes: G.add_node(node) edge_list = zip(edges['Source'], edges['Target']) G.add_edges_from(edge_list) for (a, b), val in zip(edge_list, edges['Label'].values): G[a][b]['label'] = val pos = nx.circular_layout(G, scale=50 ) # use one of the edge properties to control line thickness edgewidth = edges['Weight'] parts = community.best_partition(G) node_color = [parts.get(node) for node in G.nodes()] print("Louvain Modularity: ", community.modularity(parts, G)) # nx.draw_networkx(G, pos = pos, cmap = plt.get_cmap("jet"), node_color = node_color, node_size = [float(G.degree(v)) * 200 for v in G]) nx.draw_networkx_nodes(G, pos, cmap = plt.get_cmap("rainbow"), node_size=[float(G.degree(v)) * 700 for v in G], alpha=0.7, node_color=node_color, linewidths=0) nx.draw_networkx_edges(G, pos, alpha=0.25, edge_color='#0EA6EC', width=[w * scale for w in edgewidth], arrows=False) # arquivo edge_labels = {'{}'.format(i[2]['label']) for i in G.edges(data=True)} # edge_labels = nx.get_edge_attributes(G,'label') # nx.draw_networkx_edge_labels(G, pos, edge_labels = edge_labels) node_labels = {i:'{}'.format(i) for i in G.nodes()} nx.draw_networkx_labels(G, pos, labels = node_labels, font_color='#f5f5f5', font_weight='bold') axes = plt.gca() axes.set_axis_bgcolor('#f5f5f5') # plt.axis('off') axes.get_xaxis().set_visible(False) axes.get_yaxis().set_visible(False) plt.savefig(filename, dpi=150) plt.show return G
def graph_partition(self,G,ret_df): m,n = ret_df.shape keywords = ret_df['KEY_WORDS'] partition = community_louvain.best_partition(G,resolution = 2) #Louvain算法划分社区,返回的是一个dict,key是文档id value是文档所属社区,可见如果边过多计算时间慢 modularity = community_louvain.modularity(partition,G) #print("社区划分的模块度是:{}".format(modularity)) #resolution 控制社区内数目的大小,越小社区数目越小,resolution可以让供热问题聚类为一类 comm_dict = defaultdict(list) for doc in partition: comm_dict[partition[doc]].append(doc)#[{O:[node1,...,node2]}] {'社区id':"文档list"} num_comm = len(comm_dict) #print("社区数目为:{}".format(num_comm)) self.labels_true = np.empty(shape=[m])#各条举报数据的label GT labels_pred = np.empty(shape=[m])#以同一community中多数的label为这一community的label ret_types = list(ret_df['TYPE']) for i,d_type in enumerate(ret_types): key_list = d_type.split("-") if(len(key_list)>3): key = key_list[2] else: key = key_list[-1] idx = self.labels_dict[key] self.labels_true[i] = idx self.labels_true = np.array(self.labels_true) ''' #其实对于兰德系数和互信息并不要求给聚类的元素打label,而只要求他们原本是同一个类别现在聚成同一个簇就好了 for com_id,com_members in comm_dict.items(): members_labes = labels_true[com_members] com_label = collections.Counter(members_labes).most_common()[0][0]#聚类后聚类id如何和参考id 匹配 #上面的方法只能说相同的类别容易聚成一个类别,但比如商品质量就聚成了很多个类别?? labels_pred[com_members] = com_label print("社区的id:{}".format(com_id)) print("社区的类别:{}".format(rev_labels_dict[com_label])) ''' for com_id,com_members in comm_dict.items(): labels_pred[com_members] = np.random.randint(0,num_comm)# labels_pred[com_members] = com_id#同一个社区的文档预测的是同一个id scores = self.Eva_Metric(self.labels_true,labels_pred) print(self.getRealDocs(list(keywords))) #print(scores) return comm_dict,scores
def find_best_partition(self): from community import community_louvain G = self.graph.copy() modularity = -float('inf') removed_edges = [] partition = {} cou = 0 while cou < 40: cou += 1 betweenness = self.calculte_betweenness(G) # 1.算介度 max_betweenness_edges = self.get_max_betweenness_edges( betweenness) # 2.根据介度算最大介度的边的集合 if len(G.edges()) == len(max_betweenness_edges): # 介度全部都一样,退出 break G.remove_edges_from(max_betweenness_edges) # 将最大介度的边全部移除 components = nx.connected_components(G) # 获得连通的所有components idx = 0 tmp_partition = {} for component in components: for inner in list(component): tmp_partition.setdefault(inner, idx) # 先获得暂时的分区,按顺序递增 idx += 1 print('IDX=', idx) cur_mod = community_louvain.modularity(tmp_partition, G) # 调用louvain的模块算模块度 print("CUR MOD=", cur_mod, 'while modularity=', modularity) if cur_mod < modularity: # or abs(cur_mod - modularity) < eps: # 模块度小了,说明不能再划分,则此次分割无效,要补回去,并且退出 G.add_edges_from(max_betweenness_edges) break else: partition = tmp_partition #partition = tmp_partition removed_edges.extend(max_betweenness_edges) # 删掉的最大介度的边集合,不断更新 modularity = cur_mod self.display(partition) print('COUNT', cou) return partition, G, removed_edges
file.write("Freeman_centralization\tnan \n") ### Mean closeness centrality ### closeness_cent = nx.closeness_centrality(G) num_holder = [] for key, value in closeness_cent.items(): num_holder.append(value) mean_closeness_cent = mean(num_holder) file.write("Mean_closeness_centrality\t" + str(round(mean_closeness_cent, 5)) + "\n") ### Modularity ### p = community_louvain.best_partition(G, random_state=1, randomize=False) Q = community_louvain.modularity(p, G) print("Modularity of best partition of graph: ", str(Q)) file.write("Modularity\t" + str(round(Q, 5)) + "\n") ### Median comp size over total number of nodes ### med_list = [] for g in nx.connected_component_subgraphs(G): med_list.append(nx.number_of_nodes(g)) med_over_nnodes = median(med_list) / nnodes file.write("Median_comp_size_over_#_nodes\t" + str(round(med_over_nnodes, 5)) + "\n") ### Degree assortativity ### degree_assortativity = nx.degree_assortativity_coefficient(G) file.write("Degree_assortativity\t" + str(round(degree_assortativity, 5)) + "\n")
def community_detector(algorithm_name,network,most_valualble_edge=None): if Helpers().is_weighted(network): weight = 'weight' else: weight = None if algorithm_name == 'louvain': par_var = community_louvain.best_partition(network,weight=weight) num_partitions = len(set(par_var.values())) modularity = community_louvain.modularity(par_var,network) cliques_dict = {} for i in range(0,num_partitions): cliques_dict[i] = [] for key,value in par_var.items(): cliques_dict[value].append(key) partition = [lst for lst in cliques_dict.values()] return {'num_partitions': num_partitions, 'modularity' : modularity, 'partition' : partition} elif algorithm_name == 'girvin_newman': gen_par = nx.algorithms.community.centrality.girvan_newman(network,most_valualble_edge) mod = 0 num_partitions = 0 partition = None i=0 list_of_losers = [] while True: try: if len(list_of_losers) > 100: break i+=1 dic = {} temp_partition = next(gen_par) for index, community in enumerate(temp_partition): dic[index] = list(community) temp_mod = Helpers().modularity(network, dic) if temp_mod > mod: mod = temp_mod num_partitions = len(dic) partition = [v for v in dic.values()] list_of_losers = [] else: list_of_losers.append(temp_mod) except StopIteration: break return {'num_partitions': num_partitions, 'modularity': mod, 'partition': partition} elif algorithm_name == 'clique_percolation': flag = True modularity = 0 num_p = 0 par = None for num in range(2,len(network.nodes())): try: if flag == False: break clique_per = nx.algorithms.community.k_clique_communities(network,num) clique_per = Helpers().unpack_gen(clique_per) temp_mod = Helpers().modularity(network,clique_per.copy()) if temp_mod > modularity: modularity = temp_mod num_p = len([i for i in clique_per.values()]) par = [val for val in clique_per.values()] except IndexError: flag = False return {'num_partitions': num_p, 'modularity': modularity, 'partition': par}
# nx.draw(G, pos=nx.circular_layout(G), node_color='r', edge_color='b') # plt.show() print("getting the graph end") # Compute the best partition print("partitioning start") partition = community_louvain.best_partition(G) print("partitioning end") # Store the partition dictionary to file print("Store the partition dictionary to file start ") outFile = open('partition.txt', 'w') for nid in partition: community = partition[nid] ss = '%d,%d\n' % (nid, community) outFile.write(ss) outFile.close() print("Store the partition dictionary to file end") # Number of communities # 16172 print(len(set(partition.values()))) # Modularity of the partition # 0.9377583051376279 print(community_louvain.modularity(partition, G))
def NetworkAnalysis(G, filename='highRatingResults.txt'): #standard metrics local metrics nbr_nodes = nx.number_of_nodes(G) nbr_edges = nx.number_of_edges(G) nbr_components = nx.number_connected_components(G) F = open(filename, 'w') # t1 = "Number of nodes:" + str(nbr_nodes) # t2 = "Number of edges:" + str(nbr_edges) # t3 = "Number of connected components:" + str(nbr_components) F.write("Number of nodes:" + str(nbr_nodes) + "\n") F.write("Number of edges:" + str(nbr_edges) + "\n") F.write("Number of connected components:" + str(nbr_components) + "\n") # F.close() #betweeness betweenList = nx.betweenness_centrality(G) #print("The list of betweenness centrality is", str(betweenList), "\n") F.write("The list of betweenness centrality is" + str(betweenList) + "\n") #all the items have less than 1 betweenness centrality which indicate that there is no #item that lie inbetween the connection between two items. #degree degreeCentrality = nx.degree_centrality(G) F.write("The degrees of centrality is " + str(degreeCentrality) + "\n") #clustering coefficient #clustering coefficient for each nodes F.write("The clustering coefficients are " + str(nx.clustering(G)) + "\n") partition = community_louvain.best_partition(G) F.write("The community modularity is " + str(community_louvain.modularity(partition, G)) + "\n") #which suggest that there isn't a strong community #global network metrics (metric to explain whole network not just a part) #diameter - the max of shortest distances between nodes F.write("The diameter is " + str(nx.diameter(G)) + "\n") #density F.write("The density is " + str(nx.density(G)) + "\n") #not particularly low nor high in density #triangles F.write("The triangle is " + str(nx.triangles(G)) + "\n") #average clustering coefficient for the graph avgclu = nx.average_clustering(G) F.write("The average clustering is " + str(avgclu) + "\n") #average degree centrality tot = [] for food in degreeCentrality: item = degreeCentrality[food] tot.append(item) avgdeg = np.average(tot) F.write("The average degree centrality is " + str(avgdeg) + "\n") #average betweenness centrality l = [] for f in betweenList: item = betweenList[f] l.append(item) avgB = np.average(l) F.write("The average betweenness centrality is " + str(avgB) + "\n") F.close()
flows.add_edges_from(significant_flows) flows.remove_edges_from(flows.selfloop_edges()) #find 5 largest betweenness centralities largest_betweeness = pd.Series(nx.betweenness_centrality(flows)).nlargest(5) #find 5 largest closeness centralities largest_closeness = pd.Series(nx.closeness_centrality(flows)).nlargest(5) #find number of connected componants num_of_connected_componants = len([nx.connected_components(flows)]) #find communities in the network and the modularity partition = cm.best_partition(flows) communities = pd.Series(partition) communities = pd.DataFrame({ 'States': communities.index, 'Community': communities.values }) num_of_communities = len(communities['Community'].unique()) communtiy0states = communities[communities['Community'] == 0]['States'] communtiy1states = communities[communities['Community'] == 1]['States'] communtiy2states = communities[communities['Community'] == 2]['States'] modularity = cm.modularity(partition, flows) #save graph nx.write_graphml(flows, open('SignificantStatetateMigrationFlows.graphml', 'wb'))
# # * 위의 네트워크 지도에서는 커뮤니티를 찾기 힘듭니다. 아래에서 단어들 간의 커뮤니티를 찾아 그래프로 출력해보겠습니다. # # * 커뮤니티를 찾기 위해 Louvain 알고리즘을 사용하겠습니다. 이 알고리즘은 다른 알고리즘과는 달리 계산 시간이 빠르다는 장점으로 많이 이용되고 있습니다. # In[44]: # 커뮤니티를 찾기 위한 modularity 를 계산하겠습니다. from community import community_louvain # 노드 속성을 기초로 파티션을 나누겠습니다. partition = community_louvain.best_partition(travel_network) # partition 값을 통해 노드가 잘 구분되어 있는지를 계산하겠습니다. modularity = community_louvain.modularity(partition, travel_network) print('Modularity:', modularity) # In[45]: plt.figure(figsize=(25, 20)) colors = [partition[n] for n in travel_network.nodes()] my_colors = plt.cm.Set3_r nx.draw(travel_network, with_labels=True, labels=travel_id2word, font_family='Malgun Gothic', node_color=colors, cmap=my_colors, font_size=14,
def modularity(self): partition = community_louvain.best_partition(self) return community_louvain.modularity(partition, self)
pos, list_nodes, node_size=50, node_color=str(count / size)) nx.draw_networkx_edges(G, pos, with_labels=True, alpha=0.5) plt.show() values = [partition.get(node) for node in G.nodes()] nx.draw_spring(G, cmap=plt.get_cmap('jet'), node_color=values, node_size=30, with_labels=False) plt.show() # Clustering evaluation print('Modularity: {0:.4f}'.format(community.modularity(partition, G))) # if display_plots: fx.display_partition(G, partition, 2) #Calculate induced graph ind = community.induced_graph(partition, G) #Display induced graph if display_plots: plt.figure(3) pos = nx.random_layout(ind) nx.draw_networkx_edges(ind, pos, alpha=0.5) nx.draw_networkx_nodes(ind, pos, node_size=40, node_color='1') plt.show()
number_of_community = len(community_list) mat_size = np.max(community_list) + 1 mod = [[0.0] * mat_size for i in range(mat_size)] for i in community_list: tmp = copy.deepcopy(partition) #print(tmp) for j in community_list: #print(tmp) if (i != j): for key, values in partition.items(): if (values == i): tmp[key] = j mod[i][j] = community_louvain.modularity(tmp, dolphins_data) mod = np.array(mod) #Convert to array max = 0 #Initial Value to find Max modularity max_index = (0, 0) #to find index of max modularity for i in range(len(mod)): for j in range(len(mod[0])): if (mod[i][j] > max): max = mod[i][j] max_index = (i, j) #Merge Step tmp = copy.deepcopy(partition) #print(max_index) for key, values in tmp.items():
partition = community_louvain.best_partition(G) while True: community_labels = set(partition.values()) if len(community_labels) <= 2: break max_val, max_indx = -1 * float('inf'), (None, None) for i in community_labels: temp = deepcopy(partition) for j in community_labels: if i != j: for key in partition: if (partition[key] == j): temp[key] = i val = community_louvain.modularity(temp, G) if val > max_val: max_val = val max_indx = (i, j) temp = deepcopy(partition) for key in temp: if partition[key] in max_indx: partition[key] = max_indx[0] final_labels = tuple(set(partition.values())) group_1 = list(filter(lambda x: partition[x] == final_labels[0], partition)) group_2 = list(filter(lambda x: partition[x] == final_labels[1], partition)) # pos = nx.spring_layout(G)
def print_modularity(G, partition, title): mod = community_louvain.modularity(partition, G) print('<' + title + '>') print("modularity:", mod) return