def sub_clusterization_experiment(G1): p1 = partition(G1) print "Modularity: " + str(community.modularity(p1, G1)) sizes = clustersSizes(p1) ordered = sorted(range(len(sizes)), key=lambda k: sizes[k], reverse=True) p1 = splitCluster(G1, p1, ordered[0]) print "Modularity: " + str(community.modularity(p1, G1)) sizes = clustersSizes(p1) ordered = sorted(range(len(sizes)), key=lambda k: sizes[k], reverse=True) p1 = splitCluster(G1, p1, ordered[0]) print "Modularity: " + str(community.modularity(p1, G1)) ordered = sorted(range(len(sizes)), key=lambda k: sizes[k]) while (True): word = raw_input("palavra: ") s = G1.nbunch_iter([i[0] for i in p1.items() if i[1] == p1[findNode(G1, word)]]) try: for i in s: print G1.node[i]["text"] + ", ", print except: pass plt.plot(range(1, len(clustersSizes(p1)) + 1), sorted(clustersSizes(p1), reverse=True)) plt.xlabel("No. do Cluster") plt.ylabel("Tamanho do Cluster") # plt.title(u"Quantidade de nós para cada cluster (subclusterizando C1)") plt.title(u"Quantidade de nós para cada cluster (subclusterizando C1 e C2)") plt.show()
def __InitClusterAnalysis(self): print "starting best m_partition algorithm (will take a while)...." if self.m_classifier==None: classifier = classifier_c.classifier_c(self.m_graph,"best_practice") self.m_partition = classifier.run_classifier(classifier_c.classifier_type_e.e_bestPractice) #community.best_partition(self.m_graph) modularity = community.modularity(self.m_partition,self.m_graph) self.LogPrint("the modularity is %f"%modularity) else: self.m_partition=self.m_classifier.classifey() if self.m_partition==None: self.LogPrint("partition is NULL...will not create cluster, exit") return #__LogPrint(self,"the modularity is %f"%modularity) else: for node in self.m_partition.iteritems(): if self.m_comSize.has_key(node[1]): self.m_comSize[node[1]]= self.m_comSize[node[1]]+1 self.m_comMem[node[1]].append(node[0]) else: self.m_comSize[node[1]]=1 self.m_comMem[node[1]]=[] for cSize in self.m_comSize.iteritems(): if cSize[1] >1: self.LogPrint("cSize[1]=%d"%cSize[1]) self.m_comsizeClean[cSize[0]] =cSize[1] if len(self.m_comMem[cSize[0]])==1: self.LogPrint( "have value is only one member...") self.m_comMemClean[cSize[0]] = self.m_comMem[cSize[0]] for memberIDs in self.m_comMemClean.iteritems(): self.m_comMemNames[memberIDs[0]]=[] for member in memberIDs[1]: self.m_comMemNames[memberIDs[0]].append(utils.GetNodeName(member,self.m_graph))
def test_karate(self): """"test modularity on Zachary's karate club""" graph = nx.karate_club_graph() part = co.best_partition(graph) self.assertTrue(co.modularity(part, graph) > 0.41) for e1, e2 in graph.edges_iter(): graph[e1][e2]["test_weight"] = 1. part_weight = co.best_partition(graph, weight="test_weight") self.assertAlmostEqual(co.modularity(part, graph), co.modularity(part_weight, graph, "test_weight")) part_res_low = co.best_partition(graph, resolution=0.1) self.assertTrue( len(set(part.values())) < len(set(part_res_low.values())))
def printStats(filename): ''' Converts json adjacency list into networkx to calculate and print the graphs's - average clustering coefficient - overall clustering coefficient - maximum diameter - average diameter - number of paritions using community.best_parition - modularity of community.best_partition ''' g = makeGraphFromJSON(filename) print "Average Clustering Coefficient: %f" % nx.average_clustering(g) print "Overall Clustering Coefficient: %f" % nx.transitivity(g) connected_subgraphs = list(nx.connected_component_subgraphs(g)) largest = max(nx.connected_component_subgraphs(g), key=len) print "# Connected Components: %d" % len(connected_subgraphs) print " Maximal Diameter: %d" % nx.diameter(largest) print " Average Diameter: %f" % nx.average_shortest_path_length(largest) # Find partition that maximizes modularity using Louvain's algorithm part = community.best_partition(g) print "# Paritions: %d" % (max(part.values()) + 1) print "Louvain Modularity: %f" % community.modularity(part, g)
def getRandomPageRanks(filename): Ga=nx.read_graphml(sys.argv[1]) # create a copy of the graph and extract giant component # get component size distribution cc=nx.connected_components(Ga) cc_dict={} for x in range(0,len(cc)): try: cc_dict[len(cc[x])].append(x) except KeyError: cc_dict[len(cc[x])]=[] cc_dict[len(cc[x])].append(x) isolates=nx.isolates(Ga) rg=nx.fast_gnp_random_graph(Ga.number_of_nodes(),2.0*Ga.number_of_edges()/(Ga.number_of_nodes()*(Ga.number_of_nodes()-1))) c_rg=nx.average_clustering(rg) rg_cc=nx.connected_component_subgraphs(rg)[0] rg_asp=nx.algorithms.shortest_paths.generic.average_shortest_path_length(rg_cc) p_rg=community.best_partition(rg_cc) m_rg=community.modularity(p_rg,rg_cc) pageranks = nx.pagerank_numpy(rg) return pageranks
def get_communities(graph): betweenness = nx.edge_betweenness_centrality(graph) sorted_betweeness = [x[0] for x in sorted(betweenness.items(), key = lambda x : x[1], reverse = True)] best_partitions = [] max_modularity = -1.0 graph_copy = graph.copy() while sorted_betweeness: communities = [list(x) for x in nx.connected_components(graph_copy)] partitions = {} for i in range(len(communities)): for node in communities[i]: partitions[node] = i modularity = community.modularity(partitions, graph_copy) if modularity > max_modularity: best_partitions = communities max_modularity = modularity elif modularity <= max_modularity: break; graph_copy.remove_edge(*sorted_betweeness[0]) del sorted_betweeness[0] for partition in best_partitions: print sorted(partition) val_map = {} for partition in best_partitions: value = random.random() while value in val_map.values(): value = random.random() for node in partition: val_map[node] = value values = [val_map.get(node) for node in graph.nodes()] nx.draw_spring(graph, node_color = values, node_size = 500, with_labels = True) plt.savefig(sys.argv[2])
def InitClusterAnalysis(graph): global comMemClean global comMemNames global comsizeClean global partition print "starting best partition algorithm (will take a while)...." partition = community.best_partition(graph) modularity = community.modularity(partition, graph) LogPrint("the modularity is %f"%modularity) if partition !=None: for node in partition.iteritems(): if comSize.has_key(node[1]): comSize[node[1]]= comSize[node[1]]+1 comMem[node[1]].append(node[0]) else: comSize[node[1]]=1 comMem[node[1]]=[] for cSize in comSize.iteritems(): if cSize[1] >1: print "cSize[1]=",cSize[1] comsizeClean[cSize[0]] =cSize[1] if len(comMem[cSize[0]])==1: print "way this value is only one member...",comMem[cSize[0]] comMemClean[cSize[0]] = comMem[cSize[0]] for memberIDs in comMemClean.iteritems(): comMemNames[memberIDs[0]]=[] for member in memberIDs[1]: comMemNames[memberIDs[0]].append(utils.GetNodeName(member,graph))
def compute_best_community(original_g): max_modularity = -1 total_nodes = nx.number_of_nodes(original_g) community_count = 1 g = original_g communities = [] # Generate all the communities: Loop thru taking the entire graph as 1 community to each node as a seperate community while community_count < total_nodes: betweenness = nx.edge_betweenness(g) max_betweenness = max(betweenness.iteritems(), key=operator.itemgetter(1))[0] g.remove_edge(max_betweenness[0], max_betweenness[1]) connected_subgraphs = nx.connected_components(g) connected_subgraphs_list = convert_generator_list(connected_subgraphs) community_dict = categorize_nodes(connected_subgraphs_list) modularity = community.modularity(community_dict, original_g) if modularity > max_modularity: max_modularity = modularity communities = list(connected_subgraphs_list) community_count += 1 communities = format_list(communities) return communities, max_modularity
def get_community_assignment(in_df, graph, dendrogram): ''' Utilize dendrogram to find community clusterings at every level available. For each hierarchy level, a new column is added to the returned df with the community clustering. (e.g. cid0 -> 0,0,1,2,3) in_df: Dataframe. Must be indexed by user_id. graph: Networkx Graph. Node IDs should match user_ids in dataframe dendrogram: List of dictionaries, each dictionary mapping user_id to community_id. Each dictionary should represent a level of the clustering hierarchy. return: Tuple of Dataframe with community id assignment columns added and dictionary mapping each level to community modularity (float) ''' df = in_df.copy() community_modularity = {} for i in range(len(dendrogram)): partition = partition_at_level(dendrogram, i) # Infrequently, the community detection algorithm will exclude (?) a # a user ID or two. Still investgating why. For now, these will be # placed into partition 0. df['cid' + str(i)] = [partition[ind] if ind in partition else 0 for ind in df.index] community_modularity[i] = modularity(partition, graph) return df, community_modularity
def get_topics_noun_phrases(num_news, draw=False, url='http://cnn.com'): texts = get_news(url, num_news) gb = NounPhraseGraphBuilder(text_processing.clean_punctuation_and_stopwords) gb.load_texts(texts) G = gb.create_graph() print "Graph built" partition = community.best_partition(G) words_by_part = get_words_by_partition(partition) print_topics_from_partitions(G, words_by_part, 10) mod = community.modularity(partition,G) print("modularity:", mod) #print_topics_from_partitions(G, words_by_part, 10) if draw: values = [partition.get(node) for node in G.nodes()] nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False) plt.show() topics = get_topics_from_partitions(G, words_by_part, 10) return G, topics
def louvain_method(G): partition = community.best_partition(G) print "Graph nodes:", len(G.nodes()), "egdes:", len(G.edges()) print "Partitions:", len(set(partition.values())),\ "Modularity:", community.modularity(partition, G.to_undirected()) print "\n\n" return partition
def find_best_partition(self): G = self.graph.copy() modularity = 0.0 removed_edges = [] partition = {} while 1: betweenness = self.calculte_betweenness(G) max_betweenness_edges = self.get_max_betweenness_edges(betweenness) if len(G.edges()) == len(max_betweenness_edges): break G.remove_edges_from(max_betweenness_edges) components = nx.connected_components(G) idx = 0 tmp_partition = {} for component in components: for inner in list(component): tmp_partition.setdefault(inner, idx) idx += 1 cur_mod = community.modularity(tmp_partition, G) if cur_mod < modularity: G.add_edges_from(max_betweenness_edges) break; else: partition = tmp_partition removed_edges.extend(max_betweenness_edges) modularity = cur_mod return partition, G, removed_edges
def test_allin_is_zero(self): """it test that everyone in one community has a modularity of 0""" for i in range(self.numtest) : g = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in g : part[node] = 0 self.assertEqual(co.modularity(part, g), 0)
def run_Louvain(fNet, fMask, fOutImg, fOutInfo): ''' A wrapper function for network community detection by the Louvain method. Only the largest connected component is parcellated into modules. input parameters: fNet: the adjacency list filename for the network fMask: the filename for the mask image. Its header is used to create a modular parcellation image fOutImg: the filename for the output image with modular parcellation fOutInfo: the filename with information on modules and modularity. returns: NONE output: This function generates files recording modular parcellation. fOutImg: Modular parcellation image fOutInfo: Modular parcellation information as a numpy .npz file. It includes: Q: The modularity Q NMods: The number of modules ModID: Module ID NNodes: The number of nodes in a module. In the same order as ModID ''' # loading the network data G = nx.read_adjlist(fNet, nodetype=int) # just the largest subgraph GC = max(nx.connected_component_subgraphs(G), key=len) # computing the best partition partition = community.best_partition(GC) # calculating the modularity Q = community.modularity(partition, GC) # converting the partition into arrays VoxInd = [int(i) for i in partition.keys()] ModInd = np.array(list(partition.values()))+1 # the module number starts with 1 # calculating sizes of the modules NMods = np.max(ModInd) ModID = range(1,NMods+1) NNodes = [] for iMod in ModID: tmpNNodes = len(np.nonzero(ModInd == iMod)[0]) NNodes.append(tmpNNodes) # reading in the mask image header & data img_mask = nib.load(fMask) X_mask = img_mask.get_data() # organizing the output Xout = np.zeros_like(X_mask) VoxXYZ = np.unravel_index(VoxInd, X_mask.shape) Xout[VoxXYZ] = ModInd # writing out the image modimg = nib.Nifti1Image(Xout, img_mask.get_affine()) nib.save(modimg, fOutImg) # writing out module stats np.savez(fOutInfo, Q=Q, NMods=NMods, ModID=ModID, NNodes=NNodes)
def networkx_json(nodes, G): links = G.edges(data = True) groups = community.best_partition(G) part = reduce(lambda x, y : x if x>groups[y] else groups[y], groups, 0) + 1 modular = community.modularity(groups,G) groups = { nodes[x]: groups[nodes[x]] if nodes[x] in groups else part for x in nodes } lsN = map(lambda x : { "id" : nodes[x], "name" : x, "group" : groups[nodes[x]] }, nodes) lsL = map(lambda x : { "source" : x[0], "target" : x[1], "value" : x[2]['wight'] }, links) return { "nodes" : lsN, "links" : lsL, "modular": round(modular, 2), "community": part + 1 }
def test_modularity_increase(self): """ Generate a dendogram and test that modularity is always increasing """ g = nx.erdos_renyi_graph(1000, 0.01) dendo = co.generate_dendogram(g) mod_prec = -1. mods = [co.modularity(co.partition_at_level(dendo, level), g) for level in range(len(dendo)) ] self.assertListEqual(mods, sorted(mods))
def communities(net): parti = community.best_partition(net) modu = community.modularity(parti, net) com = parti.values() number_of_communities = max(com)+1 hist = np.histogram(com,bins=range(0,number_of_communities+1),density=True)[0] values = np.square(hist) repartition = 1 / (sum(values)) return (number_of_communities,'number_of_communities'),(repartition,'equivalent_number_of_communities'),(modu,'modularity_Louvain_partition')
def q2(): print "[2] compare modularity values" cuisines = ["SoutheastAsian", "NorthAmerican"] for cuisine in cuisines: G = nx.read_gexf("../../data/network-analysis/%sHW4.gexf" % (cuisine)) H = G.to_undirected(reciprocal=False) part = community.best_partition(H) mod = community.modularity(part, H) print "...", cuisine, "modularity =", mod
def gen_graph_stats (graph): G = nx.read_graphml(graph) stats = {} edges, nodes = 0,0 for e in G.edges_iter(): edges += 1 for n in G.nodes_iter(): nodes += 1 stats['Edges'] = (edges,'The number of edges within the Graph') stats['Nodes'] = (nodes, 'The number of nodes within the Graph') print "%i edges, %i nodes" % (edges, nodes) # Accessing the highest degree node center, degree = sorted(G.degree().items(), key=itemgetter(1), reverse=True)[0] stats['Center Node'] = ('%s: %0.5f' % (center,degree),'The center most node in the graph. Which has the highest degree') hairball = nx.subgraph(G, [x for x in nx.connected_components(G)][0]) print "Average shortest path: %0.4f" % nx.average_shortest_path_length(hairball) stats['Average Shortest Path Length'] = (nx.average_shortest_path_length(hairball), '') # print "Center: %s" % G[center] # print "Shortest Path to Center: %s" % p print "Degree: %0.5f" % degree stats['Degree'] = (degree,'The node degree is the number of edges adjacent to that node.') print "Order: %i" % G.number_of_nodes() stats['Order'] = (G.number_of_nodes(),'The number of nodes in the graph.') print "Size: %i" % G.number_of_edges() stats['Size'] = (G.number_of_edges(),'The number of edges in the graph.') print "Clustering: %0.5f" % nx.average_clustering(G) stats['Average Clustering'] = (nx.average_clustering(G),'The average clustering coefficient for the graph.') print "Transitivity: %0.5f" % nx.transitivity(G) stats['Transitivity'] = (nx.transitivity(G),'The fraction of all possible triangles present in the graph.') part = community.best_partition(G) # values = [part.get(node) for node in G.nodes()] # nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False) # plt.show() mod = community.modularity(part,G) print "modularity: %0.5f" % mod stats['Modularity'] = (mod,'The modularity of a partition of a graph.') knn = nx.k_nearest_neighbors(G) print knn stats['K Nearest Neighbors'] = (knn,'the average degree connectivity of graph.\nThe average degree connectivity is the average nearest neighbor degree of nodes with degree k. For weighted graphs, an analogous measure can be computed using the weighted average neighbors degre') return G, stats
def do_a_series_of_propagations(self): index = 0 while index < self.rounds and self.flag: index = index + 1 print("Label propagation round: " + str(index)) self.do_a_propagation() print("") print("Modularity is: " + str(round(modularity(self.labels, self.graph), 3)) + ".") json_dumper(self.labels, self.args.assignment_output)
def evaluate(self): """ Computes the modularity. :return: modularity """ modularity = community.modularity(self.clusters_labels, self.Graph) self.modularity = modularity #x = internalValidation(self.adj_matrix, labels) return self.modularity
def test_range(self): """test that modularity is always between -1 and 1""" for _ in range(self.number_of_tests): graph = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in graph: part[node] = random.randint(0, self.number_of_tests / 10) mod = co.modularity(part, graph) self.assertGreaterEqual(mod, -1) self.assertLessEqual(mod, 1)
def q3(): print "[4] compare community stats before and after edge thresholding" G = nx.read_gexf("../../data/network-analysis/complements.gexf") H = G.to_undirected() part_before = community.best_partition(H) num_comm_before = len(set([x[1] for x in part_before.iteritems()])) mod_before = community.modularity(part_before, H) print("before thresholding, #-communities=%d, modularity=%f" % (num_comm_before, mod_before)) edges_before = H.edges(data=True) for edge in edges_before: weight = edge[2]["weight"] if weight < 0.2: H.remove_edge(edge[0], edge[1]) part_after = community.best_partition(H) num_comm_after = len(set([x[1] for x in part_after.iteritems()])) mod_after = community.modularity(part_after, H) print("after thresholding, #-communities=%d, modularity=%f" % (num_comm_after, mod_after))
def run_louvain(g_sym, res, skeleton_labels): out_dict = cm.best_partition(g_sym, resolution=res) modularity = cm.modularity(out_dict, g_sym) partition = np.array(itemgetter(*skeleton_labels)(out_dict)) part_unique, part_count = np.unique(partition, return_counts=True) for uni, count in zip(part_unique, part_count): if count < 3: inds = np.where(partition == uni)[0] partition[inds] = -1 return partition, modularity
def test_range(self) : """test that modularity is always between -1 and 1""" for i in range(self.numtest) : g = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in g : part[node] = random.randint(0, self.numtest/10) mod = co.modularity(part, g) self.assertGreaterEqual(mod, -1) self.assertLessEqual(mod, 1)
def calc_modularity(G, nodal_partition): ''' A function that calculates modularity from the best partition of a graph using the louvain method ''' import community modularity = community.modularity(nodal_partition, G) return modularity
def modularity(G, assignments=None, best_partition=False): if best_partition: part = community.best_partition(G) elif assignments: part = dict(zip(G.nodes(), assignments)) else: # get assignments from Graph part = dict([(n[0], int(n[1]['partition'])) for n in G.nodes(data=True)]) mod = community.modularity(part, G) return mod
def print_communities(partition, G): data = get_data(partition) communities = pd.DataFrame(data).groupby('community') for key, item in communities: print(communities.get_group(key), "\n") global nc nc = len(communities) modularity = community.modularity(partition, G) print("Number of Communities: ", nc, "\nModularity: ", modularity)
def cluster_graph(H, resolution, weight='weight'): print("------------Louvain------------------") results_df = pd.DataFrame() results_df.index.name = 'Timepoint' for i in range(n_year): start = timeit.default_timer() if i == 0: cluster_dict = {} num_clusters_last = len(set(cluster_dict.values())) Graph = H[i] num_nodes = len(Graph.nodes) partition_dict = {} num_increment = 0 for node in Graph.nodes: if node in cluster_dict: partition_dict[node] = cluster_dict[node] else: partition_dict[node] = num_clusters_last + num_increment num_increment += 1 cluster_dict = community.best_partition(Graph, resolution=resolution, partition=partition_dict, weight=weight) num_clusters = len(set(cluster_dict.values())) nx.set_node_attributes(Graph, cluster_dict, 'Louvain cluster') stop = timeit.default_timer() cal_time = stop - start num_edges = len(Graph.edges) modularity = community.modularity(cluster_dict, Graph) results_df.loc[(i + min_year), 'Resolution'] = resolution results_df.loc[(i + min_year), 'Num_Clusters'] = num_clusters results_df.loc[(i + min_year), 'Modularity'] = modularity results_df.loc[(i + min_year), 'Num_Nodes'] = num_nodes results_df.loc[(i + min_year), 'Num_Edges'] = num_edges results_df.loc[(i + min_year), 'Calculation_Time'] = cal_time #display(results_df.loc[(i+min_year):(i+min_year+1),:]) print('Year: {:4d}'.format(i + min_year), "| {:6d} nodes ".format(num_nodes), "| {: 5d} clusters".format(num_clusters), "| Modularity: {:.6f}".format(modularity), " | Calculation time: {: 6.2f} sec".format(cal_time)) #w = pd.ExcelWriter(graph_dir + 'Clustering_Results' + desc + '.xlsx') #sheetname = 'Clustering_Results' #results_df.to_excel(w, sheetname) #w.sheets[sheetname].set_column(0, 7, 10) #w.save() results_df.to_csv(graph_dir + 'Clustering_Results_ver10' + desc + '.csv') display(results_df) return H
def get_rf_metric_cutoff(G_origin, weight="weight", cutoff_step=0.025, drop_threshold=0.01): """Get good clustering cutoff points for Ricci flow metric by detect the change of modularity while removing edges. Parameters ---------- G_origin : NetworkX graph A graph with "weight" as Ricci flow metric to cut. weight : str The edge weight used as Ricci flow metric. (Default value = "weight") cutoff_step : float The step size to find the good cutoff points. drop_threshold : float At least drop this much to considered as a drop for good_cut. Returns ------- good_cuts : list of float A list of possible cutoff point, usually we use the first one as the best cut. """ G = G_origin.copy() modularity, ari = [], [] maxw = max(nx.get_edge_attributes(G, weight).values()) cutoff_range = np.arange(maxw, 1, -cutoff_step) for cutoff in cutoff_range: G = cut_graph_by_cutoff(G, cutoff, weight=weight) # Get connected component after cut as clustering clustering = { c: idx for idx, comp in enumerate(nx.connected_components(G)) for c in comp } # Compute modularity modularity.append(community_louvain.modularity(clustering, G, weight)) good_cuts = [] mod_last = modularity[-1] # check drop from 1 -> maxw for i in range(len(modularity) - 1, 0, -1): mod_now = modularity[i] if mod_last > mod_now > 1e-4 and abs( mod_last - mod_now) / mod_last > drop_threshold: logger.trace( "Cut detected: cut:%f, diff:%f, mod_now:%f, mod_last:%f" % (cutoff_range[i + 1], mod_last - mod_now, mod_now, mod_last)) good_cuts.append(cutoff_range[i + 1]) mod_last = mod_now return good_cuts
def test_karate(self): """"test modularity on Zachary's karate club""" graph = nx.karate_club_graph() part = co.best_partition(graph, random_state=0) self.assertTrue(co.modularity(part, graph) > 0.41) for e1, e2 in graph.edges(): graph[e1][e2]["test_weight"] = 1. part_weight = co.best_partition(graph, weight="test_weight", random_state=0) self.assertAlmostEqual(co.modularity(part, graph), co.modularity(part_weight, graph, "test_weight"), places=2) part_res_low = co.best_partition(graph, resolution=0.1) self.assertTrue( len(set(part.values())) < len(set(part_res_low.values())))
def test_louvain_with_edgevals(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) nx_parts = networkx_call(M) # Calculating modularity scores for comparison Gnx = nx.from_pandas_edgelist( M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() ) cu_map = {0: 0} for i in range(len(cu_parts)): cu_map[cu_parts["vertex"][i]] = cu_parts["partition"][i] assert set(nx_parts.keys()) == set(cu_map.keys()) cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) assert len(cu_parts) == len(nx_parts) assert cu_mod > (0.82 * nx_mod) assert abs(cu_mod - cu_mod_nx) < 0.0001
def test_louvain_with_edgevals(graph_file): M = read_mtx_file(graph_file + '.mtx') cu_M = read_csv_file(graph_file + '.csv') cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) nx_parts = networkx_call(M) # Calculating modularity scores for comparison Gnx = nx.Graph(M) cu_map = {0: 0} for i in range(len(cu_parts)): cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i] assert set(nx_parts.keys()) == set(cu_map.keys()) cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) assert len(cu_parts) == len(nx_parts) assert cu_mod > (.82 * nx_mod) print(cu_mod) print(cu_mod_nx) print(nx_mod) assert abs(cu_mod - cu_mod_nx) < .0001
def test_modularity_increase(self): """ Generate a dendrogram and test that modularity is always increasing """ graph = nx.erdos_renyi_graph(1000, 0.01) dendo = co.generate_dendrogram(graph) mods = [ co.modularity(co.partition_at_level(dendo, level), graph) for level in range(len(dendo)) ] self.assertListEqual(mods, sorted(mods))
def find_communities(self, G): tab = Table() start = timeit.default_timer() communities = community.best_partition(G) M = community.modularity(communities, G) stop = timeit.default_timer() tab.from_tuples([(len(set(communities.values())), M, (stop - start))], columns=['Unique', 'M', 'Time (sec)']) tab.sort_values(by='M', ascending=False) tab.display() return communities
def add_louvain_communities(): partition = community.best_partition(Graph, random_state=42) nodes_info['community'] = -1 for node in partition: nodes_info.iloc[int(node), nodes_info.columns.get_loc('community')] = int( partition.get(node)) print("Community partition modularity: ", round(community.modularity(partition, Graph), 3)) print("Number of communities: ", len(set(partition.values()))) print("-----------")
def compute_modular(self): ''' :return: 模块度的取值范围为:[−1/2,1)[−1/2,1),有可能得到负值; 论文表示当Q值在0.3~0.7之间时,说明聚类的效果很好 ''' G = nx.Graph() allneurons = self.getNeurons() nids = [n.id for n in allneurons] G.add_nodes_from(nids) synapses = self.getSynapses() for s in synapses: G.add_edge(s.fromId, s.toId) part = community.best_partition(G) return (community.modularity(part, G), part) # 生成点与点之间的距离矩阵,这里用的欧氏距离: ''' points = self.getConnectionMarix(returntype=list, valuetype='01') disMat = sch.distance.pdist(points, 'euclidean') # 进行层次聚类: Z = sch.linkage(disMat, method='average') # 根据linkage matrix Z得到聚类结果: cluster = sch.fcluster(Z,t=0,criterion='inconsistent') print('网络'+str(self.id)+'的模块划分:'+str(cluster)) # 计算模块度,基于"Neural Modularity Helps Organisms Evolve to # Learn New Skills without Forgetting Old Skills" ''' #1.生成社区矩阵 ''' m = len(set(cluster)) # 聚类个数 if m <= 1: return 0. n = len(points) points = np.array(points) lin = list(map(np.sum, points)) col = list(map(np.sum, zip(*points))) sum = 0. for i in range(n): for j in range(n): if cluster[i] != cluster[j]: continue ki_in = col[i] kj_out = lin[j] sum += points[i][j] - ki_in * kj_out / (2 * m) Q = sum / (2 * m) return Q ''' '''
def louvain(G, number_of_nodes, matlab_bs=False): print('\nLouvain Algorithm') louvain = community.best_partition(G) #print('Louvain Communities:\n', louvain) louvain_labels_predicted, num_of_communities_louvain = get_predicted_label_from_louvain(louvain, number_of_nodes, matlab_bs) print('Number of Communities with Louvain:', num_of_communities_louvain) print('Modularity:', community.modularity(louvain, G)) #print('Louvain Truth Labels:\n', louvain_labels_predicted) return louvain_labels_predicted
def do_a_series_of_propagations(self): """ Doing propagations until convergence or reaching time budget. """ index = 0 while index < self.rounds and self.flag: index = index + 1 print("\nLabel propagation round: " + str(index)+".\n") self.do_a_propagation() print("") print("Modularity is: "+ str(round(modularity(self.labels,self.graph),3)) + ".\n") json_dumper(self.labels, self.args.assignment_output)
def sim_community_maker2(dismat,threshold,tags=None): adjmat = dismat.copy() np.fill_diagonal(adjmat, np.min(dismat)) # Set the diagonal elements to a small value so that they won't be zeroed out adjmat = adjmat.reshape((-1,)) adjmat[adjmat > threshold] = 0 adjmat = adjmat.reshape(dismat.shape) G = make_graph(adjmat,labels=tags) partition = partition_calculate1(G) F = community.modularity(partition, G) return(F)
def calcuModularity(listResult,edgeList): ''' Calculate Modularity through networkx modularity https://programminghistorian.org/en/lessons/exploring-and-analyzing-network-data-with-python ''' G = nx.Graph() G.add_weighted_edges_from(edgeList) partition={} for item in range(len(listResult)): partition[item] = listResult[item] global_modularity = community.modularity(partition, G) return global_modularity
def get_comm_dict_and_partition(g): partition = community.best_partition(g) print "Louvain Modularity: ", community.modularity(partition, g) print "Louvain Partition: ", partition reverse_dict = {} for node in partition: if partition[node] not in reverse_dict: reverse_dict[partition[node]] = [] reverse_dict[partition[node]].append(node) print 'Node List Dict:', reverse_dict return reverse_dict, partition
def Mod(G,usebest=True,l=1): D = G.to_undirected() dendo = community.generate_dendogram(D, None) if usebest: level = len(dendo)-1 else: level = l partition = community.partition_at_level(dendo,level) mod = community.modularity(partition, D) for n in G: G.node[n]['m'] = partition[n] return mod
def neural_modularity_calculator(graph, embedding, means): """ Function to calculate the GEMSEC cluster assignments. """ assignments = {} for node in graph.nodes(): positions = means - embedding[node, :] values = np.sum(np.square(positions), axis=1) index = np.argmin(values) assignments[int(node)] = int(index) modularity = community.modularity(assignments, graph) return modularity, assignments
def classical_modularity_calculator(graph, embedding, args): """ Function to calculate the DeepWalk cluster centers and assignments. """ kmeans = KMeans(n_clusters=args.cluster_number, random_state=0, n_init=1).fit(embedding) assignments = { i: int(kmeans.labels_[i]) for i in range(0, embedding.shape[0]) } modularity = community.modularity(assignments, graph) return modularity, assignments
def test_louvain(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_parts, cu_mod = cugraph_call(cu_M) nx_parts = networkx_call(M) # Calculating modularity scores for comparison Gnx = nx.from_pandas_edgelist(M, source='0', target='1', edge_attr='weight', create_using=nx.Graph()) cu_map = {0: 0} for i in range(len(cu_parts)): cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i] assert set(nx_parts.keys()) == set(cu_map.keys()) cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) assert len(cu_parts) == len(nx_parts) assert cu_mod > (.82 * nx_mod) assert abs(cu_mod - cu_mod_nx) < .0001
def modularity(self): part = self.get_partition() if not part: return None g = self.getG() try: modul = pylouvain.modularity(part, g) except NameError: self.log.error('python-louvain) library is not installed \n \ Modularity can\'t be computed ') modul = None return modul
def calc_graph(matrix): thresholds = [90, 85, 80, 75] glob = np.zeros((1, 4)) loc = np.zeros((1, 4)) Q = np.zeros((1, 4)) Ch = np.zeros((1, 4)) Ph = np.zeros((1, 4)) data = np.zeros((1, 5)) # Run graph measure analysis for index, threshold in enumerate(thresholds): graph = mat2graph_threshold(matrix, threshold) # Calculating global and average local efficiency glob[0, index] = nx.global_efficiency(graph) loc[0, index] = nx.local_efficiency(graph) # Community detection and modularity (1.25 ) part = community.best_partition(graph, weight='1.25') Q[0, index] = community.modularity(part, graph) # Calculating connector and provincial hubs Z = module_degree_zscore(matrix, part) P = participation_coefficient(matrix, part) # connector hubs ch = np.zeros(matrix.shape[0]) for i in range(len(ch)): if P[i] > 0.8 and Z[i] < 1.5: ch[i] = 1.0 Ch[0, index] = np.sum(ch) # provincial hubs ph = np.zeros(matrix.shape[0]) for i in range(len(ph)): if P[i] <= 0.3 and Z[i] >= 1.5: ph[i] = 1 Ph[0, index] = np.sum(ph) # Averaging over each graph threshold meanglob = np.mean(glob) meanloc = np.mean(loc) meanQ = np.mean(Q) meanCh = np.mean(Ch) meanPh = np.mean(Ph) data[0, 0] = meanglob data[0, 1] = meanloc data[0, 2] = meanQ data[0, 3] = meanCh data[0, 4] = meanPh return (data)
def lp(direction, graph): print('lp.....') print('Running query for community detection.....') t = datetime.now() louvain_query = graph.run(''' CALL algo.labelPropagation.stream("User", null, {direction: ''' + '"' + direction + '"' + ''', iterations: 10}) YIELD nodeId, label RETURN algo.getNodeById(nodeId) as node, label ''').data() time_taken = datetime.now() - t print('Converting results.....') results = defaultdict(list) for item in louvain_query: results[str(item['label'])].append(str(item['node']['id'])) print('Get Neo4jGraph.....') graph_query = graph.run(''' MATCH (n:User)-[r]->(m:User) RETURN n.id,TYPE(r),m.id ''').to_data_frame() print('Convert graph to nx graph.....') nx_graph = nx.from_pandas_edgelist( df=graph_query, source='n.id', target='m.id', edge_attr=True, create_using=nx.MultiGraph(name='Travian_Graph')) modularity = None print('Computing modularity.....') try: modularity = nx.algorithms.community.modularity( nx_graph, results.values()) except Exception as e: print(repr(e)) try: modularity = community.modularity(results.values(), nx_graph) except Exception as e: print(repr(e)) extract_results('neo_lp' + direction, nx_graph, results, time_taken, modularity) values = calc_color_values(nx_graph, results) visualize('Label Propagation ' + direction, nx_graph, values) print('lp finished.....')
def calc_community_modularity(bp, g): """ calc_community_modularity(bp,g) calculate modularity using community :param bp: best partition :param g: source graph :return: --- """ logging.info(cs_ref, 'modularity from Louvain') modularity_info = "\tModularity : for best partition is : " + str(cmty.modularity(bp, g)) with open(dest_file, "a") as dat_file: dat_file.write("\n" + modularity_info) print (modularity_info)
def graph_metric(G, metric): if metric == 'modularity': partition = community.best_partition(G) mod = community.modularity(partition, G) return mod if metric == 'global_clustering': if G.size() == 0: cluster_coeff = 0 else: # cluster_coeff = nx.algorithms.cluster.average_clustering(G) cluster_coeff = nx.transitivity(G) return cluster_coeff
def community_detection(self): """ Detects characteristics related to communities of graph and writes them down to the 'Communities.txt' file. It also compares these characteristics with a random graph of the same node-size and edge-size. """ partitionx = communityx.greedy_modularity_communities(self.G) """Modularity & Coverage""" modularity = community.modularity(self.best_parts, self.G) #XXX coverage = communityx.coverage(self.G, partitionx) """in the corresponding random graph""" # H = nx.gnm_random_graph(self.G.number_of_nodes(),self.G.number_of_edges()) H = nx.configuration_model([d for v, d in self.G.degree()]) part = community.best_partition(H) #XXX part2 = communityx.greedy_modularity_communities(H) modularity_rand = community.modularity(part, H) coverage_rand = communityx.coverage(H, part2) """Write File""" title = 'Communities.txt' com_file = open(self.path + title, 'w') com_file.write('Modularity:' + '\n') com_file.write(str(modularity) + '\n') com_file.write('Coverage' + '\n') com_file.write(str(coverage) + '\n') com_file.write('The corresponding random graph has modularity:' + '\n') com_file.write(str(modularity_rand) + '\n') com_file.write('The corresponding random graph has coverage:' + '\n') com_file.write(str(coverage_rand)) com_file.write('\n') com_file.write('number of communities:' + '\n') com_file.write(str(max(self.best_parts.values()) + 1) + '\n') #XXX # com_file.write(str(max(self.best_parts_x.values())+1)+'\n') com_file.write('\n') com_file.write( 'The coverage of a partition is the ratio of the number of intra-community edges to the total number of edges in the graph.' ) com_file.close() return modularity, coverage, modularity_rand, coverage_rand
def get_modularity_value(self, a_grn, louvain=False): if isinstance(a_grn, list): a_grn = self.generate_directed_grn(a_grn) if louvain: modularity_partition = community.best_partition( a_grn.to_undirected()) else: node_no = len(a_grn.nodes()) modularity_partition = {} for i in range(node_no): modularity_partition[i] = int(i / 5) return community.modularity(modularity_partition, a_grn.to_undirected())
def graph_analysis(G): """Analyze graph. Returns a dictionary with useful data. Cannot deal with weights below 0, so all negative weights are set to 0.""" MG = main_graph(G) for a,b in MG.edges(): w = MG[a][b]['weight'] if w < 0: MG[a][b]['weight'] = 0 partition = community.best_partition(MG) return { 'num_clusters': max(partition.values()), 'modularity': community.modularity(partition,MG), 'size': len(MG.nodes()), 'partition': partition}
def calc_modularity(G): import numpy as np import networkx as nx import community # Binarize both of the graphs for u,v,d in G.edges(data=True): d['weight']=1 # Compute the best partition based on the threshold you've specified in cost partition = community.best_partition(G) modularity = community.modularity(partition, G) return modularity
def test_disjoint_clique(self) : """" A group of num_clique of size size_clique disjoint, should maximize the modularity and have a modularity of 1 - 1/ num_clique """ for num_test in range(self.numtest) : size_clique = random.randint(5, 20) num_clique = random.randint(5, 20) g = nx.Graph() for i in range(num_clique) : clique_i = nx.complete_graph(size_clique) g = nx.union(g, clique_i, rename=("",str(i)+"_")) part = dict([]) for node in g : part[node] = node.split("_")[0].strip() mod = co.modularity(part, g) self.assertAlmostEqual(mod, 1. - 1./float(num_clique), msg = "Num clique: " + str(num_clique) + " size_clique: " + str(size_clique))
def decompose_graph(g, max_mod, decomposed = None): if decomposed is None: decomposed = [] p = com.best_partition(g) if com.modularity(p, g) < max_mod: decomposed.append(g) else: # split communities part_ids = np.unique(p.values()) for pi in part_ids: gi = g.copy() for n in g.nodes_iter(): if p[n] != pi: # remove all nodes not in partition gi.remove_node(n) decompose_graph(gi, max_mod, decomposed) return decomposed
def print_communities(G,sstt): part=comm.best_partition(G) print 'Number of communities of %s = %i' %(sstt, max(part.values())+1) print 'Community partition of %s:' %(sstt) parLis=[] partdi={} for i,k in part.items(): if k not in partdi: partdi[k]=[i] else: partdi[k].append(i) for i,k in partdi.items(): parLis.append(k) print parLis nodper={i:i for i in G.nodes()} print 'Community modularity of %s = %.4f' %(sstt, comm.modularity(part,G)) return part,nodper