def modified_girvan_newman_algorithm(g): initial = nx_comm.modularity(g, [set(g.nodes)], weight='weight') max_modularity = initial saved_components = [] saved_graph = nx.Graph() while g.number_of_edges() != 0: centralities = nx.edge_betweenness_centrality(g, weight='weight', seed=seed) # max() returns one of the edges with maximum centrality u, v = max(centralities, key=centralities.get) # Checking for same maximum centrality score below if len(sorted(centralities.values(), reverse=True)) > 2: centrality_max1 = sorted(centralities.values(), reverse=True)[0] centrality_max2 = sorted(centralities.values(), reverse=True)[1] if centrality_max1 == centrality_max2: # At least two equal max centrality measure detected! same_scores = [] for centrality in centralities: if centralities[centrality] == centrality_max1: same_scores.append(centrality) # Pick an edge randomly among same scores u, v = random.Random(seed).choice(same_scores) # same score check finishes. components = sorted(nx.connected_components(g), key=len, reverse=True) if len(components) > 1: fragmented_modularity = nx_comm.modularity(g, components, weight='weight') if fragmented_modularity > max_modularity: max_modularity = fragmented_modularity saved_components = components saved_graph = g.copy() g.remove_edge(u, v) return max_modularity, saved_components, saved_graph
def girvan_newman_algorithm(G, weight): """ G는 원래 네트워크 g는 Edge를 한개씩 끊어나갈 네트워크 """ g = G.copy() """ initial """ step = 0 # step log_step = [] # step 기록 log_modularity = [] # modularity 기록 old_max_m = 0 # 이전 최대 modularity 기억 k = sorted(nx.connected_components(G), key=len, reverse=True) # k 는 모두 연결되어있는 Community를 노드로 나타낸 값 m = community.modularity(G, communities=k, weight=weight) # modularity max_step = 0 # max_step은 modularity가 최대일 때 step값 기록용 """ Girvan-Newman algorithm """ while len(g.edges()) > 0: k = sorted(nx.connected_components(g), key=len, reverse=True) # 커뮤니티 추출 m = community.modularity(G, communities=k, weight=weight) # 추출된 커뮤니티의 modularity 계산 if m > old_max_m: # 이전 최대 modularity보다 현재 modularity가 높을 경우 기록 max_step = step old_max_m = m log_step = log_step + [step] # 로깅용 log_modularity = log_modularity + [m] # 로깅용 print("step: ", step, " modularity: ", m) """ remove edge """ step = step + 1 betweenness = nx.edge_betweenness_centrality( g, weight=weight) # betweennes centrality 계산 max_edge = max( betweenness, key=betweenness.get) # betweeness centrality가 가장 큰 Edge 선택 g.remove_edge(max_edge[0], max_edge[1]) # Edge 추출 return log_step, log_modularity, max_step
def girvan_newman_algorithm(G, weight): """ G는 원래 네트워크 g는 Edge를 한개씩 끊어나갈 네트워크 """ a = nx.nodes(G) g = nx.Graph() g.add_nodes_from(a) """ initial """ step = 0 # step log_step = [] # step 기록 log_modularity = [] # modularity 기록 max_g = g.copy() # modularity가 최대일 때의 네트워크 여기서는 초기화 작업 k = sorted(nx.connected_components(g), key=len, reverse=True) # k 는 모두 연결되어있는 Community를 노드로 나타낸 값 k_list = [] for j in range(len(k)): k_list = k_list + [list(k[j])] max_k = k_list # max_k 는 modularity가 최대일 때의 k 값 저장용 m = community.modularity(G, communities=k, weight=weight) # modularity max_m = m # max_m은 modularity가 최대일 때 값 기록용 max_step = 0 # max_step은 modularity가 최대일 때 step값 기록용 new_g = G.copy() """ Girvan-Newman algorithm """ while len(new_g.edges()) > 0: k = sorted(nx.connected_components(g), key=len, reverse=True) # 커뮤니티 추출 m = community.modularity(G, communities=k, weight=weight) # modularity modularities = [] for i in new_g.edges(): temp_g = g.copy() temp_g.add_edge(i[0], i[1]) k2 = sorted(nx.connected_components(temp_g), key=len, reverse=True) # 커뮤니티 추출 modularities.append( community.modularity(temp_g, communities=k2, weight=weight)) mam = modularities[0] m_index = 0 for index in range(len(modularities)): if modularities[index] > mam: mam = modularities[index] m_index = index edges = list(new_g.edges()) edge_should_added = edges[m_index] new_g.remove_edge(edge_should_added[0], edge_should_added[1]) g.add_edge(edge_should_added[0], edge_should_added[1]) step += 1 log_step = log_step + [step] # 로깅용 log_modularity = log_modularity + [m] # 로깅용 print("step: ", step, " modularity: ", m) return log_step, log_modularity, max_g, max_m, max_k, max_step
def compute_all(graph, true_labels, marginals): normalized_overlap = None normalized_hard_overlap = None if true_labels.shape[1] == marginals.shape[1]: normalized_overlap = community_soft_overlap(true_labels, marginals) normalized_hard_overlap = community_hard_overlap( true_labels, marginals) hard_labels = np.argmax(marginals, axis=1) partition = [] for i in range(hard_labels.max() + 1): indices = np.argwhere(hard_labels == i).flatten() partition.append(set(indices)) hard_modularity = modularity(graph, partition) true_labels_categorical = np.argmax(true_labels, axis=1) nmi = normalized_mutual_info_score(true_labels_categorical, hard_labels, average_method='arithmetic') return { "Soft Overlap": normalized_overlap, "Hard Overlap": normalized_hard_overlap, "Modularity": hard_modularity, "Mutual Information": nmi }
def evaluate(communities, nxgraph, iggraph, groundtruth, NMIs, ARIs, Qs, method): membership = partitionListToMembership(communities) if DEBUG: print(f'membership{membership}') # modularity Q = nxcom.modularity(nxgraph, communities) Q1 = iggraph.modularity(membership) if abs(Q - Q1) > 1e-5: print(f'\t\t Q: networkx.alg: {Q} \t igraph: {Q1}') # NMI NMI = sm.normalized_mutual_info_score(groundtruth, membership, average_method="arithmetic") NMI1 = compare_communities(membership, groundtruth, method="nmi") if abs(NMI - NMI1) > 1e-3: print(f'\t\t NMI \t sklearn: {NMI} \t igraph: {NMI1}') # ARI ARI = sm.adjusted_rand_score(groundtruth, membership) ARI1 = compare_communities(membership, groundtruth, method="ari") if abs(ARI - ARI1) > 1e-5: print(f'\t\t ARI \t sklearn: {ARI} \t igraph: {ARI1}') NMIs[method].append(NMI) ARIs[method].append(ARI) Qs[method].append(Q) return NMI, ARI, Q
def quipusBuildKnn( X_net, Y_net, knn, eQuartile=0.5, labels=False, colors=[ "#a8201a", "#46acc2", "#47a64e", "#99582a", "#d81159", "#e8e4e1", "#e8e4e1", "#e8e2e1", "#e8e1e1", "#e8e1e1", "#e8e1e1", "#e8e1e1", "#e8e1e1", ], inside=False, ): G = [] if inside: g, nbrs = networkBuildKnn(X_net, Y_net, knn, eQuartile, labels, colors) g.graph["nbrs"] = nbrs t = g.graph["classNodes"] mod = nx_comm.modularity(g, t) g.graph["mod"] = mod G.append(g) for i in range(len(X_net[0])): tmpX = X_net[:, [i]] nantmp = np.isnan(tmpX) notNan = ~nantmp X = tmpX[notNan] Y = np.reshape(Y_net, (-1, 1)) Y = Y[notNan] Y.flatten() g = nx.Graph() g, nbrs = networkBuildKnn(X, Y, knn, eQuartile, labels, colors) g.graph["nbrs"] = nbrs t = g.graph["classNodes"] mod = nx_comm.modularity(g, t) g.graph["mod"] = mod G.append(g) return G
def modularity(self) -> float: if self._modularity is None: if self.weighted: self._modularity = modularity( self.graph, tuple( self.graph.subgraph(community) for community in self.top10_communities), weight='weight') else: self._modularity = modularity( self.graph, tuple( self.graph.subgraph(community) for community in self.top10_communities)) return self._modularity
def test_allin_is_zero(self): """it test that everyone in one community has a modularity of 0""" for i in range(self.numtest) : g = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in g : part[node] = 0 self.assertEqual(co.modularity(part, g), 0)
def test_modularity(self): # test that empty graph converts fine for all options G = barbell_graph(3, 0) m = modularity(G, [{0, 1, 2}, {3, 4, 5}]) G = karate_club_graph() c = list(greedy_modularity_communities(G, gamma=1.8)) print(sorted(c[0]))
def getModularity(genome): G = getGraph(genome) if G.number_of_edges() == 0: return 0 else: partition = community.greedy_modularity_communities(G) modularity = community.modularity(G, partition) return modularity
def test_modularity_increase(self): """ Generate a dendrogram and test that modularity is always increasing """ g = nx.erdos_renyi_graph(1000, 0.01) dendo = co.generate_dendrogram(g) mod_prec = -1. mods = [co.modularity(co.partition_at_level(dendo, level), g) for level in range(len(dendo)) ] self.assertListEqual(mods, sorted(mods))
def compute_greedy_modularity_community_metrics(G): #starts with every node being a community and combines nodes to maximize modularity, stops when modularity no longer increases modularity = 0 greedy_community = list(community.greedy_modularity_communities(G)) print('There were {} communities found.'.format(len(greedy_community))) modularity = community.modularity(G, greedy_community) print( 'Graph modularity based on Clauset-Newman-Moore greedy modularity maximization clustering is: {}' .format(modularity)) return modularity
def test_range(self) : """test that modularity is always between -1 and 1""" for i in range(self.numtest) : g = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in g : part[node] = random.randint(0, self.numtest/10) mod = co.modularity(part, g) self.assertGreaterEqual(mod, -1) self.assertLessEqual(mod, 1)
def compute_girvan_newman_community_metrics(G): #Depends on removing links between links of high vertex betweeness and identifying clusters modularity = 0 communities = community.girvan_newman(G) top_level_communities = next(communities) next_level_communities = next(communities) modularity += community.modularity(G, next_level_communities) print('Graph modularity based on Girvan Newmann clustering is {}'.format( modularity)) return modularity
def compute_GN_communities(graph): girvan_newman = { len(comm): comm for comm in nx.algorithms.community.girvan_newman(graph) } communities_modularity = { modularity(graph, community): n for n, community in girvan_newman.items() } n_comm = communities_modularity[max(communities_modularity)] return girvan_newman, communities_modularity, n_comm
def Girvan_Newman_algorithm(G, weight): g = G.copy() step = 0 log_step = [] log_modularity = [] old_max_m = 0 max_g = g.copy() k = sorted(nx.connected_components(G), key=len, reverse=True) k_list = [] for j in range(len(k)): k_list = k_list + [list(k[j])] max_k = k_list m = community.modularity(G, communities=k, weight=weight) max_m = m max_step = 0 while len(g.edges()) > 0: k = sorted(nx.connected_components(g), key=len, reverse=True) m = community.modularity(G, communities=k, weight=weight) if m > old_max_m: max_g = g.copy() max_m = m k_list = [] for j in range(len(k)): k_list = k_list + [list(k[j])] max_k = k_list max_step = step old_max_m = m log_step = log_step + [step] log_modularity = log_modularity + [m] print("step: ", step, " modularity: ", m) step = step + 1 betweenness = nx.edge_betweenness_centrality(g, weight=weight) max_edge = max(betweenness, key=betweenness.get) g.remove_edge(max_edge[0], max_edge[1]) return log_step, log_modularity, max_g, max_m, max_k, max_step, k_list
def girvan_newman_opt(G, verbose=False): runningMaxMod = 0 commIndSetFull = girvan_newman(G) for iNumComm in range(2,len(G)): if verbose: print('Commnity detection iteration : %d' % iNumComm) iPartition = next(commIndSetFull) # partition with iNumComm communities Q = modularity(G, iPartition) # modularity if Q>runningMaxMod: # saving the optimum partition and associated info runningMaxMod = Q OptPartition = iPartition return OptPartition
def analysis(edges, node_dic, class_dic={}): G = nx.Graph() for edge in edges: x = edge[0] y = edge[1] G.add_edge(x, y) graph_degrees = G.degree() degree_sequence = sorted([d for n, d in G.degree()], reverse=False) # mac default ? degreeCount = collections.Counter(degree_sequence) degreeDist = list(degreeCount.items()) # # remove disconnected nodes to calculate Radius and Graph centers. if not nx.is_empty(G): largest_cc = max(nx.connected_components(G)) nodelist = G.nodes() to_remove = set(nodelist) - set(largest_cc) for nd in to_remove: G.remove_node(nd) radius, eccentricity, center = "", "", "" if not nx.is_empty(G): if nx.is_connected(G): radius = nx.radius(G) eccentricity = nx.eccentricity(G) center = nx.center( G ) # The center is the set of nodes with eccentricity equal to radius. mod_score = -2 # check if it makes sense to calculate the modularity score aTestNodeName = "" if len(list(node_dic.keys())) == 0 else node_dic[list( node_dic.keys())[0]] if bool(class_dic.keys() ) and aTestNodeName != "" and aTestNodeName in class_dic: inv_dic = {} nodelist = G.nodes() for n in nodelist: converted_num = node_dic[n] class_tmp = class_dic[converted_num] if class_tmp not in inv_dic: inv_dic[class_tmp] = set([n]) else: inv_dic[class_tmp].add(n) groups = [x for k, x in inv_dic.items()] mod_score = nx_comm.modularity(G, groups) return radius, eccentricity, center, degreeDist, mod_score, G
def gn_time(G): # define a function to compute weighted centrality betweenness def most_central_edge(G): centrality = betweenness(G, weight='weight') return max(centrality, key=centrality.get) # initiate a list to store execution time for each algo algo_time = [] for i in tqdm(range(10)): # start start_time = time.time() # fit the model if nx.is_weighted(G): solutions = girvan_newman(G, most_valuable_edge=most_central_edge) else: solutions = girvan_newman(G) # assign the number of times partitioning k = len(G.edges) # register modularity scores modularity_scores = dict() # initiate a maximum modularity score max_score = 0 # initiate count (stopping criterion) count = 0 # iterate over solutions for community in itertools.islice(solutions, k): solution = list(sorted(c) for c in community) score = modularity(G, solution) # store modularity score modularity_scores[len(solution)] = score if score > max_score: # save the community structure with highest modularity score community_structure = list(solution) max_score = score count = 0 else: count = count + 1 if count == 5: break algo_time.append(time.time() - start_time) return np.mean(algo_time)
def get_greedy_modularity_communities(graph): comm = greedy_modularity_communities(graph) mod = modularity(graph, comm) node_gr_comm = np.zeros(max(graph.nodes) + 1) for i, c in enumerate(comm): for node in c: node_gr_comm[node] = i + 1 return node_gr_comm[list(graph.nodes)], mod
def test_modularity(): G = nx.barbell_graph(3, 0) C = [{0, 1, 4}, {2, 3, 5}] assert almost_equal(-16 / (14**2), modularity(G, C)) C = [{0, 1, 2}, {3, 4, 5}] assert almost_equal((35 * 2) / (14**2), modularity(G, C)) n = 1000 G = nx.erdos_renyi_graph(n, 0.09, seed=42, directed=True) C = [set(range(n // 2)), set(range(n // 2, n))] assert almost_equal(0.00017154251389292754, modularity(G, C)) G = nx.margulis_gabber_galil_graph(10) mid_value = G.number_of_nodes() // 2 nodes = list(G.nodes) C = [set(nodes[:mid_value]), set(nodes[mid_value:])] assert almost_equal(0.13, modularity(G, C)) G = nx.DiGraph() G.add_edges_from([(2, 1), (2, 3), (3, 4)]) C = [{1, 2}, {3, 4}] assert almost_equal(2 / 9, modularity(G, C))
def mod_over_time(run_id, ld_file, data_file): path = os.path.abspath(ld_file) df = pandas.read_csv(path) gens = df['generation'].unique() max_locus = int(df['locus2'].max()) thres_space = np.linspace(0.001, 0.02, 20) for thr in thres_space: for gen in gens: q = 'generation == ' + str(gen) this_gen = df.query(q) G = nx.Graph() for i in range(max_locus): G.add_node(i) edge_list = [] for row in this_gen.itertuples(): L1 = int(row[2]) L2 = int(row[3]) D = row[4] Gen = row[1] if D > thr: edge = [L1, L2] edge_list.append(edge) G.add_edges_from(edge_list) # clus coef # modularity # avg deg clus = nx.average_clustering(G) degrees = G.degree s = 0 ct = 0 for node, deg in degrees: s += deg ct += 1 mean_deg = float(s) / float(ct) try: com = greedy_modularity_communities(G) mod = modularity(G, com) except: mod = 0 write_data(run_id, thr, gen, mean_deg, clus, mod, data_file)
def compute_modularity(self,G0=None,category=None): # m=self.m #same m at each recursion step? seems strange # B=clf.A-np.dot(clf.k,clf.k.transpose())/(2*m) # if (clf.G!=self.G): # #Adapt formula for subgraphs # B-=np.diagonal(np.sum(B,axis=1)) # Q=np.einsum("i,ij,j",clf.s,B,clf.s)/(4*m) # import ipdb; ipdb.set_trace() if G0 is None: G0=self.G0 if category is None: category=self.category self.compute_communities(category) Q=community.modularity(self.G0,self.communities) return Q
def get_girvan_newman_communities(graph): dendogram = girvan_newman(graph) comm = tuple(sorted(c) for c in next(dendogram)) mod = modularity(graph, comm) node_gn_comm = np.zeros(max(graph.nodes) + 1) for i, c in enumerate(comm): for node in c: node_gn_comm[node] = i + 1 return node_gn_comm[list(graph.nodes)], mod
def get_clique_communities(graph, k=4): comm = list(k_clique_communities(graph, k)) try: mod = modularity(graph, comm) except: mod = 0 node_cl_comm = np.zeros(max(graph.nodes) + 1) for i, c in enumerate(comm): for node in c: node_cl_comm[node] = i + 1 return node_cl_comm[list(graph.nodes)], mod
def compute_modularity_newman_leicht(self, ): """ https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.quality.modularity.html ## Example: communities = [['p1.C1', 'p1.C2', 'p1.C3'], ['p2.C4', 'p2.C5']] G = nx.barbell_graph(3, 0) :return: """ q = 0 if self.mdg_graph is not None and self.mdg_graph.number_of_edges() > 0: communities = self.__roster_communities() q = nx_comm.modularity(self.mdg_graph, communities=communities.values()) # print(q) return q
def get_graph_stats(community: nx.Graph, max_dom, max_core, more_metrics=False): N = len(community.nodes) sqsum = 0 ids = "" if not more_metrics: loop = community.nodes else: loop = sorted(community.nodes) for n in loop: ids += f'{n}|' dom = community._node[n]['dom'] sqsum += math.pow(dom - max_dom, 2) if not more_metrics: mds_result = "" else: mds_result = hashlib.md5(str.encode(ids)) stddev = math.sqrt(sqsum / N) ratio_max_core = max_core / N if more_metrics: avg_clustering = nx.average_clustering(community) if more_metrics: density = nx.density(community) modularity = nx_comm.modularity(community) return { "ratio_max_k_core": ratio_max_core, "max_k_core": max_core, "number_of_nodes": N, "max_stddev": stddev, "e2": (max_core * ratio_max_core) / stddev, "e4": (max_core + ratio_max_core) / stddev, "hash": "" if not more_metrics else mds_result.hexdigest(), "avg_clustering": 0 if not more_metrics else avg_clustering, "density": -1 if not more_metrics else density, "modularity": -1 if not more_metrics else modularity }
def test_disjoint_clique(self) : """" A group of num_clique of size size_clique disjoint, should maximize the modularity and have a modularity of 1 - 1/ num_clique """ for num_test in range(self.numtest) : size_clique = random.randint(5, 20) num_clique = random.randint(5, 20) g = nx.Graph() for i in range(num_clique) : clique_i = nx.complete_graph(size_clique) g = nx.union(g, clique_i, rename=("",str(i)+"_")) part = dict([]) for node in g : part[node] = node.split("_")[0].strip() mod = co.modularity(part, g) self.assertAlmostEqual(mod, 1. - 1./float(num_clique), msg = "Num clique: " + str(num_clique) + " size_clique: " + str(size_clique))
def compute_modularity_newman_leicht(self, ): """ ## Example: communities = [['p1.C1', 'p1.C2', 'p1.C3'], ['p2.C4', 'p2.C5']] G = nx.barbell_graph(3, 0) :return: """ q = 0 if self.mdg_graph is not None and self.mdg_graph.number_of_edges() > 0: # communities = {v: k for k, v in self.class_package_dict.items()} communities = defaultdict(list) for key, value in self.class_package_dict.items(): communities[value].append(key) q = nx_comm.modularity(self.mdg_graph, communities=communities.values()) # print(q) return q
def measure_performance(graph, cm): print(format('Community numbers:', '35s'), len(cm)) comm_len = [] for c in cm: comm_len.append(len(c)) comm_len.sort(reverse=True) print(format('', '35s'), comm_len) coverage = nx_comm.quality.coverage(graph, cm) modularity = nx_comm.modularity(graph, cm) performance = nx_comm.quality.performance(graph, cm) print(format('Coverage:', '35s'), coverage) print(format('Modularity:', '35s'), modularity) print(format('Performance:', '35s'), performance) return { 'coverage': coverage, 'modularity': modularity, 'performance': performance }
def test_modularity(): G = nx.barbell_graph(3, 0) C = [{0, 1, 4}, {2, 3, 5}] assert_almost_equal(-16 / (14 ** 2), modularity(G, C)) C = [{0, 1, 2}, {3, 4, 5}] assert_almost_equal((35 * 2) / (14 ** 2), modularity(G, C))