def test_directed_not_supported(): # not supported for directed graphs test = nx.DiGraph() test.add_edge('a', 'b') test.add_edge('a', 'c') test.add_edge('b', 'd') result = label_propagation_communities(test)
def apply_label(UG): partition_list = list(community.label_propagation_communities(UG)) partition_map = list_to_dict(partition_list) try: mod = nx.community.quality.modularity(UG, partition_list) except: mod = 0 return mod, partition_map, partition_list
def run(self, G, seed_node_count, adj_list): largest_cc = max(nx.connected_components(G), key=len) G = nx.subgraph(G, largest_cc) communities = list(label_propagation_communities(G)) print("List of community sizes:", list(map(len, list(label_propagation_communities(G))))) best_community = list(max(communities, key=len)) V = set() N_v = defaultdict(int) while len(V) < seed_node_count: best_score = 0 best_node = None for node in best_community: if node in V: continue adjs = adj_list[node] score = 0 for adj in adjs: score += 1. / max(1, N_v[adj] + 1) # score /= len(adjs) if score > best_score: best_score = score best_node = node V.add(best_node) N_v[best_node] += 1 for n in adj_list[best_node]: N_v[n] += 1 # for node, adjs in adj_list.items(): # if node in isolates: continue # # if parts[node] != chosen_part: continue # n_in_top = 0 # other_deg = 0 # for adj in adjs: # for n in adj_list[adj]: # if parts[n] == chosen_part: # n_in_top += 2 # else: # n_in_top += 1 # if len(heap) < seed_node_count: # heapq.heappush(heap, (n_in_top, other_deg, node)) # elif (n_in_top, other_deg, node) > heap[0]: # heapq.heapreplace(heap, (n_in_top, other_deg, node)) return V
def test_directed_not_supported(): with pytest.raises(nx.NetworkXNotImplemented): # not supported for directed graphs test = nx.DiGraph() test.add_edge('a', 'b') test.add_edge('a', 'c') test.add_edge('b', 'd') result = label_propagation_communities(test)
def test_one_node(): test = nx.Graph() test.add_node('a') # The expected communities are: ground_truth = set([frozenset(['a'])]) communities = label_propagation_communities(test) result = {frozenset(c) for c in communities} assert_equal(result, ground_truth)
def test_one_node(): test = nx.Graph() test.add_node("a") # The expected communities are: ground_truth = {frozenset(["a"])} communities = label_propagation_communities(test) result = {frozenset(c) for c in communities} assert result == ground_truth
def lpa(self): communities_generator = community.label_propagation_communities( self.network) print(communities_generator, 'ZZZZZZZZZZZZZZZ') node_cluster_pair = [(node, cluster_id) for (cluster_id, node_set) in enumerate(communities_generator) for node in list(node_set)] self.clusters = dict(node_cluster_pair) self.clusters_num = max(self.clusters.values()) + 1 print('{} clusters'.format(self.clusters_num))
def execute(directory): os.chdir(directory) for file in glob.glob("*.net"): file_name = file.split(".")[0] + ".clu" multigraph = nx.read_pajek(file) modelGraph = nx.Graph(multigraph) file_name = file.split(".")[0]+ ".clu" file_directory = os.path.join("../../results-label-p/",file_name) f = open(file_directory, "w+") communities = community.label_propagation_communities(modelGraph) lines = [None] * (len(modelGraph)+1) group_number = 1 for group in communities: group_as_string = str(group_number) for node_value in group: id = modelGraph.nodes.get(node_value)['id'] index = int(id) lines[index] = group_as_string group_number+=1 lines[0] = "*Vertices " + str(len(modelGraph)) for index, x in enumerate(lines): if x is None: lines[index] = str(group_number) group_number += 1 f.writelines('\n'.join(lines)) f.write("\n") f.close() os.chdir("../../radatools/Communities_Tools/") st = os.stat('./Compare_Partitions.exe') os.chmod("./Compare_Partitions.exe", st.st_mode | stat.S_IEXEC) if ("model" in directory) and ("rb125" in file_name): index = 1 while(index<=3): os.system("./Compare_Partitions.exe ../../results-label-p/" + file_name + " ../" + directory + "rb125-"+str(index)+".clu" + " ../../results-label-p/" + file_name + "-"+str(index) + ".exit " + " V") index+=1 else: os.system("./Compare_Partitions.exe ../../results-label-p/" + file_name + " ../" + directory + file_name + " ../../results-label-p/" + file_name + ".exit " + " V") st = os.stat('./Modularity_Calculation.exe') os.chmod("./Modularity_Calculation.exe", st.st_mode | stat.S_IEXEC) os.system("./Modularity_Calculation.exe ../" + directory + file + " ../../results-label-p/" + file_name + " 0 0 UN TC 2 >> " + " ../../results-label-p/" + file_name + ".modularity") os.chdir("../" + directory) os.chdir("../../source")
def partition_into_clusters(G, n, num_players): ''' Partitions a graph into clusters, and partitions the n seeds among the clusters proportional to their size. @param G: graph @param n: number of seeds @param num_players: number of players @return clusters, seed_nums: clusters is a list of clusters (each is a list of nodes), seed_nums is a list of how many seeds should be partitioned to each cluster. ''' comp = list(community.label_propagation_communities(G)) # girvan newman too slow comp.sort(reverse = True, key = len) # we focus on only the top threshold fraction of nodes in clusters, # since the best strategy is probably to dominate the large clusters, # while ignoring the very small ones (idk?) threshold = 0.3 # change if needed # extract only the top clusters that form threshold fraction of nodes total_cluster_nodes = 0 clusters = [] for cluster in comp: clusters.append(cluster) total_cluster_nodes += len(cluster) if total_cluster_nodes >= threshold * len(G): break print(len(clusters)) for cluster in clusters: print(len(cluster)) # partition our n seeds among the clusters, s.t. number of seeds given is # proportional to cluster size # ensure all n seeds get partitioned total_nodes_counted = 0 total_seeds_given = 0 seed_nums = [] for i in range(len(clusters)): total_nodes_counted += len(comp[i]) num_seeds = round(total_nodes_counted / total_cluster_nodes * n) - total_seeds_given if num_seeds == 1 and len(comp[i]) < total_cluster_nodes / n: # don't give seeds to very small clusters, picked arbitrary threshold seed_nums[0] += num_seeds seed_nums.append(0) else: seed_nums.append(num_seeds) total_seeds_given += num_seeds assert len(clusters) == len(seed_nums) return clusters, seed_nums
def test_connected_communities(): test = nx.Graph() # community 1 test.add_edge('a', 'b') test.add_edge('c', 'a') test.add_edge('c', 'b') test.add_edge('d', 'a') test.add_edge('d', 'b') test.add_edge('d', 'c') test.add_edge('e', 'a') test.add_edge('e', 'b') test.add_edge('e', 'c') test.add_edge('e', 'd') # community 2 test.add_edge('1', '2') test.add_edge('3', '1') test.add_edge('3', '2') test.add_edge('4', '1') test.add_edge('4', '2') test.add_edge('4', '3') test.add_edge('5', '1') test.add_edge('5', '2') test.add_edge('5', '3') test.add_edge('5', '4') # edge between community 1 and 2 test.add_edge('a', '1') # community 3 test.add_edge('x', 'y') # community 4 with only a single node test.add_node('z') # The expected communities are: ground_truth1 = set([ frozenset(['a', 'b', 'c', 'd', 'e']), frozenset(['1', '2', '3', '4', '5']), frozenset(['x', 'y']), frozenset(['z']) ]) ground_truth2 = set([ frozenset(['a', 'b', 'c', 'd', 'e', '1', '2', '3', '4', '5']), frozenset(['x', 'y']), frozenset(['z']) ]) ground_truth = (ground_truth1, ground_truth2) communities = label_propagation_communities(test) result = {frozenset(c) for c in communities} assert_in(result, ground_truth)
def test_connected_communities(): test = nx.Graph() # community 1 test.add_edge("a", "b") test.add_edge("c", "a") test.add_edge("c", "b") test.add_edge("d", "a") test.add_edge("d", "b") test.add_edge("d", "c") test.add_edge("e", "a") test.add_edge("e", "b") test.add_edge("e", "c") test.add_edge("e", "d") # community 2 test.add_edge("1", "2") test.add_edge("3", "1") test.add_edge("3", "2") test.add_edge("4", "1") test.add_edge("4", "2") test.add_edge("4", "3") test.add_edge("5", "1") test.add_edge("5", "2") test.add_edge("5", "3") test.add_edge("5", "4") # edge between community 1 and 2 test.add_edge("a", "1") # community 3 test.add_edge("x", "y") # community 4 with only a single node test.add_node("z") # The expected communities are: ground_truth1 = { frozenset(["a", "b", "c", "d", "e"]), frozenset(["1", "2", "3", "4", "5"]), frozenset(["x", "y"]), frozenset(["z"]), } ground_truth2 = { frozenset(["a", "b", "c", "d", "e", "1", "2", "3", "4", "5"]), frozenset(["x", "y"]), frozenset(["z"]), } ground_truth = (ground_truth1, ground_truth2) communities = label_propagation_communities(test) result = {frozenset(c) for c in communities} assert result in ground_truth
def label_propagation(self, *args): G = self.generate_graph(args[0]) edgelist = [] for g in G.nodes(): edgelist.append(tuple((args[1],g))) NewGraph = nx.Graph() NewGraph.add_edges_from(edgelist) G1 = nx.Graph() new_edges = G.edges() - NewGraph.edges() G1.add_edges_from(new_edges) communities = community.label_propagation_communities(G1) return communities
def test_unconnected_communities(): test = nx.Graph() # community 1 test.add_edge("a", "c") test.add_edge("a", "d") test.add_edge("d", "c") # community 2 test.add_edge("b", "e") test.add_edge("e", "f") test.add_edge("f", "b") # The expected communities are: ground_truth = {frozenset(["a", "c", "d"]), frozenset(["b", "e", "f"])} communities = label_propagation_communities(test) result = {frozenset(c) for c in communities} assert result == ground_truth
def test_unconnected_communities(): test = nx.Graph() # community 1 test.add_edge('a', 'c') test.add_edge('a', 'd') test.add_edge('d', 'c') # community 2 test.add_edge('b', 'e') test.add_edge('e', 'f') test.add_edge('f', 'b') # The expected communities are: ground_truth = {frozenset(['a', 'c', 'd']), frozenset(['b', 'e', 'f'])} communities = label_propagation_communities(test) result = {frozenset(c) for c in communities} assert result == ground_truth
def test_connected_communities(): test = nx.Graph() # community 1 test.add_edge('a', 'b') test.add_edge('c', 'a') test.add_edge('c', 'b') test.add_edge('d', 'a') test.add_edge('d', 'b') test.add_edge('d', 'c') test.add_edge('e', 'a') test.add_edge('e', 'b') test.add_edge('e', 'c') test.add_edge('e', 'd') # community 2 test.add_edge('1', '2') test.add_edge('3', '1') test.add_edge('3', '2') test.add_edge('4', '1') test.add_edge('4', '2') test.add_edge('4', '3') test.add_edge('5', '1') test.add_edge('5', '2') test.add_edge('5', '3') test.add_edge('5', '4') # edge between community 1 and 2 test.add_edge('a', '1') # community 3 test.add_edge('x', 'y') # community 4 with only a single node test.add_node('z') # The expected communities are: ground_truth1 = set([frozenset(['a', 'b', 'c', 'd', 'e']), frozenset(['1', '2', '3', '4', '5']), frozenset(['x', 'y']), frozenset(['z'])]) ground_truth2 = set([frozenset(['a', 'b', 'c', 'd', 'e', '1', '2', '3', '4', '5']), frozenset(['x', 'y']), frozenset(['z'])]) ground_truth = (ground_truth1, ground_truth2) communities = label_propagation_communities(test) result = {frozenset(c) for c in communities} assert_in(result, ground_truth)
def test_unconnected_communities(): test = nx.Graph() # community 1 test.add_edge('a', 'c') test.add_edge('a', 'd') test.add_edge('d', 'c') # community 2 test.add_edge('b', 'e') test.add_edge('e', 'f') test.add_edge('f', 'b') # The expected communities are: ground_truth = set([frozenset(['a', 'c', 'd']), frozenset(['b', 'e', 'f'])]) communities = label_propagation_communities(test) result = {frozenset(c) for c in communities} assert_equal(result, ground_truth)
def __init__(self, nwtk_dict, root = None, vis_name = None, netX = False, edgelist = None): new_pair_dict = self.new_pairs_(nwtk_dict) # saving the files as .dat format nx_mid_list = self.gen_comm_node(list(new_pair_dict.values())) self.saving_new_g(root, vis_name, "dat", nx_mid_list) print("saving success!") if netX is True : # required format for modularity on networkx # nx_comm_list_2 = nx_list(list(new_pair_dict.values())) """ Check modularity with networkX """ nx_G = nx.read_edgelist(edgelist) modul_nx = nx_comm.modularity(nx_G, nx_comm.label_propagation_communities(nx_G)) print("Modularity from networkX %d", modul_nx)
def ksc_centrality(G, alpha=0.5, beta=0.5): ''' Paper: https://www.researchgate.net/publication/262391998_A_New_Approach_to_Identify_Influential_Spreaders_in_Complex_Networks ''' core_numbers = core_number(G) max_core_number = max(core_numbers.values()) # internal influence f_internal = {} for v in G: f_internal[v] = core_numbers[v] / max_core_number # external influence f_external = {} curr_max_influence = 0 comp = list(community.label_propagation_communities(G)) comp.sort(reverse=True, key=len) for v in G: # count the influence (total number of neighbors in other communities) influence = 0 for cluster in comp: cluster_set = set(cluster) num_neighbors = 0 # count number of neighbors in the cluster for node in G.neighbors(v): if node in cluster_set: num_neighbors += 1 influence += (num_neighbors * len(cluster)) f_external[v] = influence # normalize by maximum influence max_influence = max(f_external.values()) for v in G: f_external[v] = f_external[v] / max_influence # total influence f_total = {} for v in G: f_total[v] = f_internal[v] * alpha + f_external[v] * beta return f_total
def label_propagation(G, weight='weight', iterNum=6): '''Community detection using label propagation algorithm. Parameters ---------- G : networkx.graph weight : edge attribute if G is weighted or None if G is unweighted iterNum : number to repeat label propagation algorithm Returns ------- list_communities : list A list of sets, and each set contains vertices in one community. Notes ----- This function only deals with weighted and unweighted undirected graph. ''' # H is the undirected version of graph G H = G.to_undirected() max_modularity = float('-inf') for i in range(iterNum): if weight is None: cur_list_communities = list( community.label_propagation_communities(H)) else: cur_list_communities = list( community.asyn_lpa_communities(H, weight=weight)) cur_modularity = quality.modularity(H, cur_list_communities) if (cur_modularity > max_modularity): list_communities = cur_list_communities max_modularity = cur_modularity return list_communities
(ex) 0 1 2 4 5 20 39 : these nodes are in same community ''' convert_comm = convertDat("/content/drive/MyDrive/Data/sourcefile/benchmark/community/community_50k_0.30-mu_1.dat") _, comm_num, pair_dict = convert_comm.decreaseBy1("comm") comm_list = convert_comm.genCommNode(list(pair_dict.values())) nx_comm_list = convert_comm.nx_list(list(pair_dict.values())) convert_comm.savingNewG("500_030_c_0202","dat",comm_list) nx_comm_list import networkx as nx import networkx.algorithms.community as nx_comm nx_G = nx.read_edgelist("/content/drive/MyDrive/Data/sourcefile/benchmark/convert_ntwk/500_030_0201.edgelist") nx_comm.modularity(nx_G, nx_comm.label_propagation_communities(nx_G)) mid_point = {0: 8, 1: 10, 2: 8, 3: 6, 4: 7, 5: 8, 6: 8, 7: 8, 8: 8, 9: 8, 10: 8, 11: 10, 12: 8, 13: 8, 14: 8, 15: 8, 16: 21, 17: 10, 18: 8, 19: 0, 20: 10, 21: 7, 22: 10, 23: 8, 24: 8, 25: 8, 26: 8, 27: 8, 28: 8, 29: 7, 30: 8, 31: 10, 32: 10, 33: 10, 34: 16, 35: 7, 36: 10, 37: 10, 38: 8, 39: 15, 40: 2, 41: 17, 42: 10, 43: 10, 44: 8, 45: 8, 46: 8, 47: 11, 48: 10, 49: 7, 50: 8, 51: 10, 52: 8, 53: 8, 54: 8, 55: 10, 56: 8, 57: 10, 58: 8, 59: 8, 60: 14, 61: 10, 62: 7, 63: 8, 64: 10, 65: 10, 66: 10, 67: 10, 68: 1, 69: 7, 70: 10, 71: 10, 72: 11, 73: 10, 74: 8, 75: 8, 76: 10, 77: 7, 78: 8, 79: 8, 80: 8, 81: 10, 82: 10, 83: 7, 84: 18, 85: 10, 86: 8, 87: 8, 88: 10, 89: 10, 90: 7, 91: 8, 92: 19, 93: 8, 94: 10, 95: 8, 96: 10, 97: 6, 98: 10, 99: 8, 100: 10, 101: 8, 102: 8, 103: 3, 104: 8, 105: 7, 106: 8, 107: 8, 108: 8, 109: 8, 110: 10, 111: 8, 112: 7, 113: 8, 114: 7, 115: 8, 116: 8, 117: 7, 118: 21, 119: 8, 120: 1, 121: 7, 122: 8, 123: 8, 124: 8, 125: 8, 126: 8, 127: 8, 128: 8, 129: 10, 130: 10, 131: 8, 132: 8, 133: 8, 134: 7, 135: 8, 136: 10, 137: 8, 138: 8, 139: 8, 140: 8, 141: 8, 142: 7, 143: 7, 144: 10, 145: 8, 146: 4, 147: 8, 148: 7, 149: 8, 150: 8, 151: 10, 152: 10, 153: 10, 154: 10, 155: 8, 156: 9, 157: 10, 158: 8, 159: 10, 160: 8, 161: 8, 162: 8, 163: 10, 164: 8, 165: 8, 166: 8, 167: 8, 168: 8, 169: 8, 170: 7, 171: 10, 172: 8, 173: 7, 174: 10, 175: 8, 176: 7, 177: 8, 178: 7, 179: 10, 180: 18, 181: 8, 182: 8, 183: 8, 184: 10, 185: 8, 186: 8, 187: 10, 188: 10, 189: 7, 190: 7, 191: 8, 192: 8, 193: 20, 194: 8, 195: 10, 196: 8, 197: 5, 198: 8, 199: 7, 200: 8, 201: 7, 202: 8, 203: 8, 204: 8, 205: 8, 206: 8, 207: 8, 208: 8, 209: 8, 210: 8, 211: 13, 212: 8, 213: 10, 214: 7, 215: 8, 216: 7, 217: 7, 218: 8, 219: 10, 220: 10, 221: 10, 222: 8, 223: 8, 224: 7, 225: 8, 226: 8, 227: 8, 228: 10, 229: 8, 230: 10, 231: 7, 232: 10, 233: 8, 234: 8, 235: 8, 236: 8, 237: 8, 238: 10, 239: 8, 240: 8, 241: 2, 242: 8, 243: 19, 244: 10, 245: 10, 246: 10, 247: 7, 248: 10, 249: 8, 250: 10, 251: 8, 252: 7, 253: 7, 254: 7, 255: 8, 256: 8, 257: 8, 258: 8, 259: 10, 260: 8, 261: 8, 262: 8, 263: 8, 264: 10, 265: 8, 266: 8, 267: 10, 268: 10, 269: 8, 270: 8, 271: 10, 272: 8, 273: 8, 274: 8, 275: 8, 276: 8, 277: 8, 278: 8, 279: 8, 280: 7, 281: 8, 282: 8, 283: 7, 284: 8, 285: 8, 286: 10, 287: 10, 288: 7, 289: 8, 290: 10, 291: 8, 292: 10, 293: 8, 294: 8, 295: 8, 296: 8, 297: 10, 298: 7, 299: 8, 300: 7, 301: 7, 302: 8, 303: 7, 304: 10, 305: 8, 306: 8, 307: 8, 308: 12, 309: 10, 310: 8, 311: 8, 312: 7, 313: 8, 314: 10, 315: 8, 316: 10, 317: 10, 318: 8, 319: 7, 320: 8, 321: 7, 322: 10, 323: 8, 324: 7, 325: 10, 326: 7, 327: 10, 328: 10, 329: 8, 330: 10, 331: 8, 332: 10, 333: 8, 334: 8, 335: 10, 336: 8, 337: 8, 338: 8, 339: 8, 340: 10, 341: 10, 342: 10, 343: 8, 344: 10, 345: 8, 346: 7, 347: 8, 348: 7, 349: 8, 350: 7, 351: 8, 352: 8, 353: 8, 354: 8, 355: 8, 356: 8, 357: 8, 358: 8, 359: 7, 360: 8, 361: 10, 362: 8, 363: 10, 364: 10, 365: 7, 366: 10, 367: 10, 368: 10, 369: 10, 370: 10, 371: 8, 372: 8, 373: 7, 374: 10, 375: 10, 376: 8, 377: 8, 378: 7, 379: 8, 380: 10, 381: 10, 382: 10, 383: 8, 384: 8, 385: 8, 386: 7, 387: 10, 388: 8, 389: 7, 390: 8, 391: 7, 392: 7, 393: 10, 394: 7, 395: 10, 396: 7, 397: 7, 398: 8, 399: 10, 400: 10, 401: 8, 402: 10, 403: 7, 404: 7, 405: 7, 406: 8, 407: 10, 408: 10, 409: 8, 410: 7, 411: 10, 412: 7, 413: 10, 414: 8, 415: 7, 416: 7, 417: 7, 418: 7, 419: 10, 420: 7, 421: 8, 422: 8, 423: 8, 424: 7, 425: 7, 426: 7, 427: 8, 428: 10, 429: 8, 430: 8, 431: 10, 432: 7, 433: 7, 434: 8, 435: 10, 436: 10, 437: 7, 438: 7, 439: 7, 440: 8, 441: 8, 442: 8, 443: 10, 444: 7, 445: 7, 446: 7, 447: 7, 448: 10, 449: 8, 450: 8, 451: 7, 452: 7, 453: 8, 454: 7, 455: 10, 456: 10, 457: 7, 458: 7, 459: 7, 460: 10, 461: 10, 462: 10, 463: 7, 464: 7, 465: 10, 466: 7, 467: 7, 468: 7, 469: 7, 470: 7, 471: 10, 472: 10, 473: 7, 474: 10, 475: 10, 476: 7, 477: 7, 478: 7, 479: 7, 480: 7, 481: 7, 482: 7, 483: 7, 484: 10, 485: 10, 486: 10, 487: 7, 488: 7, 489: 10, 490: 10, 491: 7, 492: 7, 493: 10, 494: 10, 495: 10, 496: 7, 497: 7, 498: 7, 499: 7} def new_pairs_(p_dict) : pair_dict = {} for idx in range(len(p_dict)) : #print(idx) if p_dict[idx] not in pair_dict.keys() : pair_dict[p_dict[idx]] = [] pair_dict[p_dict[idx]].append(idx) else : pair_dict[p_dict[idx]].append(idx)
def eval_labelprop(graph): """this evaluates the main function and cach it for speed up.""" communities = list(label_propagation_communities(graph)) communities.sort(key=len, reverse=True) return communities
def get_bias(text, index, visualise=False, filename="graph.png", folder=False): G = nx.Graph() ## convert lowercase # first_article = df.head().loc[row_num]["content"].lower() first_article = text.lower() ## remove html tags from improper scraping first_article = re.sub("\<(.)+\>", " ", first_article) ## replace non alpha-numeric + . + & + \s with \s ## kept the apostraphe ’ chr(8217) special_apostraphe = chr(8217) first_article = re.sub(f"[^0-9a-zA-Z\.\&\s{special_apostraphe}]+", " ", first_article) ## remove random new lines first_article = re.sub(f"\n", " ", first_article) ## loop through the sentences for sentence in first_article.split(". "): ## ignore empty strings if sentence.strip() != "": cleaned_sentence = [ stemmer.stem(word) for word in sentence.split(" ") if word not in spacy_stopwords and word.strip() != "" ] for i in range(len(cleaned_sentence) - 1): word = cleaned_sentence[i] for other_word in cleaned_sentence[i + 1:]: if G.get_edge_data(word, other_word) == None: G.add_edge(word, other_word, weight=0) old_weight = G.get_edge_data(word, other_word)["weight"] new_weight = old_weight + 1 G.add_edge(word, other_word, weight=new_weight) ## remove edges that have very small weights # to_remove = [(a,b) for a, b in G.edges if G[a][b]["weight"] == 1] # G.remove_edges_from(to_remove) # G.remove_nodes_from(list(nx.isolates(G))) ## remove nodes that are in isolated pairs and triplets for island in list(nx.connected_components(G)): if len(island) < 11: for node in island: G.remove_node(node) ## get list of betweenness_centrality scores to find most influential words top_words_with_scores = { k: v for k, v in sorted(betweenness_centrality(G).items(), key=lambda item: item[1], reverse=True)[:5] if v > 0 } # print (f"The top 5 key words are ", end="") # for word in top_5_words_with_scores.keys(): # print (word, end=", ") # print () ## determining modularity try: modularity_score = nx_comm.modularity( G, nx_comm.label_propagation_communities(G)) except: modularity_score = 0 # print(f"The modularity score is {modularity_score}") if folder: filename = folder + str(index) + " - " + str(modularity_score) + ".png" ## for visualisation plt.figure(figsize=(100, 100)) pos = nx.spring_layout(G) pos_higher = {} y_off = 10 ## offset value for k, v in pos.items(): pos_higher[k] = (v[0], v[1] + y_off) nx.draw(G, pos_higher, with_labels=True, node_size=60) plt.axis("off") if folder: plt.savefig(f"{SAVED_FIGURES}{filename}") if visualise: plt.show() plt.close() return modularity_score, list(top_words_with_scores.keys())
# g=nx.windmill_graph(8,4) N = len(g) W = np.zeros((N, N)) for i in g: print(i, end="-> ") for j in nx.neighbors(g, i): print(j, end=" ") W[i][j] = 1 W[j][i] = 1 print() # networkx community detection gmc = list(greedy_modularity_communities(g)) alc = list(asyn_lpa_communities(g)) lpac = list(label_propagation_communities(g)) asfl = list(asyn_fluidc(g, 3)) # inititalization anchorList = set([]) U = {} U[N] = np.zeros(N) randomFirstAnchor = random.randint(0, N - 1) anchorList.add(randomFirstAnchor) U[randomFirstAnchor] = np.zeros(N) phi = 0.25 itermax = 15 K = 1 adjacentNodes = {} for i in range(N): tmp = []
def get_communities(self, station_df): import networkx as nx import networkx.algorithms.community as nx_comm g_communities_ = [] try: ''' sample the graph ''' g_simple_ = self.get_simple_graph(station_df) if nx.is_empty(g_simple_): raise ValueError( 'A simple graph with %d stations was not created' % station_df.shape[0]) if self.name == 'ASYNC-LPA': #asyn_lpa_communities g_communities_ = list( nx_comm.asyn_lpa_communities(g_simple_, weight=self.weight, seed=self.seed)) elif self.name == 'LPC': #label_propagation_communities g_communities_ = list( nx_comm.label_propagation_communities(g_simple_)) elif self.name == 'GREEDY': # greedy_modularity_communities g_communities_ = list( nx_comm.greedy_modularity_communities(g_simple_)) elif self.name == 'NAIVE-GREEDY': #_naive_greedy_modularity_communities g_communities_ = list( nx_comm._naive_greedy_modularity_communities(g_simple_)) elif self.name == 'LUKES': # lukes_partitioning # TODO: create MST of g_simple first but removing the mimum weigted edge doesn't seem right g_communities_ = list( nx_comm.lukes_partitioning( g_simple_, edge_weight=self.weight, max_size=self.maximum_node_weight)) elif self.name == 'ASYNC-FLUID': # asyn_fluidc # TODO: create complete graph for g_simple but a complete graph would not work g_communities_ = list( nx_comm.asyn_fluidc(g_simple_, k=15, max_iter=300, seed=self.seed)) elif self.name == 'GIRVAN-NEWMAN': # girvan_newman # g_communities_ = list(nx_comm.girvan_newman(g_simple_)) # tmp_communities = nx_comm.girvan_newman(g_simple_) # g_communities_ = next(tmp_communities) g_communities_ = list(next(nx_comm.girvan_newman(g_simple_))) # print(list(g_communities_)) else: raise AttributeError("something was not right") # g_simple_ = self.set_graph_cluster_labels(g_simple_, g_communities_) if isinstance(g_communities_, list): #len(g_communities_)>0 g_simple_ = self.set_graph_cluster_labels( g_simple_, g_communities_) #d return g_simple_, g_communities_ except Exception as err: print("Class community_detection [get_communities] Error message:", err) return g_simple_, g_communities_
import networkx as nx from networkx.algorithms.community import label_propagation_communities, greedy_modularity_communities, asyn_fluidc graph_name = 'simpledistros.gml' number_subreddits = 6 infomap_clu = 'simpledistros.clu' G = nx.read_gml(graph_name) G = nx.convert_node_labels_to_integers(G, first_label=1) # Get the label propagation clustering print('Label propagation clustering...') lpa_gen = label_propagation_communities(G) partition = [] for community in lpa_gen: part = sorted(community) partition.append(part) partition = tuple(partition) for v in G: for i in range(len(partition)): if v in partition[i]: G.node[v]['lpa'] = i continue # Get the modularity clustering print('Modularity clustering...') comp = greedy_modularity_communities(G) partition = [] for community in comp: part = sorted(community) partition.append(part)
def labelPropagation(graph): lps = list(community.label_propagation_communities(graph)) return lps
from graph import load_graph, generate_graph, draw_graph from sknetwork.clustering import PropagationClustering from sknetwork.utils import edgelist2adjacency import numpy as np from networkx.algorithms.community import asyn_lpa_communities, label_propagation_communities if __name__ == '__main__': print("Label Propagation Algorithm of Scikit NetWork & Network X") # use a simple graph with 400 nodes and about 20,000 edges G, Edges, _ = load_graph('data/n400_p0.8_q0.1.txt') print("--Scikit Network--") propagation = PropagationClustering() adjacency = edgelist2adjacency(list(G.edges)) New_Labels = propagation.fit_transform(adjacency) labels_unique, count = np.unique(New_Labels, return_counts=True) print("Number of clusters/labels:", len(labels_unique)) print("Partition of nodes in different clusters/labels:", [item for item in count]) print("--NetWorkX--") nx_labels = label_propagation_communities(G) nx_labels = list(nx_labels) print("Number of clusters/labels:", len(nx_labels)) print("Partition of nodes in different clusters/labels:", [len(item) for item in nx_labels])
ncs = [0 for x in G.nodes()] nlist = list(G.nodes()) for i in range(len(c)): for j in range(len(nlist)): if nlist[j] in c[i]: ncs[j] = i plt.figure() plt.title("Modularity") nx.draw(G, node_color=ncs) plt.show() c = list(label_propagation_communities(G)) ncs = [0 for x in G.nodes()] nlist = list(G.nodes()) for i in range(len(c)): for j in range(len(nlist)): if nlist[j] in c[i]: ncs[j] = i plt.figure() plt.title("Label Propogation") nx.draw(G, node_color=ncs) plt.show()
def detect_communities_label_propagation(G): communities = list() #for c in asyn_lpa_communities(G): for c in label_propagation_communities(G): communities.append(sorted(c)) return sorted(communities)
def community_lpa(G, **kwargs): return list(community.label_propagation_communities(G))
def get_communities_label_propagation(G): return list(community.label_propagation_communities(G))
def find_communities(nnodes, edges, alg, params=None): def membership2cs(membership): cs = {} for i, m in enumerate(membership): cs.setdefault(m, []).append(i) return cs.values() def connected_subgraphs(G: nx.Graph): for comp in nx.connected_components(G): sub = nx.induced_subgraph(G, comp) sub = nx.convert_node_labels_to_integers(sub, label_attribute='old') yield sub def apply_subgraphs(algorithm, **params): cs = [] for sub in connected_subgraphs(G): if len(sub.nodes) <= 3: coms = [sub.nodes] # let it be a cluster else: coms = algorithm(sub, **params) if hasattr(coms, 'communities'): coms = coms.communities for com in coms: cs.append([sub.nodes[i]['old'] for i in set(com)]) return cs def karate_apply(algorithm, graph, **params): model = algorithm(**params) model.fit(graph) return membership2cs(model.get_memberships().values()) if alg == 'big_clam': c = -1 if params['c'] == 'auto' else int(params['c']) cs = BigClam('../../snap').run(edges, c=c, xc=int(params['xc'])) elif alg in ('gmm', 'kclique', 'lprop', 'lprop_async', 'fluid', 'girvan_newman', 'angel', 'congo', 'danmf', 'egonet_splitter', 'lfm', 'multicom', 'nmnf', 'nnsed', 'node_perception', 'slpa', 'GEMSEC', 'EdMot', 'demon'): G = nx.Graph() G.add_edges_from(edges) if alg == 'gmm': cs = community.greedy_modularity_communities(G) elif alg == 'kclique': params = {k: float(v) for k, v in params.items()} cs = community.k_clique_communities(G, **params) elif alg == 'lprop': cs = community.label_propagation_communities(G) elif alg == 'lprop_async': cs = community.asyn_lpa_communities(G, seed=0) elif alg == 'fluid': params = {k: int(v) for k, v in params.items()} params['seed'] = 0 cs = apply_subgraphs(community.asyn_fluidc, **params) elif alg == 'girvan_newman': comp = community.girvan_newman(G) for cs in itertools.islice(comp, int(params['k'])): pass elif alg == 'angel': params = {k: float(v) for k, v in params.items()} cs = cdlib.angel(G, **params).communities elif alg == 'congo': # too slow ncoms = int(params['number_communities']) cs = [] for sub in connected_subgraphs(G): if len(sub.nodes) <= max(3, ncoms): cs.append(sub.nodes) # let it be a cluster else: coms = cdlib.congo(sub, number_communities=ncoms, height=int(params['height'])) for com in coms.communities: cs.append([sub.nodes[i]['old'] for i in set(com)]) elif alg == 'danmf': # no overlapping cs = apply_subgraphs(cdlib.danmf) elif alg == 'egonet_splitter': params['resolution'] = float(params['resolution']) cs = apply_subgraphs(cdlib.egonet_splitter, **params) elif alg == 'lfm': coms = cdlib.lfm(G, float(params['alpha'])) cs = coms.communities elif alg == 'multicom': cs = cdlib.multicom(G, seed_node=0).communities elif alg == 'nmnf': params = {k: int(v) for k, v in params.items()} cs = apply_subgraphs(cdlib.nmnf, **params) elif alg == 'nnsed': cs = apply_subgraphs(cdlib.nnsed) elif alg == 'node_perception': # not usable params = {k: float(v) for k, v in params.items()} cs = cdlib.node_perception(G, **params).communities elif alg == 'slpa': params["t"] = int(params["t"]) params["r"] = float(params["r"]) cs = cdlib.slpa(G, **params).communities elif alg == 'demon': params = {k: float(v) for k, v in params.items()} cs = cdlib.demon(G, **params).communities elif alg == 'GEMSEC': # gamma = float(params.pop('gamma')) params = {k: int(v) for k, v in params.items()} # params['gamma'] = gamma params['seed'] = 0 _wrap = partial(karate_apply, karateclub.GEMSEC) cs = apply_subgraphs(_wrap, **params) elif alg == 'EdMot': params = {k: int(v) for k, v in params.items()} _wrap = partial(karate_apply, karateclub.EdMot) cs = apply_subgraphs(_wrap, **params) elif alg in ('infomap', 'community_leading_eigenvector', 'leig', 'multilevel', 'optmod', 'edge_betweenness', 'spinglass', 'walktrap', 'leiden', 'hlc'): G = igraph.Graph() G.add_vertices(nnodes) G.add_edges(edges) if alg == 'infomap': vcl = G.community_infomap(trials=int(params['trials'])) cs = membership2cs(vcl.membership) elif alg == 'leig': clusters = None if params['clusters'] == 'auto' else int( params['clusters']) vcl = G.community_leading_eigenvector(clusters=clusters) cs = membership2cs(vcl.membership) elif alg == 'multilevel': vcl = G.community_multilevel() cs = membership2cs(vcl.membership) elif alg == 'optmod': # too long membership, modularity = G.community_optimal_modularity() cs = membership2cs(vcl.membership) elif alg == 'edge_betweenness': clusters = None if params['clusters'] == 'auto' else int( params['clusters']) dendrogram = G.community_edge_betweenness(clusters, directed=False) try: clusters = dendrogram.as_clustering() except: return [] cs = membership2cs(clusters.membership) elif alg == 'spinglass': # only for connected graph vcl = G.community_spinglass(parupdate=True, update_rule=params['update_rule'], start_temp=float(params['start_temp']), stop_temp=float(params['stop_temp'])) cs = membership2cs(vcl.membership) elif alg == 'walktrap': dendrogram = G.community_walktrap(steps=int(params['steps'])) try: clusters = dendrogram.as_clustering() except: return [] cs = membership2cs(clusters.membership) elif alg == 'leiden': vcl = G.community_leiden( objective_function=params['objective_function'], resolution_parameter=float(params['resolution_parameter']), n_iterations=int(params['n_iterations'])) cs = membership2cs(vcl.membership) elif alg == 'hlc': algorithm = HLC(G, min_size=int(params['min_size'])) cs = algorithm.run(None) elif alg in ("sbm", "sbm_nested"): np.random.seed(42) gt.seed_rng(42) G = gt.Graph(directed=False) G.add_edge_list(edges) deg_corr = bool(params['deg_corr']) B_min = None if params['B_min'] == 'auto' else int(params['B_min']) B_max = None if params['B_max'] == 'auto' else int(params['B_max']) if alg == "sbm": state = gt.minimize_blockmodel_dl(G, deg_corr=deg_corr, B_min=B_min, B_max=B_max) membership = state.get_blocks() cs = membership2cs(membership) if alg == "sbm_nested": state = gt.minimize_nested_blockmodel_dl(G, deg_corr=deg_corr, B_min=B_min, B_max=B_max) levels = state.get_bs() level_max = int(params['level']) membership = {} for nid in range(nnodes): cid = nid level_i = len(levels) for level in levels: cid = level[cid] if level_i == level_max: membership.setdefault(cid, []).append(nid) break level_i -= 1 cs = membership.values() else: return None return list(cs)
import networkx as nx from networkx.algorithms import community import matplotlib.pyplot as plt # 数据加载 G = nx.read_gml('./football.gml') # 可视化 nx.draw(G, with_labels=True) plt.show() # 社区发现 communities = list(community.label_propagation_communities(G)) print(communities) print(len(communities))