def t_selection_pipeline_undirected_village(G, ts, fraction_t_to_keep=0.25): mis = [] coups = [] d_gws = [] rt = [] for t in ts: start = time.time() cost = sgw.undirected_normalized_heat_kernel(G, t) mutual_info, d_gw, coup = process_sgwl_village(cost, database, num_nodes, num_partitions) mis.append(mutual_info) coups.append(coup) d_gws.append(d_gw) end = time.time() rt.append(end - start) print('Couplings Computed') coverages = [] for j in range(len(ts)): coup = coups[j] partition = get_partition(coup) coverages.append(coverage(G, partition)) num_to_keep = int(np.round(fraction_t_to_keep * len(ts))) good_t_max = ts[np.argsort(coverages)][-num_to_keep:] good_t_grad = ts[np.argsort(np.abs(np.gradient(coverages)))][:num_to_keep] return mis, coups, d_gws, good_t_max, good_t_grad, rt
def best_split(wordPairs): """ Giving a Graph, return the best community partition :param Graph: a graph constructed with the most similar word pairs :return: (level of partition that gives the best performance, best performance, best partition) """ from networkx.algorithms import community from networkx.algorithms.community.quality import performance, coverage import networkx as nx Graph = nx.Graph() edges = [(pair[0][0], pair[0][1]) for pair in wordPairs] edgewidth = [pair[1] * 10 for pair in wordPairs] Graph.add_edges_from(edges) max_pc = 0 max_index = None best_communities = None communities_generator = community.girvan_newman(Graph) for i, communities in enumerate(communities_generator): p = performance(Graph, communities) c = coverage(Graph, communities) if 2 * p * c / (p + c) > max_pc: max_index = i max_pc = 2 * p * c / (p + c) best_communities = communities return (max_index, max_pc, best_communities)
def clustering_statistics(self, community_partition, feat_name, feat_desc, feat_interpret): """Compute quality of the community partitions.""" compl_desc = " of the partition of " + feat_desc self.add_feature( feat_name + "_modularity", lambda graph: quality.modularity(graph, community_partition), "Modularity" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_coverage", lambda graph: quality.coverage(graph, community_partition), "Coverage" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_performance", lambda graph: quality.performance(graph, community_partition), "Performance" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_inter_community_edges", lambda graph: quality.inter_community_edges( graph, community_partition), "Inter community edges" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_inter_community_non_edges", lambda graph: quality.inter_community_non_edges( graph, community_partition), "Inter community non edges" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_intra_community_edges", lambda graph: quality.intra_community_edges( graph, community_partition), "Intra community edges" + compl_desc, feat_interpret, )
mx = max(mx, U[j][i]) probableCommunities = [] for j in anchorList: if (U[j][i] == mx): probableCommunities.append(j) selectedAnchorIndex = random.randint(0, len(probableCommunities) - 1) selectedAnchor = probableCommunities[selectedAnchorIndex] if selectedAnchor not in finalCommunities: finalCommunities[selectedAnchor] = [i] else: finalCommunities[selectedAnchor].append(i) print("gmc", gmc) print("alc", alc) print("lpac", lpac) print("async_fluidc", asfl) print("FuzAg Communties") for i in finalCommunities: for j in finalCommunities[i]: print(j, end=" ") print() listOfFinalCommunties = [] for i in finalCommunities: listOfFinalCommunties.append(finalCommunities[i]) coverageOfFuzAg = coverage(g, listOfFinalCommunties) print("coverage Of FuzAg", coverageOfFuzAg) print("coverage of greedy_modularity_communities", coverage(g, gmc)) print("coverage of async_lpa_communities", coverage(g, alc)) print("coverage of label_propagation_communities", coverage(g, lpac)) print("coverage of Async Fluid Communties", coverage(g, asfl))
def _compute_coverage(self, partition, weight=None): return coverage(self.graph, _get_community_sets(partition))
##--------------------- print for label propagation result G_treated = label_prop(G, max_iter=100) labels = [G_treated.nodes[node]["label"] for node in G_treated.nodes] # print(labels) labels = list(set(labels)) partitions = [] for label in labels: partitions.append( set([ node for node in G_treated.nodes if G_treated.nodes[node]["label"] == label ])) # start = time.time() print('modularity, coverage, performance : ', modularity(G_treated, partitions), coverage(G_treated, partitions), performance(G_treated, partitions)) # end = time.time() # print(end-start) ##--------------------- print for louvain result # start = time.time() partition = community_louvain.best_partition(G) # print(partition) labels = [partition[node] for node in G.nodes] labels = list(set(labels)) partitions = [] for label in labels: partitions.append( set([node for node in G.nodes if partition[node] == label])) # print(modularity(partitions, G))
num_clusts = list(range(5, 45)) t = 20 cost = sgw.undirected_normalized_heat_kernel(G, t) d_gws = [] mis = [] coverages = [] modularities = [] for j in num_clusts: mutual_info, d_gw, coup = process_sgwl_eu(cost, database, num_nodes, j) partition = get_partition(coup) mis.append(mutual_info) d_gws.append(d_gw) coverages.append(coverage(G, partition)) modularities.append(modularity(G, partition)) # Estimate number of clusters estimated_clusters_raw_sym = num_clusts[np.argmax(modularities)] print('Number of Clusters:', estimated_clusters_raw_sym) # Now perform modularity/coverage maximizing pipeline ts = np.linspace(3, 10, 40) mis, coups, d_gws, good_t_max, good_t_grad, rt = t_selection_pipeline_undirected_eu( G, ts, estimated_clusters_raw_sym) coverages = [] for j in range(len(ts)): coup = coups[j]
def main(): global VERBOSE h, v, w, mu, mu_val = parse(' '.join(sys.argv[1:])) if h: print("- Use -v to activate Verbose") print("- Use -w to exclude the genetic algorithm from the run") print("- Use -mu value to set the value of mu in the graph generators; value should be in the range (0, 1)") return if v: VERBOSE = True else: VERBOSE = False if w: algorithms = [clauset_newman_moore, louvain, reneel] else: algorithms = [clauset_newman_moore, louvain, reneel, gcm] if VERBOSE: print("Start process") # small graphs non_lfr_runs(algorithms) with open('results/small_c1_test.json', 'w') as fs: json.dump(RESULTS_S, fs) # lfr benchmark graphs sizes = [250, 500, 1000, 1500, 2000, 2500, 3000] for n in sizes: G, target_partition, target_communities = genrate_lfr_graph(size=n, mu=mu_val) nodes_no = n edges_no = G.number_of_edges() avg_degree = sum([G.degree[i] for i in range(n)]) / nodes_no print("========================================================") print(nodes_no, edges_no, avg_degree) print("========================================================") pos = nx.spring_layout(G) results = [alg(G) for alg in algorithms] partitions = [r[0] for r in results] metrics = [(coverage(G, r[1]), performance(G, r[1]), normalized_mutual_info_score(convert_to_array(target_partition), convert_to_array(r[0]) )) for r in results] runtimes = [r[2] for r in results] for idx in range(len(metrics)): RESULTS_LFR[NAMES[idx]][n] = { "coverage": metrics[idx][0], "performance": metrics[idx][1], "nmi": metrics[idx][2], "runtime": runtimes[idx], } if VERBOSE: print( f"The coverage obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][0]) print( f"The performance obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][1]) print( f"The NMI score obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][2]) print("========================================================") parallel_display(algorithms, partitions, G, pos) with open('results/lfr_c1_test.json', 'w') as fb: json.dump(RESULTS_LFR, fb)
def non_lfr_runs(algorithms): # Karate Club graph karate_g = generate_karate_club_graph() pos = nx.spring_layout(karate_g) results = [alg(karate_g) for alg in algorithms] partitions = [r[0] for r in results] metrics = [(coverage(karate_g, r[1]), performance(karate_g, r[1]), modularity(karate_g, r[1]),) for r in results] runtimes = [r[2] for r in results] for idx in range(len(metrics)): RESULTS_S[NAMES[idx]]["Karate"] = { "coverage": metrics[idx][0], "performance": metrics[idx][1], "modularity": metrics[idx][2], "runtime": runtimes[idx], } if VERBOSE: print( f"The coverage obtained by {algorithms[idx].__name__} on the Karate Club graph was " + "%.4f" % metrics[idx][0]) print( f"The performance obtained by {algorithms[idx].__name__} on the Karate Club graph was " + "%.4f" % metrics[idx][1]) print( f"The final modularity obtained by {algorithms[idx].__name__} on the Karate Club graph was " + "%.4f" % metrics[idx][2]) print("========================================================") parallel_display(algorithms, partitions, karate_g, pos) # simple Caveman graph 4 6 caveman_g = generate_caveman_graph(cliques=4, size=6) pos = nx.spring_layout(caveman_g) results = [alg(caveman_g) for alg in algorithms] partitions = [r[0] for r in results] metrics = [(coverage(caveman_g, r[1]), performance(caveman_g, r[1]), modularity(caveman_g, r[1]),) for r in results] runtimes = [r[2] for r in results] for idx in range(len(metrics)): RESULTS_S[NAMES[idx]]["Caveman46"] = { "coverage": metrics[idx][0], "performance": metrics[idx][1], "modularity": metrics[idx][2], "runtime": runtimes[idx], } if VERBOSE: print( f"The coverage obtained by {algorithms[idx].__name__} on the Caveman46 graph was " + "%.4f" % metrics[idx][0]) print( f"The performance obtained by {algorithms[idx].__name__} on the Caveman46 graph was " + "%.4f" % metrics[idx][1]) print( f"The final modularity obtained by {algorithms[idx].__name__} on the Caveman46 graph was " + "%.4f" % metrics[idx][2]) print("========================================================") parallel_display(algorithms, partitions, caveman_g, pos) # simple Caveman graph 7 3 caveman_g = generate_caveman_graph(cliques=7, size=3) pos = nx.spring_layout(caveman_g) results = [alg(caveman_g) for alg in algorithms] partitions = [r[0] for r in results] metrics = [(coverage(caveman_g, r[1]), performance(caveman_g, r[1]), modularity(caveman_g, r[1]),) for r in results] runtimes = [r[2] for r in results] for idx in range(len(metrics)): RESULTS_S[NAMES[idx]]["Caveman73"] = { "coverage": metrics[idx][0], "performance": metrics[idx][1], "modularity": metrics[idx][2], "runtime": runtimes[idx], } if VERBOSE: print( f"The coverage obtained by {algorithms[idx].__name__} on the Caveman73 graph was " + "%.4f" % metrics[idx][0]) print( f"The performance obtained by {algorithms[idx].__name__} on the Caveman73 graph was " + "%.4f" % metrics[idx][1]) print( f"The final modularity obtained by {algorithms[idx].__name__} on the Caveman73 graph was " + "%.4f" % metrics[idx][2]) print("========================================================") parallel_display(algorithms, partitions, caveman_g, pos)
print(mod) # TODO Conductance of Louvain # TODO Coverage of Louvain # TODO Adjusted Rand Index of Louvain # TODO Normalized Mutual Information of Louvain # TODO Normalized Mutual Information Variant of Louvain """ Label Propagation Algorithm and Evaluation """ cs = asyn_lpa.asyn_lpa_communities(graph) # Modularity mod = modularity(cs, graph) print(mod) # Coverage cov = coverage(cs, graph) print(cov) # TODO Conductance of Label Propagation # TODO Adjusted Rand Index of Label Propagation # TODO Normalized Mutual Information of Label Propagation # TODO Normalized Mutual Information Variant of Label Propagation # TODO implement Smart local moving method # TODO Evaluation of Smart local moving # TODO implement Infomap method # TODO Evaluation of Infomap
def test_good_partition(self): """Tests that a good partition has a high coverage measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 2}, {3, 4, 5}] assert_almost_equal(6 / 7, coverage(G, partition))
def test_bad_partition(self): """Tests that a poor partition has a low coverage measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 4}, {2, 3, 5}] assert_almost_equal(3 / 7, coverage(G, partition))