def export_log(G, communities, dataset, algorithm, d_threshold, w_threshold, path): '''export community result to a log file for manually analysis ''' with open(path, 'w') as f: # write some key information first line = "dataset: " + dataset + "\n" line += "algorithm: " + algorithm + "\n" line += "d_threshold: " + str(d_threshold) + "\n" line += "w_threshold: " + str(w_threshold) + "\n" line += "time: " + time.asctime(time.localtime(time.time())) + "\n" line += "-------------------------------------\n" line += "communities: " + str(len(communities)) + "\n" line += "modularity: " + str(round(modularity(G, communities), 3)) + "\n" line += "performance: " + str(round(performance(G, communities), 3)) + "\n" line += "=====================================\n" f.write(line) # write community line by line for community in communities: namelist = list(community) line = ", ".join(namelist) f.write(line + '\n') print("[Done] export log file:", path)
def test_good_partition(self): """Tests that a good partition has a high performance measure. """ G = barbell_graph(3, 0) partition = [{0, 1, 2}, {3, 4, 5}] assert_almost_equal(14 / 15, performance(G, partition))
def best_split(wordPairs): """ Giving a Graph, return the best community partition :param Graph: a graph constructed with the most similar word pairs :return: (level of partition that gives the best performance, best performance, best partition) """ from networkx.algorithms import community from networkx.algorithms.community.quality import performance, coverage import networkx as nx Graph = nx.Graph() edges = [(pair[0][0], pair[0][1]) for pair in wordPairs] edgewidth = [pair[1] * 10 for pair in wordPairs] Graph.add_edges_from(edges) max_pc = 0 max_index = None best_communities = None communities_generator = community.girvan_newman(Graph) for i, communities in enumerate(communities_generator): p = performance(Graph, communities) c = coverage(Graph, communities) if 2 * p * c / (p + c) > max_pc: max_index = i max_pc = 2 * p * c / (p + c) best_communities = communities return (max_index, max_pc, best_communities)
def calc_graph_measures(data_matrix, thresh=0): from networkx import eccentricity from networkx.algorithms.efficiency import global_efficiency from networkx.algorithms.shortest_paths.generic import average_shortest_path_length from networkx.algorithms.centrality import betweenness_centrality from networkx.algorithms.cluster import average_clustering from networkx.algorithms.community.modularity_max import greedy_modularity_communities from networkx.algorithms.community.quality import performance def _avg_values(results): values = [] if isinstance(results, dict): for k in results: values.append(results[k]) elif isinstance(results, list): for tup in results: values.append(tup[1]) return np.mean(values) below_thresh_indices = np.abs(data_matrix) < thresh data_matrix[below_thresh_indices] = 0 if isinstance(data_matrix, np.ndarray): graph = networkx.convert_matrix.from_numpy_matrix(np.real(data_matrix)) if isinstance(data_matrix, pd.DataFrame): graph = networkx.convert_matrix.from_pandas_adjacency(data_matrix) degree = list(graph.degree) global_eff = global_efficiency(graph) b_central = betweenness_centrality(graph) modularity = performance(graph, greedy_modularity_communities(graph)) try: ecc = eccentricity(graph) except networkx.exception.NetworkXError: ecc = [(0, 0)] try: clust = average_clustering(graph) except networkx.exception.NetworkXError: clust = 0 try: char_path = average_shortest_path_length(graph) except networkx.exception.NetworkXError: char_path = 0 graph_dict = {'degree': _avg_values(degree), 'eccentricity': _avg_values(ecc), 'global_efficiency': global_eff, 'characteristic_path_length': char_path, 'betweenness_centrality': _avg_values(b_central), 'clustering_coefficient': clust, 'modularity': modularity} return graph_dict
def clustering_statistics(self, community_partition, feat_name, feat_desc, feat_interpret): """Compute quality of the community partitions.""" compl_desc = " of the partition of " + feat_desc self.add_feature( feat_name + "_modularity", lambda graph: quality.modularity(graph, community_partition), "Modularity" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_coverage", lambda graph: quality.coverage(graph, community_partition), "Coverage" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_performance", lambda graph: quality.performance(graph, community_partition), "Performance" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_inter_community_edges", lambda graph: quality.inter_community_edges( graph, community_partition), "Inter community edges" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_inter_community_non_edges", lambda graph: quality.inter_community_non_edges( graph, community_partition), "Inter community non edges" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_intra_community_edges", lambda graph: quality.intra_community_edges( graph, community_partition), "Intra community edges" + compl_desc, feat_interpret, )
def _compute_performance(self, partition, weight=None): return performance(self.graph, _get_community_sets(partition))
##--------------------- print for label propagation result G_treated = label_prop(G, max_iter=100) labels = [G_treated.nodes[node]["label"] for node in G_treated.nodes] # print(labels) labels = list(set(labels)) partitions = [] for label in labels: partitions.append( set([ node for node in G_treated.nodes if G_treated.nodes[node]["label"] == label ])) # start = time.time() print('modularity, coverage, performance : ', modularity(G_treated, partitions), coverage(G_treated, partitions), performance(G_treated, partitions)) # end = time.time() # print(end-start) ##--------------------- print for louvain result # start = time.time() partition = community_louvain.best_partition(G) # print(partition) labels = [partition[node] for node in G.nodes] labels = list(set(labels)) partitions = [] for label in labels: partitions.append( set([node for node in G.nodes if partition[node] == label])) # print(modularity(partitions, G)) # end = time.time()
def main(): global VERBOSE h, v, w, mu, mu_val = parse(' '.join(sys.argv[1:])) if h: print("- Use -v to activate Verbose") print("- Use -w to exclude the genetic algorithm from the run") print("- Use -mu value to set the value of mu in the graph generators; value should be in the range (0, 1)") return if v: VERBOSE = True else: VERBOSE = False if w: algorithms = [clauset_newman_moore, louvain, reneel] else: algorithms = [clauset_newman_moore, louvain, reneel, gcm] if VERBOSE: print("Start process") # small graphs non_lfr_runs(algorithms) with open('results/small_c1_test.json', 'w') as fs: json.dump(RESULTS_S, fs) # lfr benchmark graphs sizes = [250, 500, 1000, 1500, 2000, 2500, 3000] for n in sizes: G, target_partition, target_communities = genrate_lfr_graph(size=n, mu=mu_val) nodes_no = n edges_no = G.number_of_edges() avg_degree = sum([G.degree[i] for i in range(n)]) / nodes_no print("========================================================") print(nodes_no, edges_no, avg_degree) print("========================================================") pos = nx.spring_layout(G) results = [alg(G) for alg in algorithms] partitions = [r[0] for r in results] metrics = [(coverage(G, r[1]), performance(G, r[1]), normalized_mutual_info_score(convert_to_array(target_partition), convert_to_array(r[0]) )) for r in results] runtimes = [r[2] for r in results] for idx in range(len(metrics)): RESULTS_LFR[NAMES[idx]][n] = { "coverage": metrics[idx][0], "performance": metrics[idx][1], "nmi": metrics[idx][2], "runtime": runtimes[idx], } if VERBOSE: print( f"The coverage obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][0]) print( f"The performance obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][1]) print( f"The NMI score obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][2]) print("========================================================") parallel_display(algorithms, partitions, G, pos) with open('results/lfr_c1_test.json', 'w') as fb: json.dump(RESULTS_LFR, fb)
def non_lfr_runs(algorithms): # Karate Club graph karate_g = generate_karate_club_graph() pos = nx.spring_layout(karate_g) results = [alg(karate_g) for alg in algorithms] partitions = [r[0] for r in results] metrics = [(coverage(karate_g, r[1]), performance(karate_g, r[1]), modularity(karate_g, r[1]),) for r in results] runtimes = [r[2] for r in results] for idx in range(len(metrics)): RESULTS_S[NAMES[idx]]["Karate"] = { "coverage": metrics[idx][0], "performance": metrics[idx][1], "modularity": metrics[idx][2], "runtime": runtimes[idx], } if VERBOSE: print( f"The coverage obtained by {algorithms[idx].__name__} on the Karate Club graph was " + "%.4f" % metrics[idx][0]) print( f"The performance obtained by {algorithms[idx].__name__} on the Karate Club graph was " + "%.4f" % metrics[idx][1]) print( f"The final modularity obtained by {algorithms[idx].__name__} on the Karate Club graph was " + "%.4f" % metrics[idx][2]) print("========================================================") parallel_display(algorithms, partitions, karate_g, pos) # simple Caveman graph 4 6 caveman_g = generate_caveman_graph(cliques=4, size=6) pos = nx.spring_layout(caveman_g) results = [alg(caveman_g) for alg in algorithms] partitions = [r[0] for r in results] metrics = [(coverage(caveman_g, r[1]), performance(caveman_g, r[1]), modularity(caveman_g, r[1]),) for r in results] runtimes = [r[2] for r in results] for idx in range(len(metrics)): RESULTS_S[NAMES[idx]]["Caveman46"] = { "coverage": metrics[idx][0], "performance": metrics[idx][1], "modularity": metrics[idx][2], "runtime": runtimes[idx], } if VERBOSE: print( f"The coverage obtained by {algorithms[idx].__name__} on the Caveman46 graph was " + "%.4f" % metrics[idx][0]) print( f"The performance obtained by {algorithms[idx].__name__} on the Caveman46 graph was " + "%.4f" % metrics[idx][1]) print( f"The final modularity obtained by {algorithms[idx].__name__} on the Caveman46 graph was " + "%.4f" % metrics[idx][2]) print("========================================================") parallel_display(algorithms, partitions, caveman_g, pos) # simple Caveman graph 7 3 caveman_g = generate_caveman_graph(cliques=7, size=3) pos = nx.spring_layout(caveman_g) results = [alg(caveman_g) for alg in algorithms] partitions = [r[0] for r in results] metrics = [(coverage(caveman_g, r[1]), performance(caveman_g, r[1]), modularity(caveman_g, r[1]),) for r in results] runtimes = [r[2] for r in results] for idx in range(len(metrics)): RESULTS_S[NAMES[idx]]["Caveman73"] = { "coverage": metrics[idx][0], "performance": metrics[idx][1], "modularity": metrics[idx][2], "runtime": runtimes[idx], } if VERBOSE: print( f"The coverage obtained by {algorithms[idx].__name__} on the Caveman73 graph was " + "%.4f" % metrics[idx][0]) print( f"The performance obtained by {algorithms[idx].__name__} on the Caveman73 graph was " + "%.4f" % metrics[idx][1]) print( f"The final modularity obtained by {algorithms[idx].__name__} on the Caveman73 graph was " + "%.4f" % metrics[idx][2]) print("========================================================") parallel_display(algorithms, partitions, caveman_g, pos)
print() print("gmc", gmc) print("alc", alc) print("lpac", lpac) print("async_fluidc", asfl) # print("girvan_newman",girvanNewmanCommunities) print("FuzAg Communties") for i in finalCommunities: print(i, end="-> ") for j in finalCommunities[i]: print(j, end=" ") print() listOfFinalCommunties = [] for i in finalCommunities: listOfFinalCommunties.append(finalCommunities[i]) coverageOfFuzAg = coverage(Graph, listOfFinalCommunties) print("coverage Of FuzAg", coverageOfFuzAg) print("coverage of greedy_modularity_communities", coverage(Graph, gmc)) print("coverage of async_lpa_communities", coverage(Graph, alc)) print("coverage of label_propagation_communities", coverage(Graph, lpac)) print("coverage of Async Fluid Communties", coverage(Graph, asfl)) performanceOfFuzAg = performance(Graph, listOfFinalCommunties) print("performance Of FuzAg", performanceOfFuzAg) print("performance of greedy_modularity_communities", performance(Graph, gmc)) print("performance of async_lpa_communities", performance(Graph, alc)) print("performance of label_propagation_communities", performance(Graph, lpac)) print("performance of Async Fluid Communties", performance(Graph, asfl))
def test_bad_partition(self): """Tests that a poor partition has a low performance measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 4}, {2, 3, 5}] assert_almost_equal(8 / 15, performance(G, partition))
def export_gml(G, communities, path): # get a copy of the original graph G_copy = G.copy() # build node group dictionary node_group = {} com_num = 0 for community in communities: for v in community: node_group[v] = {'community': com_num} com_num += 1 # set node group as G_copy's node attribute nx.set_node_attributes(G_copy, node_group) # export G_copy to .gml file nx.write_gml(G_copy, path) print("Already export gml file!") G = nx.readwrite.gml.read_gml("weighted_graph.gml") communities_gn = girvan_newman(G, 6) communities_lp = label_propagation(G) communities_mc = markov_cluster(G, power=2, inflation=2, numIter=5, decimals=2) print("gn:", len(communities_gn), quality.modularity(G, communities_gn), quality.performance(G, communities_gn)) print("lp:", len(communities_lp), quality.modularity(G, communities_lp), quality.performance(G, communities_lp)) print("mc:", len(communities_mc), quality.modularity(G, communities_mc), quality.performance(G, communities_mc)) export_gml(G, communities_lp, "test_lp.gml")