def test_bad_partition(self): """Tests that a poor partition has a low coverage measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 4}, {2, 3, 5}] assert 3 / 7 == pytest.approx(coverage(G, partition), abs=1e-7) assert 3 / 7 == pytest.approx(partition_quality(G, partition)[0], abs=1e-7)
def test_good_partition(self): """Tests that a good partition has a high coverage measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 2}, {3, 4, 5}] assert 6 / 7 == pytest.approx(coverage(G, partition), abs=1e-7) assert 6 / 7 == pytest.approx(partition_quality(G, partition)[0], abs=1e-7)
def communitiesRandomModule(graphForCommunities): # function for random analysis # variables com = 0 communitiesListCNM = [] listCommunities = [] communitiesDict = dict() community = None vertex = None networkModularity = 0 networkCoverage = 0 networkPerformance = 0 counter = 0 # get Clauset-Newman-Moore communities communitiesListCNM = list( greedy_modularity_communities(graphForCommunities)) # evaluate modularity for community in communitiesListCNM: for vertex in set(community): communitiesDict[vertex] = com listCommunities.append(set(community)) com = com + 1 networkModularity = louv.modularity(communitiesDict, graphForCommunities) networkCoverage = coverage(graphForCommunities, listCommunities) networkPerformance = performance(graphForCommunities, listCommunities) # end of function return (networkModularity, len(listCommunities), networkCoverage, networkPerformance)
def increase_threshold(self): size_of_cluster1_list = [] num_of_cluster_list = [] measure_list = [] for alpha in self.alphas: causal_matrix = np.where(self.vs_corr_gene > alpha, 1, 0) G = nx.Graph(causal_matrix) group_class = list(greedy_modularity_communities(G)) measure = coverage(G, group_class) num_of_cluster = len(group_class) indices_in_cluster1 = list(group_class[0]) size_of_cluster1 = len(indices_in_cluster1) print(num_of_cluster) print(size_of_cluster1) size_of_cluster1_list.append(size_of_cluster1) num_of_cluster_list.append(num_of_cluster) measure_list.append(measure) plt.figure(figsize=(10, 10)) plt.plot(self.alphas, size_of_cluster1_list, linestyle='-', marker='o', color='b') plt.xlabel(r'$\tau$') plt.ylabel('Size of the largest community') plt.savefig("/home/liz3/Desktop/size_of_leading_community") plt.close() plt.figure(figsize=(10, 10)) plt.plot(self.alphas, num_of_cluster_list, linestyle='-', marker='o', color='b') plt.xlabel(r'$\tau$') plt.ylabel('Number of communities') plt.savefig("/home/liz3/Desktop/number_of_community") plt.close() plt.figure(figsize=(10, 10)) plt.plot(self.alphas, measure_list, linestyle='-', marker='o', color='b') plt.xlabel(r'$\tau$') plt.ylabel('Coverage') plt.savefig("/home/liz3/Desktop/measure_of_partition") plt.close()
def community_detection(self): """ Detects characteristics related to communities of graph and writes them down to the 'Communities.txt' file. It also compares these characteristics with a random graph of the same node-size and edge-size. """ partitionx = communityx.greedy_modularity_communities(self.G) """Modularity & Coverage""" modularity = community.modularity(self.best_parts, self.G) #XXX coverage = communityx.coverage(self.G, partitionx) """in the corresponding random graph""" # H = nx.gnm_random_graph(self.G.number_of_nodes(),self.G.number_of_edges()) H = nx.configuration_model([d for v, d in self.G.degree()]) part = community.best_partition(H) #XXX part2 = communityx.greedy_modularity_communities(H) modularity_rand = community.modularity(part, H) coverage_rand = communityx.coverage(H, part2) """Write File""" title = 'Communities.txt' com_file = open(self.path + title, 'w') com_file.write('Modularity:' + '\n') com_file.write(str(modularity) + '\n') com_file.write('Coverage' + '\n') com_file.write(str(coverage) + '\n') com_file.write('The corresponding random graph has modularity:' + '\n') com_file.write(str(modularity_rand) + '\n') com_file.write('The corresponding random graph has coverage:' + '\n') com_file.write(str(coverage_rand)) com_file.write('\n') com_file.write('number of communities:' + '\n') com_file.write(str(max(self.best_parts.values()) + 1) + '\n') #XXX # com_file.write(str(max(self.best_parts_x.values())+1)+'\n') com_file.write('\n') com_file.write( 'The coverage of a partition is the ratio of the number of intra-community edges to the total number of edges in the graph.' ) com_file.close() return modularity, coverage, modularity_rand, coverage_rand
def AnalizeCommunitiesAndMakeDrawings(name, fileName, someNetwork): # function message print("\t- Obtaining communities (for undir network version)...") # variables communitiesFinalResult = "" communitiesResult = "Communities (Undirected):\n" graphForCommunities = nx.Graph() read = "ok" com = 0 communitiesListCNM = [] listCommunities = [] communitiesDict = dict() community = None vertex = None networkModularity = 0 networkCoverage = 0 networkPerformance = 0 counter = 0 # get undir graph integer-weighted (graphForCommunities, read) = ParseFileToNetwork(fileName, "-u", "community") # get Clauset-Newman-Moore communities communitiesListCNM = list( greedy_modularity_communities(graphForCommunities)) # evaluate modularity for community in communitiesListCNM: for vertex in set(community): communitiesDict[vertex] = com listCommunities.append(set(community)) com = com + 1 networkModularity = louv.modularity(communitiesDict, graphForCommunities) networkCoverage = coverage(graphForCommunities, listCommunities) networkPerformance = performance(graphForCommunities, listCommunities) communitiesResult = communitiesResult + "- Modularity: " + str( networkModularity) + "\n" communitiesResult = communitiesResult + "- Coverage: " + str( networkCoverage) + "\n" communitiesResult = communitiesResult + "- Performance: " + str( networkPerformance) + "\n" communitiesResult = communitiesResult + "- Number of Communities: " + str( len(listCommunities)) + "\n" for counter in range(len(listCommunities)): communitiesResult = communitiesResult + "- Number of nodes in community " + str( counter + 1) + ": " + str(len(listCommunities[counter])) + "\n" communitiesResult = communitiesResult + "- Nodes in C_" + str( counter + 1) + ": " + ",".join(listCommunities[counter]) + "\n" # plot graph with communities drawCommunities(someNetwork, listCommunities, name) # end of function communitiesFinalResult = "\n\n" + communitiesResult + "\n\n" return (communitiesFinalResult)
def evaluate_coverage(G, commu_list): # initiate a dictionary to store coverages coverages = {} # loop over all community structure for c in commu_list: communities = c['communities'] # prepare element for coverage calculation commu = [] # loop over communities in an algorithm for i in sorted(pd.Series(communities.values()).unique()): store_list = [] # loop over all nodes for node in G.nodes: if communities[node] == i: store_list.append(node) commu.append(store_list) # calculate coverage coverage = nx_comm.coverage(G, commu) # store value in `coverages` coverages[c['algo']] = coverage return coverages
def test_good_partition(self): """Tests that a good partition has a high coverage measure.""" G = barbell_graph(3, 0) partition = [set([0, 1, 2]), set([3, 4, 5])] assert_almost_equal(6 / 7, coverage(G, partition))
def test_bad_partition(self): """Tests that a poor partition has a low coverage measure.""" G = barbell_graph(3, 0) partition = [set([0, 1, 4]), set([2, 3, 5])] assert_almost_equal(3 / 7, coverage(G, partition))
def test_good_partition(self): """TestData that a good partition has a high coverage measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 2}, {3, 4, 5}] assert almost_equal(6 / 7, coverage(G, partition))
def test_bad_partition(self): """TestData that a poor partition has a low coverage measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 4}, {2, 3, 5}] assert almost_equal(3 / 7, coverage(G, partition))
def coverage_wrapper(partition, G): community_map = extract_community_map(partition) return algorithms.coverage(G, community_map)
def get_quality_metrics(self, station_df, lst_graphs): import dunn as di from sklearn import metrics import networkx as nx import networkx.algorithms.community as nx_comm import numpy as np import pandas as pd import traceback quality_metric_df = pd.DataFrame([]) try: #d _n_num_clust = len(station_df['label'].unique()) # Generated Cluster Count _n_num_clust = len([ x for x in station_df['label'].unique() if x > -1 ]) # Generated Cluster Count if _n_num_clust <= 1: raise ValueError( 'Cannot compute quality metric for %d clusters' % (_n_num_clust)) ''' returns the simple graph of the clusters and the set dictionary of cluster nodes ''' G_simple_, l_G_clusters_ = self.__get_graph_n_labels(station_df) _s_st_types = str(station_df['st_type'].unique()) # Station Types _n_tot_num_st = station_df.shape[0] # Station Quantity _f_min_dist = self._max_distance # Minimum Distance _n_min_pts = self._minimum_samples # Minimum Points _s_clust = str(self._name) # Clustering Name _s_algo = str(self._algorithm) # Algorithm _s_metric = str(self._metric) # Metric _s_method = str(self._cluster_method) # Method _s_seed = str(self._seed) # Seed __lst_valid_cloud_clust = [ frozenset(clust) for clust in l_G_clusters_ if len(clust) >= self._minimum_samples ] _n_valid_clust = len( __lst_valid_cloud_clust) # Valid Cluster Count # Clustered Station Count _n_sts_in_clusters = 0 for x in __lst_valid_cloud_clust: _n_sts_in_clusters += len(x) _n_noise = station_df.shape[ 0] - _n_sts_in_clusters # Unclsutered Noise Count _n_avg_deg = sum([ d for n, d in G_simple_.degree() if G_simple_.nodes[n]["label"] > -1 ]) / _n_sts_in_clusters # Average Node Degree ''' Compute the accuracy of r-regularity constraint on the individual clusters by considering the systematic error that is a reproducible inaccuracy consistent for the same clustering strategy. For such we apply the weighted mean absolute error to estimate the deviation from the expected degree. ''' sum_deg_abs_err = 0 _deg_wmae = 0 _deg_err_st_count = 0 #p print("\nclusters:",len(lst_graphs)) for H in lst_graphs: H = nx.Graph(H) H.remove_nodes_from(list(nx.isolates(H))) H.remove_nodes_from( [n for n, v in H.nodes(data=True) if v["label"] == -1]) H_deg_abs_err = 0 _l_deg_diff = [] if H.number_of_nodes() > 0: _l_deg_diff = [ _n_min_pts - 1 - d for n, d in H.degree() if (int(d) < int(_n_min_pts - 1) and H.nodes[n]["label"] > -1) ] if len(_l_deg_diff) > 0: #p print("\ndegree mean absolute error") #p print("minPts:",_n_min_pts) #p print("list deg diff:",_l_deg_diff) #p print("graph nodes:",sorted([d for n,d in H.degree()])) sum_deg_abs_err += sum(_l_deg_diff) _deg_err_st_count += len(_l_deg_diff) if _deg_err_st_count > 0: _deg_wmae = sum_deg_abs_err / (_deg_err_st_count * (_n_min_pts - 1)) #p print("_deg_wmae", _deg_wmae,_deg_err_st_count) ''' prepare valid stations for measuring the quality''' lst_st = list(nx.get_node_attributes(G_simple_, 'pos').values()) lst_lbl = list(nx.get_node_attributes(G_simple_, 'label').values()) _f_silhouette = metrics.silhouette_score( lst_st, lst_lbl, metric='haversine') # Silhouette Coefficient _f_cal_har = metrics.calinski_harabasz_score( lst_st, lst_lbl) # Calinski Harabaz score _f_dav_bould = metrics.davies_bouldin_score( lst_st, lst_lbl) # Davies Bouldin score _f_dunn = di.dunn_fast(lst_st, lst_lbl) # Dunn Index _f_modul = nx_comm.modularity(G_simple_, l_G_clusters_) # Modularity try: l_conductance = list( nx.conductance(G_simple_, cluster_i, weight='distance') for cluster_i in __lst_valid_cloud_clust) _f_conduct = sum(l_conductance) / len( l_conductance) # Conductance Average except Exception: _f_conduct = 0 _f_cover = nx_comm.coverage(G_simple_, l_G_clusters_) # Coverage Score _f_perform = nx_comm.performance( G_simple_, l_G_clusters_) # Performance Score dict_quality_mesrs = { 'Station Types': _s_st_types, 'Station Quantity': _n_tot_num_st, 'Maximum Distance': _f_min_dist, 'Minimum Points': _n_min_pts, 'Name': _s_clust, 'Algorithm': _s_algo, 'Metric': _s_metric, 'Method': _s_method, 'Seed': _s_seed, 'Generated Cluster Count': _n_num_clust, 'Valid Cluster Count': _n_valid_clust, 'Clustered Station Count': _n_sts_in_clusters, 'Unclsutered Noise Count': _n_noise, 'Average Station Degree': _n_avg_deg, 'Degree Weighted Mean Absolute Error': _deg_wmae, 'Degree Error Station Count': _deg_err_st_count, 'Silhouette Coefficient': _f_silhouette, 'Calinski Harabaz score': _f_cal_har, 'Davies Bouldin score': _f_dav_bould, 'Dunn Index': _f_dunn, 'Modularity': _f_modul, 'Conductance Average': _f_conduct, 'Coverage Score': _f_cover, 'Performance Score': _f_perform, } # print('Dict qual',dict_quality_mesrs('Seed')) quality_metric_df = pd.DataFrame(dict_quality_mesrs, index=[_s_clust]) quality_metric_df.reset_index(drop=True, inplace=True) except Exception as err: print( "Class cluster_quality_metric [get_quality_metrics] Error message:", err) # print(G_simple_.edges('distance')) print(traceback.format_exc()) return quality_metric_df
def test_good_partition(self): """Tests that a good partition has a high coverage measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 2}, {3, 4, 5}] assert_almost_equal(6 / 7, coverage(G, partition))
def test_bad_partition(self): """Tests that a poor partition has a low coverage measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 4}, {2, 3, 5}] assert_almost_equal(3 / 7, coverage(G, partition))
# Plot the partitioned graph plt.figure() node_poses = nx.spring_layout(G_parted) nx.draw_networkx_nodes(G_parted, node_poses, node_size = 50, labels=bundlename_ref_parted_str) nx.draw_networkx_labels(G_parted, node_poses, node_size = 50, labels=bundlename_ref_parted_str) #nx.draw_networkx_edges(G_partitioned, node_poses) conn_comps_G_parted = [G_parted.subgraph(c).copy() for c in nx.connected_components(G_parted)] start = time.time() c = list(community.greedy_modularity_communities(G)) end = time.time() total_time = end - start G_coverage = community.coverage(G, c) plt.figure() for i in range(0, len(c)): hallo = G_nearest.subgraph(c[i]) node_poses_nearest = nx.spring_layout(G_nearest.subgraph(c[i])) nx.draw_networkx_nodes(G_nearest.subgraph(c[i]), node_poses_nearest, node_size=50, labels=cardname_ref) nx.draw_networkx_labels(G_nearest.subgraph(c[i]), node_poses_nearest, node_size=50, labels=cardname_ref) plt.show() plt.figure() plt.imshow(conn_matrix) plt.show() plt.figure() plt.imshow(nearest_neighbour_matrix)