def test_good_partition(self): """Tests that a good partition has a high performance measure. """ G = barbell_graph(3, 0) partition = [set([0, 1, 2]), set([3, 4, 5])] assert_almost_equal(14 / 15, performance(G, partition))
def test_good_partition(self): """TestData that a good partition has a high performance measure. """ G = barbell_graph(3, 0) partition = [{0, 1, 2}, {3, 4, 5}] assert almost_equal(14 / 15, performance(G, partition))
def test_good_partition(self): """Tests that a good partition has a high performance measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 2}, {3, 4, 5}] assert 14 / 15 == pytest.approx(performance(G, partition), abs=1e-7) assert 14 / 15 == pytest.approx(partition_quality(G, partition)[1], abs=1e-7)
def test_good_partition(self): """Tests that a good partition has a high performance measure. """ G = barbell_graph(3, 0) partition = [{0, 1, 2}, {3, 4, 5}] assert_almost_equal(14 / 15, performance(G, partition))
def test_bad_partition(self): """Tests that a poor partition has a low performance measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 4}, {2, 3, 5}] assert 8 / 15 == pytest.approx(performance(G, partition), abs=1e-7) assert 8 / 15 == pytest.approx(partition_quality(G, partition)[1], abs=1e-7)
def opt_async_fluid(G, kmin, kmax, verbose=False, rep=2): '''Searches for the best k within the given range rep is the number of repetitions to try (may have had bad initialization''' bestcom = 0 bestp = 0 bestk = 0 for i in range(0, rep): print "rep = " + str(i + 1) for k in range(kmin, kmax + 1): com = async_fluid(G, k) print "k = " + str(k) partition = [] tc = com for c in tc: partition.append(c) if verbose: print partition p = community.performance(G, partition) if p > bestp: bestp = p bestcom = partition bestk = k print "Current best K = " + str(bestk) print "Best K: " + str(bestk) print "Best P: " + str(bestp) return bestcom
def communitiesRandomModule(graphForCommunities): # function for random analysis # variables com = 0 communitiesListCNM = [] listCommunities = [] communitiesDict = dict() community = None vertex = None networkModularity = 0 networkCoverage = 0 networkPerformance = 0 counter = 0 # get Clauset-Newman-Moore communities communitiesListCNM = list( greedy_modularity_communities(graphForCommunities)) # evaluate modularity for community in communitiesListCNM: for vertex in set(community): communitiesDict[vertex] = com listCommunities.append(set(community)) com = com + 1 networkModularity = louv.modularity(communitiesDict, graphForCommunities) networkCoverage = coverage(graphForCommunities, listCommunities) networkPerformance = performance(graphForCommunities, listCommunities) # end of function return (networkModularity, len(listCommunities), networkCoverage, networkPerformance)
def AnalizeCommunitiesAndMakeDrawings(name, fileName, someNetwork): # function message print("\t- Obtaining communities (for undir network version)...") # variables communitiesFinalResult = "" communitiesResult = "Communities (Undirected):\n" graphForCommunities = nx.Graph() read = "ok" com = 0 communitiesListCNM = [] listCommunities = [] communitiesDict = dict() community = None vertex = None networkModularity = 0 networkCoverage = 0 networkPerformance = 0 counter = 0 # get undir graph integer-weighted (graphForCommunities, read) = ParseFileToNetwork(fileName, "-u", "community") # get Clauset-Newman-Moore communities communitiesListCNM = list( greedy_modularity_communities(graphForCommunities)) # evaluate modularity for community in communitiesListCNM: for vertex in set(community): communitiesDict[vertex] = com listCommunities.append(set(community)) com = com + 1 networkModularity = louv.modularity(communitiesDict, graphForCommunities) networkCoverage = coverage(graphForCommunities, listCommunities) networkPerformance = performance(graphForCommunities, listCommunities) communitiesResult = communitiesResult + "- Modularity: " + str( networkModularity) + "\n" communitiesResult = communitiesResult + "- Coverage: " + str( networkCoverage) + "\n" communitiesResult = communitiesResult + "- Performance: " + str( networkPerformance) + "\n" communitiesResult = communitiesResult + "- Number of Communities: " + str( len(listCommunities)) + "\n" for counter in range(len(listCommunities)): communitiesResult = communitiesResult + "- Number of nodes in community " + str( counter + 1) + ": " + str(len(listCommunities[counter])) + "\n" communitiesResult = communitiesResult + "- Nodes in C_" + str( counter + 1) + ": " + ",".join(listCommunities[counter]) + "\n" # plot graph with communities drawCommunities(someNetwork, listCommunities, name) # end of function communitiesFinalResult = "\n\n" + communitiesResult + "\n\n" return (communitiesFinalResult)
def girvan_newman(G, verbose=False): """Runs the asynchronous fluid community detection algorithm G = Graph to look at verbose = whether or not to show steps """ communities_generator = community.girvan_newman(G) bestcom = 0 bestq = 0 for next_level_communities in communities_generator: #iterate over dendrograms quality = community.performance(G, next_level_communities) if quality > bestq: bestq = quality bestcom = next_level_communities if verbose: print sorted(map(sorted, next_level_communities)) print("quality = {}".format(quality)) if (verbose): print "Best Communities:" print sorted(map(sorted, bestcom)) return bestcom
def evaluate_performance(G, commu_list): # initiate a dictionary to store performances performances = {} # loop over all community structure for c in commu_list: communities = c['communities'] # prepare element for performance calculation commu = [] # loop over communities in an algorithm for i in sorted(pd.Series(communities.values()).unique()): store_list = [] # loop over all nodes for node in G.nodes: if communities[node] == i: store_list.append(node) commu.append(store_list) # calculate performance performance = nx_comm.performance(G, commu) # store value in `performances` performances[c['algo']] = performance return performances
def test_bad_partition(self): """Tests that a poor partition has a low performance measure.""" G = barbell_graph(3, 0) partition = [set([0, 1, 4]), set([2, 3, 5])] assert_almost_equal(8 / 15, performance(G, partition))
def test_bad_partition(self): """TestData that a poor partition has a low performance measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 4}, {2, 3, 5}] assert almost_equal(8 / 15, performance(G, partition))
def performance_wrapper(partition, G): community_map = extract_community_map(partition) return algorithms.performance(G, community_map)
def get_quality_metrics(self, station_df, lst_graphs): import dunn as di from sklearn import metrics import networkx as nx import networkx.algorithms.community as nx_comm import numpy as np import pandas as pd import traceback quality_metric_df = pd.DataFrame([]) try: #d _n_num_clust = len(station_df['label'].unique()) # Generated Cluster Count _n_num_clust = len([ x for x in station_df['label'].unique() if x > -1 ]) # Generated Cluster Count if _n_num_clust <= 1: raise ValueError( 'Cannot compute quality metric for %d clusters' % (_n_num_clust)) ''' returns the simple graph of the clusters and the set dictionary of cluster nodes ''' G_simple_, l_G_clusters_ = self.__get_graph_n_labels(station_df) _s_st_types = str(station_df['st_type'].unique()) # Station Types _n_tot_num_st = station_df.shape[0] # Station Quantity _f_min_dist = self._max_distance # Minimum Distance _n_min_pts = self._minimum_samples # Minimum Points _s_clust = str(self._name) # Clustering Name _s_algo = str(self._algorithm) # Algorithm _s_metric = str(self._metric) # Metric _s_method = str(self._cluster_method) # Method _s_seed = str(self._seed) # Seed __lst_valid_cloud_clust = [ frozenset(clust) for clust in l_G_clusters_ if len(clust) >= self._minimum_samples ] _n_valid_clust = len( __lst_valid_cloud_clust) # Valid Cluster Count # Clustered Station Count _n_sts_in_clusters = 0 for x in __lst_valid_cloud_clust: _n_sts_in_clusters += len(x) _n_noise = station_df.shape[ 0] - _n_sts_in_clusters # Unclsutered Noise Count _n_avg_deg = sum([ d for n, d in G_simple_.degree() if G_simple_.nodes[n]["label"] > -1 ]) / _n_sts_in_clusters # Average Node Degree ''' Compute the accuracy of r-regularity constraint on the individual clusters by considering the systematic error that is a reproducible inaccuracy consistent for the same clustering strategy. For such we apply the weighted mean absolute error to estimate the deviation from the expected degree. ''' sum_deg_abs_err = 0 _deg_wmae = 0 _deg_err_st_count = 0 #p print("\nclusters:",len(lst_graphs)) for H in lst_graphs: H = nx.Graph(H) H.remove_nodes_from(list(nx.isolates(H))) H.remove_nodes_from( [n for n, v in H.nodes(data=True) if v["label"] == -1]) H_deg_abs_err = 0 _l_deg_diff = [] if H.number_of_nodes() > 0: _l_deg_diff = [ _n_min_pts - 1 - d for n, d in H.degree() if (int(d) < int(_n_min_pts - 1) and H.nodes[n]["label"] > -1) ] if len(_l_deg_diff) > 0: #p print("\ndegree mean absolute error") #p print("minPts:",_n_min_pts) #p print("list deg diff:",_l_deg_diff) #p print("graph nodes:",sorted([d for n,d in H.degree()])) sum_deg_abs_err += sum(_l_deg_diff) _deg_err_st_count += len(_l_deg_diff) if _deg_err_st_count > 0: _deg_wmae = sum_deg_abs_err / (_deg_err_st_count * (_n_min_pts - 1)) #p print("_deg_wmae", _deg_wmae,_deg_err_st_count) ''' prepare valid stations for measuring the quality''' lst_st = list(nx.get_node_attributes(G_simple_, 'pos').values()) lst_lbl = list(nx.get_node_attributes(G_simple_, 'label').values()) _f_silhouette = metrics.silhouette_score( lst_st, lst_lbl, metric='haversine') # Silhouette Coefficient _f_cal_har = metrics.calinski_harabasz_score( lst_st, lst_lbl) # Calinski Harabaz score _f_dav_bould = metrics.davies_bouldin_score( lst_st, lst_lbl) # Davies Bouldin score _f_dunn = di.dunn_fast(lst_st, lst_lbl) # Dunn Index _f_modul = nx_comm.modularity(G_simple_, l_G_clusters_) # Modularity try: l_conductance = list( nx.conductance(G_simple_, cluster_i, weight='distance') for cluster_i in __lst_valid_cloud_clust) _f_conduct = sum(l_conductance) / len( l_conductance) # Conductance Average except Exception: _f_conduct = 0 _f_cover = nx_comm.coverage(G_simple_, l_G_clusters_) # Coverage Score _f_perform = nx_comm.performance( G_simple_, l_G_clusters_) # Performance Score dict_quality_mesrs = { 'Station Types': _s_st_types, 'Station Quantity': _n_tot_num_st, 'Maximum Distance': _f_min_dist, 'Minimum Points': _n_min_pts, 'Name': _s_clust, 'Algorithm': _s_algo, 'Metric': _s_metric, 'Method': _s_method, 'Seed': _s_seed, 'Generated Cluster Count': _n_num_clust, 'Valid Cluster Count': _n_valid_clust, 'Clustered Station Count': _n_sts_in_clusters, 'Unclsutered Noise Count': _n_noise, 'Average Station Degree': _n_avg_deg, 'Degree Weighted Mean Absolute Error': _deg_wmae, 'Degree Error Station Count': _deg_err_st_count, 'Silhouette Coefficient': _f_silhouette, 'Calinski Harabaz score': _f_cal_har, 'Davies Bouldin score': _f_dav_bould, 'Dunn Index': _f_dunn, 'Modularity': _f_modul, 'Conductance Average': _f_conduct, 'Coverage Score': _f_cover, 'Performance Score': _f_perform, } # print('Dict qual',dict_quality_mesrs('Seed')) quality_metric_df = pd.DataFrame(dict_quality_mesrs, index=[_s_clust]) quality_metric_df.reset_index(drop=True, inplace=True) except Exception as err: print( "Class cluster_quality_metric [get_quality_metrics] Error message:", err) # print(G_simple_.edges('distance')) print(traceback.format_exc()) return quality_metric_df
def test_bad_partition(self): """Tests that a poor partition has a low performance measure.""" G = barbell_graph(3, 0) partition = [{0, 1, 4}, {2, 3, 5}] assert_almost_equal(8 / 15, performance(G, partition))
c[j] = k k += 1 d[i] += 1 d[j] += 1 v[c[i]] += 1 v[c[j]] += 1 if v[c[i]] <= v_max and v[c[j]] <= v_max: if v[c[i]] <= v[c[j]]: v[c[j]] += d[i] v[c[i]] -= d[i] c[i] = c[j] else: v[c[i]] += d[j] v[c[j]] -= d[j] c[j] = c[i] com_dict = defaultdict(set) for i in c: com_dict[c[i]].add(i) communities = list() for com in com_dict.values(): if len(com) > 1: communities.append(com) plot_nx_clusters(nx_graph, communities, pos) print(nx_comm.performance(nx_graph,communities))