def conductance(g, s=(0, 0), t=(10, 10), max_cut=5): nodes = list(g.nodes) min = nx.conductance(g, [nodes[0]]) for n in range(max_cut): cuts = itertools.combinations(nodes, n) for c in cuts: if c and ((s in c and t not in c) or (s not in c and t in c)): cond = nx.conductance(g, c) min = cond if cond < min else min return sum / N
def test_graph(self): G = nx.barbell_graph(5, 0) # Consider the singleton sets containing the "bridge" nodes. # There is only one cut edge, and each set has volume five. S = {4} T = {5} conductance = nx.conductance(G, S, T) expected = 1 / 5 assert expected == conductance # Test with no input T G2 = nx.barbell_graph(3, 0) # There is only one cut edge, and each set has volume seven. S2 = {0, 1, 2} assert nx.conductance(G2, S2) == 1 / 7
def extend_labeled_data(input_path, output_path): """ Extends the graph under input_path based on the Conductance metric and saves the extended graph under output_path :param input_path: Location of original graph :param output_path: Location of output extended graph """ total_graph = nx.read_graphml(input_path) A_pos = create_labeled_subgraph(total_graph) sub_a_pos = list(A_pos.nodes(data=True)) B_neg = create_unlabeled_subgraph(total_graph) sub_b_neg = list(B_neg.nodes(data=True)) i = 0 if i == 0: cond_on_iteration[i] = nx.conductance(total_graph, A_pos) while not stop_criterion_reached(i): i += 1 argmax = {} nodes_to_iter = set() for node in sub_a_pos: if node[1]['leaning'] == 'R': neighbors = nx.neighbors(total_graph, node[0]) strong_edge_neighbors = [ n_2 for (n_1, n_2, w) in total_graph.edges(node[0], data=True) if w['weight'] > 1 ] clean_neighbors = [] for neighbor in neighbors: if neighbor in [b[0] for b in sub_b_neg]: if neighbor in strong_edge_neighbors: clean_neighbors.append(neighbor) nodes_to_iter.update(clean_neighbors) counter = 0 for node in nodes_to_iter: counter += 1 c = math.floor(counter / len(nodes_to_iter) * 100) temp = [x[0] for x in sub_a_pos] temp.append(node) argmax[node] = nx.conductance(total_graph, temp) print(str(c) + '%') b = max(argmax.items(), key=operator.itemgetter(1))[0] cond_on_iteration[i] = argmax[b] b_data = [(x, y) for (x, y) in sub_b_neg if x == b][0] sub_a_pos.append(b_data) sub_b_neg.remove(b_data) print('Finished with ' + str(len(sub_a_pos)) + ' and ' + str(len(sub_b_neg)) + 'nodes') extended_graph = nx.subgraph(total_graph, [x for (x, y) in sub_a_pos]) nx.write_graphml(extended_graph, output_path)
def calculate_component_wise_measures(G, comm_n_dict): comp_graphs = list(nx.connected_component_subgraphs(G)) comp_wise_conductance = {} comp_wise_ncut = {} #comp_wise_conductance = {i:{} for i in range(len(comp_graphs))} i = 0 for comp_id in range(len(comp_graphs)): graph = comp_graphs[comp_id] if len(list(graph.nodes())) > 1: for comm in comm_n_dict: if set(comm_n_dict[comm]).issubset(set(graph.nodes())) and len(list((graph.nodes()))) > len(comm_n_dict[comm]) and len(comm_n_dict[comm]) > 1: if comp_id not in comp_wise_conductance: comp_wise_conductance[comp_id] = {} comp_wise_conductance[comp_id][comm] = nx.conductance(graph, comm_n_dict[comm]) #print comp_wise_conductance comp_min_conductance = {} comp_num_communities = {} for comp_id in comp_wise_conductance: #print comp_wise_conductance[comp_id] if len(comp_wise_conductance[comp_id]) > 0: #comp_min_conductance[comp_id] = sum(comp_wise_conductance[comp_id].values())/ len(comp_wise_conductance[comp_id]) comp_num_communities[comp_id] = len(comp_wise_conductance[comp_id]) comp_min_conductance[comp_id] = sorted(comp_wise_conductance[comp_id].items(), key=lambda x: x[1])[0][1] return min(comp_min_conductance.values())
def test_graph(self): G = nx.barbell_graph(5, 0) # Consider the singleton sets containing the "bridge" nodes. # There is only one cut edge, and each set has volume five. S = {4} T = {5} conductance = nx.conductance(G, S, T) expected = 1 / 5 assert_equal(expected, conductance)
def getConductance(G, communities): sum_cond = 0 # avg_len = 0 conductances = {} for L in communities: # print(L) # avg_len += len(L) if len(L) == len(G.nodes): cond = 1 else: cond = nx.conductance(G, L) conductances.update({str(L): cond}) sum_cond += cond avg_cond = sum_cond / len(communities) # avg_len = avg_len / len(communities) return avg_cond
def sweep(myp,G,data): supp = torch.nonzero(myp).squeeze().tolist() degs = data.adj[supp,:].sum(-1) sortedsupp = torch.argsort(myp[supp]/degs,descending=True).squeeze().tolist() support = [supp[i] for i in sortedsupp] sweepset = [] bestconductance = 1000 bestvolume = 0 bestset = [] for i in support: sweepset += [i] volume = nx.volume(G,sweepset) conductance = nx.conductance(G,sweepset) if(conductance < bestconductance): bestconductance = conductance bestvolume = volume bestset = sweepset return bestset, bestconductance, bestvolume
def get_quality_metrics(self, station_df, lst_graphs): import dunn as di from sklearn import metrics import networkx as nx import networkx.algorithms.community as nx_comm import numpy as np import pandas as pd import traceback quality_metric_df = pd.DataFrame([]) try: #d _n_num_clust = len(station_df['label'].unique()) # Generated Cluster Count _n_num_clust = len([ x for x in station_df['label'].unique() if x > -1 ]) # Generated Cluster Count if _n_num_clust <= 1: raise ValueError( 'Cannot compute quality metric for %d clusters' % (_n_num_clust)) ''' returns the simple graph of the clusters and the set dictionary of cluster nodes ''' G_simple_, l_G_clusters_ = self.__get_graph_n_labels(station_df) _s_st_types = str(station_df['st_type'].unique()) # Station Types _n_tot_num_st = station_df.shape[0] # Station Quantity _f_min_dist = self._max_distance # Minimum Distance _n_min_pts = self._minimum_samples # Minimum Points _s_clust = str(self._name) # Clustering Name _s_algo = str(self._algorithm) # Algorithm _s_metric = str(self._metric) # Metric _s_method = str(self._cluster_method) # Method _s_seed = str(self._seed) # Seed __lst_valid_cloud_clust = [ frozenset(clust) for clust in l_G_clusters_ if len(clust) >= self._minimum_samples ] _n_valid_clust = len( __lst_valid_cloud_clust) # Valid Cluster Count # Clustered Station Count _n_sts_in_clusters = 0 for x in __lst_valid_cloud_clust: _n_sts_in_clusters += len(x) _n_noise = station_df.shape[ 0] - _n_sts_in_clusters # Unclsutered Noise Count _n_avg_deg = sum([ d for n, d in G_simple_.degree() if G_simple_.nodes[n]["label"] > -1 ]) / _n_sts_in_clusters # Average Node Degree ''' Compute the accuracy of r-regularity constraint on the individual clusters by considering the systematic error that is a reproducible inaccuracy consistent for the same clustering strategy. For such we apply the weighted mean absolute error to estimate the deviation from the expected degree. ''' sum_deg_abs_err = 0 _deg_wmae = 0 _deg_err_st_count = 0 #p print("\nclusters:",len(lst_graphs)) for H in lst_graphs: H = nx.Graph(H) H.remove_nodes_from(list(nx.isolates(H))) H.remove_nodes_from( [n for n, v in H.nodes(data=True) if v["label"] == -1]) H_deg_abs_err = 0 _l_deg_diff = [] if H.number_of_nodes() > 0: _l_deg_diff = [ _n_min_pts - 1 - d for n, d in H.degree() if (int(d) < int(_n_min_pts - 1) and H.nodes[n]["label"] > -1) ] if len(_l_deg_diff) > 0: #p print("\ndegree mean absolute error") #p print("minPts:",_n_min_pts) #p print("list deg diff:",_l_deg_diff) #p print("graph nodes:",sorted([d for n,d in H.degree()])) sum_deg_abs_err += sum(_l_deg_diff) _deg_err_st_count += len(_l_deg_diff) if _deg_err_st_count > 0: _deg_wmae = sum_deg_abs_err / (_deg_err_st_count * (_n_min_pts - 1)) #p print("_deg_wmae", _deg_wmae,_deg_err_st_count) ''' prepare valid stations for measuring the quality''' lst_st = list(nx.get_node_attributes(G_simple_, 'pos').values()) lst_lbl = list(nx.get_node_attributes(G_simple_, 'label').values()) _f_silhouette = metrics.silhouette_score( lst_st, lst_lbl, metric='haversine') # Silhouette Coefficient _f_cal_har = metrics.calinski_harabasz_score( lst_st, lst_lbl) # Calinski Harabaz score _f_dav_bould = metrics.davies_bouldin_score( lst_st, lst_lbl) # Davies Bouldin score _f_dunn = di.dunn_fast(lst_st, lst_lbl) # Dunn Index _f_modul = nx_comm.modularity(G_simple_, l_G_clusters_) # Modularity try: l_conductance = list( nx.conductance(G_simple_, cluster_i, weight='distance') for cluster_i in __lst_valid_cloud_clust) _f_conduct = sum(l_conductance) / len( l_conductance) # Conductance Average except Exception: _f_conduct = 0 _f_cover = nx_comm.coverage(G_simple_, l_G_clusters_) # Coverage Score _f_perform = nx_comm.performance( G_simple_, l_G_clusters_) # Performance Score dict_quality_mesrs = { 'Station Types': _s_st_types, 'Station Quantity': _n_tot_num_st, 'Maximum Distance': _f_min_dist, 'Minimum Points': _n_min_pts, 'Name': _s_clust, 'Algorithm': _s_algo, 'Metric': _s_metric, 'Method': _s_method, 'Seed': _s_seed, 'Generated Cluster Count': _n_num_clust, 'Valid Cluster Count': _n_valid_clust, 'Clustered Station Count': _n_sts_in_clusters, 'Unclsutered Noise Count': _n_noise, 'Average Station Degree': _n_avg_deg, 'Degree Weighted Mean Absolute Error': _deg_wmae, 'Degree Error Station Count': _deg_err_st_count, 'Silhouette Coefficient': _f_silhouette, 'Calinski Harabaz score': _f_cal_har, 'Davies Bouldin score': _f_dav_bould, 'Dunn Index': _f_dunn, 'Modularity': _f_modul, 'Conductance Average': _f_conduct, 'Coverage Score': _f_cover, 'Performance Score': _f_perform, } # print('Dict qual',dict_quality_mesrs('Seed')) quality_metric_df = pd.DataFrame(dict_quality_mesrs, index=[_s_clust]) quality_metric_df.reset_index(drop=True, inplace=True) except Exception as err: print( "Class cluster_quality_metric [get_quality_metrics] Error message:", err) # print(G_simple_.edges('distance')) print(traceback.format_exc()) return quality_metric_df
def cut_conductance(self, set_A, set_B): return nx.conductance(self.G, set_A, set_B, weight='weight')
print("Precision@{}: {}".format(k, len(right_set)/k)) print("Recall@{}: {}".format(k, len(right_set)/len(labelList))) if outPath: f_r = open('degree_top{}'.format(k), 'w') for t in top_k_dict.items(): f_r.write(t[0]+'\t'+str(t[1])+'\n') f_r.close() if __name__ == "__main__": args = parse_args() g = Graph() start_time = time.time() if args.graph_format == 'adjlist': g.read_adjlist(filename=args.input) elif args.graph_format == 'edgelist': g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed) cluster = read_cluster(args.cluster) print("File read done, elapsed time {}s".format(time.time()-start_time)) print("Node Size: {}".format(g.G.number_of_nodes())) print("Edge Size: {}".format(g.G.number_of_edges())) start_time = time.time() print("S size {}, volume {}".format(len(cluster[0]), nx.volume(g.G, cluster[0], weight='weight'))) print("T size {}, volume {}".format(len(cluster[1]), nx.volume(g.G, cluster[1], weight='weight'))) cond = nx.conductance(g.G, cluster[0], cluster[1], weight='weight') print("Conductance {}, elapsed time {}s".format(cond, time.time()-start_time))
): # Iterating through each community list of 20 percent seeds for b in percent20_seeds_communities_list[ a]: # Iterating through each node of that particular community fact_matrix_seed20[b][ a] = 1 # Update the value in the factor matrix with '1' for the corresponding node and community combination fact_matrix_seed20 # Displays the initial factor matrix for 20 percent seeds data # In this below step we compute the conductance for the remaining values in the "fact_matrix_20" for a in range( len(fact_matrix_seed20) ): # Iterating through the range of initial factor matrix obtained for 20 percent seed communities value_minimum = np.inf # Initialized minimum value for b in range(len(fact_matrix_seed20[0]) ): # Iterating through each node within the factor matrix conduct = nx.conductance( G, (percent20_seeds_communities_list[b] + list(G.neighbors(a)) + [a]) ) # Computing the conductance associated with the particular node and their associated neighbors data if value_minimum > conduct: # If obtained conductance value is less then the minimum value then we reassign them value_minimum = conduct # Now we swap the values obtained conduct_minimum = b # This stores the node number having the minimum conductance for c in list(G.neighbors(a)) + [ a ]: # In this loop we evaluate if 'u' is locally minimal to 'c' and update it with '1' else keep at as '0' fact_matrix_seed20[c][conduct_minimum] = 1 fact_matrix_seed20 # Displays the final factor matrix for 20 percent seed communities obtained on computing the conductance """***Method-2: Factor Matrix Initialization for Neighborhood Seed Communities*** ***I) Loading the Neighborhood Seed Communities Data File*** ***II) Computing the Factorization Matrix associated to Neighborhood Seed Communities and associated conductance calculation*** """