def get_gt_matching(all_partitions, algorithm, network_dict, network_full_dict, network_info, gt_type, filter_eu_members, filter_gcc, network_type, log_file_name): # computin F score and Rand for (partition, ground truth group) partitions = all_partitions[algorithm] # get all partitions for specific algorithm # get dictionaries for calculating F score network, network_full = network_dict['igraph'], network_full_dict['igraph'] if algorithm=='Oslom': community_dicts_and_lists = get_all_community_dicts_oslom(partitions, network, filter_eu_members) else: community_tmp = get_all_community_dicts(partitions, network, filter_eu_members) # get dictionaries for community membership community_dicts, community_lists = community_tmp['dict'], community_tmp['list'] gt_dict = get_gt_dict(network_info, gt_type, filter_gcc, network_full) # network full = network with all components # get ground truth list in correct order for rand index id_names_list_in_order = list(network.vs()['name']) # list of twitter ids in correct order index_map = {v: i for i, v in enumerate(id_names_list_in_order)} sorted_gt_dict = sorted(gt_dict.items(), key=lambda pair: index_map[pair[0]]) gt_list = [next(iter(pair[1])) for pair in sorted_gt_dict] gt_int_partition = get_gt_int_partition(gt_list) # get ground truth partition in integer form # calculate metrics metrics_table = pd.DataFrame(columns=['fs', 'rand']) for i in range(0, len(partitions)): # for each partition start = time.time() # if algorithm == 'Oslom': fs = f_score(community_dicts_and_lists[i]['dict'], gt_dict) rand = ig.compare_communities(community_dicts_and_lists[i]['list'], gt_int_partition, method = 'rand', remove_none = False) else: fs = f_score(community_dicts[i], gt_dict) rand = ig.compare_communities(community_lists[i], gt_int_partition, method = 'rand', remove_none = False) metrics_table.loc[i] = [fs, rand] # end = time.time() with open(log_file_name + ".txt", "a") as f: f.write('GTC: ' + str(i) + ' TIME: ' + str(round((end-start)/60,4)) + '\n') return metrics_table
def get_mni_matrix(self) -> np.array: if self.dataset['ground_truth']: if 'memebers' in self.dataset: ground_truth = self.dataset['memebers'] else: ground_truth = self.dataset['members'] n_snp = self.dataset['snapshot_count'] mni_matrix = np.zeros((len(self.iteration_list), n_snp)) for i, it in enumerate(self.iteration_list): try: it_solution = it["execution_info"]['snapshot_members'] it_mni = np.zeros(n_snp) for i_snp in range(n_snp): it_mni[i_snp] = igraph.compare_communities( it_solution[i_snp], ground_truth[i_snp], method='nmi') mni_matrix[i, :] = np.array(it_mni) except Exception as e: raise RuntimeError( "Dataset {0}, snp {1}, it {2}, raise exception {3}". format(self.dataset['_id'], i_snp, it['_id'], e)) return mni_matrix else: raise RuntimeError("{0} has no ground truth".format( self.dataset['_id']))
def get_best_mni_matrix(self) -> np.array: """ (nº_iterations, nº_snpshot) :return: """ if 'memebers' in self.dataset: ground_truth = self.dataset['memebers'] else: ground_truth = self.dataset['members'] n_snapshots = self.dataset['snapshot_count'] number_iterations = len(self.iteration_list) best_mni_matrix = np.zeros((number_iterations, n_snapshots)) for n_it in range(number_iterations): paretos = self._extract_pareto(n_it) for n_snp, pareto_snp in enumerate(paretos): max_mni = 0 for ind in pareto_snp: members = decode(ind) actual_mni = igraph.compare_communities( members, ground_truth[n_snp], method='nmi') max_mni = max(max_mni, actual_mni) best_mni_matrix[n_it, n_snp] = max_mni return best_mni_matrix
def calculate_NMI(self, comm1, comm2, method="nmi"): """ Compares two community structures :param comm1: the first community structure as a membership list or as a Clustering object. :param comm2: the second community structure as a membership list or as a Clustering object. :param method: [string] defaults to ["nmi"] the measure to use. "vi" or "meila" means the variation of information metric of Meila (2003), "nmi" or "danon" means the normalized mutual information as defined by Danon et al (2005), "split-join" means the split-join distance of van Dongen (2000), "rand" means the Rand index of Rand (1971), "adjusted_rand" means the adjusted Rand index of Hubert and Arabie (1985). :return: [float] the calculated measure. Reference: - Meila M: Comparing clusterings by the variation of information. In: Scholkopf B, Warmuth MK (eds). Learning Theory and Kernel Machines: 16th Annual Conference on Computational Learning Theory and 7th Kernel Workship, COLT/Kernel 2003, Washington, DC, USA. Lecture Notes in Computer Science, vol. 2777, Springer, 2003. ISBN: 978-3-540-40720-1. - Danon L, Diaz-Guilera A, Duch J, Arenas A: Comparing community structure identification. J Stat Mech P09008, 2005. - van Dongen D: Performance criteria for graph clustering and Markov cluster experiments. Technical Report INS-R0012, National Research Institute for Mathematics and Computer Science in the Netherlands, Amsterdam, May 2000. - Rand WM: Objective criteria for the evaluation of clustering methods. J Am Stat Assoc 66(336):846-850, 1971. - Hubert L and Arabie P: Comparing partitions. Journal of Classification 2:193-218, 1985. """ nmi = igraph.compare_communities(communities1, comm2, method='nmi', remove_none=False) return nmi
def calc_result_and_print(graph, origin_cluster): fg_cluster = graph.community_fastgreedy().as_clustering() im_cluster = graph.community_infomap() lp_cluster = graph.community_label_propagation() ml_cluster = graph.community_multilevel() wt_cluster = graph.community_walktrap().as_clustering() fg_result = igraph.compare_communities(origin_cluster, fg_cluster, method='adjusted_rand') im_result = igraph.compare_communities(origin_cluster, im_cluster, method='adjusted_rand') lp_result = igraph.compare_communities(origin_cluster, lp_cluster, method='adjusted_rand') ml_result = igraph.compare_communities(origin_cluster, ml_cluster, method='adjusted_rand') wt_result = igraph.compare_communities(origin_cluster, wt_cluster, method='adjusted_rand') # print("FastGready result {0}".format(fg_result)) # print("InfoMap result {0}".format(im_result)) # print("LabelPropagation result {0}".format(lp_result)) # print("MultiLevel result {0}".format(ml_result)) # print("WalkTrap result {0}".format(wt_result)) return [fg_result, im_result, lp_result, ml_result, wt_result]
def testRemoveNone(self): l1 = Clustering([1, 1, 1, None, None, 2, 2, 2, 2]) l2 = Clustering([1, 1, 2, 2, None, 2, 3, 3, None]) self.assertAlmostEqual(compare_communities(l1, l2, "nmi", remove_none=True), 0.5158, places=3)
def compare_clustering(prefix1,prefix2): # compare two communities based on landmark and membership files comm1 = read_memfiles(prefix1 + '.graphcluster') comm2 = read_memfiles(prefix2 + '.graphcluster') ind1, ind2 = get_assoc_landmarks(prefix1,prefix2) comm1 = [comm1[x] for x in ind1] comm2 = [comm2[x] for x in ind2] nmi = compare_communities(comm1, comm2, method='nmi', remove_none=False) return comm1,comm2, nmi
def calculate_NMI(comm1, comm2): '''Compares two community structures using normalized mutual information as defined by Danon et al (2005)''' nmi = igraph.compare_communities(comm1, comm2, method='nmi', remove_none=False) return nmi
def nmi_null(comm1,comm2,nmi,reps=1000): # create a null distribution of the nmi value by shuffling communities null = [] #shuff1 = sample(comm1, len(comm1)) for i in range(reps): shuff2 = sample(comm2, len(comm2)) null.append(compare_communities(comm1, shuff2, method='nmi', remove_none=False)) pdf = gaussian_kde(null) pval = pdf(nmi) x = linspace(0,1,100) plot(x,pdf,'k') axvline(x=nmi,linewidth=4, color='r') text(nmi+0.1, min(pdf)+0.2, 'p-val = %.3f'%(pval), fontdict=None, withdash=False) return pval
def VI_quiver_data_zero(partitions_dict,mode='vi',verbose=False): from numpy import ma import igraph as ig U=-1*np.ones((len(partitions_dict),len(partitions_dict[0]))); n_deltas=len(partitions_dict); for i,delta in enumerate(sorted(partitions_dict.keys())): for j,t in enumerate(sorted(partitions_dict[delta].keys())): try: a=(ig.compare_communities(partitions_dict[delta][0].values(), partitions_dict[delta][t].values(), method=mode)); if a>=0: U[i][j]=a; except: if verbose==True: print 'Error at:', (i,j),(delta,t) return U;
def _similarity_igraph(cmtys1, cmtys2, method): """Calculate community similarity using igraph Available methods: 'vi' or 'meila' 'nmi' or 'danon' 'split-join' 'rand' 'adjusted_rand' Quirk/bug: Only the first character of these names is used! """ import igraph (mlist1, mlist2), nodes = to_membership_list(cmtys1, cmtys2) val = igraph.compare_communities(mlist1, mlist2, method=method) if method == 'meila': val /= log(2) return val
def comp_partition_sim_mats( membership_lists, measures=settings.cluster_similarity_measures): """ Computes pairwise clustering similarity measures between different partitions (and conditions). Parameters ---------- membership_lists : list List of membership lists corresponding the partitions measures : list List of cluster similarity measures Returns ------- result_dict : dict A dict where the key corresponds to the similarity measure and value is a (upper triangular) matrix containing the values of the similarity measures. """ membership_lists = np.array(membership_lists, dtype=int) n_tot = len(membership_lists) result_dict = {} for measure in measures: sim_mat = np.zeros((n_tot, n_tot)) for i in range(0, n_tot): # print i partition1 = membership_lists[i] partition1 = [int(partition1[k]) for k in range(len(partition1))] for j in range(i, n_tot): partition2 = membership_lists[j] partition2 = [int(partition2[k]) for k in range(len(partition2))] sim_mat[i, j] = igraph.compare_communities( partition1, partition2, measure) sim_mat[j, i] = sim_mat[i, j] result_dict[measure] = sim_mat return result_dict
utils_networks.plot_graph_with_communities(g, membership_ref, file_name="../output/temp/temp_1.png") utils_networks.plot_graph_with_communities(g, membership_m1, file_name="../output/temp/temp_2.png") utils_networks.plot_graph_with_communities(g, membership_m2, file_name="../output/temp/temp_3.png") utils_networks.plot_graph_with_communities(g, membership_m3, file_name="../output/temp/temp_4.png") # Read all temporal created images and create a sub-ploted figure utils_networks.plot_all_temp_images(file, subdir[subdir.rfind('/')+1:]) # Save pajek clu files utils_networks.save_graph_in_clu_format(comm=membership_m1, path="../output/clu_files/" + net_name + "_edge_bet.clu") utils_networks.save_graph_in_clu_format(comm=membership_m2, path="../output/clu_files/" + net_name + "_fastgreedy.clu") utils_networks.save_graph_in_clu_format(comm=membership_m3, path="../output/clu_files/" + net_name + "_louvain.clu") # Compare communities detected using different methods # FROM: http://igraph.org/python/doc/igraph.clustering-module.html#compare_communities comp1_vi = igraph.compare_communities(membership_ref, membership_m1, method="vi") # variation of information metric of Meila (2003) comp1_nmi = igraph.compare_communities(membership_ref, membership_m1, method="nmi") # normalized mutual information as defined by Danon et al (2005) comp1_ji = utils_networks.compare_communities(membership_ref, membership_m1, method="jaccard-index") # Jaccard Index comp2_vi = igraph.compare_communities(membership_ref, membership_m2, method="vi") # variation of information metric of Meila (2003) comp2_nmi = igraph.compare_communities(membership_ref, membership_m2, method="nmi") # normalized mutual information as defined by Danon et al (2005) comp2_ji = utils_networks.compare_communities(membership_ref, membership_m2, method="jaccard-index") # Jaccard Index comp3_vi = igraph.compare_communities(membership_ref, membership_m3, method="vi") # variation of information metric of Meila (2003) comp3_nmi = igraph.compare_communities(membership_ref, membership_m3, method="nmi") # normalized mutual information as defined by Danon et al (2005) comp3_ji = utils_networks.compare_communities(membership_ref, membership_m3, method="jaccard-index") # Jaccard Index # Compute modularity value for every partition including reference ones mod_ref = igraph.Graph.modularity(g, membership_ref) mod_m1 = igraph.Graph.modularity(g, membership_m1) mod_m2 = igraph.Graph.modularity(g, membership_m2)
def igraph_nmi(l1, l2): return ig.compare_communities(l1, l2, "nmi", remove_none=True)
fctr_res_b = fctr_b() fctr_res_bu = fctr_bu() W = np.asarray(fctr_res.basis()) W_b = np.asarray(fctr_res_b.basis()) W_bu = np.asarray(fctr_res_bu.basis()) actual_primary, actual_secondary = get_role_assignment(W) estimated_primary, estimated_secondary = get_role_assignment(W_b) estimated_primary_u, estimated_secondary_u = get_role_assignment(W_bu) # ari_1 = metrics.adjusted_rand_score(actual_primary, estimated_primary) # ari_1_u = metrics.adjusted_rand_score(actual_primary, estimated_primary_u) # ari_2 = metrics.adjusted_rand_score(actual_secondary, estimated_secondary) # ari_2_u = metrics.adjusted_rand_score(actual_secondary, estimated_secondary_u) ari_1 = ig.compare_communities(actual_primary, estimated_primary, method="rand") ari_1_u = ig.compare_communities(actual_primary, estimated_primary_u, method="rand") ari_2 = ig.compare_communities(actual_secondary, estimated_secondary, method="rand") ari_2_u = ig.compare_communities(actual_secondary, estimated_secondary_u, method="rand") p_ari.append(ari_1) p_ari_u.append(ari_1_u) s_ari.append(ari_2) s_ari_u.append(ari_2_u) print "Completed for %s bins" % bins primary_ari.append((bins, np.mean(p_ari))) primary_ari_uniform.append((bins, np.mean(p_ari_u))) secondary_ari.append((bins, np.mean(s_ari))) secondary_ari_uniform.append((bins, np.mean(s_ari_u)))
def compute_similarity_between_cond(simil_method = 'nmi'): import pandas as pd from igraph import compare_communities,Clustering from dmgraphanalysis_nodes.utils_net import read_lol_file simil_WWW_values = [] simil_What_values = [] simil_odor_values = [] simil_recall_values = [] for subject_num in subject_nums: ### reading community odor_WWW_lol_file = os.path.join(nipype_analyses_path,graph_analysis_name,"_cond_Odor_Hit-WWW_subject_num_" + subject_num,"community_rada","Z_List.lol") odor_WWW_community_vect = read_lol_file(odor_WWW_lol_file) print odor_WWW_community_vect odor_What_lol_file = os.path.join(nipype_analyses_path,graph_analysis_name,"_cond_Odor_Hit-What_subject_num_" + subject_num,"community_rada","Z_List.lol") odor_What_community_vect = read_lol_file(odor_What_lol_file) print odor_What_community_vect recall_WWW_lol_file = os.path.join(nipype_analyses_path,graph_analysis_name,"_cond_Recall_Hit-WWW_subject_num_" + subject_num,"community_rada","Z_List.lol") recall_WWW_community_vect = read_lol_file(recall_WWW_lol_file) print recall_WWW_community_vect recall_What_lol_file = os.path.join(nipype_analyses_path,graph_analysis_name,"_cond_Recall_Hit-What_subject_num_" + subject_num,"community_rada","Z_List.lol") recall_What_community_vect = read_lol_file(recall_What_lol_file) print recall_What_community_vect ### compute simil if odor_WWW_community_vect.shape[0] == recall_WWW_community_vect.shape[0]: simil_WWW = compare_communities(Clustering(odor_WWW_community_vect),Clustering(recall_WWW_community_vect),method = simil_method) print simil_WWW simil_WWW_values.append(simil_WWW) else: print "Warning, community vect for %s WWW have different length"%subject_num sys.exit() if odor_What_community_vect.shape[0] == recall_What_community_vect.shape[0]: simil_What = compare_communities(Clustering(odor_What_community_vect),Clustering(recall_What_community_vect),method = simil_method) print simil_What simil_What_values.append(simil_What) else: print "Warning, community vect for %s What have different length"%subject_num sys.exit() if odor_WWW_community_vect.shape[0] == odor_What_community_vect.shape[0]: simil_odor = compare_communities(Clustering(odor_WWW_community_vect),Clustering(odor_What_community_vect),method = simil_method) print simil_odor simil_odor_values.append(simil_odor) else: print "Warning, community vect for %s odor have different length"%subject_num sys.exit() if recall_WWW_community_vect.shape[0] == recall_What_community_vect.shape[0]: simil_recall = compare_communities(Clustering(recall_WWW_community_vect),Clustering(recall_What_community_vect),method = simil_method) print simil_recall simil_recall_values.append(simil_recall) else: print "Warning, community vect for %s recall have different length"%subject_num sys.exit() #print simil_WWW_values #print simil_odor_values np_simil_values = np.vstack((np.array(simil_WWW_values,dtype = 'f'),np.array(simil_What_values,dtype = 'f'),np.array(simil_odor_values,dtype = 'f'),np.array(simil_recall_values,dtype = 'f'))) print np_simil_values.shape df = pd.DataFrame(np.transpose(np_simil_values),columns = ["Simil_Odor-WWW_Recall-WWW","Simil_Odor-What_Recall-What","Simil_Odor-WWW_Odor-What","Simil_Recall-WWW_Recall-What"],index = subject_nums) df_filename = os.path.join(nipype_analyses_path,graph_analysis_name,'simil_'+ simil_method + '_values_by_cond.txt') df.to_csv(df_filename)
def _testMethod(self, method, expected): for clusters, result in zip(self.clusterings, expected): self.assertAlmostEqual(compare_communities(method=method, *clusters), result, places=3)
def general_analysis( exp_ID, probe, visual=False, height=1000, length=1000, feedbackType=None, base_layout=1, timeperiods=[("trial_start", "stimOn_times"), ("stimOn_times", "response_times"), ("response_times", "trial_end")], region_list=[], file_nickname="experiment", difficulty=[-1, 1], percentage=1, ): """ Makes a general analysis of th three main time perios (prestimulus, during stimulus, and after the stimulus) Input: exp_ID: experiment ID visual: flag for visuals height: height of the resulting graph length: length of the resulting graph feedBackType: the type of response wanted base_layout: index in timeperiods of the base_layout timeperiods: names of time series used for reach graph Output: None """ ###Data collection results = [] data = loading(exp_ID, probe, region_list=region_list) for time1, time2 in timeperiods: temp_dict = dict() graph, partition, regions, locations = community_detection( exp_ID, probe=probe, user_start=time1, user_end=time2, feedbackType=feedbackType, difficulty=difficulty, percentage=percentage, data=data, region_list=region_list) temp_dict["graph"] = graph temp_dict["partition"] = partition temp_dict["regions"] = regions temp_dict["locations"] = locations results.append(temp_dict) ###Analysis functions ###Summary of each partition """ Here we provide different measurements. Summary: this is just a breakdown of the community alligeance Split Join Distance: Compares the partitions between time periods Compare Communities: computes information variance for each community Ovelap of communities: computes the percentage overlap with respect to the communities of the base layout """ print("Summaries") for i in range(len(timeperiods)): time1, time2 = timeperiods[i] print(time1 + " to " + time2) print(results[i]["graph"].summary(verbosity=1)) ###Split Join Distance print("Split join distance") #print([[ split_join_distance(graphs_for_partitions[i],graphs_for_partitions[j] ) for i in range(len(graphs_for_partitions))] for j in range(len(graphs_for_partitions))]) ###Compare Communities print("Compare communities") print([[ig.compare_communities(i["graph"], j["graph"]) for i in results] for j in results]) print("Ovelap of communities") results_no_base = [j for j in results] results_no_base = results_no_base[:base_layout] + results_no_base[ base_layout + 1:] partitions_no_base = [i["partition"] for i in results_no_base] overlaps = com_overlap(results[base_layout]["partition"], partitions_no_base) print(overlaps) ###Community assignments based on percentage shared with original community print("Matchings ") matchings = [] for i in range(len(results_no_base)): matchings.append( match_colors(overlaps[i], len(results[base_layout]["partition"]), len(results_no_base[i]["partition"]))) print(matchings) ###Locations by community locations_simplified = [parse(i) for i in locations] partition = results[base_layout]["partition"] num_clusters = max([max(partition[i]) for i in partition]) + 1 num_to_map = dict() for i in range(num_clusters): num_to_map[i] = {num_clusters - 1 - i} locations_to_order = [i for i in set(locations_simplified)] locations_to_order.sort() print(locations_to_order) order_x = labels_to_dictionary(locations_to_order) for i in order_x: temp = 0 for k in order_x[i]: temp = k order_x[i] = temp layout_x = locations_from_dictionary(partition, length=length, height=height, seperated=False) layout_y = locations_from_dictionary( labels_to_dictionary(locations_simplified), order=order_x, length=length, height=height) layout_probe_y = locations_from_dictionary(num_to_map, length=length, height=height) layout_probe_3 = [[ int(length * (1 / 2 - 0.1 + 0.2 * i % 3)), layout_probe_y[i][1] ] for i in range(len(layout_probe_y))] layout_mixed = [[layout_x[i][0], layout_y[i][1]] for i in range(min(len(layout_y), len(layout_x)))] layout_depth = [[layout_x[i][0], layout_probe_y[i][1]] for i in range(min(len(layout_probe_y), len(layout_x)))] coloring = ig.ClusterColoringPalette(20) colorings = [] for i in range(len(overlaps)): o_partition = results_no_base[i]["partition"] overlap = overlaps[i] #plt.table(cellText=[ [ overlap[(i,j)] for j in o_partition] for i in partition], rowLabels=[i for i in partition] , colLabels=[j for j in o_partition], loc='top' ) #plt.subplots_adjust(left=0.2, top=0.8) #plt.show() colorings.append(Pallete_changed(20, coloring, matchings[i])) ###Final visualization for i in range(len(timeperiods)): if i < base_layout: pre_graph = results[i]["graph"] #visualize(pre_graph, layout= layout_mixed , vertex_size=30, labels=locations_simplified, length=length, height=height, coloring=colorings[i], file_name=file_nickname+"_"+str(i)+"_final" +".svg") #visualize(pre_graph, layout= layout_probe_3 , vertex_size=30, labels=locations_simplified, length=length, height=height, coloring=colorings[i], file_name=file_nickname+".pdf") visualize(pre_graph, layout=layout_depth, vertex_size=30, labels=locations_simplified, length=length, height=height, coloring=colorings[i], file_name=file_nickname + "_" + str(i) + "_final" + ".svg") elif i == base_layout: pre_graph = results[i]["graph"] #visualize(pre_graph, layout= layout_mixed , vertex_size=30, labels=locations_simplified, length=length, height=height, coloring=coloring, file_name=file_nickname+"_"+str(i)+"_final"+".pdf") #visualize(pre_graph, layout= layout_probe_3 , vertex_size=30, labels=locations_simplified, length=length, height=height, coloring=coloring, file_name=file_nickname+".pdf") visualize(pre_graph, layout=layout_depth, vertex_size=30, labels=locations_simplified, length=length, height=height, coloring=coloring, file_name=file_nickname + "_" + str(i) + "_final" + ".svg") else: j = i - 1 pre_graph = results[i]["graph"] #visualize(pre_graph, layout= layout_mixed , vertex_size=30, labels=locations_simplified, length=length, height=height, coloring=colorings[j], file_name=file_nickname+"_"+str(i)+"_final"+".pdf") #visualize(pre_graph, layout= layout_probe_3 , vertex_size=30, labels=locations_simplified, length=length, height=height, coloring=colorings[j], file_name=file_nickname+".pdf") visualize(pre_graph, layout=layout_depth, vertex_size=30, labels=locations_simplified, length=length, height=height, coloring=colorings[j], file_name=file_nickname + "_" + str(i) + "_final" + ".svg") if visual: return visualized_3d(results, matchings, base_layout, exp_ID, probe) else: return []
def compare_community_structures(network, path): # variable to hold louvain communities louvain_c = network.community_multilevel(weights='weight') # variable to hold spinglass communities spinglass_c = network.community_spinglass(weights='weight') #################################################################################################################### # variable to hold comparison variation of information comparison_vi = ig.compare_communities(louvain_c, spinglass_c, method='vi') # variable to hold comparison normalized mutual information comparison_nmi = ig.compare_communities(louvain_c, spinglass_c, method='nmi') # variable to hold comparison split join comparison_split_join = ig.compare_communities(louvain_c, spinglass_c, method='split-join') # variable to hold comparison rand index comparison_rand = ig.compare_communities(louvain_c, spinglass_c, method='rand') # variable to hold comparison adjusted rand index comparison_adjusted_rand = ig.compare_communities(louvain_c, spinglass_c, method='adjusted_rand') #################################################################################################################### # variable to hold output file output_file = open( '{}/network/txt/wnn/louvain/comparison.txt'.format(path), mode='a') # write header to file output_file.write('> Community structure comparison\n\n') # write comparison using variation of information to file output_file.write( '- Variation of information: {:.3f}\n'.format(comparison_vi)) # write comparison using normalized mutual information to file output_file.write( '- Normalized mutual information: {:.3f}\n'.format(comparison_nmi)) # write comparison using split join to file output_file.write( '- Split-join distance: {}\n'.format(comparison_split_join)) # write comparison using rand index to file output_file.write( '- Rand index: {:.3f}\n'.format(comparison_rand)) # write comparison using adjusted rand index to file output_file.write('- Adjusted Rand index: {:.3f}\n'.format( comparison_adjusted_rand))
IG_edgeList_ = perm_loss_decep(target_comm, IG_edgeList, deg, in_deg, e_max_list, comm_max_list, communities, subedge, subgraph, subvertices, beta, target_comm_index) # communities in the updated graph g = igraph.Graph(directed=False) num_vertices = num_v g.add_vertices(num_vertices) g.add_edges(IG_edgeList_) communities = g.community_walktrap().as_clustering() post_neighbours = check_neighbours(neighbours, communities) # calculating the metrics nmi = igraph.compare_communities(comm_1, communities, method="nmi") print("NMI - ", nmi) nmi_neighbourhood = igraph.compare_communities(pre_neighbours, post_neighbours, method="nmi") print("Neighbourhood NMI - ", nmi_neighbourhood) num_splits, comm_list = num_comm(target_comm, communities) sum_comm = sum_comm + num_splits print("Community splits - ", num_splits) entropy_val = get_entropy(comm_list) sum_entropy = sum_entropy + entropy_val print("Uniformity - ", entropy_val) NMI_List.append(nmi) Neighbourhood_NMI_List.append(nmi_neighbourhood) communities = safe_copy_comm
def compute_pairwise_metrics(all_partitions, network_dict, log_file_name): network = network_dict['igraph'] n = len(all_partitions['Louvain']) ### get community dictionaries for calculating B cubed - F score community_dicts_louvain = get_all_community_dicts(all_partitions['Louvain'], network, filter_eu_members=False)['dict'] community_dicts_louvain_dir = get_all_community_dicts(all_partitions['Directed Louvain'], network, filter_eu_members=False)['dict'] community_dicts_leiden = get_all_community_dicts(all_partitions['Leiden'], network, filter_eu_members=False)['dict'] community_dicts_infomap = get_all_community_dicts(all_partitions['Infomap'], network, filter_eu_members=False)['dict'] # get oslom dicts and lists oslom_dicts_and_lists = get_all_community_dicts_oslom(all_partitions['Oslom'], network, filter_eu_members=False) #### compute pairwise metrics comparison_table = pd.DataFrame(columns=['nmi', 'rand', 'sj', 'fs', 'method']) index = 0 for i in range(0, n): # comparing partition pairs for j in range(i+1, n): start = time.time() # # 1) Louvain nmi = (ig.compare_communities(all_partitions['Louvain'][i], all_partitions['Louvain'][j], method = 'nmi', remove_none = False)) rand = (ig.compare_communities(all_partitions['Louvain'][i], all_partitions['Louvain'][j], method = 'rand', remove_none = False)) sj = (ig.compare_communities(all_partitions['Louvain'][i], all_partitions['Louvain'][j], method = 'split-join', remove_none = False)) fs = f_score(community_dicts_louvain[i], community_dicts_louvain[j]) comparison_table.loc[index] = [nmi, rand, sj, fs, 'Louvain'] index = index + 1 # # 2) Directed Louvain nmi = (ig.compare_communities(all_partitions['Directed Louvain'][i], all_partitions['Directed Louvain'][j], method = 'nmi', remove_none = False)) rand = (ig.compare_communities(all_partitions['Directed Louvain'][i], all_partitions['Directed Louvain'][j], method = 'rand', remove_none = False)) sj = (ig.compare_communities(all_partitions['Directed Louvain'][i], all_partitions['Directed Louvain'][j], method = 'split-join', remove_none = False)) fs = f_score(community_dicts_louvain_dir[i], community_dicts_louvain_dir[j]) comparison_table.loc[index] = [nmi, rand, sj, fs, 'Directed Louvain'] index = index + 1 # # 3) Leiden nmi = (ig.compare_communities(all_partitions['Leiden'][i], all_partitions['Leiden'][j], method = 'nmi', remove_none = False)) rand = (ig.compare_communities(all_partitions['Leiden'][i], all_partitions['Leiden'][j], method = 'rand', remove_none = False)) sj = (ig.compare_communities(all_partitions['Leiden'][i], all_partitions['Leiden'][j], method = 'split-join', remove_none = False)) fs = f_score(community_dicts_leiden[i], community_dicts_leiden[j]) comparison_table.loc[index] = [nmi, rand, sj, fs, 'Leiden'] index = index + 1 # # 4) Infomap nmi = (ig.compare_communities(all_partitions['Infomap'][i], all_partitions['Infomap'][j], method = 'nmi', remove_none = False)) rand = (ig.compare_communities(all_partitions['Infomap'][i], all_partitions['Infomap'][j], method = 'rand', remove_none = False)) sj = (ig.compare_communities(all_partitions['Infomap'][i], all_partitions['Infomap'][j], method = 'split-join', remove_none = False)) fs = f_score(community_dicts_infomap[i], community_dicts_infomap[j]) comparison_table.loc[index] = [nmi, rand, sj, fs, 'Infomap'] index = index + 1 # # 5) Oslom nmi = (ig.compare_communities(oslom_dicts_and_lists[i]['list'], oslom_dicts_and_lists[j]['list'], method = 'nmi', remove_none = False)) rand = (ig.compare_communities(oslom_dicts_and_lists[i]['list'], oslom_dicts_and_lists[j]['list'], method = 'rand', remove_none = False)) sj = (ig.compare_communities(oslom_dicts_and_lists[i]['list'], oslom_dicts_and_lists[j]['list'], method = 'split-join', remove_none = False)) fs = f_score(oslom_dicts_and_lists[i]['dict'], oslom_dicts_and_lists[j]['dict']) comparison_table.loc[index] = [nmi, rand, sj, fs, 'Oslom'] index = index + 1 # end = time.time() with open(log_file_name + ".txt", "a") as f: f.write('PC: ' + str(i) + '-' + str(j) + ' TIME: ' + str(round((end-start)/60,4)) + '\n') # return comparison_table
for line in f: split_string = line.split(" ") second_clustering_mem_list.append(int(split_string[1].replace("\n", ""))) f.close() # create corresponding Vertex Clusterings first_clustering = igraph.VertexClustering(input_network, first_clustering_mem_list) second_clustering = igraph.VertexClustering(input_network, second_clustering_mem_list) print "done creating clusterings." if verbosity: print first_clustering print second_clustering ############ COMPARE CLUSTERINGS ############ vi = igraph.compare_communities(first_clustering, second_clustering, method='vi', remove_none=False) nmi = igraph.compare_communities(first_clustering, second_clustering, method='nmi', remove_none=False) split_join = igraph.compare_communities(first_clustering, second_clustering, method='split-join', remove_none=False) rand = igraph.compare_communities(first_clustering, second_clustering, method='rand', remove_none=False) adj_rand = igraph.compare_communities(first_clustering, second_clustering, method='adjusted_rand', remove_none=False) print "\nSeparated by tabs:" print str(vi) + "\t" + str(nmi) + "\t" + str(split_join) + "\t" + str(rand) + "\t" + str(adj_rand) print "\nCSV format:" print str(vi) + "," + str(nmi) + "," + str(split_join) + "," + str(rand) + "," + str(adj_rand) + "\n" if verbosity: print "Meila (2003):", vi print "Normalized mutual information, Danon et al (2005):", nmi print "Split-join distance, van Dongen (2000):", split_join
W = np.asarray(fctr_res.basis()) W_b = np.asarray(fctr_res_b.basis()) W_bu = np.asarray(fctr_res_bu.basis()) actual_primary, actual_secondary = get_role_assignment(W) estimated_primary, estimated_secondary = get_role_assignment( W_b) estimated_primary_u, estimated_secondary_u = get_role_assignment( W_bu) # ari_1 = metrics.adjusted_rand_score(actual_primary, estimated_primary) # ari_1_u = metrics.adjusted_rand_score(actual_primary, estimated_primary_u) # ari_2 = metrics.adjusted_rand_score(actual_secondary, estimated_secondary) # ari_2_u = metrics.adjusted_rand_score(actual_secondary, estimated_secondary_u) ari_1 = ig.compare_communities(actual_primary, estimated_primary, method="rand") ari_1_u = ig.compare_communities(actual_primary, estimated_primary_u, method="rand") ari_2 = ig.compare_communities(actual_secondary, estimated_secondary, method="rand") ari_2_u = ig.compare_communities(actual_secondary, estimated_secondary_u, method="rand") p_ari.append(ari_1) p_ari_u.append(ari_1_u) s_ari.append(ari_2) s_ari_u.append(ari_2_u)
def evaluation(G, pred_labels, true_labels, name): Modularity = G.modularity(pred_labels) NMI = ig.compare_communities(pred_labels, true_labels, method='nmi') ARI = ig.compare_communities(pred_labels, true_labels, method='ari') return (Modularity, NMI, ARI)
def get_plot_data(i, time_intervals): t = time_intervals[i] ground_truth = Clustering([n.group for n in t.nodes]) cc = Clustering([n.cc for n in t.nodes]) return compare_communities(ground_truth, cc, method = "danon")