def calculate_evaluation_measures_for_one_K(self): self.configdata.logger.debug("Debugging from inside calculate_evaluation_measures_for_one_K method of class EvaluationData_WI.") super().calculate_evaluation_measures_for_one_K() #Write the average measures to a file a_name = AlgorithmName() filename = self.configdata.eval_results_dir + "/stats/phase" + str(self.phase) + "/" + self.configdata.currentdate_str + "_" + str(a_name.get_algorithm_name(self.algorithm_name)) + "_" + self.configdata.dataset_name + "_K_All" + "_Phase_" + str(self.phase) + "_stats_CP.txt" filename_1 = self.configdata.eval_results_dir + "/stats/phase" + str(self.phase) + "/" + self.configdata.currentdate_str + "_" + str(a_name.get_algorithm_name(self.algorithm_name)) + "_" + self.configdata.dataset_name + "_K_All" + "_Phase_" + str(self.phase) + "_stats_CP_1.txt" filename_2 = self.configdata.eval_results_dir + "/stats/phase" + str(self.phase) + "/" + self.configdata.currentdate_str + "_" + str(a_name.get_algorithm_name(self.algorithm_name)) + "_" + self.configdata.dataset_name + "_K_All" + "_Phase_" + str(self.phase) + "_stats_CP_2.txt" filename_3 = self.configdata.eval_results_dir + "/stats/phase" + str(self.phase) + "/" + self.configdata.currentdate_str + "_" + str(a_name.get_algorithm_name(self.algorithm_name)) + "_" + self.configdata.dataset_name + "_K_All" + "_Phase_" + str(self.phase) + "_stats_CP_3.txt" if self.phase == 2: #Calculate total average measures self.eval_calculate_total_avg_measures() #Print the core periphery relationships self.eval_print_summary_stats_CP(filename) #Print the global_core_periphery relationships self.eval_print_summary_stats_CP_1(filename_1) #Print the global core periphery relationships in a table format self.eval_print_summary_stats_CP_2(filename_2) #Print statistics for core periphery relationships self.eval_print_coreperiphery_summary_format(filename_3)
def print_cluster_stats(self): self.configdata.logger.debug( "Debugging from inside printCluster_stats method of Evaluation Data class." ) a_name = AlgorithmName() filename = self.configdata.eval_results_dir + "/docs/phase" + str( self.phase) + "/" + self.configdata.currentdate_str + "_" + str( a_name.get_algorithm_name(self.algorithm_name) ) + "_" + self.configdata.dataset_name + "_K_" + str( self.K) + "_Phase_" + str( self.phase) + "_print_cluster_stats.txt" target = open(filename, 'w') for cnode_id, cnode_data in self.cnodes_dict.items(): if cnode_data.num_nodes >= 3: target.write(str(cnode_id)) target.write("\t") target.write(str(cnode_data.num_nodes)) target.write("\t") target.write(str(cnode_data.mean_edges)) target.write("\t") target.write(str(cnode_data.standard_deviation_edges)) target.write("\t") target.write(str(cnode_data.struct_density)) target.write("\t") target.write(str(self.f_measure_dict[cnode_id]['f_measure'])) target.write("\n")
def calculate_evaluation_measures_for_one_K(self): self.configdata.logger.debug( "Debugging from inside calculate_evaluation_measures_for_one_K method of class EvaluationData." ) #CL_list_unique = list(set(self.CL_list)) #Measures of individual clusters (mean_list, variance_list, struct_density_list, node_count_list) = self.eval_calc_measures() #Average measures self.eval_calc_average_measures(mean_list, variance_list, struct_density_list, node_count_list) # #commented temporarily if self.phase == 2: #Calculate Sn and PPV and accuracy self.eval_calc_accuracy(self.configdata.gold_standard_file) #Calculate Rand Index self.calculate_rand_index() self.eval_calc_f_measure(self.configdata.complex_codes_file) self.eval_calc_avg_cnode_avg_measures( self.configdata.evol_rates_file, self.configdata.essentiality_file, self.configdata.phyletic_age_file) #Print cluster stats #self.print_cluster_stats() # #Calculate total avg evolutionary rates for cores and peripheries # #Results meaningful only for PPI datasets # self.eval_calculate_total_avg_evol_rate() #Write the average measures to a file a_name = AlgorithmName() filename = self.configdata.eval_results_dir + "/stats/phase" + str( self.phase ) + "/" + self.configdata.currentdate_str + "_" + str( a_name.get_algorithm_name(self.algorithm_name) ) + "_" + self.configdata.dataset_name + "_K_All" + "_Phase_" + str( self.phase) + "_stats.txt" self.eval_print_summary_stats(filename) #Also print a summary list of all clusters formed to the same file. self.eval_print_clusters_summary_format(filename)
def printClusters(self): self.configdata.logger.debug( "Debugging from inside printClusters method") a_name = AlgorithmName() filename = self.configdata.eval_results_dir + "/docs/phase" + str( self.phase) + "/" + self.configdata.currentdate_str + "_" + str( a_name.get_algorithm_name(self.algorithm_name) ) + "_" + self.configdata.dataset_name + "_K_" + str( self.K) + "_Phase_" + str(self.phase) + "_print.txt" target = open(filename, 'w') for i in range(0, self.graphdata.num_nodes): target.write(str(self.CL_list[i])) target.write("\t") target.write(str(i)) target.write("\t") target.write(str(self.graphdata.node_dict[i].node_code)) target.write("\n") target.close()
def visualize_clusters(self): self.configdata.logger.debug( "Debugging from inside visualize_clusters") #phase = 2 plt.close("all") a_name = AlgorithmName() #currentdate = datetime.datetime.now() filename = self.configdata.eval_results_dir + "/figures/phase" + str( self.phase) + "/" + self.configdata.currentdate_str + "_" + str( a_name.get_algorithm_name(self.algorithm_name) ) + "_" + self.configdata.dataset_name + "_K_" + str( self.K) + "_Phase_" + str(self.phase) + "_V_NX.png" ######################################################### #Code below is a way for plotting graph from matrix. #Didnt use it finally, because, we had to put attributes to nodes and edges. # dt =[('len', float)] # SM = SM.view(dt) # # G = nx.from_numpy_matrix(SM) # G.nodes() # G.edges() # G = nx.relabel_nodes(G, dict(zip(range(len(G.nodes())), node_codes))) # #G = nx.relabel_nodes(G, dict(zip(range(len(G.nodes())),string.ascii_uppercase))) # G = nx.to_agraph(G) # # G.node_attr.update(color="red", style="filled") # G.edge_attr.update(color="blue", width="2.0") # # edges = G.edges() # # # G.draw('figures/distances.png', format='png',prog='dot') ########################################################## #Generate a unique list of cluster labels #print(str(self.num_clusters)) CL_list_unique = list(set(self.CL_list)) #print(str(len(CL_list_unique))) #num_clusters should be the same as len(CL_List_unique) #but overriding it to take care of clusterone, where the two might #be different. #Plus, CL_List_unique might have -1 for all nodes with no cluster label #in case of clusterone. self.num_clusters = len(CL_list_unique) #Assign colors to each distinct cluster using a color map self.cluster_colors = self.generateCMap(CL_list_unique, self.num_clusters) #cluster_colors = self.generateCMap(CL_list_unique, len(CL_list_unique)) #Create a nx graph for the similarity matrix self.G = nx.Graph() #Add edges to the graph, one edge at a time for i in range(0, len(self.CL_list)): for j in range(0, len(self.CL_list)): if (self.graphdata.SM_orig[i, j] != -1 and self.graphdata.SM_orig[i, j] != 0 and i != j ): #change SM[i][i] please to some other value than 1. #Add edge self.G.add_edge(self.graphdata.node_dict[i].node_code, self.graphdata.node_dict[j].node_code, weight=self.graphdata.SM_orig[i, j]) #G.add_edge(node_codes[i],node_codes[j], weight=self.graphdata.SM[i,j]) #Add node attributes self.assign_node_attribute(i) self.assign_node_attribute(j) #G.node[self.graphdata.node_dict[i].node_code]['category'] = self.CL_list[i] #G.node[self.graphdata.node_dict[j].node_code]['category'] = self.CL_list[j] #Specify the graph layout pos = nx.graphviz_layout(self.G) # # Draw nodes of G #Get nodes belonging to cluster 1 # for cluster_idx in CL_list_unique: # cluster_idx_nodes = [u for u in G.nodes() if CL_list[node_codes.index(u)] == cluster_idx] # #nx.draw_networkx_nodes(G,pos,cluster_idx_nodes, node_size=200, node_color= cluster_colors[cluster_idx] ) # nx.draw_networkx_nodes(G,pos,cluster_idx_nodes, node_size=200, node_color= np.linspace(0,1,len(G.nodes())) ) #Another way for drawing nodes of G: # #Assign a node attribute for cluster label # for node in G.nodes(): # G.node[node]['category'] = CL_list[node_codes.index(node)] nList = [] for (u, d) in self.G.nodes(data=True): nList.append((u, d)) #Draw nodes with different colors for each cluster. Colors are chosen from the cluster map constructed before. #nx.draw_networkx_nodes(self.G, pos, node_color=[self.cluster_colors[self.G.node[node]['category']] for node in self.G], node_size = 250, alpha=0.7) #nx.draw_networkx_nodes(self.G, pos, node_color=[self.cluster_colors[self.G.node[node]['category']] if self.G.node[node]['category'] != -1 else "#bebcbd" for node in self.G], node_size = 250, alpha=0.7) nx.draw_networkx_nodes( self.G, pos, node_color=[(self.cluster_colors[self.G.node[node]['category']] if self.G.node[node]['category'] > -1 else ("#bebcbd" if self.G.node[node]['category'] == -1 else "#ffff00")) for node in self.G], node_size=250, alpha=0.7) # Draw edges of G(one by one) , setting the width equal to the weight of the edge. for (u, v, d) in self.G.edges(data=True): eCurrent = [(u, v)] nx.draw_networkx_edges(self.G, pos, eCurrent, width=(d['weight'] * 4), alpha=0.2) ########################## # elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.5] # esmall=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.5] # # #nx.draw_networkx_edges(G,pos) # nx.draw_networkx_edges(G,pos,edgelist=elarge, # width=2, alpha=0.5) # nx.draw_networkx_edges(G,pos,edgelist=esmall, # width=1, alpha=0.5) #Draw labels of G nx.draw_networkx_labels(self.G, pos, font_size=10, font_family='sans-serif', font_weight='bold') #Plot the graph plt.axis('off') plt.savefig(filename) # save as png #plt.show() # display plt.close("all")