def on_button_go_plot_clicked(self, widget): """ 04-19-05 06-14-05 use graphDotOutput() and insert it into the textbuffer 10-11-05 add cluster_id and go_no for each graph """ if self.cluster == None: print "Cluster not loaded in" return #first construct the graph subgraph = self.cluster_info_instance.graph_from_node_edge_set(\ self.cluster.vertex_set, self.cluster.edge_set) pathlist = [] treeselection_go_association = self.treeview_go_association.get_selection() treeselection_go_association.selected_foreach(foreach_cb, pathlist) if len(pathlist) >0: for i in range(len(pathlist)): go_no = self.liststore_go_association[pathlist[i][0]][0] graphSrcFname = '/tmp/GuiAnalyzer.dot' graphFname = '/tmp/GuiAnalyzer.ps' graphSrcF = open(graphSrcFname, 'w') graphDotOutput(graphSrcF, subgraph, \ self.gene_no2gene_id, self.gene_no2go_no, \ function=int(go_no), weighted=0) graphSrcF.close() #06-14-05 get the plot_type_command from the comboboxentry_plot_type entry = self.comboboxentry_plot_type.child plot_type_command = entry.get_text() if plot_type_command=='neato' or plot_type_command=='twopi': plot_type_command += ' -Goverlap=false' #06-14-05 -Goverlap=false passed to neato or twopi commandline = '%s -Tps %s -o %s'%(plot_type_command, graphSrcFname, graphFname) system_call(commandline) #10-11-05 temporary adding this block to draw pictures in png format outputfname = '/tmp/%s_%s.png'%(self.cluster.cluster_id, go_no) commandline = '%s -Tpng %s -o %s'%(plot_type_command, graphSrcFname, outputfname) system_call(commandline) textbuffer = self.textview_subgraph.get_buffer() im = Image.open(graphFname) ar = fromimage(im) pixbuf = gtk.gdk.pixbuf_new_from_array(ar, gtk.gdk.COLORSPACE_RGB, 8) startiter = textbuffer.get_start_iter() textbuffer.insert_pixbuf(startiter, pixbuf) startiter = textbuffer.get_start_iter() textbuffer.insert(startiter, "cluster_id:%s go_no:%s"%(self.cluster.cluster_id, go_no)) #cleanup the temporary files and others del im del ar
def node_fire(self, communicator, node_rank2cluster_no, netmine2nd_parameter_list): """ 04-08-05 mpi form 05-16-05 os.system() might fail on some platforms, so use recursive system_call() """ # get the output filename prefix ofname_prefix = netmine2nd_parameter_list[20] # store the finished output files and send it back to node 0 later ofname_list = [] if communicator.rank in node_rank2cluster_no: # check if it's necessary to ignite the node # fire repeatedly for cluster_no in node_rank2cluster_no[communicator.rank]: jobrow = netmine2nd_parameter_list + ["-f", repr(cluster_no)] sys.stderr.write("node %s working on cluster %s...\n" % (communicator.rank, cluster_no)) exit_code = system_call("%s" % " ".join(jobrow)) sys.stderr.write( "node %s on cluster %s done, exit_code: %s\n" % (communicator.rank, cluster_no, exit_code) ) ofname_list.append( "%s_%sh" % (ofname_prefix, cluster_no) ) # 04-09-05 haiyan puts 'h' after cluster_no.?? # join the list by blank and send it back to node 0 communicator.send("%s" % " ".join(ofname_list), 0, communicator.rank)
def run_netmine(self, node, netmine_parameter_list): """ 04-01-05 input: wl_list of netmine output: number of clusters nothing to do with mpi 05-16-05 os.system() might fail on some platforms, so use recursive system_call() 06-10-05 random select a big-memory node from 16 to 35 to run netmine """ sys.stderr.write("Running netmine...") # wl = ['ssh', 'node%s'%node, '%s'%' '.join(netmine_parameter_list)] """ 04-08-05 spawnvp gets dead under MPI, use system instead. """ random_node = random.randint(16, 35) # return_code = os.spawnvp(os.P_WAIT, netmine_parameter_list[0], netmine_parameter_list) commandline = "ssh node%s %s" % (random_node, " ".join(netmine_parameter_list)) if self.debug: sys.stderr.write("The commandline of netmine is %s\n" % commandline) exit_code = system_call(commandline) # 05-16-05 use the recursive one. # exit_code = os.system('%s'%' '.join(netmine_parameter_list)) op = netmine_parameter_list[8] # the 8th is the output file no_of_clusters = 0 of = open(op, "r") for line in of: no_of_clusters += 1 sys.stderr.write("total clusters:%s. exit_code: %s.\n" % (no_of_clusters, exit_code)) return no_of_clusters
def uniqueSort(self, inputfile, outputfile, tmpdir='/scratch'): """ 08-07-05 sort the file (unique it simutaneouly) """ sys.stderr.write("Starting to sort and unique %s..."%inputfile) commandline = 'sort -T %s -u %s -o %s'%(tmpdir, inputfile, outputfile) exit_code = system_call(commandline) #os.remove(inputfile) sys.stderr.write("Done.\n")
def run_netmine2nd(cluster_no, netmine2nd_parameter_list): """ 05-19-05 similar to node_fire() of class netmine_wrapper """ import sys # get the output filename prefix ofname_prefix = netmine2nd_parameter_list[20] jobrow = netmine2nd_parameter_list + ["-f", repr(cluster_no)] exit_code = system_call("%s" % " ".join(jobrow)) ofname = "%s_%sh" % (ofname_prefix, cluster_no) # 04-09-05 haiyan puts 'h' after cluster_no.?? return ofname
def collect_and_merge_output(self, of_name_list, final_ofname): """ 04-08-05 only for rank 0 1. receive the concatenated ofname_list from all nodes 2. cat all files together into final_ofname and delete the intermediary files. 05-16-05 os.system() might fail on some platforms, so use recursive system_call() 05-19-05 collecting output filenames is NOT necessary, it's done in distribute_jobs() (via mpi_schedule_jobs()) """ sys.stderr.write("Catting them together...") # 05-22-05 remove the final_ofname first to avoid '>>' to add up if os.path.isfile(final_ofname): os.remove(final_ofname) for ofname in of_name_list: # do it one by one because 'cat * >final_ofname' might cause error 'Argument list too long' # which is the case for 'rm' when there're thousands of files represented by '*'. exit_code = system_call("cat %s >> %s" % (ofname, final_ofname)) # it's >> not > exit_code = system_call("rm %s" % ofname) # delete it immediately sys.stderr.write("Done.\n")
def callTightClust(go_no, parameter_list): """ 06-03-05 the node_function for mpi_schedule_jobs() """ import sys output_dir = parameter_list[0] no_of_nas = parameter_list[1] top_percentage = parameter_list[2] input_file = os.path.join(output_dir, 'edge_data_%s'%go_no) output_file = os.path.join(output_dir, 'edge_data_%s.2'%go_no) preprocess_instance = PreprocessEdgeData(input_file, output_file, no_of_nas, top_percentage) preprocess_instance.run() tightClust_path = os.path.join(os.path.expanduser('~/script/annot/bin/tightClust'), 'tightClust') job_list = [tightClust_path, '%s/'%output_dir, 'edge_data_%s.2'%go_no] + parameter_list[3:] exit_code = system_call("%s"%' '.join(job_list)) tightClust_output_file = '%s.tightClust'%output_file del preprocess_instance return tightClust_output_file
def output_p_gene_id_list(self, curs, schema_instance1, schema_instance2, p_gene_id_list, writer, pic_output_dir,\ pga_instance1, pga_instance2, cluster_info_instance, simple): """ 10-15-05 add score1 and is_accepted1 10-17-05 score and is_accepted depend on whether pga_instance is None or not 10-17-05 add simple to allow no graph pictures output also get prediction from schema_instance1 and calculate the score if prediction is available 10-18-05 sort the p_gene_id_list first """ #10-15-05 following sentence slightly different from PredictionFilterByClusterSize.py in the trailing edge_gradient #and d_matrix is a placeholder sql_sentence1 = "SELECT p.p_gene_id, p.gene_no, p.go_no, p.is_correct, p.is_correct_l1, \ p.is_correct_lca, p.avg_p_value, p.no_of_clusters, p.cluster_array, p.p_value_cut_off, p.recurrence_cut_off, \ p.connectivity_cut_off, p.cluster_size_cut_off, p.unknown_cut_off, p.depth_cut_off, p.mcl_id, p.lca_list, \ p.vertex_gradient, p.edge_gradient, m.vertex_set, s.edge_set, 'd_matrix', 'r' from %s p, %s s, %s m where \ p.mcl_id=s.splat_id and p.mcl_id=m.mcl_id"%(schema_instance1.p_gene_table, \ schema_instance1.splat_table, schema_instance1.mcl_table) sql_sentence2 = "SELECT p.p_gene_id, p.gene_no, p.go_no, p.is_correct, p.is_correct_l1, \ p.is_correct_lca, p.avg_p_value, p.no_of_clusters, p.cluster_array, p.p_value_cut_off, p.recurrence_cut_off, \ p.connectivity_cut_off, p.cluster_size_cut_off, p.unknown_cut_off, p.depth_cut_off, p.mcl_id, p.lca_list, \ p.vertex_gradient, p.edge_gradient, m.vertex_set, s.edge_set, 'd_matrix', 'r' from %s p, %s s, %s m where \ p.mcl_id=s.splat_id and p.mcl_id=m.mcl_id"%(schema_instance2.p_gene_table, \ schema_instance2.splat_table, schema_instance2.mcl_table) writer.writerow(['p_gene_id', 'gene_no', 'go_no', 'is_correct_lca', 'p_value', 'recurrence', 'connectivity',\ 'cluster_size', 'unknown_ratio', 'mcl_id', 'lca_list', 'edge_gradient', 'score1', 'is_accepted1', 'score2', 'is_accepted2']) p_gene_id_list.sort() for p_gene_id in p_gene_id_list: #sql_sentence1's prediction infomation is not gonna be displayed curs.execute("%s and p.p_gene_id=%s"%(sql_sentence1, p_gene_id)) rows = curs.fetchall() if rows: p_attr_instance1 = prediction_attributes(rows[0], type=3) else: p_attr_instance1 = None #sql_sentence2's prediction infomation is going to be displayed curs.execute("%s and p.p_gene_id=%s"%(sql_sentence2, p_gene_id)) rows = curs.fetchall() if rows: p_attr_instance2 = prediction_attributes(rows[0], type=3) if pga_instance1 and p_attr_instance1: (is_accepted1, score1) = pga_instance1.prediction_accepted(p_attr_instance1.go_no, \ [-math.log(p_attr_instance1.p_value_cut_off), p_attr_instance1.recurrence_cut_off, \ p_attr_instance1.connectivity_cut_off, p_attr_instance1.cluster_size_cut_off, \ p_attr_instance1.edge_gradient]) else: is_accepted1, score1 = None, None if pga_instance2: (is_accepted2, score2) = pga_instance2.prediction_accepted(p_attr_instance2.go_no, \ [-math.log(p_attr_instance2.p_value_cut_off), p_attr_instance2.recurrence_cut_off, \ p_attr_instance2.connectivity_cut_off, p_attr_instance2.cluster_size_cut_off, \ p_attr_instance2.edge_gradient]) else: is_accepted2, score2 = None, None writer.writerow([p_attr_instance2.p_gene_id, p_attr_instance2.gene_no, p_attr_instance2.go_no, \ p_attr_instance2.is_correct_lca, p_attr_instance2.avg_p_value, p_attr_instance2.recurrence_cut_off,\ p_attr_instance2.connectivity_cut_off, p_attr_instance2.cluster_size_cut_off, p_attr_instance2.unknown_cut_off,\ p_attr_instance2.mcl_id, p_attr_instance2.lca_list, p_attr_instance2.edge_gradient, score1, is_accepted1, \ score2, is_accepted2]) if not simple: #prepare vertex_set and edge_set to draw graphs vertex_set = p_attr_instance2.vertex_set[1:-1].split(',') vertex_set = map(int, vertex_set) edge_set = p_attr_instance2.edge_set[2:-2].split('},{') for i in range(len(edge_set)): edge_set[i] = edge_set[i].split(',') edge_set[i] = map(int, edge_set[i]) #following copied from GuiAnalyzer.py subgraph = cluster_info_instance.graph_from_node_edge_set(vertex_set, edge_set) graphSrcFname = '/tmp/GuiAnalyzer.dot' graphFname = os.path.join(pic_output_dir, '%s_%s_%s_%s.png'%(p_attr_instance2.p_gene_id, \ p_attr_instance2.gene_no, p_attr_instance2.go_no, p_attr_instance2.mcl_id)) graphSrcF = open(graphSrcFname, 'w') graphDotOutput(graphSrcF, subgraph, \ self.gene_no2gene_id, self.gene_no2go_no, \ centralnode=p_attr_instance2.gene_no, function=p_attr_instance2.go_no, weighted=0, ) graphSrcF.close() plot_type_command='neato -Goverlap=false' commandline = '%s -Tpng %s -o %s'%(plot_type_command, graphSrcFname, graphFname) system_call(commandline)