def run(self): """ 10-29-05 call p_gene_id_set_from_gene_p_table() """ (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) if self.new_table: self.create_good_cluster_table(curs, self.good_cluster_table) p_gene_id_set = p_gene_id_set_from_gene_p_table(curs, self.gene_p_table) mcl_id2unknown_ratio = self.get_mcl_id2unknown_ratio(curs, self.p_gene_table, p_gene_id_set) self.submit_good_clusters(curs, self.cluster_queue, self.good_cluster_table, mcl_id2unknown_ratio, self.occurrence_cutoff) if self.commit: curs.execute("end")
def run(self): """ 10-17-05 bit control whether that setting has linear model """ schema_instance1 = form_schema_tables(self.fname1, self.acc_cutoff1, self.lm_bit1) schema_instance2 = form_schema_tables(self.fname2, self.acc_cutoff2, self.lm_bit2) (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) p_gene_id_set1 = p_gene_id_set_from_gene_p_table(curs, schema_instance1.gene_p_table) p_gene_id_set2 = p_gene_id_set_from_gene_p_table(curs, schema_instance2.gene_p_table) p_gene_id_set_total = p_gene_id_set_from_gene_p_table(curs, schema_instance2.p_gene_table) catI_set = p_gene_id_set1 - p_gene_id_set2 catII_set = p_gene_id_set1 & p_gene_id_set2 catIII_set = p_gene_id_set2 - p_gene_id_set1 catIV_set = p_gene_id_set_total-(p_gene_id_set1|p_gene_id_set2) sample_ls_ls = [] for p_gene_id_set in [catI_set, catII_set, catIII_set, catIV_set]: sample_ls_ls.append(self.sample_p_gene_id_set(p_gene_id_set, self.no_of_samples)) writer = csv.writer(open(self.ofname, 'w'), delimiter = '\t') writer.writerow(['linear model coeffs of two settings']) writer.writerow([]) writer.writerow(['No.','intercept', 'coeff1', 'coeff2', 'coeff3', 'coeff4', 'coeff5', 'intercept_p_value',\ 'coeff1_p_value', 'coeff2_p_value', 'coeff3_p_value', 'coeff4_p_value', 'coeff5_p_value',\ 'score_cut_off']) #fetch linear model coefficients pga_instance_list = [None, None] #10-17-05 default is nothing, none of them have linear model if self.bit[0] == '1': pga_instance1 = p_gene_analysis() pga_instance1.go_no2lm_results, lm_results_2d_list = pga_instance1.get_go_no2lm_results(curs, schema_instance1.lm_table) pga_instance1.general_lm_results = pga_instance1.get_general_lm_results(lm_results_2d_list) pga_instance_list[0] = pga_instance1 self.output_lm_model(curs, schema_instance1, writer) if self.bit[1] == '1': pga_instance2 = p_gene_analysis() pga_instance2.go_no2lm_results, lm_results_2d_list = pga_instance2.get_go_no2lm_results(curs, schema_instance2.lm_table) pga_instance2.general_lm_results = pga_instance2.get_general_lm_results(lm_results_2d_list) pga_instance_list[1] = pga_instance2 self.output_lm_model(curs, schema_instance2, writer) #following is for drawing graph in output_p_gene_id_list() self.gene_no2gene_id = get_gene_no2gene_id(curs) self.gene_no2go_no = get_gene_no2go_no(curs) cluster_info_instance = cluster_info() for i in range(len(sample_ls_ls)): cat_no = i+1 sys.stderr.write("Category %s...\n"%cat_no) writer.writerow(['Category %s'%cat_no]) writer.writerow([self.category_no2information[cat_no]]) cat_dir = 'cat%s'%cat_no if not os.path.isdir(cat_dir): os.makedirs(cat_dir) if i==0: #this is different, prediction only in schema_instance1, so swap it self.output_p_gene_id_list(curs, schema_instance2, schema_instance1, sample_ls_ls[i], writer, cat_dir, \ pga_instance_list[1], pga_instance_list[0], cluster_info_instance, self.simple) else: self.output_p_gene_id_list(curs, schema_instance1, schema_instance2, sample_ls_ls[i], writer, cat_dir, \ pga_instance_list[0], pga_instance_list[1], cluster_info_instance, self.simple) sys.stderr.write("End Category %s.\n"%cat_no)
def run(self): """ 10-31-05 2006-09-26 modify it to be compatible with the modified pipeline from haifeng 2006-11-06 add type 2006-12-13 use font_path and font_size --form_schema_tables() --db_connect() --get_char_dimension() --get_no_of_p_funcs_gene_no_go_no_list() --get_recurrence_go_no_rec_array_cluster_id_ls() --get_go_no2name() --draw_function_map() --draw_gene_function_map() --get_recurrence_rec_array_bs_no_list() --get_mt_no2tf_name() --draw_tf_map() """ schema_instance = form_schema_tables(self.inputfname, self.acc_cutoff, self.lm_bit) (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) font = ImageFont.truetype(self.font_path, self.font_size) char_dimension = font.getsize('a') #char_dimension = get_char_dimension() #go_no2name = get_go_no2name(curs) go_no2name = get_go_id2name(curs) if self.type==1: go_no2go_id = get_go_no2go_id(curs) given_p_gene_set = p_gene_id_set_from_gene_p_table(curs, schema_instance.gene_p_table) no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_db(curs, \ schema_instance.p_gene_table, given_p_gene_set, go_no2go_id) elif self.type==2: no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_file(self.inputfname) recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets, mcl_id2enc_recurrence = \ self.get_recurrence_go_no_rec_array_cluster_id_ls(curs, self.pattern_table, mcl_id2go_no_set) no_of_functions = len(recurrence_go_no_rec_array_cluster_id_ls) function_map_output_fname = '%s.function_map.png'%self.output_prefix go_no2index, function_name_region = self.draw_function_map(recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets,\ go_no2name, function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font) gene_function_map_output_fname = '%s.gene_function_map.png'%self.output_prefix self.draw_gene_function_map(no_of_p_funcs_gene_no_go_no_list, go_no2index, function_name_region,\ gene_function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font) #tf_map requires mcl_id2enc_recurrence and no_of_datasets from above recurrence_rec_array_bs_no_list = self.get_recurrence_rec_array_bs_no_list(curs, self.cluster_bs_table, mcl_id2enc_recurrence) mt_no2tf_name = get_gene_id2gene_symbol(curs, tax_id=9606) #mt_no2tf_name = get_mt_no2tf_name() tf_map_output_fname = '%s.tf_map.png'%self.output_prefix self.draw_tf_map(recurrence_rec_array_bs_no_list, no_of_datasets, mt_no2tf_name, \ tf_map_output_fname, self.function_name_length, char_dimension, font)