def run(self): (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) id2chr_start_stop = self.get_id2chr_start_stop(curs, self.tax_id) gene_id2gene_symbol = get_gene_id2gene_symbol(curs, self.tax_id) mt_id2gene_symbol = self.get_mt_id2gene_symbol(curs, gene_id2gene_symbol, self.tax_id) mt_id_gene_symbol2color_code = self.get_mt_id2color_code( self.input_dir, self.color_code_list, mt_id2gene_symbol) self.parse_files(self.input_dir, self.output_fname, id2chr_start_stop, mt_id_gene_symbol2color_code, mt_id2gene_symbol)
def run(self): (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) id2chr_start_stop = self.get_id2chr_start_stop(curs, self.tax_id) gene_id2gene_symbol = get_gene_id2gene_symbol(curs, self.tax_id) mt_id2gene_symbol = self.get_mt_id2gene_symbol(curs, gene_id2gene_symbol, self.tax_id) mt_id_gene_symbol2color_code = self.get_mt_id2color_code( self.input_dir, self.color_code_list, mt_id2gene_symbol ) self.parse_files( self.input_dir, self.output_fname, id2chr_start_stop, mt_id_gene_symbol2color_code, mt_id2gene_symbol )
def run(self): """ --db_connect() --get_gene_id2gene_symbol() --get_go_id2name() --get_mcl_id2pred_go_id2gene_id_set_from_db() --get_prot_interaction_graph() --get_gene_id_set() --get_mt_no2gene_id_set() --get_go_id2gene_set_from_db() --get_mcl_id2mt_no_set() --draw_all_patterns() --draw_augmented_PI_graph() --draw_pattern() 2006-11-21 add prot_interaction_graph 2006-12-29 split draw_all_patterns() add another way to call draw_all_patterns() through pattern_id_list """ conn, curs = db_connect(self.hostname, self.dbname, self.schema) gene_id2gene_symbol = get_gene_id2gene_symbol(curs, self.tax_id) go_id2name = get_go_id2name(curs) if not os.path.isdir(self.output_dir): os.makedirs(self.output_dir) #mcl_id2pred_go_id2gene_id_set = self.get_mcl_id2pred_go_id2gene_id_set(input_fname) mcl_id2pred_go_id2gene_id_set = self.get_mcl_id2pred_go_id2gene_id_set_from_db(curs, self.input_fname, self.gene_p_table) prot_interaction_graph = self.get_prot_interaction_graph(curs, self.prot_interaction_table, self.tax_id) gene_id_set = self.get_gene_id_set(curs, self.gene_table) comp_mt_no2gene_id_set = self.get_mt_no2gene_id_set(curs, self.comp_tf_mapping_table, gene_id_set) expt_mt_no2gene_id_set = self.get_mt_no2gene_id_set(curs, self.expt_tf_mapping_table, gene_id_set) go_id2gene_set = self.get_go_id2gene_set_from_db(curs, self.go_table) comp_mcl_id2mt_no_set = self.get_mcl_id2mt_no_set(curs, self.comp_cluster_bs_table) expt_mcl_id2mt_no_set = self.get_mcl_id2mt_no_set(curs, self.expt_cluster_bs_table) for pattern_id in self.pattern_id_list: self.draw_all_patterns(curs, pattern_id, mcl_id2pred_go_id2gene_id_set, comp_mt_no2gene_id_set, \ expt_mt_no2gene_id_set, go_id2gene_set, comp_mcl_id2mt_no_set, expt_mcl_id2mt_no_set,\ self.pattern_table, self.output_dir, gene_id2gene_symbol, go_id2name, prot_interaction_graph) pattern_id = raw_input("Please input a pattern id:") while pattern_id: self.draw_all_patterns(curs, pattern_id, mcl_id2pred_go_id2gene_id_set, comp_mt_no2gene_id_set, \ expt_mt_no2gene_id_set, go_id2gene_set, comp_mcl_id2mt_no_set, expt_mcl_id2mt_no_set,\ self.pattern_table, self.output_dir, gene_id2gene_symbol, go_id2name, prot_interaction_graph) pattern_id = raw_input("Please input a pattern id:")
def run(self): """ 09-28-05 12-19-05 use class_list and output_fname_list to ease program writing 12-30-05 fix a bug in indexing darwin_instance_list 2006-09-25 2007-02-08 add context_prediction_csv_format """ tf_darwin_ofname = os.path.join(self.output_dir, '%s.tf.darwin'%self.cluster_bs_table) cluster_darwin_ofname = os.path.join(self.output_dir, '%s.cluster.darwin'%os.path.basename(self.input_fname)) prediction_darwin_ofname = os.path.join(self.output_dir, '%s.prediction.darwin'%os.path.basename(self.input_fname)) pattern_darwin_ofname = os.path.join(self.output_dir, '%s.pattern.darwin'%self.pattern_table) if not os.path.isdir(self.output_dir): os.makedirs(self.output_dir) conn, curs = db_connect(self.hostname, self.dbname, self.schema) tax_id = org2tax_id(self.organism) #gene_no2id = get_gene_no2gene_id(curs) #Watch, if unigene, should use this. gene_id2symbol = get_gene_id2gene_symbol(curs, tax_id) gene_id2symbol = self.replace_prime_in_gene_id2symbol(gene_id2symbol) #01-26-06 #gene_no2symbol = dict_transfer(gene_no2id, gene_id2symbol) #Jasmine wants the gene symbol 09-28-05 #gene_id is integer in gene.gene table and same as gene_no, so just use it. go_no2name = get_go_no2name(curs) #09-28-05 Jasmine wants the go_name, not go_id #2006-09-25 use gene_id2symbol to replace mt_no2tf_name #mt_no2tf_name = get_mt_no2tf_name() mt_no2tf_name = gene_id2symbol class_list = [tf_darwin_format, cluster_darwin_format, prediction_darwin_format, pattern_darwin_format, context_prediction_csv_format] context_prediction_csv_fname = os.path.join(self.output_dir, '%s.context.csv'%self.input_fname) output_fname_list = [tf_darwin_ofname, cluster_darwin_ofname, prediction_darwin_ofname, pattern_darwin_ofname, context_prediction_csv_fname] darwin_instance_list = [] for i in range(len(self.running_bit)): if self.running_bit[i] == '1': darwin_instance_list.append(class_list[i](self.hostname, self.dbname, self.schema, self.pattern_table,\ self.cluster_bs_table, self.input_fname, self.lm_bit, self.acc_cut_off, \ output_fname_list[i], gene_id2symbol, go_no2name, mt_no2tf_name, debug, report)) #2006-09-25 current_pos = len(darwin_instance_list)-1 #12-30-05 darwin_instance_list[current_pos].start() for i in range(len(darwin_instance_list)): darwin_instance_list[i].join()
def run(self): """ 12-28-05 """ conn, curs = db_connect(self.hostname, self.dbname, self.schema) organism = get_org_from_tax_id(curs, self.tax_id) #get the key_map gene_id2symbol = get_gene_id2gene_symbol(curs, self.tax_id) #open output here outf = open(self.output_fname, 'w') if len(self.running_bit)>=1 and self.running_bit[0] =='1': gene_id2go_bp_term = get_gene_id2go_term(curs, term_type='biological_process', organism=organism) self.dict2darwin(gene_id2go_bp_term, 'go_bp', gene_id2symbol, outf) if len(self.running_bit)>=2 and self.running_bit[1] =='1': gene_id2go_cc_term = get_gene_id2go_term(curs, term_type='cellular_component', organism=organism) self.dict2darwin(gene_id2go_cc_term, 'go_cc', gene_id2symbol, outf) if len(self.running_bit)>=3 and self.running_bit[2] =='1': gene_id2no_of_events = get_gene_id2no_of_events(curs, self.tax_id, ensembl2no_of_events_table='graph.ensembl2no_of_events') self.dict2darwin(gene_id2no_of_events, 'as', gene_id2symbol, outf) if len(self.running_bit)>=4 and self.running_bit[3] =='1': gene_id2no_of_promoters = get_gene_id2no_of_promoters(curs, self.tax_id) #get_gene_id2no_of_events(curs, self.tax_id, ensembl2no_of_events_table='graph.ensembl_id2no_of_promoters') self.dict2darwin(gene_id2no_of_promoters, 'dp', gene_id2symbol, outf) if len(self.running_bit)>=5 and self.running_bit[4] =='1': tg_tax_id2ca_depth_tax_id_short_org = get_tg_tax_id2ca_depth_tax_id_short_org(curs, self.tax_id) gene_id2ortholog_tax_id_set = get_gene_id2ortholog_tax_id_set(curs, self.tax_id, homologene_table='homologene.homologene') #convert gene_id2ortholog_tax_id_set to gene_id2ca_depth_tax_id_short_org_list gene_id2ca_depth_tax_id_short_org_list = {} for gene_id, ortholog_tax_id_set in gene_id2ortholog_tax_id_set.iteritems(): ca_depth_tax_id_short_org_list = dict_map(tg_tax_id2ca_depth_tax_id_short_org, list(ortholog_tax_id_set)) ca_depth_tax_id_short_org_list.sort() gene_id2ca_depth_tax_id_short_org_list[gene_id] = ca_depth_tax_id_short_org_list self.dict2darwin(gene_id2ca_depth_tax_id_short_org_list, 'gene_age', gene_id2symbol, outf) if len(self.running_bit)>=6 and self.running_bit[5] =='1': gene_id2tissue_list = get_gene_id2tissue_list(curs, self.tax_id) self.dict2darwin(gene_id2tissue_list, 'gene_tissue', gene_id2symbol, outf) if len(self.running_bit)>=7 and self.running_bit[6] =='1': gene_id2family_size = get_gene_id2family_size(curs, self.tax_id) self.dict2darwin(gene_id2family_size, 'gene_family_size', gene_id2symbol, outf) if len(self.running_bit)>=8 and self.running_bit[7] =='1': gnf_gene_id2tissue = get_gnf_gene_id2tissue_list(curs, self.tax_id) self.dict2darwin(gnf_gene_id2tissue, 'gnf_gene_tissue', gene_id2symbol, outf) #close output outf.close()
def run(self): """ 09-05-05 2006-09-21 add fuzzyDense_flag 2006-11-02 add tfbs_association_type 2006-11-02 differentiate good_cluster_table as pattern_xxx or good_xxx for pattern id --db_connect() --get_gene_no2bs_no_block() --construct_two_dicts() --input_node() --fetch_cluster_block() --computing_node() --node_fire() --cluster_bs_analysis() --create_cluster_bs_table() --output_node() --submit_cluster_bs_table() """ communicator = MPI.world.duplicate() node_rank = communicator.rank free_computing_nodes = range(1,communicator.size-1) print self.tfbs_association_type if node_rank == 0: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) if self.tfbs_association_type==1: #2006-11-02 gene_no2bs_no_block = self.get_gene_no2bs_no_block(curs) elif self.tfbs_association_type==2: gene_no2bs_no_block = get_gene_no2bs_no_block_from_expt_tf_mapping(curs) for node in range(1, communicator.size-1): #send it to the computing_node communicator.send(gene_no2bs_no_block, node, 0) if self.fuzzyDense_flag: #2006-09-21 add fuzzyDense_flag #12-18-05 get edge2encodedOccurrence MpiCrackSplat_instance = MpiCrackSplat() edge2encodedOccurrence = {} min_sup = 5 #need to expose them max_sup = 40 total_vertex_set = self.return_total_vertex_set(curs, self.good_cluster_table) edge2encodedOccurrence, no_of_datasets = self.fill_edge2encodedOccurrence(\ self.sig_vector_fname, min_sup, max_sup, total_vertex_set) edge2encodedOccurrence_pickle = cPickle.dumps(edge2encodedOccurrence, -1) for node in free_computing_nodes: #send it to the computing_node communicator.send(edge2encodedOccurrence_pickle, node, 0) elif node_rank>0 and node_rank<communicator.size-1: data, source, tag, count = communicator.receive(Numeric.Int, 0, 0) gene_no2bs_no_set, bs_no2gene_no_set = self.construct_two_dicts(node_rank, data) if self.fuzzyDense_flag: #2006-09-21 #12-18-05 data, source, tag = communicator.receiveString(0, 0) edge2encodedOccurrence = cPickle.loads(data) elif node_rank==communicator.size-1: #establish connection before pursuing (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) #12-20-05 for darwin output gene_id2symbol = get_gene_id2gene_symbol(curs, self.tax_id) dataset_no2desc = get_dataset_no2desc(curs) mpi_synchronize(communicator) if node_rank == 0: if self.good_cluster_table.find('pattern')!=-1: #2006-11-02 it's pattern_xxx table, use id as pattern_id curs.execute("DECLARE crs CURSOR FOR select distinct id, vertex_set, recurrence_array\ from %s "%(self.good_cluster_table)) else: #2006-11-02 it's good_xxx table, use mcl_id as pattern_id curs.execute("DECLARE crs CURSOR FOR select distinct mcl_id, vertex_set, recurrence_array\ from %s "%(self.good_cluster_table)) input_node(communicator, curs, free_computing_nodes, self.size, self.report) curs.execute("close crs") elif node_rank<=communicator.size-2: #exclude the last node if self.fuzzyDense_flag: #2006-09-21 fuzzyDense_instance = fuzzyDense(edge2encodedOccurrence) else: fuzzyDense_instance = None parameter_list = [gene_no2bs_no_set, bs_no2gene_no_set, self.ratio_cutoff, \ self.top_number, self.p_value_cut_off, fuzzyDense_instance, self.degree_cut_off, self.fuzzyDense_flag] computing_node(communicator, parameter_list, self.computing_node_handler, report=self.report) elif node_rank==communicator.size-1: #12-20-05 comment out if self.new_table: self.create_cluster_bs_table(curs, self.cluster_bs_table) parameter_list = [curs, self.cluster_bs_table] output_node(communicator, free_computing_nodes, parameter_list, self.submit_cluster_bs_table, report=self.report) if self.commit: curs.execute("end") """
def run(self): """ 10-31-05 2006-09-26 modify it to be compatible with the modified pipeline from haifeng 2006-11-06 add type 2006-12-13 use font_path and font_size --form_schema_tables() --db_connect() --get_char_dimension() --get_no_of_p_funcs_gene_no_go_no_list() --get_recurrence_go_no_rec_array_cluster_id_ls() --get_go_no2name() --draw_function_map() --draw_gene_function_map() --get_recurrence_rec_array_bs_no_list() --get_mt_no2tf_name() --draw_tf_map() """ schema_instance = form_schema_tables(self.inputfname, self.acc_cutoff, self.lm_bit) (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) font = ImageFont.truetype(self.font_path, self.font_size) char_dimension = font.getsize('a') #char_dimension = get_char_dimension() #go_no2name = get_go_no2name(curs) go_no2name = get_go_id2name(curs) if self.type==1: go_no2go_id = get_go_no2go_id(curs) given_p_gene_set = p_gene_id_set_from_gene_p_table(curs, schema_instance.gene_p_table) no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_db(curs, \ schema_instance.p_gene_table, given_p_gene_set, go_no2go_id) elif self.type==2: no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_file(self.inputfname) recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets, mcl_id2enc_recurrence = \ self.get_recurrence_go_no_rec_array_cluster_id_ls(curs, self.pattern_table, mcl_id2go_no_set) no_of_functions = len(recurrence_go_no_rec_array_cluster_id_ls) function_map_output_fname = '%s.function_map.png'%self.output_prefix go_no2index, function_name_region = self.draw_function_map(recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets,\ go_no2name, function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font) gene_function_map_output_fname = '%s.gene_function_map.png'%self.output_prefix self.draw_gene_function_map(no_of_p_funcs_gene_no_go_no_list, go_no2index, function_name_region,\ gene_function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font) #tf_map requires mcl_id2enc_recurrence and no_of_datasets from above recurrence_rec_array_bs_no_list = self.get_recurrence_rec_array_bs_no_list(curs, self.cluster_bs_table, mcl_id2enc_recurrence) mt_no2tf_name = get_gene_id2gene_symbol(curs, tax_id=9606) #mt_no2tf_name = get_mt_no2tf_name() tf_map_output_fname = '%s.tf_map.png'%self.output_prefix self.draw_tf_map(recurrence_rec_array_bs_no_list, no_of_datasets, mt_no2tf_name, \ tf_map_output_fname, self.function_name_length, char_dimension, font)
vertex_list = map(int, vertex_list) recurrence_array = recurrence_array[1:-1].split(',') recurrence_array = map(float, recurrence_array) fuzzyDense_instance = fuzzyDense(edge2encodedOccurrence, debug) core_vertex_ls, recurrent_and_on_datasets_ls = fuzzyDense_instance.get_core_vertex_set(vertex_list, recurrence_array, degree_cut_off) from MpiClusterBsStat import MpiClusterBsStat MpiClusterBsStat_instance = MpiClusterBsStat() gene_no2bs_no_block = MpiClusterBsStat_instance.get_gene_no2bs_no_block(curs) gene_no2bs_no_set, bs_no2gene_no_set = MpiClusterBsStat_instance.construct_two_dicts(0, gene_no2bs_no_block) from TF_functions import cluster_bs_analysis ls_to_return = cluster_bs_analysis(core_vertex_ls, gene_no2bs_no_set, bs_no2gene_no_set, ratio_cutoff, \ top_number, p_value_cut_off) gene_id2symbol = get_gene_id2gene_symbol(curs, tax_id) dataset_no2desc = get_dataset_no2desc(curs) dataset_no_desc_ls = [] for dataset_index in recurrent_and_on_datasets_ls: dataset_no = dataset_index +1 dataset_no_desc_ls.append([dataset_no, dataset_no2desc[dataset_no]]) outf = open(output_file, 'w') outf.write("out:=[\n") for i in range(len(ls_to_return)): row = ls_to_return[i] score, score_type, bs_no_list, target_gene_no_list, global_ratio, local_ratio, expected_ratio, unknown_ratio = row core_vertex_symbol_ls = dict_map(gene_id2symbol, core_vertex_ls) bs_no_symbol_list = dict_map(gene_id2symbol, bs_no_list)