Exemple #1
0
 def run(self):
     (conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
     id2chr_start_stop = self.get_id2chr_start_stop(curs, self.tax_id)
     gene_id2gene_symbol = get_gene_id2gene_symbol(curs, self.tax_id)
     mt_id2gene_symbol = self.get_mt_id2gene_symbol(curs,
                                                    gene_id2gene_symbol,
                                                    self.tax_id)
     mt_id_gene_symbol2color_code = self.get_mt_id2color_code(
         self.input_dir, self.color_code_list, mt_id2gene_symbol)
     self.parse_files(self.input_dir, self.output_fname, id2chr_start_stop,
                      mt_id_gene_symbol2color_code, mt_id2gene_symbol)
 def run(self):
     (conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
     id2chr_start_stop = self.get_id2chr_start_stop(curs, self.tax_id)
     gene_id2gene_symbol = get_gene_id2gene_symbol(curs, self.tax_id)
     mt_id2gene_symbol = self.get_mt_id2gene_symbol(curs, gene_id2gene_symbol, self.tax_id)
     mt_id_gene_symbol2color_code = self.get_mt_id2color_code(
         self.input_dir, self.color_code_list, mt_id2gene_symbol
     )
     self.parse_files(
         self.input_dir, self.output_fname, id2chr_start_stop, mt_id_gene_symbol2color_code, mt_id2gene_symbol
     )
	def run(self):
		"""
		
		--db_connect()
		--get_gene_id2gene_symbol()
		--get_go_id2name()
		--get_mcl_id2pred_go_id2gene_id_set_from_db()
		--get_prot_interaction_graph()
		
		--get_gene_id_set()
		--get_mt_no2gene_id_set()
		--get_go_id2gene_set_from_db()
		--get_mcl_id2mt_no_set()
		
		--draw_all_patterns()
			--draw_augmented_PI_graph()
			--draw_pattern()
		
		2006-11-21
			add prot_interaction_graph
		2006-12-29
			split draw_all_patterns()
			add another way to call draw_all_patterns() through pattern_id_list
		"""
		conn, curs = db_connect(self.hostname, self.dbname, self.schema)
		gene_id2gene_symbol = get_gene_id2gene_symbol(curs, self.tax_id)
		go_id2name = get_go_id2name(curs)
		if not os.path.isdir(self.output_dir):
			os.makedirs(self.output_dir)
		#mcl_id2pred_go_id2gene_id_set = self.get_mcl_id2pred_go_id2gene_id_set(input_fname)
		mcl_id2pred_go_id2gene_id_set = self.get_mcl_id2pred_go_id2gene_id_set_from_db(curs, self.input_fname, self.gene_p_table)
		prot_interaction_graph = self.get_prot_interaction_graph(curs, self.prot_interaction_table, self.tax_id)
		
		gene_id_set = self.get_gene_id_set(curs, self.gene_table)
		comp_mt_no2gene_id_set = self.get_mt_no2gene_id_set(curs, self.comp_tf_mapping_table, gene_id_set)
		expt_mt_no2gene_id_set = self.get_mt_no2gene_id_set(curs, self.expt_tf_mapping_table, gene_id_set)
		go_id2gene_set = self.get_go_id2gene_set_from_db(curs, self.go_table)
		
		comp_mcl_id2mt_no_set = self.get_mcl_id2mt_no_set(curs, self.comp_cluster_bs_table)
		expt_mcl_id2mt_no_set = self.get_mcl_id2mt_no_set(curs, self.expt_cluster_bs_table)
		
		for pattern_id in self.pattern_id_list:
			self.draw_all_patterns(curs, pattern_id, mcl_id2pred_go_id2gene_id_set, comp_mt_no2gene_id_set, \
				expt_mt_no2gene_id_set, go_id2gene_set, comp_mcl_id2mt_no_set, expt_mcl_id2mt_no_set,\
				self.pattern_table, self.output_dir, gene_id2gene_symbol, go_id2name, prot_interaction_graph)
		
		pattern_id = raw_input("Please input a pattern id:")
		while pattern_id:
			self.draw_all_patterns(curs, pattern_id, mcl_id2pred_go_id2gene_id_set, comp_mt_no2gene_id_set, \
				expt_mt_no2gene_id_set, go_id2gene_set, comp_mcl_id2mt_no_set, expt_mcl_id2mt_no_set,\
				self.pattern_table, self.output_dir, gene_id2gene_symbol, go_id2name, prot_interaction_graph)
			pattern_id = raw_input("Please input a pattern id:")
Exemple #4
0
	def run(self):
		"""
		09-28-05
		12-19-05
			use class_list and output_fname_list to ease program writing
		12-30-05
			fix a bug in indexing darwin_instance_list
		2006-09-25
		2007-02-08
			add context_prediction_csv_format
		"""
		tf_darwin_ofname = os.path.join(self.output_dir, '%s.tf.darwin'%self.cluster_bs_table)
		cluster_darwin_ofname = os.path.join(self.output_dir, '%s.cluster.darwin'%os.path.basename(self.input_fname))
		prediction_darwin_ofname = os.path.join(self.output_dir, '%s.prediction.darwin'%os.path.basename(self.input_fname))
		pattern_darwin_ofname = os.path.join(self.output_dir, '%s.pattern.darwin'%self.pattern_table)
		
		if not os.path.isdir(self.output_dir):
			os.makedirs(self.output_dir)
		conn, curs = db_connect(self.hostname, self.dbname, self.schema)
		
		tax_id = org2tax_id(self.organism)
		#gene_no2id = get_gene_no2gene_id(curs)	#Watch, if unigene, should use this.
		gene_id2symbol = get_gene_id2gene_symbol(curs, tax_id)
		
		gene_id2symbol = self.replace_prime_in_gene_id2symbol(gene_id2symbol)	#01-26-06
		
		#gene_no2symbol = dict_transfer(gene_no2id, gene_id2symbol)
		#Jasmine wants the gene symbol 09-28-05
		#gene_id is integer in gene.gene table and same as gene_no, so just use it.
		go_no2name = get_go_no2name(curs)	#09-28-05 Jasmine wants the go_name, not go_id
		
		#2006-09-25 use gene_id2symbol to replace mt_no2tf_name
		#mt_no2tf_name = get_mt_no2tf_name()
		mt_no2tf_name = gene_id2symbol
		
		class_list = [tf_darwin_format, cluster_darwin_format, prediction_darwin_format, pattern_darwin_format, context_prediction_csv_format]
		context_prediction_csv_fname = os.path.join(self.output_dir, '%s.context.csv'%self.input_fname)
		output_fname_list = [tf_darwin_ofname, cluster_darwin_ofname, prediction_darwin_ofname, pattern_darwin_ofname, context_prediction_csv_fname]
		darwin_instance_list = []
		for i in range(len(self.running_bit)):
			if self.running_bit[i] == '1':
				darwin_instance_list.append(class_list[i](self.hostname, self.dbname, self.schema, self.pattern_table,\
					self.cluster_bs_table, self.input_fname, self.lm_bit, self.acc_cut_off, \
					output_fname_list[i], gene_id2symbol, go_no2name, mt_no2tf_name, debug, report))	#2006-09-25
				current_pos = len(darwin_instance_list)-1 #12-30-05
				darwin_instance_list[current_pos].start()
			
		for i in range(len(darwin_instance_list)):
			darwin_instance_list[i].join()
Exemple #5
0
	def run(self):
		"""
		12-28-05
		"""
		conn, curs = db_connect(self.hostname, self.dbname, self.schema)
		organism = get_org_from_tax_id(curs, self.tax_id)
		#get the key_map
		gene_id2symbol = get_gene_id2gene_symbol(curs, self.tax_id)
		#open output here
		outf = open(self.output_fname, 'w')
		
		if len(self.running_bit)>=1 and self.running_bit[0] =='1':
			gene_id2go_bp_term = get_gene_id2go_term(curs, term_type='biological_process', organism=organism)
			self.dict2darwin(gene_id2go_bp_term, 'go_bp', gene_id2symbol, outf)
		if len(self.running_bit)>=2 and self.running_bit[1] =='1':
			gene_id2go_cc_term = get_gene_id2go_term(curs, term_type='cellular_component', organism=organism)
			self.dict2darwin(gene_id2go_cc_term, 'go_cc', gene_id2symbol, outf)
		if len(self.running_bit)>=3 and self.running_bit[2] =='1':
			gene_id2no_of_events = get_gene_id2no_of_events(curs, self.tax_id, ensembl2no_of_events_table='graph.ensembl2no_of_events')
			self.dict2darwin(gene_id2no_of_events, 'as', gene_id2symbol, outf)
		if len(self.running_bit)>=4 and self.running_bit[3] =='1':
			gene_id2no_of_promoters = get_gene_id2no_of_promoters(curs, self.tax_id)
				#get_gene_id2no_of_events(curs, self.tax_id, ensembl2no_of_events_table='graph.ensembl_id2no_of_promoters')
			self.dict2darwin(gene_id2no_of_promoters, 'dp', gene_id2symbol, outf)
		if len(self.running_bit)>=5 and self.running_bit[4] =='1':
			tg_tax_id2ca_depth_tax_id_short_org = get_tg_tax_id2ca_depth_tax_id_short_org(curs, self.tax_id)
			gene_id2ortholog_tax_id_set = get_gene_id2ortholog_tax_id_set(curs, self.tax_id, homologene_table='homologene.homologene')
			#convert gene_id2ortholog_tax_id_set to gene_id2ca_depth_tax_id_short_org_list
			gene_id2ca_depth_tax_id_short_org_list = {}
			for gene_id, ortholog_tax_id_set in gene_id2ortholog_tax_id_set.iteritems():
				ca_depth_tax_id_short_org_list = dict_map(tg_tax_id2ca_depth_tax_id_short_org, list(ortholog_tax_id_set))
				ca_depth_tax_id_short_org_list.sort()
				gene_id2ca_depth_tax_id_short_org_list[gene_id] = ca_depth_tax_id_short_org_list
			self.dict2darwin(gene_id2ca_depth_tax_id_short_org_list, 'gene_age', gene_id2symbol, outf)
		if len(self.running_bit)>=6 and self.running_bit[5] =='1':
			gene_id2tissue_list = get_gene_id2tissue_list(curs, self.tax_id)
			self.dict2darwin(gene_id2tissue_list, 'gene_tissue', gene_id2symbol, outf)
		if len(self.running_bit)>=7 and self.running_bit[6] =='1':
			gene_id2family_size = get_gene_id2family_size(curs, self.tax_id)
			self.dict2darwin(gene_id2family_size, 'gene_family_size', gene_id2symbol, outf)
		if len(self.running_bit)>=8 and self.running_bit[7] =='1':
			gnf_gene_id2tissue = get_gnf_gene_id2tissue_list(curs, self.tax_id)
			self.dict2darwin(gnf_gene_id2tissue, 'gnf_gene_tissue', gene_id2symbol, outf)
		#close output
		outf.close()
Exemple #6
0
	def run(self):
		"""
		09-05-05
		2006-09-21 add fuzzyDense_flag
		2006-11-02 add tfbs_association_type
		2006-11-02 differentiate good_cluster_table as pattern_xxx or good_xxx for pattern id
		
			--db_connect()
			--get_gene_no2bs_no_block()
			--construct_two_dicts()
			
			--input_node()
				--fetch_cluster_block()
			--computing_node()
				--node_fire()
					--cluster_bs_analysis()
			--create_cluster_bs_table()
			--output_node()
				--submit_cluster_bs_table()
		"""
		communicator = MPI.world.duplicate()
		node_rank = communicator.rank
		free_computing_nodes = range(1,communicator.size-1)
		print self.tfbs_association_type
		if node_rank == 0:
			(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
			if self.tfbs_association_type==1:	#2006-11-02
				gene_no2bs_no_block = self.get_gene_no2bs_no_block(curs)
			elif self.tfbs_association_type==2:
				gene_no2bs_no_block = get_gene_no2bs_no_block_from_expt_tf_mapping(curs)
			for node in range(1, communicator.size-1):	#send it to the computing_node
				communicator.send(gene_no2bs_no_block, node, 0)
			if self.fuzzyDense_flag:	#2006-09-21 add fuzzyDense_flag
				#12-18-05 get edge2encodedOccurrence
				MpiCrackSplat_instance = MpiCrackSplat()
				edge2encodedOccurrence = {}
				min_sup = 5	#need to expose them
				max_sup = 40
				total_vertex_set = self.return_total_vertex_set(curs, self.good_cluster_table)
				edge2encodedOccurrence, no_of_datasets = self.fill_edge2encodedOccurrence(\
					self.sig_vector_fname, min_sup, max_sup, total_vertex_set)
				edge2encodedOccurrence_pickle = cPickle.dumps(edge2encodedOccurrence, -1)
				for node in free_computing_nodes:	#send it to the computing_node
					communicator.send(edge2encodedOccurrence_pickle, node, 0)
		elif node_rank>0 and node_rank<communicator.size-1:
			data, source, tag, count = communicator.receive(Numeric.Int, 0, 0)
			gene_no2bs_no_set, bs_no2gene_no_set = self.construct_two_dicts(node_rank, data)
			if self.fuzzyDense_flag:	#2006-09-21
				#12-18-05
				data, source, tag = communicator.receiveString(0, 0)
				edge2encodedOccurrence = cPickle.loads(data)
			
		elif node_rank==communicator.size-1:	#establish connection before pursuing
			(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
			
			#12-20-05 for darwin output
			gene_id2symbol = get_gene_id2gene_symbol(curs, self.tax_id)
			dataset_no2desc = get_dataset_no2desc(curs)
			
			
		mpi_synchronize(communicator)
		
		if node_rank == 0:
			if self.good_cluster_table.find('pattern')!=-1:	#2006-11-02 it's pattern_xxx table, use id as pattern_id
				curs.execute("DECLARE crs CURSOR FOR select distinct id, vertex_set, recurrence_array\
					from %s "%(self.good_cluster_table))
			else:	#2006-11-02 it's good_xxx table, use mcl_id as pattern_id
				curs.execute("DECLARE crs CURSOR FOR select distinct mcl_id, vertex_set, recurrence_array\
					from %s "%(self.good_cluster_table))
			input_node(communicator, curs, free_computing_nodes, self.size, self.report)
			curs.execute("close crs")
			
		elif node_rank<=communicator.size-2:	#exclude the last node
			if self.fuzzyDense_flag:	#2006-09-21
				fuzzyDense_instance = fuzzyDense(edge2encodedOccurrence)
			else:
				fuzzyDense_instance = None
			parameter_list = [gene_no2bs_no_set, bs_no2gene_no_set, self.ratio_cutoff, \
				self.top_number, self.p_value_cut_off, fuzzyDense_instance, self.degree_cut_off, self.fuzzyDense_flag]
			computing_node(communicator, parameter_list, self.computing_node_handler, report=self.report)
			
		elif node_rank==communicator.size-1:
			
			#12-20-05 comment out
			if self.new_table:
				self.create_cluster_bs_table(curs, self.cluster_bs_table)
			parameter_list = [curs, self.cluster_bs_table]
			output_node(communicator, free_computing_nodes, parameter_list, self.submit_cluster_bs_table, report=self.report)
			if self.commit:
				curs.execute("end")
			"""
Exemple #7
0
	def run(self):
		"""
		10-31-05
		2006-09-26
			modify it to be compatible with the modified pipeline from haifeng
		2006-11-06
			add type
		2006-12-13
			use font_path and font_size
			
			--form_schema_tables()
			--db_connect()
			--get_char_dimension()
			
			--get_no_of_p_funcs_gene_no_go_no_list()
			--get_recurrence_go_no_rec_array_cluster_id_ls()
			--get_go_no2name()
			--draw_function_map()
			
			--draw_gene_function_map()

			--get_recurrence_rec_array_bs_no_list()
			--get_mt_no2tf_name()
			--draw_tf_map()
		"""
		schema_instance = form_schema_tables(self.inputfname, self.acc_cutoff, self.lm_bit)
		(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
		font = ImageFont.truetype(self.font_path, self.font_size)
		char_dimension = font.getsize('a')
		#char_dimension = get_char_dimension()
		
		#go_no2name = get_go_no2name(curs)
		go_no2name = get_go_id2name(curs)
		if self.type==1:
			go_no2go_id = get_go_no2go_id(curs)
			given_p_gene_set = p_gene_id_set_from_gene_p_table(curs, schema_instance.gene_p_table)
			no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_db(curs, \
				schema_instance.p_gene_table, given_p_gene_set, go_no2go_id)
		elif self.type==2:
			no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_file(self.inputfname)
		
		
		recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets, mcl_id2enc_recurrence = \
			self.get_recurrence_go_no_rec_array_cluster_id_ls(curs, self.pattern_table, mcl_id2go_no_set)
		
		no_of_functions = len(recurrence_go_no_rec_array_cluster_id_ls)
		function_map_output_fname = '%s.function_map.png'%self.output_prefix
		go_no2index, function_name_region = self.draw_function_map(recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets,\
			go_no2name, function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font)				
		
		gene_function_map_output_fname = '%s.gene_function_map.png'%self.output_prefix
		self.draw_gene_function_map(no_of_p_funcs_gene_no_go_no_list, go_no2index, function_name_region,\
			gene_function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font)
		
		
		#tf_map requires mcl_id2enc_recurrence and no_of_datasets from above
		recurrence_rec_array_bs_no_list = self.get_recurrence_rec_array_bs_no_list(curs, self.cluster_bs_table, mcl_id2enc_recurrence)
		mt_no2tf_name = get_gene_id2gene_symbol(curs, tax_id=9606)
		#mt_no2tf_name = get_mt_no2tf_name()
		tf_map_output_fname = '%s.tf_map.png'%self.output_prefix
		self.draw_tf_map(recurrence_rec_array_bs_no_list, no_of_datasets, mt_no2tf_name, \
			tf_map_output_fname, self.function_name_length, char_dimension, font)
Exemple #8
0
		vertex_list = map(int, vertex_list)
		recurrence_array = recurrence_array[1:-1].split(',')
		recurrence_array = map(float, recurrence_array)
		
		fuzzyDense_instance = fuzzyDense(edge2encodedOccurrence, debug)
		core_vertex_ls, recurrent_and_on_datasets_ls = fuzzyDense_instance.get_core_vertex_set(vertex_list, recurrence_array, degree_cut_off)
		
		from MpiClusterBsStat import MpiClusterBsStat
		MpiClusterBsStat_instance = MpiClusterBsStat()
		gene_no2bs_no_block = MpiClusterBsStat_instance.get_gene_no2bs_no_block(curs)
		gene_no2bs_no_set, bs_no2gene_no_set = MpiClusterBsStat_instance.construct_two_dicts(0, gene_no2bs_no_block)
		from TF_functions import cluster_bs_analysis
		ls_to_return = cluster_bs_analysis(core_vertex_ls, gene_no2bs_no_set, bs_no2gene_no_set, ratio_cutoff, \
			top_number, p_value_cut_off)
		
		gene_id2symbol = get_gene_id2gene_symbol(curs, tax_id)
		dataset_no2desc = get_dataset_no2desc(curs)
		
		dataset_no_desc_ls = []
		for dataset_index in recurrent_and_on_datasets_ls:
			dataset_no = dataset_index +1
			dataset_no_desc_ls.append([dataset_no, dataset_no2desc[dataset_no]])
		
		
		outf = open(output_file, 'w')
		outf.write("out:=[\n")
		for i in range(len(ls_to_return)):
			row = ls_to_return[i]
			score, score_type, bs_no_list, target_gene_no_list, global_ratio, local_ratio, expected_ratio, unknown_ratio = row
			core_vertex_symbol_ls = dict_map(gene_id2symbol, core_vertex_ls)
			bs_no_symbol_list = dict_map(gene_id2symbol, bs_no_list)