コード例 #1
0
ファイル: filter_clusters.py プロジェクト: polyactis/annot
	def run(self):
		"""
		10-29-05 call p_gene_id_set_from_gene_p_table()
		"""
		(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
		if self.new_table:
			self.create_good_cluster_table(curs, self.good_cluster_table)
		p_gene_id_set = p_gene_id_set_from_gene_p_table(curs, self.gene_p_table)
		mcl_id2unknown_ratio = self.get_mcl_id2unknown_ratio(curs, self.p_gene_table, p_gene_id_set)
		self.submit_good_clusters(curs, self.cluster_queue, self.good_cluster_table, mcl_id2unknown_ratio, self.occurrence_cutoff)
		if self.commit:
			curs.execute("end")
コード例 #2
0
ファイル: SettingCmp.py プロジェクト: polyactis/annot
	def run(self):
		"""
		10-17-05
			bit control whether that setting has linear model
		"""
		schema_instance1 = form_schema_tables(self.fname1, self.acc_cutoff1, self.lm_bit1)
		schema_instance2 = form_schema_tables(self.fname2, self.acc_cutoff2, self.lm_bit2)
		(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
		p_gene_id_set1 = p_gene_id_set_from_gene_p_table(curs, schema_instance1.gene_p_table)
		p_gene_id_set2 = p_gene_id_set_from_gene_p_table(curs, schema_instance2.gene_p_table)
		p_gene_id_set_total = p_gene_id_set_from_gene_p_table(curs, schema_instance2.p_gene_table)
		
		catI_set = p_gene_id_set1 - p_gene_id_set2
		catII_set = p_gene_id_set1 & p_gene_id_set2
		catIII_set = p_gene_id_set2 - p_gene_id_set1
		catIV_set = p_gene_id_set_total-(p_gene_id_set1|p_gene_id_set2)
		
		sample_ls_ls = []
		for p_gene_id_set in [catI_set, catII_set, catIII_set, catIV_set]:
			sample_ls_ls.append(self.sample_p_gene_id_set(p_gene_id_set, self.no_of_samples))
		
		writer = csv.writer(open(self.ofname, 'w'), delimiter = '\t')
		writer.writerow(['linear model coeffs of two settings'])
		writer.writerow([])
		writer.writerow(['No.','intercept', 'coeff1', 'coeff2', 'coeff3', 'coeff4', 'coeff5', 'intercept_p_value',\
			'coeff1_p_value', 'coeff2_p_value', 'coeff3_p_value', 'coeff4_p_value', 'coeff5_p_value',\
			'score_cut_off'])
		
		#fetch linear model coefficients
		pga_instance_list = [None, None]	#10-17-05 default is nothing, none of them have linear model
		if self.bit[0] == '1':
			pga_instance1 = p_gene_analysis()
			pga_instance1.go_no2lm_results, lm_results_2d_list = pga_instance1.get_go_no2lm_results(curs, schema_instance1.lm_table)
			pga_instance1.general_lm_results = pga_instance1.get_general_lm_results(lm_results_2d_list)
			pga_instance_list[0] = pga_instance1
			self.output_lm_model(curs, schema_instance1, writer)
		if self.bit[1] == '1':
			pga_instance2 = p_gene_analysis()
			pga_instance2.go_no2lm_results, lm_results_2d_list = pga_instance2.get_go_no2lm_results(curs, schema_instance2.lm_table)
			pga_instance2.general_lm_results = pga_instance2.get_general_lm_results(lm_results_2d_list)
			pga_instance_list[1] = pga_instance2
			self.output_lm_model(curs, schema_instance2, writer)
		
		#following is for drawing graph in output_p_gene_id_list()
		self.gene_no2gene_id = get_gene_no2gene_id(curs)
		self.gene_no2go_no = get_gene_no2go_no(curs)

		cluster_info_instance = cluster_info()
		
		for i in range(len(sample_ls_ls)):
			cat_no = i+1
			sys.stderr.write("Category %s...\n"%cat_no)
			writer.writerow(['Category %s'%cat_no])
			writer.writerow([self.category_no2information[cat_no]])
			cat_dir = 'cat%s'%cat_no
			if not os.path.isdir(cat_dir):
				os.makedirs(cat_dir)
			if i==0:	#this is different, prediction only in schema_instance1, so swap it
				self.output_p_gene_id_list(curs, schema_instance2, schema_instance1, sample_ls_ls[i], writer, cat_dir, \
					pga_instance_list[1], pga_instance_list[0], cluster_info_instance, self.simple)
			else:
				self.output_p_gene_id_list(curs, schema_instance1, schema_instance2, sample_ls_ls[i], writer, cat_dir, \
					pga_instance_list[0], pga_instance_list[1], cluster_info_instance, self.simple)
			sys.stderr.write("End Category %s.\n"%cat_no)
コード例 #3
0
ファイル: DrawMaps.py プロジェクト: polyactis/annot
	def run(self):
		"""
		10-31-05
		2006-09-26
			modify it to be compatible with the modified pipeline from haifeng
		2006-11-06
			add type
		2006-12-13
			use font_path and font_size
			
			--form_schema_tables()
			--db_connect()
			--get_char_dimension()
			
			--get_no_of_p_funcs_gene_no_go_no_list()
			--get_recurrence_go_no_rec_array_cluster_id_ls()
			--get_go_no2name()
			--draw_function_map()
			
			--draw_gene_function_map()

			--get_recurrence_rec_array_bs_no_list()
			--get_mt_no2tf_name()
			--draw_tf_map()
		"""
		schema_instance = form_schema_tables(self.inputfname, self.acc_cutoff, self.lm_bit)
		(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
		font = ImageFont.truetype(self.font_path, self.font_size)
		char_dimension = font.getsize('a')
		#char_dimension = get_char_dimension()
		
		#go_no2name = get_go_no2name(curs)
		go_no2name = get_go_id2name(curs)
		if self.type==1:
			go_no2go_id = get_go_no2go_id(curs)
			given_p_gene_set = p_gene_id_set_from_gene_p_table(curs, schema_instance.gene_p_table)
			no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_db(curs, \
				schema_instance.p_gene_table, given_p_gene_set, go_no2go_id)
		elif self.type==2:
			no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_file(self.inputfname)
		
		
		recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets, mcl_id2enc_recurrence = \
			self.get_recurrence_go_no_rec_array_cluster_id_ls(curs, self.pattern_table, mcl_id2go_no_set)
		
		no_of_functions = len(recurrence_go_no_rec_array_cluster_id_ls)
		function_map_output_fname = '%s.function_map.png'%self.output_prefix
		go_no2index, function_name_region = self.draw_function_map(recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets,\
			go_no2name, function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font)				
		
		gene_function_map_output_fname = '%s.gene_function_map.png'%self.output_prefix
		self.draw_gene_function_map(no_of_p_funcs_gene_no_go_no_list, go_no2index, function_name_region,\
			gene_function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font)
		
		
		#tf_map requires mcl_id2enc_recurrence and no_of_datasets from above
		recurrence_rec_array_bs_no_list = self.get_recurrence_rec_array_bs_no_list(curs, self.cluster_bs_table, mcl_id2enc_recurrence)
		mt_no2tf_name = get_gene_id2gene_symbol(curs, tax_id=9606)
		#mt_no2tf_name = get_mt_no2tf_name()
		tf_map_output_fname = '%s.tf_map.png'%self.output_prefix
		self.draw_tf_map(recurrence_rec_array_bs_no_list, no_of_datasets, mt_no2tf_name, \
			tf_map_output_fname, self.function_name_length, char_dimension, font)