Ejemplo n.º 1
0
	def dstruc_loadin(self, curs):
		"""
		03-14-05
			remove the distance loading part
		"""
		sys.stderr.write("Loading Data STructure...\n")
		from codense.common import get_known_genes_dict, get_go_no2go_id,\
			get_go_no2term_id, get_go_no2depth, get_go_term_id2go_no, \
			get_go_term_id2depth
		
		self.known_genes_dict = get_known_genes_dict(curs)
		self.go_no2go_id = get_go_no2go_id(curs)
		self.go_no2term_id = get_go_no2term_id(curs)
		self.go_no2depth = get_go_no2depth(curs)
		self.go_term_id2go_no = get_go_term_id2go_no(curs)
		self.go_term_id2depth = get_go_term_id2depth(curs)
		
		sys.stderr.write("Done\n")
Ejemplo n.º 2
0
	def output(self, curs, gene_no2go_id_set_list, go_id_set_list, support, prefix, gene_no2id, go_id2name, schema_list):
		"""
		07-06-05
		"""
		sys.stderr.write("Outputing...")
		
		#get the total set
		total_gene_no_set = Set()
		total_go_id_set = Set()
		for i in range(len(gene_no2go_id_set_list)):
			total_gene_no_set |= Set(gene_no2go_id_set_list[i].keys())
			total_go_id_set |= go_id_set_list[i]
		print "the total number of genes is ",len(total_gene_no_set)
		gene_ofname = '%s.gene'%prefix
		function_ofname = '%s.function'%prefix
		gene_writer = csv.writer(open(gene_ofname,'w'), delimiter='\t')
		function_writer = csv.writer(open(function_ofname, 'w'), delimiter='\t')
		gene_writer.writerow(['']+schema_list)
		function_writer.writerow([''] + schema_list)
		
		from gene_p_map_redundancy import gene_p_map_redundancy
		node_distance_class = gene_p_map_redundancy()
		
		go_id2term_id = get_go_id2term_id(curs)
		go_term_id2depth = get_go_term_id2depth(curs)
		
		#output the gene-oriented information
		for gene_no in total_gene_no_set:
			freq = 0
			p_go_id_set_list = []
			for i in range(len(gene_no2go_id_set_list)):
				if gene_no in gene_no2go_id_set_list[i]:
					p_go_id_set_list.append(gene_no2go_id_set_list[i][gene_no])
					freq += 1
			if freq == support:
				if self.p_go_id_set_list_distinct(curs, p_go_id_set_list, node_distance_class, go_term_id2depth, go_id2term_id):
					row = [gene_no2id[gene_no]]
					for i in range(len(gene_no2go_id_set_list)):
						if gene_no in gene_no2go_id_set_list[i]:
							go_id_set = gene_no2go_id_set_list[i][gene_no]
							go_name_list = dict_map(go_id2name, go_id_set)
							row.append(';'.join(go_name_list))
						else:
							row.append('')
					gene_writer.writerow(row)
		
		#output the function_oriented information
		for go_id in total_go_id_set:
			freq = 0
			for i in range(len(go_id_set_list)):
				if go_id in go_id_set_list[i]:
					freq += 1
			if freq == support:
				row = ['%s(%s)'%(go_id2name[go_id],go_id)]
				for i in range(len(go_id_set_list)):
					if go_id in go_id_set_list[i]:
						row.append('1')
					else:
						row.append('0')
				function_writer.writerow(row)
		
		
		sys.stderr.write("Done.\n")