def run(self):
		"""
		"""
		import MySQLdb
		conn = MySQLdb.connect(db=self.dbname,host=self.hostname, user = self.user, passwd = self.passwd)
		curs = conn.cursor()
		
		snpData = SNPData(input_fname=self.input_fname, turn_into_array=1, ignore_2nd_column=1)
		from OutputPopulation import OutputPopulation
		
		popid2ecotypeid_ls = OutputPopulation.get_popid2ecotypeid_ls(curs, self.population_table)
		
		ecotypeid2popid = {}
		for popid, ecotypeid_ls in popid2ecotypeid_ls.iteritems():
			for ecotypeid in ecotypeid_ls:
				ecotypeid2popid[ecotypeid] = popid
		pop_id_ls = []
		rows_to_be_tossed_out = Set()
		for i in range(len(snpData.row_id_ls)):
			ecotype_id = int(snpData.row_id_ls[i])
			if ecotype_id not in ecotypeid2popid:
				rows_to_be_tossed_out.add(i)
				pop_id_ls.append(None)	#dont' know population, a placeholder
			else:
				pop_id_ls.append(ecotypeid2popid[ecotype_id])
		
		snpData.strain_acc_list = snpData.row_id_ls
		snpData.category_list = pop_id_ls
		
		snpData.tofile(self.output_fname, rows_to_be_tossed_out = rows_to_be_tossed_out)
Example #2
0
	def run(self):
		import MySQLdb
		conn = MySQLdb.connect(db=self.dbname,host=self.hostname, user = self.user, passwd = self.passwd)
		curs = conn.cursor()
		
		from dbSNP2data import dbSNP2data
		dbSNP2data_instance = dbSNP2data(user=self.user, passwd=self.passwd, output_fname='whatever')
		
		snp_id2index, snp_id_list, snp_id2info = dbSNP2data_instance.get_snp_id2index_m(curs, self.input_table, self.snp_locus_table)
		#strain_id2index, strain_id_list
		strain_id2index, strain_id_list, nativename2strain_id, strain_id2acc, strain_id2category  = dbSNP2data_instance.get_strain_id2index_m(curs, self.input_table, self.strain_info_table)
		#2008-06-02 stuff returned by get_strain_id2index_m is totally changed.
		ecotype_id2row_index = {}
		for strain_id, acc in strain_id2acc.iteritems():
			row_index = strain_id2index[strain_id]
			ecotype_id2row_index[acc] = row_index
		
		#strain_id2acc, strain_id2category = dbSNP2data_instance.get_strain_id_info_m(curs, strain_id_list, self.strain_info_table)
		snp_id2acc = dbSNP2data_instance.get_snp_id_info_m(curs, snp_id_list, self.snp_locus_table)
		data_matrix = dbSNP2data_instance.get_data_matrix_m(curs, strain_id2index, snp_id2index, nt2number, self.input_table, need_heterozygous_call=1)
		
		
		from OutputPopulation import OutputPopulation
		
		popid2ecotypeid_ls = OutputPopulation.get_popid2ecotypeid_ls(curs, self.population_table)
		from FilterStrainSNPMatrix import FilterStrainSNPMatrix
		FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
		
		from RemoveBadSNPs import RemoveBadSNPs
		RemoveBadSNPs_instance = RemoveBadSNPs()
		popid2strain_id_snp_id_ls = {}
		for popid, ecotypeid_ls in popid2ecotypeid_ls.iteritems():
			if len(ecotypeid_ls)>=self.min_no_of_strains_per_pop:
				sys.stderr.write("Population %s\n"%popid)
				sub_data_matrix, new_ecotypeid_ls = self.create_sub_data_matrix(popid, data_matrix, ecotypeid_ls, ecotype_id2row_index)
				if len(new_ecotypeid_ls)>=self.min_no_of_strains_per_pop:
					sys.stderr.write("\tPopulation %s has %s strains\n"%(popid, len(new_ecotypeid_ls)))
					strain_id_selected, snp_id_selected = self.cleanup_one_population(FilterStrainSNPMatrix_instance, RemoveBadSNPs_instance, sub_data_matrix, new_ecotypeid_ls, snp_id_list, self.min_no_of_strains_per_pop, self.row_cutoff, self.col_cutoff, self.min_log_prob)
					if strain_id_selected and snp_id_selected:
						popid2strain_id_snp_id_ls[popid] = [strain_id_selected, snp_id_selected]
		
		if self.commit:
			self.create_popid2snpid_table(curs, self.output_table)
			self.mark_strain_id_selected(curs, popid2strain_id_snp_id_ls, self.population_table)
			self.submit_popid2snpid_list(curs, popid2strain_id_snp_id_ls, self.population_table, self.output_table)
			conn.commit()