Ejemplo n.º 1
0
	def on_button_filechooserdialog_save_ok_clicked(self, widget, data=None):
		"""
		2008-02-12
		to update the no_of_selected rows (have to double click a row to change a cursor if it's multiple selection)
		2008-02-05
		"""
		output_fname = self.filechooserdialog_save.get_filename()
		self.filechooserdialog_save.hide()
		pathlist_strains1 = []
		self.treeselection.selected_foreach(yh_gnome.foreach_cb, pathlist_strains1)
		self.app1_appbar1.push("%s rows selected."%len(pathlist_strains1))
		if self.header and self.strain_acc_list and self.category_list and self.data_matrix:
			selected_index_set = Set()
			for path in pathlist_strains1:
				row = self.liststore[path[0]]
				id = row[0]
				index_in_data_matrix = row[-1]
				selected_index_set.add(index_in_data_matrix)
				if self.id_is_strain:
					id = id[1:-1].split(',')	#id is a tuple of (ecotypeid,duplicate)
					self.strain_acc_list[index_in_data_matrix] = id[0].strip()	#remove extra space
					self.category_list[index_in_data_matrix] = id[1].strip()
				#else:
				#	self.header[index_in_data_matrix+2] = id
			from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
			FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
			if self.id_is_strain:
				rows_to_be_tossed_out = Set(range(len(self.strain_acc_list))) - selected_index_set
				FilterStrainSNPMatrix_instance.write_data_matrix(self.data_matrix, output_fname, self.header, self.strain_acc_list, self.category_list,\
								rows_to_be_tossed_out, cols_to_be_tossed_out=Set(), nt_alphabet=0)
			else:
				cols_to_be_tossed_out = Set(range(len(self.header)-2)) - selected_index_set
				FilterStrainSNPMatrix_instance.write_data_matrix(self.data_matrix, output_fname, self.header, self.strain_acc_list, self.category_list,\
								rows_to_be_tossed_out=Set(), cols_to_be_tossed_out=cols_to_be_tossed_out, nt_alphabet=0)
Ejemplo n.º 2
0
	def load_dstruc(self):
		if self.debug:
			import pdb
			pdb.set_trace()
		QualityControl.load_dstruc(self)
		from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
		FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
		self.header1, self.strain_acc_list1, self.category_list1, self.data_matrix1 = FilterStrainSNPMatrix_instance.read_data(self.input_fname1)
		self.header2, self.strain_acc_list2, self.category_list2, self.data_matrix2 = FilterStrainSNPMatrix_instance.read_data(self.input_fname2)
	 	
		self.col_id2col_index1, self.col_id2col_index2, self.col_id12col_id2 = self.get_col_matching_dstruc(self.header1, self.header2, self.curs, self.snp_locus_table_250k, self.snp_locus_table_149snp)
		self.row_id2row_index1, self.row_id2row_index2, self.row_id12row_id2 = self.get_row_matching_dstruc(self.strain_acc_list1, self.category_list1, self.strain_acc_list2, self.curs, self.ecotype_duplicate2tg_ecotypeid_table)
Ejemplo n.º 3
0
def read_2010_x_149SNP(input_fname):
	"""
	2007-12-30
	"""
	from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
	FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
	header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(input_fname)
	snp_acc_ls = header[2:]
	snp_acc2col_index = dict(zip(snp_acc_ls, range(len(snp_acc_ls))))
	accession_id_ls = map(int, strain_acc_list)
	accession_id2row_index = dict(zip(accession_id_ls, range(len(accession_id_ls))))
	return snp_acc2col_index, accession_id2row_index, data_matrix
    def run(self):
        """
		2008-08-11
			the database interface changed in variation.src.dbsnp
		2008-05-06
		"""
        import MySQLdb
        conn = MySQLdb.connect(db=self.dbname,
                               host=self.hostname,
                               user=self.user,
                               passwd=self.passwd)
        curs = conn.cursor()
        if self.debug:
            import pdb
            pdb.set_trace()

        db = DBSNP(username=self.user,
                   password=self.passwd,
                   hostname=self.hostname,
                   database=self.dbname)
        session = db.session
        session.begin()
        #transaction = session.create_transaction()

        snps_name2possible_mappings, snps_name2snps_id = self.get_snps_name2possible_mappings(
            db)

        from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
        header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(
            self.input_fname1)
        snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \
             col_id2id=snps_name2snps_id, snps_table='dbsnp.snps')

        header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(
            self.input_fname2)
        snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix,\
            snps_table='stock_250k.snps')

        twoSNPData = TwoSNPData384(SNPData1=snpData1,
                                   SNPData2=snpData2,
                                   curs=curs,
                                   user=self.user)

        readme = formReadmeObj(sys.argv, self.ad, README)
        session.save(readme)
        session.flush()
        twoSNPData.figureOutABMapping(session, readme,
                                      snps_name2possible_mappings)
        if self.commit:
            curs.execute("commit")
            session.commit()
        else:
            session.rollback()
Ejemplo n.º 5
0
    def load_dstruc(self):
        if self.debug:
            import pdb
            pdb.set_trace()
        QualityControl.load_dstruc(self)
        from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
        FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
        self.header1, self.strain_acc_list1, self.category_list1, self.data_matrix1 = self.readTina2010In250kSNPs(
            self.input_fname1)
        self.header2, self.strain_acc_list2, self.category_list2, self.data_matrix2 = FilterStrainSNPMatrix_instance.read_data(
            self.input_fname2)

        self.col_id2col_index1, self.col_id2col_index2, self.col_id12col_id2 = self.get_col_matching_dstruc(
            self.header1, self.header2)
        self.row_id2row_index1, self.row_id2row_index2, self.row_id12row_id2 = self.get_row_matching_dstruc(
            self.curs, self.strain_acc_list1, self.strain_acc_list2)
Ejemplo n.º 6
0
    def load_dstruc(self):
        if self.debug:
            import pdb
            pdb.set_trace()
        QualityControl.load_dstruc(self)
        from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
        FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
        self.header1, self.strain_acc_list1, self.category_list1, self.data_matrix1 = FilterStrainSNPMatrix_instance.read_data(
            self.input_fname1)
        self.header2, self.strain_acc_list2, self.category_list2, self.data_matrix2 = FilterStrainSNPMatrix_instance.read_data(
            self.input_fname2)

        self.col_id2col_index1, self.col_id2col_index2, self.col_id12col_id2 = self.get_col_matching_dstruc(
            self.header1, self.header2, self.curs, self.snp_locus_table_250k,
            self.snp_locus_table_149snp)
        self.row_id2row_index1, self.row_id2row_index2, self.row_id12row_id2 = self.get_row_matching_dstruc(
            self.strain_acc_list1, self.category_list1, self.strain_acc_list2,
            self.curs, self.ecotype_duplicate2tg_ecotypeid_table)
Ejemplo n.º 7
0
	def run(self):
		"""
		2008-08-11
			the database interface changed in variation.src.dbsnp
		2008-05-06
		"""
		import MySQLdb
		conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user = self.user, passwd = self.passwd)
		curs = conn.cursor()
		if self.debug:
			import pdb
			pdb.set_trace()
		
		db = DBSNP(username=self.user,
				   password=self.passwd, hostname=self.hostname, database=self.dbname)
		session = db.session
		session.begin()
		#transaction = session.create_transaction()
		
		snps_name2possible_mappings, snps_name2snps_id = self.get_snps_name2possible_mappings(db)
		
		from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
		header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(self.input_fname1)
		snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \
							col_id2id=snps_name2snps_id, snps_table='dbsnp.snps')
				
		header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(self.input_fname2)
		snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix,\
						snps_table='stock_250k.snps')
		
		twoSNPData = TwoSNPData384(SNPData1=snpData1, SNPData2=snpData2, curs=curs, user=self.user)
		
		readme = formReadmeObj(sys.argv, self.ad, README)
		session.save(readme)
		session.flush()
		twoSNPData.figureOutABMapping(session, readme, snps_name2possible_mappings)
		if self.commit:
			curs.execute("commit")
			session.commit()
		else:
			session.rollback()
Ejemplo n.º 8
0
    def load_dstruc(self):
        if self.debug:
            import pdb

            pdb.set_trace()
        QualityControl.load_dstruc(self)
        from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix

        FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
        self.header1, self.strain_acc_list1, self.category_list1, self.data_matrix1 = self.readTina2010In250kSNPs(
            self.input_fname1
        )
        self.header2, self.strain_acc_list2, self.category_list2, self.data_matrix2 = FilterStrainSNPMatrix_instance.read_data(
            self.input_fname2
        )

        self.col_id2col_index1, self.col_id2col_index2, self.col_id12col_id2 = self.get_col_matching_dstruc(
            self.header1, self.header2
        )
        self.row_id2row_index1, self.row_id2row_index2, self.row_id12row_id2 = self.get_row_matching_dstruc(
            self.curs, self.strain_acc_list1, self.strain_acc_list2
        )
Ejemplo n.º 9
0
	def run(self):
		import MySQLdb
		conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user=self.user, passwd = self.passwd)
		curs = conn.cursor()
		if self.debug:
			import pdb
			pdb.set_trace()
		"""
		#2008-02-08 old way to get 2010 data is from raw alignments. didn't realize all SNPs are put into db.
		alignment_id2positions_to_be_checked_ls, alignment_id2chr_start_end = self.get_alignment_id2positions_to_be_checked_ls(curs, self.alignment_table)
		SNPpos_snpacc_ls = self.get_SNPpos_snpacc_ls(curs, self.snp_locus_table)
		SNPpos2col_index, snp_acc_ls = self.setup_SNP_dstruc(SNPpos_snpacc_ls, alignment_id2chr_start_end)

		ecotype_id2accession_id, ecotype_id2row_index, ecotype_id2info_ls, ecotype_id_ls, accession_id2row_index, accession_id_ls, nativename_ls = self.setup_accession_ecotype_dstruc(curs, self.accession2ecotype_table, self.ecotype_table)
		accession_X_snp_matrix, accession_X_snp_matrix_touched, snp_index2alignment_id = self.get_accession_X_snp_matrix(curs, accession_id2row_index, SNPpos2col_index, self.sequence_table, self.alignment_table, alignment_id2positions_to_be_checked_ls)
		"""
		if self.processing_bits[3]==0:
			#2009-2-12 will be a problem if snp_locus_table doesn't have field offset
			SNPpos2col_index, snp_acc_ls = self.setup_SNP_dstruc2(curs, self.snp_locus_table, offset=self.offset)
		elif self.processing_bits[3]==1:
			SNPpos2col_index, snp_acc_ls = self.setup_SNP_dstruc2(curs, self.snp_locus_table, \
																cross_linking_table=self.data_type2data_table[self.processing_bits[1]], \
																offset=self.offset)
		elif self.processing_bits[3]==2:
			SNPpos2col_index, snp_acc_ls = self.setup_SNP_dstruc2(curs, self.data_type2data_table[self.processing_bits[1]], \
																offset=self.offset)
		else:
			sys.stderr.write("Error: unsupported 3rd bit in processing_bits %s.\n"%self.processing_bits[3])
			sys.exit(3)
		from variation.src.common import get_accession_id2name
		accession_id2name = get_accession_id2name(curs)
		if self.processing_bits[1]==0:
			row_id2dstruc = self.setup_row_dstruc(curs, SNPpos2col_index, accession_id2name, offset=self.offset, version=self.version)
		elif self.processing_bits[1]==1:
			from variation.src.common import map_perlegen_ecotype_name2accession_id
			ecotype_name2accession_id = map_perlegen_ecotype_name2accession_id(curs)
			row_id2dstruc = self.setup_row_dstruc(curs, SNPpos2col_index, accession_id2name, data_type=self.processing_bits[1], \
												ecotype_name2accession_id=ecotype_name2accession_id,\
												offset=self.offset, version=self.version)
		else:
			sys.stderr("Unsupported data type: %s or no ecotype_name2accession_id specified.\n"%self.processing_bits[1])
			sys.exit(2)
		accession_id_ls, accession_name_ls, data_matrix = self.transform_row_id2dstruc_2_matrix(row_id2dstruc)
		
		from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
		FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
		
		#2008-02-08 which type of row id/1st column
		if self.processing_bits[0]==0:
			from variation.src.common import map_accession_id2ecotype_id
			accession_id2ecotype_id = map_accession_id2ecotype_id(curs, accession2ecotype_table=self.accession2ecotype_table)
			accession_id2ecotype_id[99] = 6909	#accession 99 is the reference genome, which col-0 (ecotype_id=6909)
			ecotype_id_ls = []
			rows_to_be_tossed_out=Set()
			for i in range(len(accession_id_ls)):
				ecotype_id = accession_id2ecotype_id.get(accession_id_ls[i])
				if not ecotype_id:	#mapping failed
					rows_to_be_tossed_out.add(i)
				ecotype_id_ls.append(ecotype_id)
			strain_acc_list = ecotype_id_ls
			header = ['ecotype_id']	#1st column in the header
		else:
			rows_to_be_tossed_out=Set()
			strain_acc_list = accession_id_ls
			header = ['accession_id']
		#2008-02-08 which type of 2nd column
		if self.processing_bits[2]==0:
			category_list = [1]*len(accession_name_ls)
			header.append('duplicate')	#2nd column in the header
		elif self.processing_bits[2]==1:
			category_list = accession_name_ls
			header.append('accession_name')
		else:
			category_list = accession_name_ls
			header.append('accession_name')
		
		header += snp_acc_ls
		FilterStrainSNPMatrix_instance.write_data_matrix(data_matrix, self.output_fname, header, strain_acc_list, category_list, rows_to_be_tossed_out=rows_to_be_tossed_out)
Ejemplo n.º 10
0
    def run(self):
        """
		2008-05-08
			transpose everything if output_matrix_type=1 (bjarni's SNP matrix format)
		2007-02-19
			--db_connect
			--get_snp_id2index()
			--get_strain_id2index()
			--get_strain_id_info()
			--get_snp_id_info()
			--get_data_matrix()
			if self.toss_out_rows:
				--toss_rows_to_make_distance_matrix_NA_free()
					--find_smallest_vertex_set_to_remove_all_edges()
			--write_data_matrix()
			#--sort_file()
		2007-09-22
			for mysql_connection
				add get_nativename_snpid2call_m()
				add fill_in_resolved_duplicated_calls()
		"""
        if self.debug:
            import pdb
            pdb.set_trace()
        if self.db_connection_type == 1:
            import MySQLdb
            #conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc')
            conn = MySQLdb.connect(db=self.dbname,
                                   host=self.hostname,
                                   user=self.user,
                                   passwd=self.passwd)
            curs = conn.cursor()
            snp_id2index, snp_id_list, snp_id2info = self.get_snp_id2index_m(
                curs, self.input_table, self.snp_locus_table)
            strain_id2index, strain_id_list, nativename2strain_id, strain_id2acc, strain_id2category = self.get_strain_id2index_m(curs, \
                         self.input_table, self.strain_info_table, self.only_include_strains_with_GPS, \
                         self.resolve_duplicated_calls, toss_contaminants=self.toss_contaminants)

            #strain_id2acc, strain_id2category = self.get_strain_id_info_m(curs, strain_id_list, self.strain_info_table)
            #snp_id2info = self.get_snp_id_info_m(curs, snp_id_list, self.snp_locus_table)
            if self.input_table == 'dbsnp.calls':
                from variation.src.FigureOut384IlluminaABMapping import get_snps_id2mapping
                snps_id2mapping = get_snps_id2mapping(self.hostname,
                                                      dbname='dbsnp',
                                                      user=self.user,
                                                      passwd=self.passwd)
            else:
                snps_id2mapping = None
            data_matrix = self.get_data_matrix_m(curs, strain_id2index,
                                                 snp_id2index, nt2number,
                                                 self.input_table,
                                                 self.need_heterozygous_call,
                                                 snps_id2mapping)
            """
			if self.resolve_duplicated_calls:
				nativename_snpid2call = self.get_nativename_snpid2call_m(curs, self.strain_info_table, self.input_table)
				data_matrix = self.fill_in_resolved_duplicated_calls(data_matrix, strain_id2index, snp_id2index, nativename2strain_id, nativename_snpid2call)
			"""
            if self.include_other_strain_info:
                strain_id2other_info = self.get_strain_id2other_info(
                    curs, strain_id_list, self.strain_info_table,
                    self.input_table)
            else:
                strain_id2other_info = {}
        elif self.db_connection_type == 2:
            (conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
            snp_id2index, snp_id_list = self.get_snp_id2index(
                curs, self.input_table, self.snp_locus_table)
            strain_id2index, strain_id_list = self.get_strain_id2index(
                curs, self.input_table)

            strain_id2acc, strain_id2category = self.get_strain_id_info(
                curs, strain_id_list, self.strain_info_table)
            snp_id2info = self.get_snp_id_info(curs, snp_id_list,
                                               self.snp_locus_table)
            data_matrix = self.get_data_matrix(curs, strain_id2index,
                                               snp_id2index, nt2number,
                                               self.input_table,
                                               self.need_heterozygous_call)
            strain_id2other_info = {}

        if self.toss_out_rows:
            rows_to_be_tossed_out = self.toss_rows_to_make_distance_matrix_NA_free(
                data_matrix)
            rows_to_be_tossed_out = Set(rows_to_be_tossed_out)
        else:
            rows_to_be_tossed_out = Set()

        #05/08/08
        if self.discard_all_NA_strain:
            from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
            remove_rows_data = FilterStrainSNPMatrix.remove_rows_with_too_many_NAs(
                data_matrix, row_cutoff=1)
            rows_with_too_many_NAs_set = remove_rows_data.rows_with_too_many_NAs_set
            #row_index2no_of_NAs = remove_rows_data.row_index2no_of_NAs
            rows_to_be_tossed_out.update(rows_with_too_many_NAs_set)

        strain_acc_list = [
            strain_id2acc[strain_id] for strain_id in strain_id_list
        ]
        category_list = [
            strain_id2category[strain_id] for strain_id in strain_id_list
        ]

        strain_acc2other_info = {}
        for strain_id in strain_id2other_info:
            strain_acc2other_info[
                strain_id2acc[strain_id]] = strain_id2other_info[strain_id]

        if self.output_matrix_type == 1:
            #transpose everything
            data_matrix = num.array(data_matrix)
            data_matrix = num.transpose(data_matrix)

            header = ['Chromosomes', 'Positions'] + strain_acc_list
            chromosome_ls = []
            position_ls = []
            for snp_id in snp_id_list:
                snp_name, chromosome, position = snp_id2info[snp_id]
                chromosome_ls.append(chromosome)
                position_ls.append(position)

            strain_acc_list = chromosome_ls
            category_list = position_ls
            cols_to_be_tossed_out = rows_to_be_tossed_out
            rows_to_be_tossed_out = None
            strain_id2other_info = None  #make up one
        else:
            header = ['strain', 'category']
            for snp_id in snp_id_list:
                snp_name, chromosome, position = snp_id2info[snp_id]
                header.append(snp_name)
            cols_to_be_tossed_out = None

        write_data_matrix(data_matrix, self.output_fname, header, strain_acc_list, category_list, rows_to_be_tossed_out=rows_to_be_tossed_out, \
           cols_to_be_tossed_out=cols_to_be_tossed_out, nt_alphabet=self.nt_alphabet,\
           strain_acc2other_info=strain_acc2other_info, delimiter=self.delimiter)
Ejemplo n.º 11
0
	def run(self):
		"""
		2008-05-08
			transpose everything if output_matrix_type=1 (bjarni's SNP matrix format)
		2007-02-19
			--db_connect
			--get_snp_id2index()
			--get_strain_id2index()
			--get_strain_id_info()
			--get_snp_id_info()
			--get_data_matrix()
			if self.toss_out_rows:
				--toss_rows_to_make_distance_matrix_NA_free()
					--find_smallest_vertex_set_to_remove_all_edges()
			--write_data_matrix()
			#--sort_file()
		2007-09-22
			for mysql_connection
				add get_nativename_snpid2call_m()
				add fill_in_resolved_duplicated_calls()
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		if self.db_connection_type==1:
			import MySQLdb
			#conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc')
			conn = MySQLdb.connect(db=self.dbname,host=self.hostname, user=self.user, passwd = self.passwd)
			curs = conn.cursor()
			snp_id2index, snp_id_list, snp_id2info = self.get_snp_id2index_m(curs, self.input_table, self.snp_locus_table)
			strain_id2index, strain_id_list, nativename2strain_id, strain_id2acc, strain_id2category = self.get_strain_id2index_m(curs, \
																self.input_table, self.strain_info_table, self.only_include_strains_with_GPS, \
																self.resolve_duplicated_calls, toss_contaminants=self.toss_contaminants)
			
			#strain_id2acc, strain_id2category = self.get_strain_id_info_m(curs, strain_id_list, self.strain_info_table)
			#snp_id2info = self.get_snp_id_info_m(curs, snp_id_list, self.snp_locus_table)
			if self.input_table == 'dbsnp.calls':
				from variation.src.FigureOut384IlluminaABMapping import get_snps_id2mapping
				snps_id2mapping = get_snps_id2mapping(self.hostname, dbname='dbsnp', user=self.user, passwd=self.passwd)
			else:
				snps_id2mapping = None
			data_matrix = self.get_data_matrix_m(curs, strain_id2index, snp_id2index, nt2number, self.input_table, self.need_heterozygous_call, snps_id2mapping)
			"""
			if self.resolve_duplicated_calls:
				nativename_snpid2call = self.get_nativename_snpid2call_m(curs, self.strain_info_table, self.input_table)
				data_matrix = self.fill_in_resolved_duplicated_calls(data_matrix, strain_id2index, snp_id2index, nativename2strain_id, nativename_snpid2call)
			"""
			if self.include_other_strain_info:
				strain_id2other_info = self.get_strain_id2other_info(curs, strain_id_list, self.strain_info_table, self.input_table)
			else:
				strain_id2other_info = {}
		elif self.db_connection_type==2:
			(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
			snp_id2index, snp_id_list = self.get_snp_id2index(curs, self.input_table, self.snp_locus_table)
			strain_id2index, strain_id_list = self.get_strain_id2index(curs, self.input_table)
			
			strain_id2acc, strain_id2category = self.get_strain_id_info(curs, strain_id_list, self.strain_info_table)
			snp_id2info = self.get_snp_id_info(curs, snp_id_list, self.snp_locus_table)
			data_matrix = self.get_data_matrix(curs, strain_id2index, snp_id2index, nt2number, self.input_table, self.need_heterozygous_call)
			strain_id2other_info = {}
		
		if self.toss_out_rows:
			rows_to_be_tossed_out = self.toss_rows_to_make_distance_matrix_NA_free(data_matrix)
			rows_to_be_tossed_out = Set(rows_to_be_tossed_out)
		else:
			rows_to_be_tossed_out = Set()
		
		#05/08/08
		if self.discard_all_NA_strain:
			from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
			remove_rows_data = FilterStrainSNPMatrix.remove_rows_with_too_many_NAs(data_matrix, row_cutoff=1)
			rows_with_too_many_NAs_set = remove_rows_data.rows_with_too_many_NAs_set
			#row_index2no_of_NAs = remove_rows_data.row_index2no_of_NAs
			rows_to_be_tossed_out.update(rows_with_too_many_NAs_set)
		
		strain_acc_list = [strain_id2acc[strain_id] for strain_id in strain_id_list]
		category_list = [strain_id2category[strain_id] for strain_id in strain_id_list]
		
		strain_acc2other_info = {}
		for strain_id in strain_id2other_info:
			strain_acc2other_info[strain_id2acc[strain_id]] = strain_id2other_info[strain_id]
		
		if self.output_matrix_type==1:
			#transpose everything
			data_matrix = num.array(data_matrix)
			data_matrix = num.transpose(data_matrix)
			
			header = ['Chromosomes', 'Positions'] + strain_acc_list
			chromosome_ls = []
			position_ls = []
			for snp_id in snp_id_list:
				snp_name, chromosome, position = snp_id2info[snp_id]
				chromosome_ls.append(chromosome)
				position_ls.append(position) 
			
			strain_acc_list = chromosome_ls
			category_list = position_ls
			cols_to_be_tossed_out = rows_to_be_tossed_out
			rows_to_be_tossed_out = None
			strain_id2other_info = None	#make up one
		else:
			header = ['strain', 'category']
			for snp_id in snp_id_list:
				snp_name, chromosome, position = snp_id2info[snp_id]
				header.append(snp_name)
			cols_to_be_tossed_out = None
		
		write_data_matrix(data_matrix, self.output_fname, header, strain_acc_list, category_list, rows_to_be_tossed_out=rows_to_be_tossed_out, \
					cols_to_be_tossed_out=cols_to_be_tossed_out, nt_alphabet=self.nt_alphabet,\
					strain_acc2other_info=strain_acc2other_info, delimiter=self.delimiter)