Python FilterStrainSNPMatrix.write_data_matrix 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: FilterStrainSNPMatrix

메소드/함수: write_data_matrix

hotexamples.com에서의 예제들: 8

Python FilterStrainSNPMatrix.write_data_matrix - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 FilterStrainSNPMatrix.FilterStrainSNPMatrix.write_data_matrix에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FilterStrainSNPMatrix(10)

read_data(8)

write_data_matrix(4)

예제 #1

파일 보기

	def run(self):
		"""
		2007-03-20
		2007-04-03
		"""		
		from FilterStrainSNPMatrix import FilterStrainSNPMatrix
		FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
		if self.draw_only:
			header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.output_fname)
			data_matrix = Numeric.array(data_matrix)
		else:
			(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
			header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.input_fname)
			
			snp_acc_ls = header[2:]
			strain_id2index = self.get_id2index(curs, self.strain_info_table, strain_acc_list)
			snp_id2index = self.get_id2index(curs, self.snp_locus_table, snp_acc_ls)
			
			from dbSNP2data import dbSNP2data
			dbSNP2data_instance = dbSNP2data(report=self.report)
			data_matrix = dbSNP2data_instance.get_data_matrix(curs, strain_id2index, snp_id2index, nt2number, self.data_table, need_heterozygous_call=1)
			
			FilterStrainSNPMatrix_instance.write_data_matrix(data_matrix, self.output_fname, header, strain_acc_list, category_list)
		
		heterozygous_data_matrix, coarse_data_matrix = self.get_heterozygous_and_coarse_data_matrix(data_matrix)
		self.displayDataMatrix(heterozygous_data_matrix, title='heterozygous_data_matrix, 5-10=hetero, else=0')
		self.displayDataMatrix(coarse_data_matrix, title='coarse_data_matrix, 0=NA, 1=h**o, 2=hetero')
		raw_input("enter")

예제 #2

파일 보기

파일: Identify2010SNPsGivenSNPset.py 프로젝트: bopopescu/gwasmodules

def shuffleMatrixSNPColumn_in_chrom_position_order(input_fname, curs,
                                                   snps_table, output_fname):
    from FilterStrainSNPMatrix import FilterStrainSNPMatrix
    FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
    header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(
        input_fname)
    snp_acc_list = header[2:]
    snp_acc2col_index = {}
    new_snp_acc_list = []
    curs.execute(
        "select snpid, chromosome, position from %s order by chromosome, position"
        % (snps_table))
    rows = curs.fetchall()
    for row in rows:
        snpid, chromosome, position = row
        snp_acc2col_index[snpid] = len(snp_acc2col_index)
        new_snp_acc_list.append(snpid)
    import numpy
    old_matrix = numpy.array(data_matrix)
    new_matrix = numpy.zeros(old_matrix.shape, numpy.integer)
    for j in range(old_matrix.shape[1]):
        snp_acc = snp_acc_list[j]
        col_index = snp_acc2col_index[snp_acc]
        new_matrix[:, col_index] = old_matrix[:, j]
    header = header[:2] + new_snp_acc_list
    FilterStrainSNPMatrix_instance.write_data_matrix(new_matrix, output_fname,
                                                     header, strain_acc_list,
                                                     category_list)

예제 #3

파일 보기

파일: RestoreHeterozygousCalls.py 프로젝트: bopopescu/gwasmodules

    def run(self):
        """
		2007-03-20
		2007-04-03
		"""
        from FilterStrainSNPMatrix import FilterStrainSNPMatrix
        FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
        if self.draw_only:
            header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(
                self.output_fname)
            data_matrix = Numeric.array(data_matrix)
        else:
            (conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
            header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(
                self.input_fname)

            snp_acc_ls = header[2:]
            strain_id2index = self.get_id2index(curs, self.strain_info_table,
                                                strain_acc_list)
            snp_id2index = self.get_id2index(curs, self.snp_locus_table,
                                             snp_acc_ls)

            from dbSNP2data import dbSNP2data
            dbSNP2data_instance = dbSNP2data(report=self.report)
            data_matrix = dbSNP2data_instance.get_data_matrix(
                curs,
                strain_id2index,
                snp_id2index,
                nt2number,
                self.data_table,
                need_heterozygous_call=1)

            FilterStrainSNPMatrix_instance.write_data_matrix(
                data_matrix, self.output_fname, header, strain_acc_list,
                category_list)

        heterozygous_data_matrix, coarse_data_matrix = self.get_heterozygous_and_coarse_data_matrix(
            data_matrix)
        self.displayDataMatrix(
            heterozygous_data_matrix,
            title='heterozygous_data_matrix, 5-10=hetero, else=0')
        self.displayDataMatrix(
            coarse_data_matrix,
            title='coarse_data_matrix, 0=NA, 1=h**o, 2=hetero')
        raw_input("enter")

예제 #4

파일 보기

def shuffleMatrixSNPColumn_in_chrom_position_order(input_fname, curs, snps_table, output_fname):
	from FilterStrainSNPMatrix import FilterStrainSNPMatrix
	FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
	header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(input_fname)
	snp_acc_list = header[2:]
	snp_acc2col_index = {}
	new_snp_acc_list = []
	curs.execute("select snpid, chromosome, position from %s order by chromosome, position"%(snps_table))
	rows = curs.fetchall()
	for row in rows:
		snpid, chromosome, position = row
		snp_acc2col_index[snpid] = len(snp_acc2col_index)
		new_snp_acc_list.append(snpid)
	import numpy
	old_matrix = numpy.array(data_matrix)
	new_matrix = numpy.zeros(old_matrix.shape, numpy.integer)
	for j in range(old_matrix.shape[1]):
		snp_acc = snp_acc_list[j]
		col_index = snp_acc2col_index[snp_acc]
		new_matrix[:,col_index] = old_matrix[:,j]
	header = header[:2] + new_snp_acc_list
	FilterStrainSNPMatrix_instance.write_data_matrix(new_matrix, output_fname, header, strain_acc_list, category_list)

예제 #5

파일 보기

	def run(self):
		from FilterStrainSNPMatrix import FilterStrainSNPMatrix
		FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
		
		import MySQLdb
		#conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc')
		conn = MySQLdb.connect(db=self.dbname,host=self.hostname)
		curs = conn.cursor()
		if self.debug:
			import pdb
			pdb.set_trace()
		nt_number2diff_matrix_index = self.get_nt_number2diff_matrix_index(nt2number)
		SNPpos2col_index, snpid2col_index, snp_acc_ls, snp_index2snp_info_ls = self.setup_SNP_dstruc(curs, self.snp_locus_table)
		ecotype_id2accession_id, ecotype_id2row_index, ecotype_id2info_ls, ecotype_id_ls, accession_id2row_index, accession_id_ls, accession_id2ecotype_id_ls = self.setup_accession_ecotype_dstruc(curs, self.accession2ecotype_table, self.ecotype_table, self.calls_table)
		
		ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched = self.get_ecotype_X_snp_matrix(curs, ecotype_id2row_index, snpid2col_index, self.calls_table)
		if self.sub_justin_output_fname:
			header = ['ecotype_id', 'ecotype_id'] + snp_acc_ls
			FilterStrainSNPMatrix_instance.write_data_matrix(ecotype_X_snp_matrix, self.sub_justin_output_fname, header, ecotype_id_ls, ecotype_id_ls)
		
		alignment_id2positions_to_be_checked_ls, alignment_id2start = self.get_alignment_id2positions_to_be_checked_ls(curs, self.alignment_table)
		accession_X_snp_matrix, accession_X_snp_matrix_touched, snp_index2alignment_id= self.get_accession_X_snp_matrix(curs, accession_id2row_index, SNPpos2col_index, self.sequence_table, self.alignment_table, alignment_id2positions_to_be_checked_ls)
		
		if self.output_fname:
			header = ['accession_id', 'accession_id'] + snp_acc_ls
			FilterStrainSNPMatrix_instance.write_data_matrix(accession_X_snp_matrix, self.output_fname, header, accession_id_ls, accession_id_ls)
		summary_diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, diff_details_ls_type=2)
		print "diff_matrix_touched_accession_vs_touched_ecotype"
		print summary_diff_matrix_ls[0]
		print "diff_matrix_touched_accession_vs_untouched_ecotype"
		print summary_diff_matrix_ls[1]
		print "diff_matrix_untouched_accession_vs_touched_ecotype"
		print summary_diff_matrix_ls[2]
		print "diff_matrix_untouched_accession_vs_untouched_ecotype"
		print summary_diff_matrix_ls[3]
		
		summary_diff_matrix_caption_ls = ['PCR-tried vs sequenom-tried', 'PCR-tried vs sequenom-untried', 'PCR-untried vs sequenom-tried', 'PCR-untried vs sequenom-untried']
		
		if self.latex_output_fname:
			outf = open(self.latex_output_fname, 'w')
			outf.write('\\section{2010 PCR versus sequenom. summary} \\label{section_summary}\n')
			for i in range(len(summary_diff_matrix_ls)):
				from pymodule.latex import outputMatrixInLatexTable
				wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(summary_diff_matrix_ls[i])
				table_label = 'table_dm%s'%i
				outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, summary_diff_matrix_caption_ls[i], table_label))
			table_no = i
			
			#output the whole diff_details_ls
			outf.write('\\section{Real Mismatches between pcr and sequenom (deletion/NA excluded)} \\label{section_real_mismatch}\n')
			diff_details_ls = self.beautify_snp_diff_details_ls(diff_details_ls, ecotype_id2info_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id)
			table_label = 'table_dm%s'%table_no
			caption = 'mismatches between pcr and sequenom data (deletion/NA excluded, sorted by accession id)'
			outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=['nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'SNP', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call']))
			
			#Strain-wise comparison
			outf.write('\\section{2010 PCR versus sequenom for each strain} \\label{section_strain_wise}\n')
			accession_id_ls.sort()
			for accession_id in accession_id_ls:
				ecotype_id_ls = accession_id2ecotype_id_ls[accession_id]
				outf.write('\\subsection{strain %s(accession id=%s)}\n'%(ecotype_id2info_ls[ecotype_id_ls[0]][0], accession_id))
				for ecotype_id in ecotype_id_ls:
					outf.write('\\subsubsection{corresponding ecotype %s(stkparent=%s, ecotype id=%s, duplicate=%s)}\n'%(ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1], ecotype_id[0], ecotype_id[1]))
					e_row_index = ecotype_id2row_index[ecotype_id]
					a_row_index = accession_id2row_index[accession_id]
					
					diff_matrix_ls, diff_details_ls= self.cmp_two_lists(accession_X_snp_matrix[a_row_index,:], accession_X_snp_matrix_touched[a_row_index,:], ecotype_X_snp_matrix[e_row_index,:], ecotype_X_snp_matrix_touched[e_row_index,:], nt_number2diff_matrix_index)
					wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(diff_matrix_ls[0])
					table_no += 1
					table_label = 'table_dm%s'%table_no
					caption = 'accession id=%s vs ecotype id=%s, duplicate=%s(nativename=%s, stockparent=%s)'%(accession_id, ecotype_id[0], ecotype_id[1], ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1])
					outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label))
					
					if diff_details_ls:
						diff_details_ls = self.beautify_diff_details_ls(diff_details_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id)
						table_no += 1
						table_label = 'table_dm%s'%table_no
						caption = 'detailed difference for accession id=%s vs ecotype id=%s, duplicate=%s'%(accession_id, ecotype_id[0], ecotype_id[1])
						outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=['snp', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call']))
			#SNP-wise comparison
			outf.write('\\section{2010 PCR versus sequenom for each SNP} \\label{section_snp_wise}\n')
			for snp_column in range(accession_X_snp_matrix.shape[1]):
				snp_acc, chromosome, position = snp_index2snp_info_ls[snp_column]
				alignment_id = snp_index2alignment_id[snp_column]
				alignment_start = alignment_id2start[alignment_id]
				outf.write('\\subsection{SNP %s(chrom=%s, pos=%s, alignment id=%s, alignment start=%s)}\n'%(snp_acc, chromosome, position, alignment_id, alignment_start))
				
				diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, snp_column=snp_column, diff_details_ls_type=1)
				wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(diff_matrix_ls[0])
				table_no += 1
				table_label = 'table_dm%s'%table_no
				caption = 'SNP %s(chromosome=%s, position=%s, alignment id=%s, alignment start=%s)'%(snp_acc, chromosome, position, alignment_id, alignment_start)
				outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label))
				
				if diff_details_ls:
					diff_details_ls = self.beautify_snp_diff_details_ls(diff_details_ls, ecotype_id2info_ls)
					table_no += 1
					table_label = 'table_dm%s'%table_no
					caption = 'detailed difference for SNP %s'%(snp_acc)
					header_ls = ['nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'pcr_call', 'sequenom_call']
					outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls))
			del outf

예제 #6

파일 보기

    def run(self):
        from FilterStrainSNPMatrix import FilterStrainSNPMatrix
        FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()

        import MySQLdb
        #conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc')
        conn = MySQLdb.connect(db=self.dbname, host=self.hostname)
        curs = conn.cursor()
        if self.debug:
            import pdb
            pdb.set_trace()
        nt_number2diff_matrix_index = self.get_nt_number2diff_matrix_index(
            nt2number)
        SNPpos2col_index, snpid2col_index, snp_acc_ls, snp_index2snp_info_ls = self.setup_SNP_dstruc(
            curs, self.snp_locus_table)
        ecotype_id2accession_id, ecotype_id2row_index, ecotype_id2info_ls, ecotype_id_ls, accession_id2row_index, accession_id_ls, accession_id2ecotype_id_ls = self.setup_accession_ecotype_dstruc(
            curs, self.accession2ecotype_table, self.ecotype_table,
            self.calls_table)

        ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched = self.get_ecotype_X_snp_matrix(
            curs, ecotype_id2row_index, snpid2col_index, self.calls_table)
        if self.sub_justin_output_fname:
            header = ['ecotype_id', 'ecotype_id'] + snp_acc_ls
            FilterStrainSNPMatrix_instance.write_data_matrix(
                ecotype_X_snp_matrix, self.sub_justin_output_fname, header,
                ecotype_id_ls, ecotype_id_ls)

        alignment_id2positions_to_be_checked_ls, alignment_id2start = self.get_alignment_id2positions_to_be_checked_ls(
            curs, self.alignment_table)
        accession_X_snp_matrix, accession_X_snp_matrix_touched, snp_index2alignment_id = self.get_accession_X_snp_matrix(
            curs, accession_id2row_index, SNPpos2col_index,
            self.sequence_table, self.alignment_table,
            alignment_id2positions_to_be_checked_ls)

        if self.output_fname:
            header = ['accession_id', 'accession_id'] + snp_acc_ls
            FilterStrainSNPMatrix_instance.write_data_matrix(
                accession_X_snp_matrix, self.output_fname, header,
                accession_id_ls, accession_id_ls)
        summary_diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(
            accession_X_snp_matrix,
            accession_X_snp_matrix_touched,
            ecotype_X_snp_matrix,
            ecotype_X_snp_matrix_touched,
            nt_number2diff_matrix_index,
            ecotype_id2accession_id,
            ecotype_id2row_index,
            accession_id2row_index,
            diff_details_ls_type=2)
        print "diff_matrix_touched_accession_vs_touched_ecotype"
        print summary_diff_matrix_ls[0]
        print "diff_matrix_touched_accession_vs_untouched_ecotype"
        print summary_diff_matrix_ls[1]
        print "diff_matrix_untouched_accession_vs_touched_ecotype"
        print summary_diff_matrix_ls[2]
        print "diff_matrix_untouched_accession_vs_untouched_ecotype"
        print summary_diff_matrix_ls[3]

        summary_diff_matrix_caption_ls = [
            'PCR-tried vs sequenom-tried', 'PCR-tried vs sequenom-untried',
            'PCR-untried vs sequenom-tried', 'PCR-untried vs sequenom-untried'
        ]

        if self.latex_output_fname:
            outf = open(self.latex_output_fname, 'w')
            outf.write(
                '\\section{2010 PCR versus sequenom. summary} \\label{section_summary}\n'
            )
            for i in range(len(summary_diff_matrix_ls)):
                from pymodule.latex import outputMatrixInLatexTable
                wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(
                    summary_diff_matrix_ls[i])
                table_label = 'table_dm%s' % i
                outf.write(
                    outputMatrixInLatexTable(wrapped_diff_matrix,
                                             summary_diff_matrix_caption_ls[i],
                                             table_label))
            table_no = i

            #output the whole diff_details_ls
            outf.write(
                '\\section{Real Mismatches between pcr and sequenom (deletion/NA excluded)} \\label{section_real_mismatch}\n'
            )
            diff_details_ls = self.beautify_snp_diff_details_ls(
                diff_details_ls, ecotype_id2info_ls, snp_index2snp_info_ls,
                alignment_id2start, snp_index2alignment_id)
            table_label = 'table_dm%s' % table_no
            caption = 'mismatches between pcr and sequenom data (deletion/NA excluded, sorted by accession id)'
            outf.write(
                outputMatrixInLatexTable(diff_details_ls,
                                         caption,
                                         table_label,
                                         header_ls=[
                                             'nativename', 'stkparent',
                                             'ecotype_id', 'duplicate',
                                             'accession_id', 'SNP',
                                             'chromosome', 'position',
                                             'alignment_id', 'alignment_start',
                                             'pcr_call', 'sequenom_call'
                                         ]))

            #Strain-wise comparison
            outf.write(
                '\\section{2010 PCR versus sequenom for each strain} \\label{section_strain_wise}\n'
            )
            accession_id_ls.sort()
            for accession_id in accession_id_ls:
                ecotype_id_ls = accession_id2ecotype_id_ls[accession_id]
                outf.write(
                    '\\subsection{strain %s(accession id=%s)}\n' %
                    (ecotype_id2info_ls[ecotype_id_ls[0]][0], accession_id))
                for ecotype_id in ecotype_id_ls:
                    outf.write(
                        '\\subsubsection{corresponding ecotype %s(stkparent=%s, ecotype id=%s, duplicate=%s)}\n'
                        % (ecotype_id2info_ls[ecotype_id][0],
                           ecotype_id2info_ls[ecotype_id][1], ecotype_id[0],
                           ecotype_id[1]))
                    e_row_index = ecotype_id2row_index[ecotype_id]
                    a_row_index = accession_id2row_index[accession_id]

                    diff_matrix_ls, diff_details_ls = self.cmp_two_lists(
                        accession_X_snp_matrix[a_row_index, :],
                        accession_X_snp_matrix_touched[a_row_index, :],
                        ecotype_X_snp_matrix[e_row_index, :],
                        ecotype_X_snp_matrix_touched[e_row_index, :],
                        nt_number2diff_matrix_index)
                    wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(
                        diff_matrix_ls[0])
                    table_no += 1
                    table_label = 'table_dm%s' % table_no
                    caption = 'accession id=%s vs ecotype id=%s, duplicate=%s(nativename=%s, stockparent=%s)' % (
                        accession_id, ecotype_id[0], ecotype_id[1],
                        ecotype_id2info_ls[ecotype_id][0],
                        ecotype_id2info_ls[ecotype_id][1])
                    outf.write(
                        outputMatrixInLatexTable(wrapped_diff_matrix, caption,
                                                 table_label))

                    if diff_details_ls:
                        diff_details_ls = self.beautify_diff_details_ls(
                            diff_details_ls, snp_index2snp_info_ls,
                            alignment_id2start, snp_index2alignment_id)
                        table_no += 1
                        table_label = 'table_dm%s' % table_no
                        caption = 'detailed difference for accession id=%s vs ecotype id=%s, duplicate=%s' % (
                            accession_id, ecotype_id[0], ecotype_id[1])
                        outf.write(
                            outputMatrixInLatexTable(
                                diff_details_ls,
                                caption,
                                table_label,
                                header_ls=[
                                    'snp', 'chromosome', 'position',
                                    'alignment_id', 'alignment_start',
                                    'pcr_call', 'sequenom_call'
                                ]))
            #SNP-wise comparison
            outf.write(
                '\\section{2010 PCR versus sequenom for each SNP} \\label{section_snp_wise}\n'
            )
            for snp_column in range(accession_X_snp_matrix.shape[1]):
                snp_acc, chromosome, position = snp_index2snp_info_ls[
                    snp_column]
                alignment_id = snp_index2alignment_id[snp_column]
                alignment_start = alignment_id2start[alignment_id]
                outf.write(
                    '\\subsection{SNP %s(chrom=%s, pos=%s, alignment id=%s, alignment start=%s)}\n'
                    % (snp_acc, chromosome, position, alignment_id,
                       alignment_start))

                diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(
                    accession_X_snp_matrix,
                    accession_X_snp_matrix_touched,
                    ecotype_X_snp_matrix,
                    ecotype_X_snp_matrix_touched,
                    nt_number2diff_matrix_index,
                    ecotype_id2accession_id,
                    ecotype_id2row_index,
                    accession_id2row_index,
                    snp_column=snp_column,
                    diff_details_ls_type=1)
                wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(
                    diff_matrix_ls[0])
                table_no += 1
                table_label = 'table_dm%s' % table_no
                caption = 'SNP %s(chromosome=%s, position=%s, alignment id=%s, alignment start=%s)' % (
                    snp_acc, chromosome, position, alignment_id,
                    alignment_start)
                outf.write(
                    outputMatrixInLatexTable(wrapped_diff_matrix, caption,
                                             table_label))

                if diff_details_ls:
                    diff_details_ls = self.beautify_snp_diff_details_ls(
                        diff_details_ls, ecotype_id2info_ls)
                    table_no += 1
                    table_label = 'table_dm%s' % table_no
                    caption = 'detailed difference for SNP %s' % (snp_acc)
                    header_ls = [
                        'nativename', 'stkparent', 'ecotype_id', 'duplicate',
                        'accession_id', 'pcr_call', 'sequenom_call'
                    ]
                    outf.write(
                        outputMatrixInLatexTable(diff_details_ls, caption,
                                                 table_label, header_ls))
            del outf

예제 #7

파일 보기

파일: Identify2010SNPsGivenSNPset.py 프로젝트: bopopescu/gwasmodules

    def run(self):
        """
		2007-03-29
		2007-04-03
		2007-05-01
			--db_connect()
			--FilterStrainSNPMatrix_instance.read_data()
			if self.comparison_only:
				--FilterStrainSNPMatrix_instance.read_data()
			else:
				--get_SNPpos2index()
				--create_SNP_matrix_2010()
					--get_align_length_from_fname()
						--get_positions_to_be_checked_ls()
					--get_align_matrix_from_fname()
						--get_positions_to_be_checked_ls()
				--get_mapping_info_regarding_strain_acc()
				--shuffle_data_matrix_according_to_strain_acc_ls()
				--FilterStrainSNPMatrix_instance.write_data_matrix()
			
			--extract_sub_data_matrix()
			if self.sub_justin_output_fname:
				--FilterStrainSNPMatrix_instance.write_data_matrix()
			--compare_two_SNP_matrix()
			--outputDiffType()
			
		"""
        from FilterStrainSNPMatrix import FilterStrainSNPMatrix
        FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
        header, src_strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(
            self.input_fname)
        if self.comparison_only:
            header, strain_acc_ls, abbr_name_ls_sorted, SNP_matrix_2010_sorted = FilterStrainSNPMatrix_instance.read_data(
                self.output_fname)
            SNP_matrix_2010_sorted = Numeric.array(SNP_matrix_2010_sorted)
        else:
            (conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
            #extract data from alignment
            snp_acc_ls = header[2:]
            SNPpos2index = self.get_SNPpos2index(curs, snp_acc_ls,
                                                 self.snp_locus_table)
            abbr_name_ls, SNP_matrix_2010 = self.create_SNP_matrix_2010(
                SNPpos2index, self.data_dir_2010)
            strain_acc_ls, strain_acc2abbr_name, strain_acc2index = self.get_mapping_info_regarding_strain_acc(
                curs, self.strain_info_table, self.strain_info_2010_table,
                abbr_name_ls)
            SNP_matrix_2010_sorted = self.shuffle_data_matrix_according_to_strain_acc_ls(
                SNP_matrix_2010, strain_acc_ls, strain_acc2index)
            abbr_name_ls_sorted = []
            for strain_acc in strain_acc_ls:
                abbr_name_ls_sorted.append(strain_acc2abbr_name[strain_acc])
            FilterStrainSNPMatrix_instance.write_data_matrix(
                SNP_matrix_2010_sorted, self.output_fname, header,
                strain_acc_ls, abbr_name_ls_sorted)

        #comparison
        data_matrix = Numeric.array(data_matrix)
        sub_data_matrix = self.extract_sub_data_matrix(src_strain_acc_list,
                                                       data_matrix,
                                                       strain_acc_ls)
        if self.sub_justin_output_fname:
            FilterStrainSNPMatrix_instance.write_data_matrix(
                sub_data_matrix, self.sub_justin_output_fname, header,
                strain_acc_ls, abbr_name_ls_sorted)
        diff_matrix, diff_tag_dict, diff_tag2counter = self.compare_two_SNP_matrix(
            SNP_matrix_2010_sorted, sub_data_matrix)
        if self.diff_output_fname:
            self.outputDiffType(diff_matrix, SNP_matrix_2010_sorted,
                                sub_data_matrix, diff_tag_dict,
                                self.diff_type_to_be_outputted,
                                abbr_name_ls_sorted, header[2:],
                                self.diff_output_fname)

        summary_result_ls = []
        for tag, counter in diff_tag2counter.iteritems():
            summary_result_ls.append('%s(%s):%s' %
                                     (tag, diff_tag_dict[tag], counter))
            print '\t%s(%s)\t%s' % (tag, diff_tag_dict[tag], counter)
        import pylab
        pylab.clf()
        diff_matrix_reverse = list(diff_matrix)
        diff_matrix_reverse.reverse()
        diff_matrix_reverse = Numeric.array(diff_matrix_reverse)
        pylab.imshow(diff_matrix_reverse, interpolation='nearest')
        pylab.title(' '.join(summary_result_ls))
        pylab.colorbar()
        pylab.show()

        #2007-11-01 do something as CmpAccession2Ecotype.py
        from CmpAccession2Ecotype import CmpAccession2Ecotype
        CmpAccession2Ecotype_ins = CmpAccession2Ecotype()
        nt_number2diff_matrix_index = CmpAccession2Ecotype_ins.get_nt_number2diff_matrix_index(
            nt2number)
        dc_placeholder = dict(
            zip(range(sub_data_matrix.shape[0]),
                range(sub_data_matrix.shape[1])))
        diff_matrix_ls = CmpAccession2Ecotype_ins.cmp_two_matricies(
            SNP_matrix_2010_sorted, sub_data_matrix,
            nt_number2diff_matrix_index, dc_placeholder, dc_placeholder,
            dc_placeholder)
        print diff_matrix_ls

예제 #8

파일 보기

	def run(self):
		"""
		2007-03-29
		2007-04-03
		2007-05-01
			--db_connect()
			--FilterStrainSNPMatrix_instance.read_data()
			if self.comparison_only:
				--FilterStrainSNPMatrix_instance.read_data()
			else:
				--get_SNPpos2index()
				--create_SNP_matrix_2010()
					--get_align_length_from_fname()
						--get_positions_to_be_checked_ls()
					--get_align_matrix_from_fname()
						--get_positions_to_be_checked_ls()
				--get_mapping_info_regarding_strain_acc()
				--shuffle_data_matrix_according_to_strain_acc_ls()
				--FilterStrainSNPMatrix_instance.write_data_matrix()
			
			--extract_sub_data_matrix()
			if self.sub_justin_output_fname:
				--FilterStrainSNPMatrix_instance.write_data_matrix()
			--compare_two_SNP_matrix()
			--outputDiffType()
			
		"""
		from FilterStrainSNPMatrix import FilterStrainSNPMatrix
		FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix()
		header, src_strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.input_fname)
		if self.comparison_only:
			header, strain_acc_ls, abbr_name_ls_sorted, SNP_matrix_2010_sorted = FilterStrainSNPMatrix_instance.read_data(self.output_fname)
			SNP_matrix_2010_sorted = Numeric.array(SNP_matrix_2010_sorted)
		else:
			(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
			#extract data from alignment
			snp_acc_ls = header[2:]
			SNPpos2index = self.get_SNPpos2index(curs, snp_acc_ls, self.snp_locus_table)
			abbr_name_ls, SNP_matrix_2010 = self.create_SNP_matrix_2010(SNPpos2index, self.data_dir_2010)
			strain_acc_ls, strain_acc2abbr_name, strain_acc2index = self.get_mapping_info_regarding_strain_acc(curs, self.strain_info_table, self.strain_info_2010_table, abbr_name_ls)
			SNP_matrix_2010_sorted = self.shuffle_data_matrix_according_to_strain_acc_ls(SNP_matrix_2010, strain_acc_ls, strain_acc2index)
			abbr_name_ls_sorted = []
			for strain_acc in strain_acc_ls:
				abbr_name_ls_sorted.append(strain_acc2abbr_name[strain_acc])
			FilterStrainSNPMatrix_instance.write_data_matrix(SNP_matrix_2010_sorted, self.output_fname, header, strain_acc_ls, abbr_name_ls_sorted)
		
		
		#comparison
		data_matrix = Numeric.array(data_matrix)
		sub_data_matrix = self.extract_sub_data_matrix(src_strain_acc_list, data_matrix, strain_acc_ls)
		if self.sub_justin_output_fname:
			FilterStrainSNPMatrix_instance.write_data_matrix(sub_data_matrix, self.sub_justin_output_fname, header, strain_acc_ls, abbr_name_ls_sorted)
		diff_matrix, diff_tag_dict, diff_tag2counter= self.compare_two_SNP_matrix(SNP_matrix_2010_sorted, sub_data_matrix)
		if self.diff_output_fname:
			self.outputDiffType(diff_matrix, SNP_matrix_2010_sorted, sub_data_matrix, diff_tag_dict, self.diff_type_to_be_outputted, abbr_name_ls_sorted, header[2:], self.diff_output_fname)
		
		summary_result_ls = []
		for tag, counter in diff_tag2counter.iteritems():
			summary_result_ls.append('%s(%s):%s'%(tag, diff_tag_dict[tag], counter))
			print '\t%s(%s)\t%s'%(tag, diff_tag_dict[tag], counter)
		import pylab
		pylab.clf()
		diff_matrix_reverse = list(diff_matrix)
		diff_matrix_reverse.reverse()
		diff_matrix_reverse = Numeric.array(diff_matrix_reverse)
		pylab.imshow(diff_matrix_reverse, interpolation='nearest')
		pylab.title(' '.join(summary_result_ls))
		pylab.colorbar()
		pylab.show()
		
		#2007-11-01 do something as CmpAccession2Ecotype.py
		from CmpAccession2Ecotype import CmpAccession2Ecotype
		CmpAccession2Ecotype_ins = CmpAccession2Ecotype()
		nt_number2diff_matrix_index = CmpAccession2Ecotype_ins.get_nt_number2diff_matrix_index(nt2number)
		dc_placeholder = dict(zip(range(sub_data_matrix.shape[0]), range(sub_data_matrix.shape[1])))
		diff_matrix_ls = CmpAccession2Ecotype_ins.cmp_two_matricies(SNP_matrix_2010_sorted, sub_data_matrix, nt_number2diff_matrix_index, dc_placeholder, dc_placeholder, dc_placeholder)
		print diff_matrix_ls