def run(self): """ 2007-03-20 2007-04-03 """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() if self.draw_only: header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.output_fname) data_matrix = Numeric.array(data_matrix) else: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.input_fname) snp_acc_ls = header[2:] strain_id2index = self.get_id2index(curs, self.strain_info_table, strain_acc_list) snp_id2index = self.get_id2index(curs, self.snp_locus_table, snp_acc_ls) from dbSNP2data import dbSNP2data dbSNP2data_instance = dbSNP2data(report=self.report) data_matrix = dbSNP2data_instance.get_data_matrix(curs, strain_id2index, snp_id2index, nt2number, self.data_table, need_heterozygous_call=1) FilterStrainSNPMatrix_instance.write_data_matrix(data_matrix, self.output_fname, header, strain_acc_list, category_list) heterozygous_data_matrix, coarse_data_matrix = self.get_heterozygous_and_coarse_data_matrix(data_matrix) self.displayDataMatrix(heterozygous_data_matrix, title='heterozygous_data_matrix, 5-10=hetero, else=0') self.displayDataMatrix(coarse_data_matrix, title='coarse_data_matrix, 0=NA, 1=h**o, 2=hetero') raw_input("enter")
def shuffleMatrixSNPColumn_in_chrom_position_order(input_fname, curs, snps_table, output_fname): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( input_fname) snp_acc_list = header[2:] snp_acc2col_index = {} new_snp_acc_list = [] curs.execute( "select snpid, chromosome, position from %s order by chromosome, position" % (snps_table)) rows = curs.fetchall() for row in rows: snpid, chromosome, position = row snp_acc2col_index[snpid] = len(snp_acc2col_index) new_snp_acc_list.append(snpid) import numpy old_matrix = numpy.array(data_matrix) new_matrix = numpy.zeros(old_matrix.shape, numpy.integer) for j in range(old_matrix.shape[1]): snp_acc = snp_acc_list[j] col_index = snp_acc2col_index[snp_acc] new_matrix[:, col_index] = old_matrix[:, j] header = header[:2] + new_snp_acc_list FilterStrainSNPMatrix_instance.write_data_matrix(new_matrix, output_fname, header, strain_acc_list, category_list)
def run(self): """ 2007-03-20 2007-04-03 """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() if self.draw_only: header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.output_fname) data_matrix = Numeric.array(data_matrix) else: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.input_fname) snp_acc_ls = header[2:] strain_id2index = self.get_id2index(curs, self.strain_info_table, strain_acc_list) snp_id2index = self.get_id2index(curs, self.snp_locus_table, snp_acc_ls) from dbSNP2data import dbSNP2data dbSNP2data_instance = dbSNP2data(report=self.report) data_matrix = dbSNP2data_instance.get_data_matrix( curs, strain_id2index, snp_id2index, nt2number, self.data_table, need_heterozygous_call=1) FilterStrainSNPMatrix_instance.write_data_matrix( data_matrix, self.output_fname, header, strain_acc_list, category_list) heterozygous_data_matrix, coarse_data_matrix = self.get_heterozygous_and_coarse_data_matrix( data_matrix) self.displayDataMatrix( heterozygous_data_matrix, title='heterozygous_data_matrix, 5-10=hetero, else=0') self.displayDataMatrix( coarse_data_matrix, title='coarse_data_matrix, 0=NA, 1=h**o, 2=hetero') raw_input("enter")
def shuffleMatrixSNPColumn_in_chrom_position_order(input_fname, curs, snps_table, output_fname): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(input_fname) snp_acc_list = header[2:] snp_acc2col_index = {} new_snp_acc_list = [] curs.execute("select snpid, chromosome, position from %s order by chromosome, position"%(snps_table)) rows = curs.fetchall() for row in rows: snpid, chromosome, position = row snp_acc2col_index[snpid] = len(snp_acc2col_index) new_snp_acc_list.append(snpid) import numpy old_matrix = numpy.array(data_matrix) new_matrix = numpy.zeros(old_matrix.shape, numpy.integer) for j in range(old_matrix.shape[1]): snp_acc = snp_acc_list[j] col_index = snp_acc2col_index[snp_acc] new_matrix[:,col_index] = old_matrix[:,j] header = header[:2] + new_snp_acc_list FilterStrainSNPMatrix_instance.write_data_matrix(new_matrix, output_fname, header, strain_acc_list, category_list)
def run(self): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() import MySQLdb #conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc') conn = MySQLdb.connect(db=self.dbname,host=self.hostname) curs = conn.cursor() if self.debug: import pdb pdb.set_trace() nt_number2diff_matrix_index = self.get_nt_number2diff_matrix_index(nt2number) SNPpos2col_index, snpid2col_index, snp_acc_ls, snp_index2snp_info_ls = self.setup_SNP_dstruc(curs, self.snp_locus_table) ecotype_id2accession_id, ecotype_id2row_index, ecotype_id2info_ls, ecotype_id_ls, accession_id2row_index, accession_id_ls, accession_id2ecotype_id_ls = self.setup_accession_ecotype_dstruc(curs, self.accession2ecotype_table, self.ecotype_table, self.calls_table) ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched = self.get_ecotype_X_snp_matrix(curs, ecotype_id2row_index, snpid2col_index, self.calls_table) if self.sub_justin_output_fname: header = ['ecotype_id', 'ecotype_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix(ecotype_X_snp_matrix, self.sub_justin_output_fname, header, ecotype_id_ls, ecotype_id_ls) alignment_id2positions_to_be_checked_ls, alignment_id2start = self.get_alignment_id2positions_to_be_checked_ls(curs, self.alignment_table) accession_X_snp_matrix, accession_X_snp_matrix_touched, snp_index2alignment_id= self.get_accession_X_snp_matrix(curs, accession_id2row_index, SNPpos2col_index, self.sequence_table, self.alignment_table, alignment_id2positions_to_be_checked_ls) if self.output_fname: header = ['accession_id', 'accession_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix(accession_X_snp_matrix, self.output_fname, header, accession_id_ls, accession_id_ls) summary_diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, diff_details_ls_type=2) print "diff_matrix_touched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[0] print "diff_matrix_touched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[1] print "diff_matrix_untouched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[2] print "diff_matrix_untouched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[3] summary_diff_matrix_caption_ls = ['PCR-tried vs sequenom-tried', 'PCR-tried vs sequenom-untried', 'PCR-untried vs sequenom-tried', 'PCR-untried vs sequenom-untried'] if self.latex_output_fname: outf = open(self.latex_output_fname, 'w') outf.write('\\section{2010 PCR versus sequenom. summary} \\label{section_summary}\n') for i in range(len(summary_diff_matrix_ls)): from pymodule.latex import outputMatrixInLatexTable wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(summary_diff_matrix_ls[i]) table_label = 'table_dm%s'%i outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, summary_diff_matrix_caption_ls[i], table_label)) table_no = i #output the whole diff_details_ls outf.write('\\section{Real Mismatches between pcr and sequenom (deletion/NA excluded)} \\label{section_real_mismatch}\n') diff_details_ls = self.beautify_snp_diff_details_ls(diff_details_ls, ecotype_id2info_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_label = 'table_dm%s'%table_no caption = 'mismatches between pcr and sequenom data (deletion/NA excluded, sorted by accession id)' outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=['nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'SNP', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call'])) #Strain-wise comparison outf.write('\\section{2010 PCR versus sequenom for each strain} \\label{section_strain_wise}\n') accession_id_ls.sort() for accession_id in accession_id_ls: ecotype_id_ls = accession_id2ecotype_id_ls[accession_id] outf.write('\\subsection{strain %s(accession id=%s)}\n'%(ecotype_id2info_ls[ecotype_id_ls[0]][0], accession_id)) for ecotype_id in ecotype_id_ls: outf.write('\\subsubsection{corresponding ecotype %s(stkparent=%s, ecotype id=%s, duplicate=%s)}\n'%(ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1], ecotype_id[0], ecotype_id[1])) e_row_index = ecotype_id2row_index[ecotype_id] a_row_index = accession_id2row_index[accession_id] diff_matrix_ls, diff_details_ls= self.cmp_two_lists(accession_X_snp_matrix[a_row_index,:], accession_X_snp_matrix_touched[a_row_index,:], ecotype_X_snp_matrix[e_row_index,:], ecotype_X_snp_matrix_touched[e_row_index,:], nt_number2diff_matrix_index) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'accession id=%s vs ecotype id=%s, duplicate=%s(nativename=%s, stockparent=%s)'%(accession_id, ecotype_id[0], ecotype_id[1], ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1]) outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_diff_details_ls(diff_details_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'detailed difference for accession id=%s vs ecotype id=%s, duplicate=%s'%(accession_id, ecotype_id[0], ecotype_id[1]) outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=['snp', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call'])) #SNP-wise comparison outf.write('\\section{2010 PCR versus sequenom for each SNP} \\label{section_snp_wise}\n') for snp_column in range(accession_X_snp_matrix.shape[1]): snp_acc, chromosome, position = snp_index2snp_info_ls[snp_column] alignment_id = snp_index2alignment_id[snp_column] alignment_start = alignment_id2start[alignment_id] outf.write('\\subsection{SNP %s(chrom=%s, pos=%s, alignment id=%s, alignment start=%s)}\n'%(snp_acc, chromosome, position, alignment_id, alignment_start)) diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, snp_column=snp_column, diff_details_ls_type=1) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'SNP %s(chromosome=%s, position=%s, alignment id=%s, alignment start=%s)'%(snp_acc, chromosome, position, alignment_id, alignment_start) outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_snp_diff_details_ls(diff_details_ls, ecotype_id2info_ls) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'detailed difference for SNP %s'%(snp_acc) header_ls = ['nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'pcr_call', 'sequenom_call'] outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls)) del outf
def run(self): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() import MySQLdb #conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc') conn = MySQLdb.connect(db=self.dbname, host=self.hostname) curs = conn.cursor() if self.debug: import pdb pdb.set_trace() nt_number2diff_matrix_index = self.get_nt_number2diff_matrix_index( nt2number) SNPpos2col_index, snpid2col_index, snp_acc_ls, snp_index2snp_info_ls = self.setup_SNP_dstruc( curs, self.snp_locus_table) ecotype_id2accession_id, ecotype_id2row_index, ecotype_id2info_ls, ecotype_id_ls, accession_id2row_index, accession_id_ls, accession_id2ecotype_id_ls = self.setup_accession_ecotype_dstruc( curs, self.accession2ecotype_table, self.ecotype_table, self.calls_table) ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched = self.get_ecotype_X_snp_matrix( curs, ecotype_id2row_index, snpid2col_index, self.calls_table) if self.sub_justin_output_fname: header = ['ecotype_id', 'ecotype_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix( ecotype_X_snp_matrix, self.sub_justin_output_fname, header, ecotype_id_ls, ecotype_id_ls) alignment_id2positions_to_be_checked_ls, alignment_id2start = self.get_alignment_id2positions_to_be_checked_ls( curs, self.alignment_table) accession_X_snp_matrix, accession_X_snp_matrix_touched, snp_index2alignment_id = self.get_accession_X_snp_matrix( curs, accession_id2row_index, SNPpos2col_index, self.sequence_table, self.alignment_table, alignment_id2positions_to_be_checked_ls) if self.output_fname: header = ['accession_id', 'accession_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix( accession_X_snp_matrix, self.output_fname, header, accession_id_ls, accession_id_ls) summary_diff_matrix_ls, diff_details_ls = self.cmp_two_matricies( accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, diff_details_ls_type=2) print "diff_matrix_touched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[0] print "diff_matrix_touched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[1] print "diff_matrix_untouched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[2] print "diff_matrix_untouched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[3] summary_diff_matrix_caption_ls = [ 'PCR-tried vs sequenom-tried', 'PCR-tried vs sequenom-untried', 'PCR-untried vs sequenom-tried', 'PCR-untried vs sequenom-untried' ] if self.latex_output_fname: outf = open(self.latex_output_fname, 'w') outf.write( '\\section{2010 PCR versus sequenom. summary} \\label{section_summary}\n' ) for i in range(len(summary_diff_matrix_ls)): from pymodule.latex import outputMatrixInLatexTable wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names( summary_diff_matrix_ls[i]) table_label = 'table_dm%s' % i outf.write( outputMatrixInLatexTable(wrapped_diff_matrix, summary_diff_matrix_caption_ls[i], table_label)) table_no = i #output the whole diff_details_ls outf.write( '\\section{Real Mismatches between pcr and sequenom (deletion/NA excluded)} \\label{section_real_mismatch}\n' ) diff_details_ls = self.beautify_snp_diff_details_ls( diff_details_ls, ecotype_id2info_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_label = 'table_dm%s' % table_no caption = 'mismatches between pcr and sequenom data (deletion/NA excluded, sorted by accession id)' outf.write( outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=[ 'nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'SNP', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call' ])) #Strain-wise comparison outf.write( '\\section{2010 PCR versus sequenom for each strain} \\label{section_strain_wise}\n' ) accession_id_ls.sort() for accession_id in accession_id_ls: ecotype_id_ls = accession_id2ecotype_id_ls[accession_id] outf.write( '\\subsection{strain %s(accession id=%s)}\n' % (ecotype_id2info_ls[ecotype_id_ls[0]][0], accession_id)) for ecotype_id in ecotype_id_ls: outf.write( '\\subsubsection{corresponding ecotype %s(stkparent=%s, ecotype id=%s, duplicate=%s)}\n' % (ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1], ecotype_id[0], ecotype_id[1])) e_row_index = ecotype_id2row_index[ecotype_id] a_row_index = accession_id2row_index[accession_id] diff_matrix_ls, diff_details_ls = self.cmp_two_lists( accession_X_snp_matrix[a_row_index, :], accession_X_snp_matrix_touched[a_row_index, :], ecotype_X_snp_matrix[e_row_index, :], ecotype_X_snp_matrix_touched[e_row_index, :], nt_number2diff_matrix_index) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names( diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'accession id=%s vs ecotype id=%s, duplicate=%s(nativename=%s, stockparent=%s)' % ( accession_id, ecotype_id[0], ecotype_id[1], ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1]) outf.write( outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_diff_details_ls( diff_details_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'detailed difference for accession id=%s vs ecotype id=%s, duplicate=%s' % ( accession_id, ecotype_id[0], ecotype_id[1]) outf.write( outputMatrixInLatexTable( diff_details_ls, caption, table_label, header_ls=[ 'snp', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call' ])) #SNP-wise comparison outf.write( '\\section{2010 PCR versus sequenom for each SNP} \\label{section_snp_wise}\n' ) for snp_column in range(accession_X_snp_matrix.shape[1]): snp_acc, chromosome, position = snp_index2snp_info_ls[ snp_column] alignment_id = snp_index2alignment_id[snp_column] alignment_start = alignment_id2start[alignment_id] outf.write( '\\subsection{SNP %s(chrom=%s, pos=%s, alignment id=%s, alignment start=%s)}\n' % (snp_acc, chromosome, position, alignment_id, alignment_start)) diff_matrix_ls, diff_details_ls = self.cmp_two_matricies( accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, snp_column=snp_column, diff_details_ls_type=1) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names( diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'SNP %s(chromosome=%s, position=%s, alignment id=%s, alignment start=%s)' % ( snp_acc, chromosome, position, alignment_id, alignment_start) outf.write( outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_snp_diff_details_ls( diff_details_ls, ecotype_id2info_ls) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'detailed difference for SNP %s' % (snp_acc) header_ls = [ 'nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'pcr_call', 'sequenom_call' ] outf.write( outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls)) del outf
def run(self): """ 2007-03-29 2007-04-03 2007-05-01 --db_connect() --FilterStrainSNPMatrix_instance.read_data() if self.comparison_only: --FilterStrainSNPMatrix_instance.read_data() else: --get_SNPpos2index() --create_SNP_matrix_2010() --get_align_length_from_fname() --get_positions_to_be_checked_ls() --get_align_matrix_from_fname() --get_positions_to_be_checked_ls() --get_mapping_info_regarding_strain_acc() --shuffle_data_matrix_according_to_strain_acc_ls() --FilterStrainSNPMatrix_instance.write_data_matrix() --extract_sub_data_matrix() if self.sub_justin_output_fname: --FilterStrainSNPMatrix_instance.write_data_matrix() --compare_two_SNP_matrix() --outputDiffType() """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, src_strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.input_fname) if self.comparison_only: header, strain_acc_ls, abbr_name_ls_sorted, SNP_matrix_2010_sorted = FilterStrainSNPMatrix_instance.read_data( self.output_fname) SNP_matrix_2010_sorted = Numeric.array(SNP_matrix_2010_sorted) else: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) #extract data from alignment snp_acc_ls = header[2:] SNPpos2index = self.get_SNPpos2index(curs, snp_acc_ls, self.snp_locus_table) abbr_name_ls, SNP_matrix_2010 = self.create_SNP_matrix_2010( SNPpos2index, self.data_dir_2010) strain_acc_ls, strain_acc2abbr_name, strain_acc2index = self.get_mapping_info_regarding_strain_acc( curs, self.strain_info_table, self.strain_info_2010_table, abbr_name_ls) SNP_matrix_2010_sorted = self.shuffle_data_matrix_according_to_strain_acc_ls( SNP_matrix_2010, strain_acc_ls, strain_acc2index) abbr_name_ls_sorted = [] for strain_acc in strain_acc_ls: abbr_name_ls_sorted.append(strain_acc2abbr_name[strain_acc]) FilterStrainSNPMatrix_instance.write_data_matrix( SNP_matrix_2010_sorted, self.output_fname, header, strain_acc_ls, abbr_name_ls_sorted) #comparison data_matrix = Numeric.array(data_matrix) sub_data_matrix = self.extract_sub_data_matrix(src_strain_acc_list, data_matrix, strain_acc_ls) if self.sub_justin_output_fname: FilterStrainSNPMatrix_instance.write_data_matrix( sub_data_matrix, self.sub_justin_output_fname, header, strain_acc_ls, abbr_name_ls_sorted) diff_matrix, diff_tag_dict, diff_tag2counter = self.compare_two_SNP_matrix( SNP_matrix_2010_sorted, sub_data_matrix) if self.diff_output_fname: self.outputDiffType(diff_matrix, SNP_matrix_2010_sorted, sub_data_matrix, diff_tag_dict, self.diff_type_to_be_outputted, abbr_name_ls_sorted, header[2:], self.diff_output_fname) summary_result_ls = [] for tag, counter in diff_tag2counter.iteritems(): summary_result_ls.append('%s(%s):%s' % (tag, diff_tag_dict[tag], counter)) print '\t%s(%s)\t%s' % (tag, diff_tag_dict[tag], counter) import pylab pylab.clf() diff_matrix_reverse = list(diff_matrix) diff_matrix_reverse.reverse() diff_matrix_reverse = Numeric.array(diff_matrix_reverse) pylab.imshow(diff_matrix_reverse, interpolation='nearest') pylab.title(' '.join(summary_result_ls)) pylab.colorbar() pylab.show() #2007-11-01 do something as CmpAccession2Ecotype.py from CmpAccession2Ecotype import CmpAccession2Ecotype CmpAccession2Ecotype_ins = CmpAccession2Ecotype() nt_number2diff_matrix_index = CmpAccession2Ecotype_ins.get_nt_number2diff_matrix_index( nt2number) dc_placeholder = dict( zip(range(sub_data_matrix.shape[0]), range(sub_data_matrix.shape[1]))) diff_matrix_ls = CmpAccession2Ecotype_ins.cmp_two_matricies( SNP_matrix_2010_sorted, sub_data_matrix, nt_number2diff_matrix_index, dc_placeholder, dc_placeholder, dc_placeholder) print diff_matrix_ls
def run(self): """ 2007-03-29 2007-04-03 2007-05-01 --db_connect() --FilterStrainSNPMatrix_instance.read_data() if self.comparison_only: --FilterStrainSNPMatrix_instance.read_data() else: --get_SNPpos2index() --create_SNP_matrix_2010() --get_align_length_from_fname() --get_positions_to_be_checked_ls() --get_align_matrix_from_fname() --get_positions_to_be_checked_ls() --get_mapping_info_regarding_strain_acc() --shuffle_data_matrix_according_to_strain_acc_ls() --FilterStrainSNPMatrix_instance.write_data_matrix() --extract_sub_data_matrix() if self.sub_justin_output_fname: --FilterStrainSNPMatrix_instance.write_data_matrix() --compare_two_SNP_matrix() --outputDiffType() """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, src_strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.input_fname) if self.comparison_only: header, strain_acc_ls, abbr_name_ls_sorted, SNP_matrix_2010_sorted = FilterStrainSNPMatrix_instance.read_data(self.output_fname) SNP_matrix_2010_sorted = Numeric.array(SNP_matrix_2010_sorted) else: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) #extract data from alignment snp_acc_ls = header[2:] SNPpos2index = self.get_SNPpos2index(curs, snp_acc_ls, self.snp_locus_table) abbr_name_ls, SNP_matrix_2010 = self.create_SNP_matrix_2010(SNPpos2index, self.data_dir_2010) strain_acc_ls, strain_acc2abbr_name, strain_acc2index = self.get_mapping_info_regarding_strain_acc(curs, self.strain_info_table, self.strain_info_2010_table, abbr_name_ls) SNP_matrix_2010_sorted = self.shuffle_data_matrix_according_to_strain_acc_ls(SNP_matrix_2010, strain_acc_ls, strain_acc2index) abbr_name_ls_sorted = [] for strain_acc in strain_acc_ls: abbr_name_ls_sorted.append(strain_acc2abbr_name[strain_acc]) FilterStrainSNPMatrix_instance.write_data_matrix(SNP_matrix_2010_sorted, self.output_fname, header, strain_acc_ls, abbr_name_ls_sorted) #comparison data_matrix = Numeric.array(data_matrix) sub_data_matrix = self.extract_sub_data_matrix(src_strain_acc_list, data_matrix, strain_acc_ls) if self.sub_justin_output_fname: FilterStrainSNPMatrix_instance.write_data_matrix(sub_data_matrix, self.sub_justin_output_fname, header, strain_acc_ls, abbr_name_ls_sorted) diff_matrix, diff_tag_dict, diff_tag2counter= self.compare_two_SNP_matrix(SNP_matrix_2010_sorted, sub_data_matrix) if self.diff_output_fname: self.outputDiffType(diff_matrix, SNP_matrix_2010_sorted, sub_data_matrix, diff_tag_dict, self.diff_type_to_be_outputted, abbr_name_ls_sorted, header[2:], self.diff_output_fname) summary_result_ls = [] for tag, counter in diff_tag2counter.iteritems(): summary_result_ls.append('%s(%s):%s'%(tag, diff_tag_dict[tag], counter)) print '\t%s(%s)\t%s'%(tag, diff_tag_dict[tag], counter) import pylab pylab.clf() diff_matrix_reverse = list(diff_matrix) diff_matrix_reverse.reverse() diff_matrix_reverse = Numeric.array(diff_matrix_reverse) pylab.imshow(diff_matrix_reverse, interpolation='nearest') pylab.title(' '.join(summary_result_ls)) pylab.colorbar() pylab.show() #2007-11-01 do something as CmpAccession2Ecotype.py from CmpAccession2Ecotype import CmpAccession2Ecotype CmpAccession2Ecotype_ins = CmpAccession2Ecotype() nt_number2diff_matrix_index = CmpAccession2Ecotype_ins.get_nt_number2diff_matrix_index(nt2number) dc_placeholder = dict(zip(range(sub_data_matrix.shape[0]), range(sub_data_matrix.shape[1]))) diff_matrix_ls = CmpAccession2Ecotype_ins.cmp_two_matricies(SNP_matrix_2010_sorted, sub_data_matrix, nt_number2diff_matrix_index, dc_placeholder, dc_placeholder, dc_placeholder) print diff_matrix_ls