def run(self): """ 2007-03-20 2007-04-03 """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() if self.draw_only: header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.output_fname) data_matrix = Numeric.array(data_matrix) else: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.input_fname) snp_acc_ls = header[2:] strain_id2index = self.get_id2index(curs, self.strain_info_table, strain_acc_list) snp_id2index = self.get_id2index(curs, self.snp_locus_table, snp_acc_ls) from dbSNP2data import dbSNP2data dbSNP2data_instance = dbSNP2data(report=self.report) data_matrix = dbSNP2data_instance.get_data_matrix(curs, strain_id2index, snp_id2index, nt2number, self.data_table, need_heterozygous_call=1) FilterStrainSNPMatrix_instance.write_data_matrix(data_matrix, self.output_fname, header, strain_acc_list, category_list) heterozygous_data_matrix, coarse_data_matrix = self.get_heterozygous_and_coarse_data_matrix(data_matrix) self.displayDataMatrix(heterozygous_data_matrix, title='heterozygous_data_matrix, 5-10=hetero, else=0') self.displayDataMatrix(coarse_data_matrix, title='coarse_data_matrix, 0=NA, 1=h**o, 2=hetero') raw_input("enter")
def run(self): """ 2007-10-11 """ import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname) curs = conn.cursor() from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.input_fname) if self.debug: import pdb pdb.set_trace() identity_pair_ls = self.construct_identity_pair_ls( strain_acc_list, header, data_matrix) g = self.construct_graph_out_of_identity_pair(identity_pair_ls) g = self.expand_g_with_singleton_strain_id_ls(g, strain_acc_list) cc_id2clique_id_ls, clique_id2ecotype_id_ls = self.compute_components_and_cliques( g) if self.commit: self.create_identity_table(curs, self.identity_table) self.create_component2clique_table(curs, self.component2clique_table) self.create_clique2ecotype_table(curs, self.clique2ecotype_table) self.submit_identity_pairs(curs, g, self.identity_table) self.submit_cc_id2clique_id_ls(curs, cc_id2clique_id_ls, self.component2clique_table) self.submit_clique_id2ecotype_id_ls(curs, clique_id2ecotype_id_ls, self.clique2ecotype_table)
def shuffleMatrixSNPColumn_in_chrom_position_order(input_fname, curs, snps_table, output_fname): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( input_fname) snp_acc_list = header[2:] snp_acc2col_index = {} new_snp_acc_list = [] curs.execute( "select snpid, chromosome, position from %s order by chromosome, position" % (snps_table)) rows = curs.fetchall() for row in rows: snpid, chromosome, position = row snp_acc2col_index[snpid] = len(snp_acc2col_index) new_snp_acc_list.append(snpid) import numpy old_matrix = numpy.array(data_matrix) new_matrix = numpy.zeros(old_matrix.shape, numpy.integer) for j in range(old_matrix.shape[1]): snp_acc = snp_acc_list[j] col_index = snp_acc2col_index[snp_acc] new_matrix[:, col_index] = old_matrix[:, j] header = header[:2] + new_snp_acc_list FilterStrainSNPMatrix_instance.write_data_matrix(new_matrix, output_fname, header, strain_acc_list, category_list)
def run(self): """ 2007-10-11 """ import MySQLdb conn = MySQLdb.connect(db=self.dbname,host=self.hostname) curs = conn.cursor() from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.input_fname) if self.debug: import pdb pdb.set_trace() identity_pair_ls = self.construct_identity_pair_ls(strain_acc_list, header, data_matrix) g = self.construct_graph_out_of_identity_pair(identity_pair_ls) g = self.expand_g_with_singleton_strain_id_ls(g, strain_acc_list) cc_id2clique_id_ls, clique_id2ecotype_id_ls = self.compute_components_and_cliques(g) if self.commit: self.create_identity_table(curs, self.identity_table) self.create_component2clique_table(curs, self.component2clique_table) self.create_clique2ecotype_table(curs, self.clique2ecotype_table) self.submit_identity_pairs(curs, g, self.identity_table) self.submit_cc_id2clique_id_ls(curs, cc_id2clique_id_ls, self.component2clique_table) self.submit_clique_id2ecotype_id_ls(curs, clique_id2ecotype_id_ls, self.clique2ecotype_table)
def cmp192StrainsBorevitsAndNordborgData(borevitz_data_fname, nordborg_data_fname): """ 2007-10-09 compare between borevitz and nordborg data of 192 strains """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() borevitz_header, borevitz_strain_acc_list, borevitz_category_list, borevitz_data_matrix = FilterStrainSNPMatrix_instance.read_data( borevitz_data_fname) nordborg_header, nordborg_strain_acc_list, nordborg_category_list, nordborg_data_matrix = FilterStrainSNPMatrix_instance.read_data( nordborg_data_fname, turn_into_integer=0) #for nordborg data accession_name2index = {} for i in range(len(nordborg_category_list)): accession_name = nordborg_category_list[i] accession_name2index[accession_name] = i nativename_missing_in_nordborg_alignment_ls = [] for nativename in borevitz_category_list: if nativename not in accession_name2index: nativename_missing_in_nordborg_alignment_ls.append(nativename) print 'nativename_missing_in_nordborg_alignment_ls:', nativename_missing_in_nordborg_alignment_ls sys.stderr.write( "Comparing 192 strains' data from borevitz lab and nordborg 2010 ...") acc_name_pair_ls = [] borevitz_dist_ls = [] nordborg_dist_ls = [] no_of_borevits_strains = len(borevitz_strain_acc_list) no_of_valid_nordborg_pairs_ls = [] for i in range(no_of_borevits_strains): for j in range(i + 1, no_of_borevits_strains): acc_name1 = borevitz_category_list[i] acc_name2 = borevitz_category_list[j] if acc_name1 in accession_name2index and acc_name2 in accession_name2index: borevitz_dist, no_of_valid_pairs = calBinaryDistanceBetTwoNumericVectors( borevitz_data_matrix[i], borevitz_data_matrix[j]) nordborg_dist, no_of_valid_pairs = calBinaryDistanceBetTwoAlignmentVectors( nordborg_data_matrix[accession_name2index[acc_name1]], nordborg_data_matrix[accession_name2index[acc_name2]]) borevitz_dist_ls.append(borevitz_dist) nordborg_dist_ls.append(nordborg_dist) acc_name_pair_ls.append((acc_name1, acc_name2)) no_of_valid_nordborg_pairs_ls.append(no_of_valid_pairs) sys.stderr.write("Done.\n") return acc_name_pair_ls, borevitz_dist_ls, nordborg_dist_ls, no_of_valid_nordborg_pairs_ls
def run(self): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.input_fname) data_matrix = Numeric.array(data_matrix) locus_allele_prob_vector = self.cal_locus_allele_prob_vector(data_matrix) locus_heterozygous_prob_vector = self.cal_locus_heterozygous_prob_vector(locus_allele_prob_vector) locus_heterozygous_prob_matrix = self.cal_locus_heterozygous_prob_matrix(locus_heterozygous_prob_vector, self.max_selfing_generation) selfing_generation_ls = self.cal_selfing_generation_prob(data_matrix, locus_heterozygous_prob_vector, strain_acc_list, category_list, locus_heterozygous_prob_matrix, self.output_fname) import pylab pylab.clf() pylab.hist(selfing_generation_ls, 20) pylab.title("hist of selfing generations") pylab.show()
def run(self): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.input_fname) data_matrix = Numeric.array(data_matrix) locus_allele_prob_vector = self.cal_locus_allele_prob_vector( data_matrix) locus_heterozygous_prob_vector = self.cal_locus_heterozygous_prob_vector( locus_allele_prob_vector) locus_heterozygous_prob_matrix = self.cal_locus_heterozygous_prob_matrix( locus_heterozygous_prob_vector, self.max_selfing_generation) selfing_generation_ls = self.cal_selfing_generation_prob( data_matrix, locus_heterozygous_prob_vector, strain_acc_list, category_list, locus_heterozygous_prob_matrix, self.output_fname) import pylab pylab.clf() pylab.hist(selfing_generation_ls, 20) pylab.title("hist of selfing generations") pylab.show()
def find_2010_accession_id_for_old_2010_x_149snp_matrix(input_fname, curs, accession_table='at.accession'): """ 2007-11-05 whether the names can still be matched to entries in accession_table. """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(input_fname) strain_acc_accession_id_ls = [] strain_acc_match_failed_ls = [] for strain_acc in category_list: curs.execute("select id from %s where name='%s'"%(accession_table, strain_acc)) rows = curs.fetchall() if rows: accession_id = rows[0][0] strain_acc_accession_id_ls.append([strain_acc, accession_id]) else: accession_id = '' strain_acc_accession_id_ls.append([strain_acc]) strain_acc_match_failed_ls.append(strain_acc) print '%s\t%s'%(strain_acc, accession_id) return strain_acc_accession_id_ls, strain_acc_match_failed_ls
def shuffleMatrixSNPColumn_in_chrom_position_order(input_fname, curs, snps_table, output_fname): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(input_fname) snp_acc_list = header[2:] snp_acc2col_index = {} new_snp_acc_list = [] curs.execute("select snpid, chromosome, position from %s order by chromosome, position"%(snps_table)) rows = curs.fetchall() for row in rows: snpid, chromosome, position = row snp_acc2col_index[snpid] = len(snp_acc2col_index) new_snp_acc_list.append(snpid) import numpy old_matrix = numpy.array(data_matrix) new_matrix = numpy.zeros(old_matrix.shape, numpy.integer) for j in range(old_matrix.shape[1]): snp_acc = snp_acc_list[j] col_index = snp_acc2col_index[snp_acc] new_matrix[:,col_index] = old_matrix[:,j] header = header[:2] + new_snp_acc_list FilterStrainSNPMatrix_instance.write_data_matrix(new_matrix, output_fname, header, strain_acc_list, category_list)
def find_2010_accession_id_for_old_2010_x_149snp_matrix( input_fname, curs, accession_table='at.accession'): """ 2007-11-05 whether the names can still be matched to entries in accession_table. """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( input_fname) strain_acc_accession_id_ls = [] strain_acc_match_failed_ls = [] for strain_acc in category_list: curs.execute("select id from %s where name='%s'" % (accession_table, strain_acc)) rows = curs.fetchall() if rows: accession_id = rows[0][0] strain_acc_accession_id_ls.append([strain_acc, accession_id]) else: accession_id = '' strain_acc_accession_id_ls.append([strain_acc]) strain_acc_match_failed_ls.append(strain_acc) print '%s\t%s' % (strain_acc, accession_id) return strain_acc_accession_id_ls, strain_acc_match_failed_ls
def run(self): import MySQLdb conn = MySQLdb.connect(db=self.dbname,host=self.hostname, user = self.user, passwd = self.passwd) curs = conn.cursor() from dbSNP2data import dbSNP2data dbSNP2data_instance = dbSNP2data(user=self.user, passwd=self.passwd, output_fname='whatever') snp_id2index, snp_id_list, snp_id2info = dbSNP2data_instance.get_snp_id2index_m(curs, self.input_table, self.snp_locus_table) #strain_id2index, strain_id_list strain_id2index, strain_id_list, nativename2strain_id, strain_id2acc, strain_id2category = dbSNP2data_instance.get_strain_id2index_m(curs, self.input_table, self.strain_info_table) #2008-06-02 stuff returned by get_strain_id2index_m is totally changed. ecotype_id2row_index = {} for strain_id, acc in strain_id2acc.iteritems(): row_index = strain_id2index[strain_id] ecotype_id2row_index[acc] = row_index #strain_id2acc, strain_id2category = dbSNP2data_instance.get_strain_id_info_m(curs, strain_id_list, self.strain_info_table) snp_id2acc = dbSNP2data_instance.get_snp_id_info_m(curs, snp_id_list, self.snp_locus_table) data_matrix = dbSNP2data_instance.get_data_matrix_m(curs, strain_id2index, snp_id2index, nt2number, self.input_table, need_heterozygous_call=1) from OutputPopulation import OutputPopulation popid2ecotypeid_ls = OutputPopulation.get_popid2ecotypeid_ls(curs, self.population_table) from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() from RemoveBadSNPs import RemoveBadSNPs RemoveBadSNPs_instance = RemoveBadSNPs() popid2strain_id_snp_id_ls = {} for popid, ecotypeid_ls in popid2ecotypeid_ls.iteritems(): if len(ecotypeid_ls)>=self.min_no_of_strains_per_pop: sys.stderr.write("Population %s\n"%popid) sub_data_matrix, new_ecotypeid_ls = self.create_sub_data_matrix(popid, data_matrix, ecotypeid_ls, ecotype_id2row_index) if len(new_ecotypeid_ls)>=self.min_no_of_strains_per_pop: sys.stderr.write("\tPopulation %s has %s strains\n"%(popid, len(new_ecotypeid_ls))) strain_id_selected, snp_id_selected = self.cleanup_one_population(FilterStrainSNPMatrix_instance, RemoveBadSNPs_instance, sub_data_matrix, new_ecotypeid_ls, snp_id_list, self.min_no_of_strains_per_pop, self.row_cutoff, self.col_cutoff, self.min_log_prob) if strain_id_selected and snp_id_selected: popid2strain_id_snp_id_ls[popid] = [strain_id_selected, snp_id_selected] if self.commit: self.create_popid2snpid_table(curs, self.output_table) self.mark_strain_id_selected(curs, popid2strain_id_snp_id_ls, self.population_table) self.submit_popid2snpid_list(curs, popid2strain_id_snp_id_ls, self.population_table, self.output_table) conn.commit()
def cmp192StrainsBorevitsAndNordborgData(borevitz_data_fname, nordborg_data_fname): """ 2007-10-09 compare between borevitz and nordborg data of 192 strains """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() borevitz_header, borevitz_strain_acc_list, borevitz_category_list, borevitz_data_matrix = FilterStrainSNPMatrix_instance.read_data(borevitz_data_fname) nordborg_header, nordborg_strain_acc_list, nordborg_category_list, nordborg_data_matrix = FilterStrainSNPMatrix_instance.read_data(nordborg_data_fname, turn_into_integer=0) #for nordborg data accession_name2index = {} for i in range(len(nordborg_category_list)): accession_name = nordborg_category_list[i] accession_name2index[accession_name] = i nativename_missing_in_nordborg_alignment_ls = [] for nativename in borevitz_category_list: if nativename not in accession_name2index: nativename_missing_in_nordborg_alignment_ls.append(nativename) print 'nativename_missing_in_nordborg_alignment_ls:', nativename_missing_in_nordborg_alignment_ls sys.stderr.write("Comparing 192 strains' data from borevitz lab and nordborg 2010 ...") acc_name_pair_ls = [] borevitz_dist_ls = [] nordborg_dist_ls = [] no_of_borevits_strains = len(borevitz_strain_acc_list) no_of_valid_nordborg_pairs_ls = [] for i in range(no_of_borevits_strains): for j in range(i+1, no_of_borevits_strains): acc_name1 = borevitz_category_list[i] acc_name2 = borevitz_category_list[j] if acc_name1 in accession_name2index and acc_name2 in accession_name2index: borevitz_dist, no_of_valid_pairs = calBinaryDistanceBetTwoNumericVectors(borevitz_data_matrix[i], borevitz_data_matrix[j]) nordborg_dist, no_of_valid_pairs = calBinaryDistanceBetTwoAlignmentVectors(nordborg_data_matrix[accession_name2index[acc_name1]], nordborg_data_matrix[accession_name2index[acc_name2]]) borevitz_dist_ls.append(borevitz_dist) nordborg_dist_ls.append(nordborg_dist) acc_name_pair_ls.append((acc_name1, acc_name2)) no_of_valid_nordborg_pairs_ls.append(no_of_valid_pairs) sys.stderr.write("Done.\n") return acc_name_pair_ls, borevitz_dist_ls, nordborg_dist_ls, no_of_valid_nordborg_pairs_ls
def run(self): """ 2007-03-20 2007-04-03 """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() if self.draw_only: header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.output_fname) data_matrix = Numeric.array(data_matrix) else: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.input_fname) snp_acc_ls = header[2:] strain_id2index = self.get_id2index(curs, self.strain_info_table, strain_acc_list) snp_id2index = self.get_id2index(curs, self.snp_locus_table, snp_acc_ls) from dbSNP2data import dbSNP2data dbSNP2data_instance = dbSNP2data(report=self.report) data_matrix = dbSNP2data_instance.get_data_matrix( curs, strain_id2index, snp_id2index, nt2number, self.data_table, need_heterozygous_call=1) FilterStrainSNPMatrix_instance.write_data_matrix( data_matrix, self.output_fname, header, strain_acc_list, category_list) heterozygous_data_matrix, coarse_data_matrix = self.get_heterozygous_and_coarse_data_matrix( data_matrix) self.displayDataMatrix( heterozygous_data_matrix, title='heterozygous_data_matrix, 5-10=hetero, else=0') self.displayDataMatrix( coarse_data_matrix, title='coarse_data_matrix, 0=NA, 1=h**o, 2=hetero') raw_input("enter")
def run(self): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() import MySQLdb #conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc') conn = MySQLdb.connect(db=self.dbname, host=self.hostname) curs = conn.cursor() if self.debug: import pdb pdb.set_trace() nt_number2diff_matrix_index = self.get_nt_number2diff_matrix_index( nt2number) SNPpos2col_index, snpid2col_index, snp_acc_ls, snp_index2snp_info_ls = self.setup_SNP_dstruc( curs, self.snp_locus_table) ecotype_id2accession_id, ecotype_id2row_index, ecotype_id2info_ls, ecotype_id_ls, accession_id2row_index, accession_id_ls, accession_id2ecotype_id_ls = self.setup_accession_ecotype_dstruc( curs, self.accession2ecotype_table, self.ecotype_table, self.calls_table) ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched = self.get_ecotype_X_snp_matrix( curs, ecotype_id2row_index, snpid2col_index, self.calls_table) if self.sub_justin_output_fname: header = ['ecotype_id', 'ecotype_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix( ecotype_X_snp_matrix, self.sub_justin_output_fname, header, ecotype_id_ls, ecotype_id_ls) alignment_id2positions_to_be_checked_ls, alignment_id2start = self.get_alignment_id2positions_to_be_checked_ls( curs, self.alignment_table) accession_X_snp_matrix, accession_X_snp_matrix_touched, snp_index2alignment_id = self.get_accession_X_snp_matrix( curs, accession_id2row_index, SNPpos2col_index, self.sequence_table, self.alignment_table, alignment_id2positions_to_be_checked_ls) if self.output_fname: header = ['accession_id', 'accession_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix( accession_X_snp_matrix, self.output_fname, header, accession_id_ls, accession_id_ls) summary_diff_matrix_ls, diff_details_ls = self.cmp_two_matricies( accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, diff_details_ls_type=2) print "diff_matrix_touched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[0] print "diff_matrix_touched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[1] print "diff_matrix_untouched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[2] print "diff_matrix_untouched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[3] summary_diff_matrix_caption_ls = [ 'PCR-tried vs sequenom-tried', 'PCR-tried vs sequenom-untried', 'PCR-untried vs sequenom-tried', 'PCR-untried vs sequenom-untried' ] if self.latex_output_fname: outf = open(self.latex_output_fname, 'w') outf.write( '\\section{2010 PCR versus sequenom. summary} \\label{section_summary}\n' ) for i in range(len(summary_diff_matrix_ls)): from pymodule.latex import outputMatrixInLatexTable wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names( summary_diff_matrix_ls[i]) table_label = 'table_dm%s' % i outf.write( outputMatrixInLatexTable(wrapped_diff_matrix, summary_diff_matrix_caption_ls[i], table_label)) table_no = i #output the whole diff_details_ls outf.write( '\\section{Real Mismatches between pcr and sequenom (deletion/NA excluded)} \\label{section_real_mismatch}\n' ) diff_details_ls = self.beautify_snp_diff_details_ls( diff_details_ls, ecotype_id2info_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_label = 'table_dm%s' % table_no caption = 'mismatches between pcr and sequenom data (deletion/NA excluded, sorted by accession id)' outf.write( outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=[ 'nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'SNP', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call' ])) #Strain-wise comparison outf.write( '\\section{2010 PCR versus sequenom for each strain} \\label{section_strain_wise}\n' ) accession_id_ls.sort() for accession_id in accession_id_ls: ecotype_id_ls = accession_id2ecotype_id_ls[accession_id] outf.write( '\\subsection{strain %s(accession id=%s)}\n' % (ecotype_id2info_ls[ecotype_id_ls[0]][0], accession_id)) for ecotype_id in ecotype_id_ls: outf.write( '\\subsubsection{corresponding ecotype %s(stkparent=%s, ecotype id=%s, duplicate=%s)}\n' % (ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1], ecotype_id[0], ecotype_id[1])) e_row_index = ecotype_id2row_index[ecotype_id] a_row_index = accession_id2row_index[accession_id] diff_matrix_ls, diff_details_ls = self.cmp_two_lists( accession_X_snp_matrix[a_row_index, :], accession_X_snp_matrix_touched[a_row_index, :], ecotype_X_snp_matrix[e_row_index, :], ecotype_X_snp_matrix_touched[e_row_index, :], nt_number2diff_matrix_index) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names( diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'accession id=%s vs ecotype id=%s, duplicate=%s(nativename=%s, stockparent=%s)' % ( accession_id, ecotype_id[0], ecotype_id[1], ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1]) outf.write( outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_diff_details_ls( diff_details_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'detailed difference for accession id=%s vs ecotype id=%s, duplicate=%s' % ( accession_id, ecotype_id[0], ecotype_id[1]) outf.write( outputMatrixInLatexTable( diff_details_ls, caption, table_label, header_ls=[ 'snp', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call' ])) #SNP-wise comparison outf.write( '\\section{2010 PCR versus sequenom for each SNP} \\label{section_snp_wise}\n' ) for snp_column in range(accession_X_snp_matrix.shape[1]): snp_acc, chromosome, position = snp_index2snp_info_ls[ snp_column] alignment_id = snp_index2alignment_id[snp_column] alignment_start = alignment_id2start[alignment_id] outf.write( '\\subsection{SNP %s(chrom=%s, pos=%s, alignment id=%s, alignment start=%s)}\n' % (snp_acc, chromosome, position, alignment_id, alignment_start)) diff_matrix_ls, diff_details_ls = self.cmp_two_matricies( accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, snp_column=snp_column, diff_details_ls_type=1) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names( diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'SNP %s(chromosome=%s, position=%s, alignment id=%s, alignment start=%s)' % ( snp_acc, chromosome, position, alignment_id, alignment_start) outf.write( outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_snp_diff_details_ls( diff_details_ls, ecotype_id2info_ls) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'detailed difference for SNP %s' % (snp_acc) header_ls = [ 'nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'pcr_call', 'sequenom_call' ] outf.write( outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls)) del outf
def run(self): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() import MySQLdb #conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc') conn = MySQLdb.connect(db=self.dbname,host=self.hostname) curs = conn.cursor() if self.debug: import pdb pdb.set_trace() nt_number2diff_matrix_index = self.get_nt_number2diff_matrix_index(nt2number) SNPpos2col_index, snpid2col_index, snp_acc_ls, snp_index2snp_info_ls = self.setup_SNP_dstruc(curs, self.snp_locus_table) ecotype_id2accession_id, ecotype_id2row_index, ecotype_id2info_ls, ecotype_id_ls, accession_id2row_index, accession_id_ls, accession_id2ecotype_id_ls = self.setup_accession_ecotype_dstruc(curs, self.accession2ecotype_table, self.ecotype_table, self.calls_table) ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched = self.get_ecotype_X_snp_matrix(curs, ecotype_id2row_index, snpid2col_index, self.calls_table) if self.sub_justin_output_fname: header = ['ecotype_id', 'ecotype_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix(ecotype_X_snp_matrix, self.sub_justin_output_fname, header, ecotype_id_ls, ecotype_id_ls) alignment_id2positions_to_be_checked_ls, alignment_id2start = self.get_alignment_id2positions_to_be_checked_ls(curs, self.alignment_table) accession_X_snp_matrix, accession_X_snp_matrix_touched, snp_index2alignment_id= self.get_accession_X_snp_matrix(curs, accession_id2row_index, SNPpos2col_index, self.sequence_table, self.alignment_table, alignment_id2positions_to_be_checked_ls) if self.output_fname: header = ['accession_id', 'accession_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix(accession_X_snp_matrix, self.output_fname, header, accession_id_ls, accession_id_ls) summary_diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, diff_details_ls_type=2) print "diff_matrix_touched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[0] print "diff_matrix_touched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[1] print "diff_matrix_untouched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[2] print "diff_matrix_untouched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[3] summary_diff_matrix_caption_ls = ['PCR-tried vs sequenom-tried', 'PCR-tried vs sequenom-untried', 'PCR-untried vs sequenom-tried', 'PCR-untried vs sequenom-untried'] if self.latex_output_fname: outf = open(self.latex_output_fname, 'w') outf.write('\\section{2010 PCR versus sequenom. summary} \\label{section_summary}\n') for i in range(len(summary_diff_matrix_ls)): from pymodule.latex import outputMatrixInLatexTable wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(summary_diff_matrix_ls[i]) table_label = 'table_dm%s'%i outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, summary_diff_matrix_caption_ls[i], table_label)) table_no = i #output the whole diff_details_ls outf.write('\\section{Real Mismatches between pcr and sequenom (deletion/NA excluded)} \\label{section_real_mismatch}\n') diff_details_ls = self.beautify_snp_diff_details_ls(diff_details_ls, ecotype_id2info_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_label = 'table_dm%s'%table_no caption = 'mismatches between pcr and sequenom data (deletion/NA excluded, sorted by accession id)' outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=['nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'SNP', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call'])) #Strain-wise comparison outf.write('\\section{2010 PCR versus sequenom for each strain} \\label{section_strain_wise}\n') accession_id_ls.sort() for accession_id in accession_id_ls: ecotype_id_ls = accession_id2ecotype_id_ls[accession_id] outf.write('\\subsection{strain %s(accession id=%s)}\n'%(ecotype_id2info_ls[ecotype_id_ls[0]][0], accession_id)) for ecotype_id in ecotype_id_ls: outf.write('\\subsubsection{corresponding ecotype %s(stkparent=%s, ecotype id=%s, duplicate=%s)}\n'%(ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1], ecotype_id[0], ecotype_id[1])) e_row_index = ecotype_id2row_index[ecotype_id] a_row_index = accession_id2row_index[accession_id] diff_matrix_ls, diff_details_ls= self.cmp_two_lists(accession_X_snp_matrix[a_row_index,:], accession_X_snp_matrix_touched[a_row_index,:], ecotype_X_snp_matrix[e_row_index,:], ecotype_X_snp_matrix_touched[e_row_index,:], nt_number2diff_matrix_index) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'accession id=%s vs ecotype id=%s, duplicate=%s(nativename=%s, stockparent=%s)'%(accession_id, ecotype_id[0], ecotype_id[1], ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1]) outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_diff_details_ls(diff_details_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'detailed difference for accession id=%s vs ecotype id=%s, duplicate=%s'%(accession_id, ecotype_id[0], ecotype_id[1]) outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=['snp', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call'])) #SNP-wise comparison outf.write('\\section{2010 PCR versus sequenom for each SNP} \\label{section_snp_wise}\n') for snp_column in range(accession_X_snp_matrix.shape[1]): snp_acc, chromosome, position = snp_index2snp_info_ls[snp_column] alignment_id = snp_index2alignment_id[snp_column] alignment_start = alignment_id2start[alignment_id] outf.write('\\subsection{SNP %s(chrom=%s, pos=%s, alignment id=%s, alignment start=%s)}\n'%(snp_acc, chromosome, position, alignment_id, alignment_start)) diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, snp_column=snp_column, diff_details_ls_type=1) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'SNP %s(chromosome=%s, position=%s, alignment id=%s, alignment start=%s)'%(snp_acc, chromosome, position, alignment_id, alignment_start) outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_snp_diff_details_ls(diff_details_ls, ecotype_id2info_ls) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'detailed difference for SNP %s'%(snp_acc) header_ls = ['nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'pcr_call', 'sequenom_call'] outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls)) del outf
def run(self): """ 2007-03-29 2007-04-03 2007-05-01 --db_connect() --FilterStrainSNPMatrix_instance.read_data() if self.comparison_only: --FilterStrainSNPMatrix_instance.read_data() else: --get_SNPpos2index() --create_SNP_matrix_2010() --get_align_length_from_fname() --get_positions_to_be_checked_ls() --get_align_matrix_from_fname() --get_positions_to_be_checked_ls() --get_mapping_info_regarding_strain_acc() --shuffle_data_matrix_according_to_strain_acc_ls() --FilterStrainSNPMatrix_instance.write_data_matrix() --extract_sub_data_matrix() if self.sub_justin_output_fname: --FilterStrainSNPMatrix_instance.write_data_matrix() --compare_two_SNP_matrix() --outputDiffType() """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, src_strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data(self.input_fname) if self.comparison_only: header, strain_acc_ls, abbr_name_ls_sorted, SNP_matrix_2010_sorted = FilterStrainSNPMatrix_instance.read_data(self.output_fname) SNP_matrix_2010_sorted = Numeric.array(SNP_matrix_2010_sorted) else: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) #extract data from alignment snp_acc_ls = header[2:] SNPpos2index = self.get_SNPpos2index(curs, snp_acc_ls, self.snp_locus_table) abbr_name_ls, SNP_matrix_2010 = self.create_SNP_matrix_2010(SNPpos2index, self.data_dir_2010) strain_acc_ls, strain_acc2abbr_name, strain_acc2index = self.get_mapping_info_regarding_strain_acc(curs, self.strain_info_table, self.strain_info_2010_table, abbr_name_ls) SNP_matrix_2010_sorted = self.shuffle_data_matrix_according_to_strain_acc_ls(SNP_matrix_2010, strain_acc_ls, strain_acc2index) abbr_name_ls_sorted = [] for strain_acc in strain_acc_ls: abbr_name_ls_sorted.append(strain_acc2abbr_name[strain_acc]) FilterStrainSNPMatrix_instance.write_data_matrix(SNP_matrix_2010_sorted, self.output_fname, header, strain_acc_ls, abbr_name_ls_sorted) #comparison data_matrix = Numeric.array(data_matrix) sub_data_matrix = self.extract_sub_data_matrix(src_strain_acc_list, data_matrix, strain_acc_ls) if self.sub_justin_output_fname: FilterStrainSNPMatrix_instance.write_data_matrix(sub_data_matrix, self.sub_justin_output_fname, header, strain_acc_ls, abbr_name_ls_sorted) diff_matrix, diff_tag_dict, diff_tag2counter= self.compare_two_SNP_matrix(SNP_matrix_2010_sorted, sub_data_matrix) if self.diff_output_fname: self.outputDiffType(diff_matrix, SNP_matrix_2010_sorted, sub_data_matrix, diff_tag_dict, self.diff_type_to_be_outputted, abbr_name_ls_sorted, header[2:], self.diff_output_fname) summary_result_ls = [] for tag, counter in diff_tag2counter.iteritems(): summary_result_ls.append('%s(%s):%s'%(tag, diff_tag_dict[tag], counter)) print '\t%s(%s)\t%s'%(tag, diff_tag_dict[tag], counter) import pylab pylab.clf() diff_matrix_reverse = list(diff_matrix) diff_matrix_reverse.reverse() diff_matrix_reverse = Numeric.array(diff_matrix_reverse) pylab.imshow(diff_matrix_reverse, interpolation='nearest') pylab.title(' '.join(summary_result_ls)) pylab.colorbar() pylab.show() #2007-11-01 do something as CmpAccession2Ecotype.py from CmpAccession2Ecotype import CmpAccession2Ecotype CmpAccession2Ecotype_ins = CmpAccession2Ecotype() nt_number2diff_matrix_index = CmpAccession2Ecotype_ins.get_nt_number2diff_matrix_index(nt2number) dc_placeholder = dict(zip(range(sub_data_matrix.shape[0]), range(sub_data_matrix.shape[1]))) diff_matrix_ls = CmpAccession2Ecotype_ins.cmp_two_matricies(SNP_matrix_2010_sorted, sub_data_matrix, nt_number2diff_matrix_index, dc_placeholder, dc_placeholder, dc_placeholder) print diff_matrix_ls
def run(self): """ 2007-03-29 2007-04-03 2007-05-01 --db_connect() --FilterStrainSNPMatrix_instance.read_data() if self.comparison_only: --FilterStrainSNPMatrix_instance.read_data() else: --get_SNPpos2index() --create_SNP_matrix_2010() --get_align_length_from_fname() --get_positions_to_be_checked_ls() --get_align_matrix_from_fname() --get_positions_to_be_checked_ls() --get_mapping_info_regarding_strain_acc() --shuffle_data_matrix_according_to_strain_acc_ls() --FilterStrainSNPMatrix_instance.write_data_matrix() --extract_sub_data_matrix() if self.sub_justin_output_fname: --FilterStrainSNPMatrix_instance.write_data_matrix() --compare_two_SNP_matrix() --outputDiffType() """ from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, src_strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.input_fname) if self.comparison_only: header, strain_acc_ls, abbr_name_ls_sorted, SNP_matrix_2010_sorted = FilterStrainSNPMatrix_instance.read_data( self.output_fname) SNP_matrix_2010_sorted = Numeric.array(SNP_matrix_2010_sorted) else: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) #extract data from alignment snp_acc_ls = header[2:] SNPpos2index = self.get_SNPpos2index(curs, snp_acc_ls, self.snp_locus_table) abbr_name_ls, SNP_matrix_2010 = self.create_SNP_matrix_2010( SNPpos2index, self.data_dir_2010) strain_acc_ls, strain_acc2abbr_name, strain_acc2index = self.get_mapping_info_regarding_strain_acc( curs, self.strain_info_table, self.strain_info_2010_table, abbr_name_ls) SNP_matrix_2010_sorted = self.shuffle_data_matrix_according_to_strain_acc_ls( SNP_matrix_2010, strain_acc_ls, strain_acc2index) abbr_name_ls_sorted = [] for strain_acc in strain_acc_ls: abbr_name_ls_sorted.append(strain_acc2abbr_name[strain_acc]) FilterStrainSNPMatrix_instance.write_data_matrix( SNP_matrix_2010_sorted, self.output_fname, header, strain_acc_ls, abbr_name_ls_sorted) #comparison data_matrix = Numeric.array(data_matrix) sub_data_matrix = self.extract_sub_data_matrix(src_strain_acc_list, data_matrix, strain_acc_ls) if self.sub_justin_output_fname: FilterStrainSNPMatrix_instance.write_data_matrix( sub_data_matrix, self.sub_justin_output_fname, header, strain_acc_ls, abbr_name_ls_sorted) diff_matrix, diff_tag_dict, diff_tag2counter = self.compare_two_SNP_matrix( SNP_matrix_2010_sorted, sub_data_matrix) if self.diff_output_fname: self.outputDiffType(diff_matrix, SNP_matrix_2010_sorted, sub_data_matrix, diff_tag_dict, self.diff_type_to_be_outputted, abbr_name_ls_sorted, header[2:], self.diff_output_fname) summary_result_ls = [] for tag, counter in diff_tag2counter.iteritems(): summary_result_ls.append('%s(%s):%s' % (tag, diff_tag_dict[tag], counter)) print '\t%s(%s)\t%s' % (tag, diff_tag_dict[tag], counter) import pylab pylab.clf() diff_matrix_reverse = list(diff_matrix) diff_matrix_reverse.reverse() diff_matrix_reverse = Numeric.array(diff_matrix_reverse) pylab.imshow(diff_matrix_reverse, interpolation='nearest') pylab.title(' '.join(summary_result_ls)) pylab.colorbar() pylab.show() #2007-11-01 do something as CmpAccession2Ecotype.py from CmpAccession2Ecotype import CmpAccession2Ecotype CmpAccession2Ecotype_ins = CmpAccession2Ecotype() nt_number2diff_matrix_index = CmpAccession2Ecotype_ins.get_nt_number2diff_matrix_index( nt2number) dc_placeholder = dict( zip(range(sub_data_matrix.shape[0]), range(sub_data_matrix.shape[1]))) diff_matrix_ls = CmpAccession2Ecotype_ins.cmp_two_matricies( SNP_matrix_2010_sorted, sub_data_matrix, nt_number2diff_matrix_index, dc_placeholder, dc_placeholder, dc_placeholder) print diff_matrix_ls
def run(self): """ 2007-04-16 (rank==0) --get_chr_start_ls() elif free_computing_nodes: -- (receive data) --mpi_synchronize() (rank==0) --input_node() --input_handler() elif free_computing_nodes: --computing_node() --computing_node_handler() --identify_ancestry_with_min_jumps() --initialize_score_trace_matrix() --is_child_heterozygous_SNP_compatible_with_parents() (for loop) --identify_ancestry_of_one_chr_with_DP() --is_child_heterozygous_SNP_compatible_with_parents() --trace() --recursive_trace() else: --output_node() --output_node_handler() """ node_rank = self.communicator.rank free_computing_nodes = range(1, self.communicator.size - 1) # exclude the 1st and last node if node_rank == 0: FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.input_fname ) snp_acc_list = header[2:] data_matrix = Numeric.array(data_matrix) no_of_strains = data_matrix.shape[0] (conn, curs) = db_connect(self.hostname, self.dbname, self.schema, password="******", user="******") # 2007-09-17 send strain_acc_list to the output_node strain_acc_list_pickle = cPickle.dumps(strain_acc_list, -1) self.communicator.send(strain_acc_list_pickle, self.communicator.size - 1, 0) chr_start_ls = self.get_chr_start_ls(curs, snp_acc_list, self.snp_locus_table) chr_start_ls_pickle = cPickle.dumps(chr_start_ls, -1) # -1 means use the highest protocol data_matrix_pickle = cPickle.dumps(data_matrix, -1) for node in free_computing_nodes: # send it to the computing_node self.communicator.send(chr_start_ls_pickle, node, 0) self.communicator.send(data_matrix_pickle, node, 0) elif node_rank in free_computing_nodes: data, source, tag = self.communicator.receiveString(0, 0) chr_start_ls = cPickle.loads(data) # take the data data, source, tag = self.communicator.receiveString(0, 0) data_matrix = cPickle.loads(data) else: data, source, tag = self.communicator.receiveString(0, 0) strain_acc_list = cPickle.loads(data) mpi_synchronize(self.communicator) if node_rank == 0: parameter_list = [no_of_strains] self.input_node(self.communicator, parameter_list, free_computing_nodes, self.message_size, self.report) elif node_rank in free_computing_nodes: trio_arrangement_ls = [ [0, 1, 2], [1, 2, 0], [2, 0, 1], ] # three different ways to pick the parent-set and the child parameter_list = [data_matrix, chr_start_ls, trio_arrangement_ls] computing_node(self.communicator, parameter_list, self.computing_node_handler, report=self.report) else: writer = csv.writer(open(self.output_fname, "w"), delimiter="\t") parameter_list = [writer, strain_acc_list] output_node(self.communicator, free_computing_nodes, parameter_list, self.output_node_handler, self.report) del writer
def run(self): """ 2007-04-16 (rank==0) --get_chr_start_ls() elif free_computing_nodes: -- (receive data) --mpi_synchronize() (rank==0) --input_node() --input_handler() elif free_computing_nodes: --computing_node() --computing_node_handler() --identify_ancestry_with_min_jumps() --initialize_score_trace_matrix() --is_child_heterozygous_SNP_compatible_with_parents() (for loop) --identify_ancestry_of_one_chr_with_DP() --is_child_heterozygous_SNP_compatible_with_parents() --trace() --recursive_trace() else: --output_node() --output_node_handler() """ node_rank = self.communicator.rank free_computing_nodes = range(1, self.communicator.size - 1) #exclude the 1st and last node if node_rank == 0: FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix_instance.read_data( self.input_fname) snp_acc_list = header[2:] data_matrix = Numeric.array(data_matrix) no_of_strains = data_matrix.shape[0] (conn, curs) = db_connect(self.hostname, self.dbname, self.schema, password='******', user='******') #2007-09-17 send strain_acc_list to the output_node strain_acc_list_pickle = cPickle.dumps(strain_acc_list, -1) self.communicator.send(strain_acc_list_pickle, self.communicator.size - 1, 0) chr_start_ls = self.get_chr_start_ls(curs, snp_acc_list, self.snp_locus_table) chr_start_ls_pickle = cPickle.dumps( chr_start_ls, -1) #-1 means use the highest protocol data_matrix_pickle = cPickle.dumps(data_matrix, -1) for node in free_computing_nodes: #send it to the computing_node self.communicator.send(chr_start_ls_pickle, node, 0) self.communicator.send(data_matrix_pickle, node, 0) elif node_rank in free_computing_nodes: data, source, tag = self.communicator.receiveString(0, 0) chr_start_ls = cPickle.loads(data) #take the data data, source, tag = self.communicator.receiveString(0, 0) data_matrix = cPickle.loads(data) else: data, source, tag = self.communicator.receiveString(0, 0) strain_acc_list = cPickle.loads(data) mpi_synchronize(self.communicator) if node_rank == 0: parameter_list = [no_of_strains] self.input_node(self.communicator, parameter_list, free_computing_nodes, self.message_size, \ self.report) elif node_rank in free_computing_nodes: trio_arrangement_ls = [[0, 1, 2], [1, 2, 0], [ 2, 0, 1 ]] #three different ways to pick the parent-set and the child parameter_list = [data_matrix, chr_start_ls, trio_arrangement_ls] computing_node(self.communicator, parameter_list, self.computing_node_handler, report=self.report) else: writer = csv.writer(open(self.output_fname, 'w'), delimiter='\t') parameter_list = [writer, strain_acc_list] output_node(self.communicator, free_computing_nodes, parameter_list, self.output_node_handler, self.report) del writer