def run(self): if self.debug: import pdb pdb.set_trace() db = StockDB.StockDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname) db.setup(create_tables=False) session = db.session session.begin() self.cmp_data_filename = self.findOutCmpDataFilename(self.cmp_data_filename, self.QC_method_id, StockDB.QCMethod) header, strain_acc_list, category_list, data_matrix = read_data(self.cmp_data_filename) strain_acc_list = map(int, strain_acc_list) #it's ecotypeid, cast it to integer to be compatible to the later ecotype_id_ls from db snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, \ data_matrix=data_matrix) #category_list is not used. readme = formReadmeObj(sys.argv, self.ad, StockDB.README) session.save(readme) import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user = self.db_user, passwd = self.db_passwd) curs = conn.cursor() from dbSNP2data import dbSNP2data snp_id2index, snp_id_list, snp_id2info = dbSNP2data.get_snp_id2index_m(curs, StockDB.Calls.table.name, StockDB.SNPs.table.name) strain_info_data = self.get_strain_id_info(self.QC_method_id) data_matrix = self.get_data_matrix(db, strain_info_data.strain_id2index, snp_id2index, StockDB.Calls.table.name) strain_acc_list = [strain_info_data.strain_id2acc[strain_id] for strain_id in strain_info_data.strain_id_list] category_list = [strain_info_data.strain_id2category[strain_id] for strain_id in strain_info_data.strain_id_list] header = ['ecotypeid', 'strainid'] for snp_id in snp_id_list: snp_name, chromosome, position = snp_id2info[snp_id] header.append(snp_name) snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \ snps_table='stock.snps') #snps_table is set to the stock_250k snps_table twoSNPData = TwoSNPData(SNPData1=snpData1, SNPData2=snpData2, curs=curs, \ QC_method_id=self.QC_method_id, user=self.db_user, row_matching_by_which_value=0, debug=self.debug) if self.run_type==1: row_id2NA_mismatch_rate = twoSNPData.cmp_row_wise() elif self.run_type==2: #twoSNPData.save_col_wise(session, readme) #2008-08-18 need to implement a new one for 149SNP row_id2NA_mismatch_rate = {} else: sys.stderr.write("run_type=%s is not supported.\n"%self.run_type) sys.exit(5) if self.output_fname and self.run_type==1 and row_id2NA_mismatch_rate: self.output_row_id2NA_mismatch_rate(row_id2NA_mismatch_rate, self.output_fname) if self.run_type==1 and self.commit and not self.input_dir and row_id2NA_mismatch_rate: #if self.input_dir is given, no db submission. call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid) #row_id2NA_mismatch_rate might be None if it's method 0. self.submit_to_call_QC(session, row_id2NA_mismatch_rate, self.QC_method_id, self.db_user, \ twoSNPData.row_id12row_id2, readme) if self.commit: session.commit() else: session.rollback()
def run(self): """ 2008-08-11 the database interface changed in variation.src.dbsnp 2008-05-06 """ import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user=self.user, passwd=self.passwd) curs = conn.cursor() if self.debug: import pdb pdb.set_trace() db = DBSNP(username=self.user, password=self.passwd, hostname=self.hostname, database=self.dbname) session = db.session session.begin() #transaction = session.create_transaction() snps_name2possible_mappings, snps_name2snps_id = self.get_snps_name2possible_mappings( db) from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data( self.input_fname1) snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \ col_id2id=snps_name2snps_id, snps_table='dbsnp.snps') header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data( self.input_fname2) snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix,\ snps_table='stock_250k.snps') twoSNPData = TwoSNPData384(SNPData1=snpData1, SNPData2=snpData2, curs=curs, user=self.user) readme = formReadmeObj(sys.argv, self.ad, README) session.save(readme) session.flush() twoSNPData.figureOutABMapping(session, readme, snps_name2possible_mappings) if self.commit: curs.execute("commit") session.commit() else: session.rollback()
def run(self): self.communicator = MPI.world.duplicate() node_rank = self.communicator.rank free_computing_nodes = range(1, self.communicator.size-1) #exclude the 1st and last node free_computing_node_set = Set(free_computing_nodes) output_node_rank = self.communicator.size-1 if node_rank not in free_computing_node_set: #computing nodes don't need db connection db = StockDB.StockDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname) db.setup(create_tables=False) session = db.session if node_rank == 0: twoSNPData = self.prepareTwoSNPData(db) twoSNPData_pickle = cPickle.dumps(twoSNPData, -1) for node in free_computing_nodes: #send it to the computing_node sys.stderr.write("passing initial data to nodes from %s to %s ... "%(node_rank, node)) self.communicator.send(twoSNPData_pickle, node, 0) sys.stderr.write(".\n") del twoSNPData_pickle elif node_rank in free_computing_node_set: data, source, tag = self.communicator.receiveString(0, 0) twoSNPData = cPickle.loads(data) del data sys.stderr.write(".\n") else: readme = formReadmeObj(sys.argv, self.ad, StockDB.README) session.save(readme) mw = MPIwrapper(self.communicator, debug=self.debug, report=self.report) mw.synchronize() if node_rank == 0: param_obj = PassingData(communicator=self.communicator, twoSNPData=twoSNPData, output_node_rank=output_node_rank, \ QC_method_id=self.QC_method_id, output_fname=getattr(self, 'output_fname', None), report=self.report) self.input_node(param_obj, free_computing_nodes, self.message_size) elif node_rank in free_computing_node_set: computing_parameter_obj = PassingData(twoSNPData=twoSNPData, QC_method_id=self.QC_method_id) mw.computing_node(computing_parameter_obj, self.computing_node_handler) else: if getattr(self, 'output_fname', None): writer = csv.writer(open(self.output_fname, 'w'), delimiter='\t') else: writer = None param_obj = PassingData(writer=writer, session=session, commit=self.commit, QC_method_id=self.QC_method_id, readme=readme, is_header_written=False) mw.output_node(free_computing_nodes, param_obj, self.output_node_handler) del writer mw.synchronize() #to avoid some node early exits
def run(self): """ 2008-08-11 the database interface changed in variation.src.dbsnp 2008-05-06 """ import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user = self.user, passwd = self.passwd) curs = conn.cursor() if self.debug: import pdb pdb.set_trace() db = DBSNP(username=self.user, password=self.passwd, hostname=self.hostname, database=self.dbname) session = db.session session.begin() #transaction = session.create_transaction() snps_name2possible_mappings, snps_name2snps_id = self.get_snps_name2possible_mappings(db) from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(self.input_fname1) snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \ col_id2id=snps_name2snps_id, snps_table='dbsnp.snps') header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(self.input_fname2) snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix,\ snps_table='stock_250k.snps') twoSNPData = TwoSNPData384(SNPData1=snpData1, SNPData2=snpData2, curs=curs, user=self.user) readme = formReadmeObj(sys.argv, self.ad, README) session.save(readme) session.flush() twoSNPData.figureOutABMapping(session, readme, snps_name2possible_mappings) if self.commit: curs.execute("commit") session.commit() else: session.rollback()
def run(self): if self.debug: import pdb pdb.set_trace() db = StockDB.StockDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname) db.setup(create_tables=False) session = db.session #session.begin() readme = formReadmeObj(sys.argv, self.ad, StockDB.README) session.save(readme) twoSNPData = self.prepareTwoSNPData(db, report=self.report) twoSNPData.cal_row_id2pairwise_dist() self.submitToQCCrossMatch(session, twoSNPData.row_id2pairwise_dist, self.QC_method_id, readme, self.commit,\ max_mismatch_rate=self.max_mismatch_rate, min_no_of_non_NA_pairs=self.min_no_of_non_NA_pairs) """
def run(self): """ 2008-04-25 return None if QC_method_id==0 2008-04-20 for plone to call it just to get row_id2NA_mismatch_rate """ # database connection and etc db = Stock_250kDB.Stock_250kDB( drivername=self.drivername, username=self.user, password=self.passwd, hostname=self.hostname, database=self.dbname, ) db.setup() session = db.session session.begin() # transaction = session.create_transaction() self.cmp_data_filename = self.findOutCmpDataFilename( self.cmp_data_filename, self.QC_method_id, self.QCMethod_class ) qm = self.QCMethod_class.query.get(self.QC_method_id) # 2009-5-20 import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user=self.user, passwd=self.passwd) curs = conn.cursor() self.curs = curs if self.debug: import pdb pdb.set_trace() readme = formReadmeObj(sys.argv, self.ad, Stock_250kDB.README) session.save(readme) QC_method_id2snps_table = self.QC_method_id2snps_table if self.QC_method_id == 0: self.cal_independent_NA_rate(db, self.min_probability, readme) row_id2NA_mismatch_rate = None else: # from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix header, strain_acc_list, category_list, data_matrix = read_data( self.cmp_data_filename, ignore_het=qm.ignore_het ) strain_acc_list = map( int, strain_acc_list ) # it's ecotypeid, cast it to integer to be compatible to the later ecotype_id_ls from db snpData2 = SNPData( header=header, strain_acc_list=strain_acc_list, data_matrix=data_matrix, snps_table=QC_method_id2snps_table.get(self.QC_method_id), ignore_het=qm.ignore_het, ) # category_list is not used. 05/20/09 ignore_het is useless cuz data_matrix is provided. """ if self.input_dir and os.path.isdir(self.input_dir): #04/22/08 Watch: call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid) #no submission to db call_info_id2fname = self.get_array_id2fname(curs, self.input_dir) """ if self.input_dir and os.path.isfile(self.input_dir): # it's file call_info_id2fname = None else: if self.run_type == 2: # no filtering on call_info entries that have been QCed. filter_calls_QCed = 0 elif self.run_type == 1: filter_calls_QCed = 1 self.max_call_info_mismatch_rate = 1 # don't use this when doing accession-wise QC else: sys.stderr.write("run_type=%s is not supported.\n" % self.run_type) sys.exit(5) call_data = self.get_call_info_id2fname( db, self.QC_method_id, self.call_method_id, filter_calls_QCed, self.max_call_info_mismatch_rate, self.debug, min_no_of_non_NA_pairs=self.min_no_of_non_NA_pairs, input_dir=self.input_dir, ) call_info_id2fname = call_data.call_info_id2fname call_info_ls_to_return = call_data.call_info_ls_to_return if self.run_type == 2: snps_name2snps_id = self.get_snps_name2snps_id(db) else: snps_name2snps_id = None if call_info_id2fname: if self.one_by_one and self.run_type == 1: # one_by_one only for QC by accession row_id2NA_mismatch_rate = {} row_id12row_id2 = {} counter = 0 for call_info_id, value in call_info_id2fname.iteritems(): counter += 1 print "No", counter tmp_dict = {} tmp_dict[call_info_id] = value pdata = self.read_call_matrix( tmp_dict, self.min_probability ) # 05/20/09 no need for qm.ignore_het because 250k is all h**o passingdata = self.qcDataMatrixVSsnpData( pdata, snps_name2snps_id, snpData2, curs, session, readme ) row_id2NA_mismatch_rate.update(passingdata.row_id2NA_mismatch_rate) row_id12row_id2.update(passingdata.row_id12row_id2) del pdata if self.debug and counter == 10: break else: pdata = self.read_call_matrix( call_info_id2fname, self.min_probability ) # 05/20/09 no need for qm.ignore_het because 250k is all h**o passingdata = self.qcDataMatrixVSsnpData(pdata, snps_name2snps_id, snpData2, curs, session, readme) row_id2NA_mismatch_rate = passingdata.row_id2NA_mismatch_rate row_id12row_id2 = passingdata.row_id12row_id2 del pdata else: # input file is SNP by strain format. double header (1st two lines) header, snps_name_ls, category_list, data_matrix = read_data( self.input_dir, double_header=1, ignore_het=qm.ignore_het ) pdata = PassingData() pdata.ecotype_id_ls = header[0][2:] pdata.call_info_id_ls = header[1][2:] data_matrix = numpy.array(data_matrix) pdata.data_matrix = data_matrix.transpose() pdata.header = ["", ""] + snps_name_ls # fake a header for SNPData passingdata = self.qcDataMatrixVSsnpData(pdata, snps_name2snps_id, snpData2, curs, session, readme) row_id2NA_mismatch_rate = passingdata.row_id2NA_mismatch_rate row_id12row_id2 = passingdata.row_id12row_id2 del pdata if self.output_fname and self.run_type == 1 and row_id2NA_mismatch_rate: self.output_row_id2NA_mismatch_rate(row_id2NA_mismatch_rate, self.output_fname) if self.run_type == 1 and self.commit and not self.input_dir and row_id2NA_mismatch_rate: # if self.input_dir is given, no db submission. call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid) # row_id2NA_mismatch_rate might be None if it's method 0. self.submit_to_call_QC( session, row_id2NA_mismatch_rate, self.QC_method_id, self.user, self.min_probability, row_id12row_id2, self.call_method_id, readme, ) if self.commit: curs.execute("commit") session.commit() else: session.rollback() self.row_id2NA_mismatch_rate = row_id2NA_mismatch_rate # for plone to get the data structure
def run(self): self.communicator = MPI.world.duplicate() node_rank = self.communicator.rank free_computing_nodes = range(1, self.communicator.size - 1) #exclude the 1st and last node free_computing_node_set = Set(free_computing_nodes) output_node_rank = self.communicator.size - 1 if node_rank not in free_computing_node_set: #computing nodes don't need db connection db = StockDB.StockDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname) db.setup(create_tables=False) session = db.session if node_rank == 0: twoSNPData = self.prepareTwoSNPData(db) twoSNPData_pickle = cPickle.dumps(twoSNPData, -1) for node in free_computing_nodes: #send it to the computing_node sys.stderr.write( "passing initial data to nodes from %s to %s ... " % (node_rank, node)) self.communicator.send(twoSNPData_pickle, node, 0) sys.stderr.write(".\n") del twoSNPData_pickle elif node_rank in free_computing_node_set: data, source, tag = self.communicator.receiveString(0, 0) twoSNPData = cPickle.loads(data) del data sys.stderr.write(".\n") else: readme = formReadmeObj(sys.argv, self.ad, StockDB.README) session.save(readme) mw = MPIwrapper(self.communicator, debug=self.debug, report=self.report) mw.synchronize() if node_rank == 0: param_obj = PassingData(communicator=self.communicator, twoSNPData=twoSNPData, output_node_rank=output_node_rank, \ QC_method_id=self.QC_method_id, output_fname=getattr(self, 'output_fname', None), report=self.report) self.input_node(param_obj, free_computing_nodes, self.message_size) elif node_rank in free_computing_node_set: computing_parameter_obj = PassingData( twoSNPData=twoSNPData, QC_method_id=self.QC_method_id) mw.computing_node(computing_parameter_obj, self.computing_node_handler) else: if getattr(self, 'output_fname', None): writer = csv.writer(open(self.output_fname, 'w'), delimiter='\t') else: writer = None param_obj = PassingData(writer=writer, session=session, commit=self.commit, QC_method_id=self.QC_method_id, readme=readme, is_header_written=False) mw.output_node(free_computing_nodes, param_obj, self.output_node_handler) del writer mw.synchronize() #to avoid some node early exits
def run(self): if self.debug: import pdb pdb.set_trace() db = StockDB.StockDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname) db.setup(create_tables=False) session = db.session session.begin() self.cmp_data_filename = self.findOutCmpDataFilename( self.cmp_data_filename, self.QC_method_id, StockDB.QCMethod) header, strain_acc_list, category_list, data_matrix = read_data( self.cmp_data_filename) strain_acc_list = map( int, strain_acc_list ) #it's ecotypeid, cast it to integer to be compatible to the later ecotype_id_ls from db snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, \ data_matrix=data_matrix) #category_list is not used. readme = formReadmeObj(sys.argv, self.ad, StockDB.README) session.save(readme) import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user=self.db_user, passwd=self.db_passwd) curs = conn.cursor() from dbSNP2data import dbSNP2data snp_id2index, snp_id_list, snp_id2info = dbSNP2data.get_snp_id2index_m( curs, StockDB.Calls.table.name, StockDB.SNPs.table.name) strain_info_data = self.get_strain_id_info(self.QC_method_id) data_matrix = self.get_data_matrix(db, strain_info_data.strain_id2index, snp_id2index, StockDB.Calls.table.name) strain_acc_list = [ strain_info_data.strain_id2acc[strain_id] for strain_id in strain_info_data.strain_id_list ] category_list = [ strain_info_data.strain_id2category[strain_id] for strain_id in strain_info_data.strain_id_list ] header = ['ecotypeid', 'strainid'] for snp_id in snp_id_list: snp_name, chromosome, position = snp_id2info[snp_id] header.append(snp_name) snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \ snps_table='stock.snps') #snps_table is set to the stock_250k snps_table twoSNPData = TwoSNPData(SNPData1=snpData1, SNPData2=snpData2, curs=curs, \ QC_method_id=self.QC_method_id, user=self.db_user, row_matching_by_which_value=0, debug=self.debug) if self.run_type == 1: row_id2NA_mismatch_rate = twoSNPData.cmp_row_wise() elif self.run_type == 2: #twoSNPData.save_col_wise(session, readme) #2008-08-18 need to implement a new one for 149SNP row_id2NA_mismatch_rate = {} else: sys.stderr.write("run_type=%s is not supported.\n" % self.run_type) sys.exit(5) if self.output_fname and self.run_type == 1 and row_id2NA_mismatch_rate: self.output_row_id2NA_mismatch_rate(row_id2NA_mismatch_rate, self.output_fname) if self.run_type == 1 and self.commit and not self.input_dir and row_id2NA_mismatch_rate: #if self.input_dir is given, no db submission. call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid) #row_id2NA_mismatch_rate might be None if it's method 0. self.submit_to_call_QC(session, row_id2NA_mismatch_rate, self.QC_method_id, self.db_user, \ twoSNPData.row_id12row_id2, readme) if self.commit: session.commit() else: session.rollback()
def run(self): """ 2008-04-25 return None if QC_method_id==0 2008-04-20 for plone to call it just to get row_id2NA_mismatch_rate """ #database connection and etc db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.user, password=self.passwd, hostname=self.hostname, database=self.dbname) db.setup(create_tables=False) session = db.session session.begin() #transaction = session.create_transaction() self.cmp_data_filename = self.findOutCmpDataFilename( self.cmp_data_filename, self.QC_method_id, self.QCMethod_class) qm = self.QCMethod_class.query.get(self.QC_method_id) #2009-5-20 import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user=self.user, passwd=self.passwd) curs = conn.cursor() self.curs = curs if self.debug: import pdb pdb.set_trace() readme = formReadmeObj(sys.argv, self.ad, Stock_250kDB.README) session.add(readme) QC_method_id2snps_table = self.QC_method_id2snps_table if self.QC_method_id == 0: self.cal_independent_NA_rate(db, self.min_probability, readme) row_id2NA_mismatch_rate = None else: #from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix header, strain_acc_list, category_list, data_matrix = read_data( self.cmp_data_filename, ignore_het=qm.ignore_het) strain_acc_list = map( int, strain_acc_list ) #it's ecotypeid, cast it to integer to be compatible to the later ecotype_id_ls from db snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, \ data_matrix=data_matrix, snps_table=QC_method_id2snps_table.get(self.QC_method_id),\ ignore_het=qm.ignore_het) #category_list is not used. 05/20/09 ignore_het is useless cuz data_matrix is provided. """ if self.input_dir and os.path.isdir(self.input_dir): #04/22/08 Watch: call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid) #no submission to db call_info_id2fname = self.get_array_id2fname(curs, self.input_dir) """ if self.input_dir and os.path.isfile(self.input_dir): #it's file call_info_id2fname = None else: if self.run_type == 2: #no filtering on call_info entries that have been QCed. filter_calls_QCed = 0 elif self.run_type == 1: filter_calls_QCed = 1 self.max_call_info_mismatch_rate = 1 #don't use this when doing accession-wise QC else: sys.stderr.write("run_type=%s is not supported.\n" % self.run_type) sys.exit(5) call_data = self.get_call_info_id2fname(db, self.QC_method_id, self.call_method_id, \ filter_calls_QCed, self.max_call_info_mismatch_rate, self.debug,\ min_no_of_non_NA_pairs=self.min_no_of_non_NA_pairs, input_dir=self.input_dir) call_info_id2fname = call_data.call_info_id2fname call_info_ls_to_return = call_data.call_info_ls_to_return if self.run_type == 2: snps_name2snps_id = self.get_snps_name2snps_id(db) else: snps_name2snps_id = None if call_info_id2fname: db_id2chr_pos = db.getSNPID2ChrPos() #2011-22 from DB_250k2data import DB_250k2Data db_id2index = DB_250k2Data.getSNPID2index( call_info_id2fname.values()[0][1], db_id2chr_pos) if self.one_by_one and self.run_type == 1: #one_by_one only for QC by accession row_id2NA_mismatch_rate = {} row_id12row_id2 = {} counter = 0 for call_info_id, value in call_info_id2fname.iteritems(): counter += 1 print "No", counter tmp_dict = {} tmp_dict[call_info_id] = value pdata = self.read_call_matrix( tmp_dict, self.min_probability, db_id2chr_pos=db_id2chr_pos, db_id2index=db_id2index) #05/20/09 no need for qm.ignore_het because 250k is all h**o passingdata = self.qcDataMatrixVSsnpData( pdata, snps_name2snps_id, snpData2, curs, session, readme) row_id2NA_mismatch_rate.update( passingdata.row_id2NA_mismatch_rate) row_id12row_id2.update(passingdata.row_id12row_id2) del pdata if self.debug and counter == 10: break else: pdata = self.read_call_matrix(call_info_id2fname, self.min_probability, db_id2chr_pos=db_id2chr_pos, db_id2index=db_id2index) #05/20/09 no need for qm.ignore_het because 250k is all h**o passingdata = self.qcDataMatrixVSsnpData( pdata, snps_name2snps_id, snpData2, curs, session, readme) row_id2NA_mismatch_rate = passingdata.row_id2NA_mismatch_rate row_id12row_id2 = passingdata.row_id12row_id2 del pdata else: #input file is SNP by strain format. double header (1st two lines) header, snps_name_ls, category_list, data_matrix = read_data( self.input_dir, double_header=1, ignore_het=qm.ignore_het) pdata = PassingData() pdata.ecotype_id_ls = header[0][2:] pdata.call_info_id_ls = header[1][2:] data_matrix = numpy.array(data_matrix) pdata.data_matrix = data_matrix.transpose() pdata.header = ['', '' ] + snps_name_ls #fake a header for SNPData passingdata = self.qcDataMatrixVSsnpData( pdata, snps_name2snps_id, snpData2, curs, session, readme) row_id2NA_mismatch_rate = passingdata.row_id2NA_mismatch_rate row_id12row_id2 = passingdata.row_id12row_id2 del pdata if self.output_fname and self.run_type == 1 and row_id2NA_mismatch_rate: self.output_row_id2NA_mismatch_rate(row_id2NA_mismatch_rate, self.output_fname) if self.run_type == 1 and self.commit and not self.input_dir and row_id2NA_mismatch_rate: #if self.input_dir is given, no db submission. call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid) #row_id2NA_mismatch_rate might be None if it's method 0. self.submit_to_call_QC(session, row_id2NA_mismatch_rate, self.QC_method_id, self.user, self.min_probability, \ row_id12row_id2, self.call_method_id, readme) if self.commit: curs.execute("commit") session.commit() else: session.rollback() self.row_id2NA_mismatch_rate = row_id2NA_mismatch_rate #for plone to get the data structure