def run(self): """ 2008-04-11 new way of handling raw cel files: md5sum each array, check if each array is in db already. if yes: skip it else: assign a new id and insert an entry into array_info_table copy the original cel file to output_dir and put array_id in the beginning of the filename. 2008-02-28 """ if self.debug: import pdb pdb.set_trace() import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user=self.user, passwd=self.passwd) curs = conn.cursor() """ filename2array_id_in_db = self.get_filename2array_id_in_db(curs, self.array_info_table) filename2array_id = self.get_filename2array_id(self.input_dir, filename2array_id_in_db) if self.commit: self.submit_filename2array_id(curs, filename2array_id, self.array_info_table) xypos2probes_id = self.get_xypos2probes_id(curs, self.probes_table) self.submit_all_array_data(filename2array_id, xypos2probes_id, curs, self.array_data_table) """ arrayInfo = ArrayInfo(curs=curs, array_info_table=self.array_info_table, user=self.user, \ experimenter=self.experimenter, mapping_file=self.mapping_file) ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(curs, debug=self.debug) nativename2ecotypeid_ls = getNativename2EcotypeIDLs(curs) input_fname_ls = self.get_all_files_in_input_dir(self.input_dir) for filename in input_fname_ls: sys.stderr.write("Assigning new id to %s ... " % filename) return_value = arrayInfo.assignNewIdToThisArray(filename, self.output_dir, ecotypeid2tg_ecotypeid=ecotypeid2tg_ecotypeid,\ nativename2ecotypeid_ls=nativename2ecotypeid_ls) if return_value == -1: sys.stderr.write("Failed.\n") else: sys.stderr.write("\n") if self.commit: curs.execute("commit")
def run(self): """ 2009-5-28 """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) nativename2tg_ecotypeid_set = getNativename2TgEcotypeIDSet( db.metadata.bind, turnUpperCase=True) ecotype_id_set_250k_in_pipeline = get_ecotype_id_set_250k_in_pipeline( ArrayInfo) ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(db.metadata.bind) #turn_into_integer=2 because it's not nucleotides header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data( self.input_fname, turn_into_integer=2, matrix_data_type=float) data_matrix_phen = numpy.array(data_matrix_phen) #2009-8-19 bug here. strain_acc_list_phen is not unique for each row. causing replicates to have the same value #from Association import Association #data_matrix_phen = Association.get_phenotype_matrix_in_data_matrix_order(strain_acc_list_phen, strain_acc_list_phen, data_matrix_phen) phenData = SNPData(header=header_phen, strain_acc_list=strain_acc_list_phen, data_matrix=data_matrix_phen) ecotype_id_ls = self.straightenEcotypeID(phenData.row_id_ls, nativename2tg_ecotypeid_set, ecotypeid2tg_ecotypeid, \ ecotype_id_set_250k_in_pipeline) session = db.session session.begin() if self.run_type == 1: self.putPhenotypeIntoDB(db, phenData, ecotype_id_ls) elif self.run_type == 2: self.putReplicatePhenotypeIntoDB(db, phenData, ecotype_id_ls) else: sys.stderr.write("Unsupported run type: %s.\n" % (self.run_type)) if self.commit: session.commit()
def run(self): """ 2008-04-11 new way of handling raw cel files: md5sum each array, check if each array is in db already. if yes: skip it else: assign a new id and insert an entry into array_info_table copy the original cel file to output_dir and put array_id in the beginning of the filename. 2008-02-28 """ if self.debug: import pdb pdb.set_trace() import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user = self.user, passwd = self.passwd) curs = conn.cursor() """ filename2array_id_in_db = self.get_filename2array_id_in_db(curs, self.array_info_table) filename2array_id = self.get_filename2array_id(self.input_dir, filename2array_id_in_db) if self.commit: self.submit_filename2array_id(curs, filename2array_id, self.array_info_table) xypos2probes_id = self.get_xypos2probes_id(curs, self.probes_table) self.submit_all_array_data(filename2array_id, xypos2probes_id, curs, self.array_data_table) """ arrayInfo = ArrayInfo(curs=curs, array_info_table=self.array_info_table, user=self.user, \ experimenter=self.experimenter, mapping_file=self.mapping_file) ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(curs, debug=self.debug) nativename2ecotypeid_ls = getNativename2EcotypeIDLs(curs) input_fname_ls = self.get_all_files_in_input_dir(self.input_dir) for filename in input_fname_ls: sys.stderr.write("Assigning new id to %s ... "%filename) return_value = arrayInfo.assignNewIdToThisArray(filename, self.output_dir, ecotypeid2tg_ecotypeid=ecotypeid2tg_ecotypeid,\ nativename2ecotypeid_ls=nativename2ecotypeid_ls) if return_value==-1: sys.stderr.write("Failed.\n") else: sys.stderr.write("\n") if self.commit: curs.execute("commit")
def run(self): """ """ if self.debug: import pdb pdb.set_trace() db = StockDB.StockDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup() session = db.session session.begin() ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(db.metadata.bind, debug=self.debug) tg_ecotypeid2row = self.dropRedundantEcotypes(self.input_fname, ecotypeid2tg_ecotypeid) snp_id_ls = self.getSNPIDLs() self.putHaplotypeGroupIntoDB(session, self.input_fname, tg_ecotypeid2row, self.max_snp_typing_error_rate, snp_id_ls) if self.commit: session.commit() session.clear() else: #default is rollback(). to demonstrate good programming session.rollback()
def run(self): """ 2009-5-28 """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) nativename2tg_ecotypeid_set = getNativename2TgEcotypeIDSet(db.metadata.bind, turnUpperCase=True) ecotype_id_set_250k_in_pipeline = get_ecotype_id_set_250k_in_pipeline(ArrayInfo) ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(db.metadata.bind) #turn_into_integer=2 because it's not nucleotides header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(self.input_fname, turn_into_integer=2, matrix_data_type=float) data_matrix_phen = numpy.array(data_matrix_phen) #2009-8-19 bug here. strain_acc_list_phen is not unique for each row. causing replicates to have the same value #from Association import Association #data_matrix_phen = Association.get_phenotype_matrix_in_data_matrix_order(strain_acc_list_phen, strain_acc_list_phen, data_matrix_phen) phenData = SNPData(header=header_phen, strain_acc_list=strain_acc_list_phen, data_matrix=data_matrix_phen) ecotype_id_ls = self.straightenEcotypeID(phenData.row_id_ls, nativename2tg_ecotypeid_set, ecotypeid2tg_ecotypeid, \ ecotype_id_set_250k_in_pipeline) session = db.session session.begin() if self.run_type==1: self.putPhenotypeIntoDB(db, phenData, ecotype_id_ls) elif self.run_type==2: self.putReplicatePhenotypeIntoDB(db, phenData, ecotype_id_ls) else: sys.stderr.write("Unsupported run type: %s.\n"%(self.run_type)) if self.commit: session.commit()