def run(self): if self.debug: import pdb pdb.set_trace() db = Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup() session = db.session results_method_id_info = self.getResultsMethodIDInfo(db, self.call_method_id_ls, self.min_distance, self.get_closest, self.min_MAF) results_method_id2gene_set = self.getResultsMethodID2GeneSet(db, results_method_id_info, self.results_directory, self.max_rank) rdata = self.getDataMatrix(results_method_id2gene_set, results_method_id_info) header = ['', ''] + results_method_id_info.results_method_id_label_ls strain_acc_list = results_method_id_info.results_method_id_label_ls category_list = results_method_id_info.results_method_id_ls if SNPData.isDataMatrixEmpty(rdata.data_matrix): sys.stderr.write("Nothing fetched from database.\n") sys.exit(3) if self.output_fname: write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list) if self.fig_fname: font = get_font(self.font_path, font_size=self.font_size) #2008-08-01 value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value) im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font) #im.save('%s_legend.png'%self.fig_fname_prefix) im = drawMatrix(rdata.data_matrix, value2color_func, strain_acc_list,\ strain_acc_list, with_grid=1, font=font) im = combineTwoImages(im, im_legend, font=font) im.save(self.fig_fname)
def run(self): if self.debug: import pdb pdb.set_trace() db = StockDB.StockDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session order_by_sentence = " order by c.longitude, c.latitude, e.longitude, e.latitude, e.nativename " #how to order strains. if self.QC_method_id ==4: sql_table_str = "from %s e, %s s, %s a, %s c"%(StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\ StockDB.Country.table.name) common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s " + order_by_sentence strain_where_condition = common_where_condition%(" and e.id=st.ecotypeid") strain_id_info_query = "select distinct st.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s"%(sql_table_str, StockDB.Strain.table.name, strain_where_condition) else: sql_table_str = "from %s q, %s e, %s s, %s a, %s c"%(StockDB.QCCrossMatch.table.name, StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\ StockDB.Country.table.name) common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s"+ " and q.qc_method_id=%s and q.no_of_non_NA_pairs>=%s and q.mismatch_rate<=%s "%\ (self.QC_method_id, self.min_no_of_non_NAs, self.max_mismatch_rate) + order_by_sentence strain_where_condition = common_where_condition%(" and e.id=st.ecotypeid and st.id=q.strainid") strain_id_info_query = "select distinct q.strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s"%(sql_table_str, StockDB.Strain.table.name, strain_where_condition) if self.how_to_group_strains==2 or self.how_to_group_strains==3: plate_info = self.alignStrainsAccordingToSeqPlate(db) id_set_data = PassingData() id_set_data.strain_id_set = None id_set_data.target_id_set = None elif self.input_fname: id_set_data = self.getStrainidTargetidFromFile(db, self.QC_method_id, self.input_fname, self.max_mismatch_rate, self.min_no_of_non_NAs) else: id_set_data = PassingData() id_set_data.strain_id_set = None id_set_data.target_id_set = None if self.how_to_group_strains==2 or self.how_to_group_strains==3: strain_id_info = self.getStrainInfoGivenPlateInfo(db, plate_info, strain_id_info_query, strain_id_set=None) else: strain_id_info = self.getStrainIDInfo(db, strain_id_info_query, id_set_data.strain_id_set) if self.QC_method_id==4: if self.how_to_group_strains==3: #2008-09-15 column strain id is in country, strain-longitude order target_id_info = self.getStrainIDInfo(db, strain_id_info_query, id_set_data.strain_id_set) else: target_id_info = strain_id_info else: target_where_condition = common_where_condition%(" and e.id=q.target_id") target_id_info_query = "select distinct e.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s %s"%(sql_table_str, target_where_condition) target_id_info = self.getStrainIDInfo(db, target_id_info_query) if self.input_fname: rdata = self.get_data_matrixFromFile(db, strain_id_info, target_id_info, self.QC_method_id, self.input_fname, self.max_mismatch_rate, self.min_no_of_non_NAs) else: rdata = self.get_data_matrix(db, strain_id_info, target_id_info, self.QC_method_id, self.max_mismatch_rate, self.min_no_of_non_NAs) rdata.data_matrix = self.markDataMatrixBoundary(rdata.data_matrix, strain_id_info, target_id_info) header = ['strain info', ''] + target_id_info.strain_label_ls strain_acc_list = strain_id_info.strain_label_ls category_list = [1]*len(strain_acc_list) if SNPData.isDataMatrixEmpty(rdata.data_matrix): sys.stderr.write("Nothing fetched from database.\n") sys.exit(3) if self.output_fname: write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list) if self.fig_fname: font = get_font(self.font_path, font_size=self.font_size) #2008-08-01 value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value) im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font) #im.save('%s_legend.png'%self.fig_fname_prefix) im = drawMatrix(rdata.data_matrix, value2color_func, strain_id_info.strain_label_ls,\ target_id_info.strain_label_ls, with_grid=1, font=font) im = combineTwoImages(im, im_legend, font=font) im.save(self.fig_fname)
def run(self): if self.debug: import pdb pdb.set_trace() db = StockDB.StockDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session order_by_sentence = " order by c.longitude, c.latitude, e.longitude, e.latitude, e.nativename " #how to order strains. if self.QC_method_id == 4: sql_table_str = "from %s e, %s s, %s a, %s c"%(StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\ StockDB.Country.table.name) common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s " + order_by_sentence strain_where_condition = common_where_condition % ( " and e.id=st.ecotypeid") strain_id_info_query = "select distinct st.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s" % ( sql_table_str, StockDB.Strain.table.name, strain_where_condition) else: sql_table_str = "from %s q, %s e, %s s, %s a, %s c"%(StockDB.QCCrossMatch.table.name, StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\ StockDB.Country.table.name) common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s"+ " and q.qc_method_id=%s and q.no_of_non_NA_pairs>=%s and q.mismatch_rate<=%s "%\ (self.QC_method_id, self.min_no_of_non_NAs, self.max_mismatch_rate) + order_by_sentence strain_where_condition = common_where_condition % ( " and e.id=st.ecotypeid and st.id=q.strainid") strain_id_info_query = "select distinct q.strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s" % ( sql_table_str, StockDB.Strain.table.name, strain_where_condition) if self.how_to_group_strains == 2 or self.how_to_group_strains == 3: plate_info = self.alignStrainsAccordingToSeqPlate(db) id_set_data = PassingData() id_set_data.strain_id_set = None id_set_data.target_id_set = None elif self.input_fname: id_set_data = self.getStrainidTargetidFromFile( db, self.QC_method_id, self.input_fname, self.max_mismatch_rate, self.min_no_of_non_NAs) else: id_set_data = PassingData() id_set_data.strain_id_set = None id_set_data.target_id_set = None if self.how_to_group_strains == 2 or self.how_to_group_strains == 3: strain_id_info = self.getStrainInfoGivenPlateInfo( db, plate_info, strain_id_info_query, strain_id_set=None) else: strain_id_info = self.getStrainIDInfo(db, strain_id_info_query, id_set_data.strain_id_set) if self.QC_method_id == 4: if self.how_to_group_strains == 3: #2008-09-15 column strain id is in country, strain-longitude order target_id_info = self.getStrainIDInfo( db, strain_id_info_query, id_set_data.strain_id_set) else: target_id_info = strain_id_info else: target_where_condition = common_where_condition % ( " and e.id=q.target_id") target_id_info_query = "select distinct e.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s %s" % ( sql_table_str, target_where_condition) target_id_info = self.getStrainIDInfo(db, target_id_info_query) if self.input_fname: rdata = self.get_data_matrixFromFile(db, strain_id_info, target_id_info, self.QC_method_id, self.input_fname, self.max_mismatch_rate, self.min_no_of_non_NAs) else: rdata = self.get_data_matrix(db, strain_id_info, target_id_info, self.QC_method_id, self.max_mismatch_rate, self.min_no_of_non_NAs) rdata.data_matrix = self.markDataMatrixBoundary( rdata.data_matrix, strain_id_info, target_id_info) header = ['strain info', ''] + target_id_info.strain_label_ls strain_acc_list = strain_id_info.strain_label_ls category_list = [1] * len(strain_acc_list) if SNPData.isDataMatrixEmpty(rdata.data_matrix): sys.stderr.write("Nothing fetched from database.\n") sys.exit(3) if self.output_fname: write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list) if self.fig_fname: font = get_font(self.font_path, font_size=self.font_size) #2008-08-01 value2color_func = lambda x: Value2Color.value2HSLcolor( x, rdata.min_value, rdata.max_value) im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font) #im.save('%s_legend.png'%self.fig_fname_prefix) im = drawMatrix(rdata.data_matrix, value2color_func, strain_id_info.strain_label_ls,\ target_id_info.strain_label_ls, with_grid=1, font=font) im = combineTwoImages(im, im_legend, font=font) im.save(self.fig_fname)
def run(self): if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup() session = db.session if self.test_result_type==1: test_result_class_table = CandidateGeneRankSumTestResult.table.name test_result_class_table = 'candidate_gene_rank_sum_test_result_2008_09_15' elif self.test_result_type==2: test_result_class_table = CandidateGeneTopSNPTest.table.name elif self.test_result_type==3: test_result_class_table = Stock_250kDB.CandidateGeneRankSumTestResultMethod.table.name else: sys.stderr.write(" test_result_type %s not supported.\n"%(self.test_result_type)) sys.exit(2) #the condition for min_MAF is tricky because of the floating precision. if self.test_result_type==1: where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \ and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"\ %(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF) elif self.test_result_type==2: where_condition = "%s r, %s rg, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null and r.id=rg.results_method_id \ and c.results_id=rg.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"\ %(ResultsMethod.table.name, ResultsByGene.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF) elif self.test_result_type==3: where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \ and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"\ %(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF) if self.call_method_id_ls: where_condition += " and r.call_method_id in (%s)"%self.call_method_id_ls if self.analysis_method_id_ls: where_condition += " and r.analysis_method_id in (%s)"%self.analysis_method_id_ls if self.super_type_id: where_condition += " and g.super_type_id=%s"%self.super_type_id if self.test_type: where_condition += " and c.test_type=%s"%self.test_type if self.test_result_type==1: pass where_condition += " and c.max_pvalue_per_gene=%s"%(self.max_pvalue_per_gene) elif self.test_result_type==2: where_condition += " and c.no_of_top_snps=%s"%(self.no_of_top_snps) list_type_id_ls = self.getListTypeInfo(db, where_condition) analysis_method_id_ls = self.getAnalysisMethodInfo(db, where_condition) list_type_analysis_method_info = self.orderListTypeAnalysisMethodID(list_type_id_ls, analysis_method_id_ls) phenotype_info = self.getPhenotypeInfo(db, where_condition) rdata = self.get_data_matrix(db, phenotype_info, list_type_analysis_method_info, where_condition) rdata.data_matrix = self.markDataMatrixBoundary(rdata.data_matrix, phenotype_info, list_type_analysis_method_info) header = ['list_type_analysis_method', ''] + phenotype_info.phenotype_method_label_ls strain_acc_list = list_type_analysis_method_info.list_type_analysis_method_label_ls category_list = list_type_analysis_method_info.list_type_id_analysis_method_id_ls if SNPData.isDataMatrixEmpty(rdata.data_matrix): sys.stderr.write("Nothing fetched from database.\n") sys.exit(3) if self.output_fname: write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list) if self.fig_fname: font = get_font(self.font_path, font_size=self.font_size) #2008-08-01 value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value) im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font) #im.save('%s_legend.png'%self.fig_fname_prefix) im = drawMatrix(rdata.data_matrix, value2color_func, list_type_analysis_method_info.list_type_analysis_method_label_ls,\ phenotype_info.phenotype_method_label_ls, with_grid=1, font=font) im = combineTwoImages(im, im_legend, font=font) im.save(self.fig_fname)
def run(self): if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup() session = db.session if self.test_result_type == 1: test_result_class_table = CandidateGeneRankSumTestResult.table.name test_result_class_table = 'candidate_gene_rank_sum_test_result_2008_09_15' elif self.test_result_type == 2: test_result_class_table = CandidateGeneTopSNPTest.table.name elif self.test_result_type == 3: test_result_class_table = Stock_250kDB.CandidateGeneRankSumTestResultMethod.table.name else: sys.stderr.write(" test_result_type %s not supported.\n" % (self.test_result_type)) sys.exit(2) #the condition for min_MAF is tricky because of the floating precision. if self.test_result_type == 1: where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \ and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001" \ %(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF) elif self.test_result_type == 2: where_condition = "%s r, %s rg, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null and r.id=rg.results_method_id \ and c.results_id=rg.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001" \ %(ResultsMethod.table.name, ResultsByGene.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF) elif self.test_result_type == 3: where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \ and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001" \ %(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF) if self.call_method_id_ls: where_condition += " and r.call_method_id in (%s)" % self.call_method_id_ls if self.analysis_method_id_ls: where_condition += " and r.analysis_method_id in (%s)" % self.analysis_method_id_ls if self.super_type_id: where_condition += " and g.super_type_id=%s" % self.super_type_id if self.test_type: where_condition += " and c.test_type=%s" % self.test_type if self.test_result_type == 1: pass where_condition += " and c.max_pvalue_per_gene=%s" % ( self.max_pvalue_per_gene) elif self.test_result_type == 2: where_condition += " and c.no_of_top_snps=%s" % ( self.no_of_top_snps) list_type_id_ls = self.getListTypeInfo(db, where_condition) analysis_method_id_ls = self.getAnalysisMethodInfo(db, where_condition) list_type_analysis_method_info = self.orderListTypeAnalysisMethodID( list_type_id_ls, analysis_method_id_ls) phenotype_info = self.getPhenotypeInfo(db, where_condition) rdata = self.get_data_matrix(db, phenotype_info, list_type_analysis_method_info, where_condition) rdata.data_matrix = self.markDataMatrixBoundary( rdata.data_matrix, phenotype_info, list_type_analysis_method_info) header = ['list_type_analysis_method', '' ] + phenotype_info.phenotype_method_label_ls strain_acc_list = list_type_analysis_method_info.list_type_analysis_method_label_ls category_list = list_type_analysis_method_info.list_type_id_analysis_method_id_ls if SNPData.isDataMatrixEmpty(rdata.data_matrix): sys.stderr.write("Nothing fetched from database.\n") sys.exit(3) if self.output_fname: write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list) if self.fig_fname: font = get_font(self.font_path, font_size=self.font_size) #2008-08-01 value2color_func = lambda x: Value2Color.value2HSLcolor( x, rdata.min_value, rdata.max_value) im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font) #im.save('%s_legend.png'%self.fig_fname_prefix) im = drawMatrix(rdata.data_matrix, value2color_func, list_type_analysis_method_info.list_type_analysis_method_label_ls,\ phenotype_info.phenotype_method_label_ls, with_grid=1, font=font) im = combineTwoImages(im, im_legend, font=font) im.save(self.fig_fname)