def run(self): """ 2008-11-08 generate combinations of results_id, list_type_id and generate plots one after another save the plots into database if commit=1 """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup() session = db.session param_obj = PassingData(call_method_id=self.call_method_id, \ analysis_method_id=getattr(self, 'analysis_method_id', None),\ analysis_method_id_ls=getattr(self, 'analysis_method_id_ls', None),\ phenotype_method_id_ls=getattr(self, 'phenotype_method_id_ls', None),\ list_type_id_ls=self.list_type_id_ls, \ results_type=self.results_type) params_ls = MpiGeneListRankTest.generate_params(param_obj) ResultsClass, TestResultClass = db.getResultsAndTestResultsClass( results_type=self.results_type) if ResultsClass is None or TestResultClass is None: sys.stderr.write("Invalid results type : %s.\n" % pd.results_type) sys.exit(3) for results_id, list_type_id in params_ls: rm = ResultsClass.get(results_id) list_type = Stock_250kDB.GeneListType.get(list_type_id) title = 'result(%s) of %s on %s with %s(%s) list'%\ (results_id, rm.analysis_method.short_name, rm.phenotype_method.short_name, list_type.short_name, list_type.id) TopSNPTestType_id_ls = self.getTopSNPTestType_id_ls(self.get_closest, self.min_MAF, self.allow_two_sample_overlapping, self.results_type, \ self.test_type_id, self.null_distribution_type_id) if self.commit: rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\ filter_by(results_id=results_id).filter_by(list_type_id=list_type_id) if rows.count() > 0: row = rows.first() sys.stderr.write( '%s already in db (%s of them) with first id=%s.\n' % (title, rows.count(), row.id)) continue if not TopSNPTestType_id_ls: sys.stderr.write( "No TopSNPTestType matches the input requirements. Exit.\n" ) sys.exit(3) TopSNPTestType_id_ls_str = map(str, TopSNPTestType_id_ls) from_where_clause = "from %s t, %s y where t.type_id=y.id and t.results_id=%s and t.list_type_id=%s and y.id in (%s)"%\ (TestResultClass.table.name, Stock_250kDB.CandidateGeneTopSNPTestRMType.table.name,\ results_id, list_type_id, ','.join(TopSNPTestType_id_ls_str)) no_of_top_snps_info = self.get_no_of_top_snps_info( db, from_where_clause) min_distance_info = self.get_min_distance_info( db, from_where_clause) rdata = self.get_data_matrix(db, no_of_top_snps_info, min_distance_info, from_where_clause, need_other_values=True, \ null_distribution_type_id=self.null_distribution_type_id) header = ['no_of_top_snps', ''] + min_distance_info.label_ls strain_acc_list = no_of_top_snps_info.label_ls category_list = no_of_top_snps_info.label_ls if SNPData.isDataMatrixEmpty(rdata.data_matrix): sys.stderr.write("Nothing fetched from database.\n") #sys.exit(3) continue if self.output_fname: write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list) """ if self.fig_fname: font = get_font(self.font_path, font_size=self.font_size) #2008-08-01 value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value) im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font) #im.save('%s_legend.png'%self.fig_fname_prefix) im = drawMatrix(rdata.data_matrix, value2color_func, no_of_top_snps_info.label_ls,\ min_distance_info.label_ls, with_grid=1, font=font) im = combineTwoImages(im, im_legend, font=font) im.save(self.fig_fname) """ if self.commit: output_fname_prefix = None else: title_cp = title title_cp = title_cp.replace('/', '_') output_fname_prefix = '%s_%s_type_%s.png' % (os.path.splitext( self.fig_fname)[0], title_cp, TopSNPTestType_id_ls[0]) if rm.analysis_method_id == 1 or rm.analysis_method_id == 7: preset_xlim = [0, 8] preset_xlim = None else: preset_xlim = None return_data = self.plotCurve(rdata, no_of_top_snps_info, min_distance_info, output_fname_prefix, title=title, commit=self.commit, preset_xlim=preset_xlim) if self.commit and return_data.png_data: rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\ filter_by(results_id=results_id).filter_by(list_type_id=list_type_id) if rows.count() > 0: row = rows.first() sys.stderr.write( '%s already in db (%s of them) with first id=%s.\n' % (title, rows.count(), row.id)) continue plot = Stock_250kDB.CandidateVsNonRatioPlot( type_id=TopSNPTestType_id_ls[0], results_id=results_id, list_type_id=list_type_id) plot.png_thumbnail = return_data.png_thumbnail.getvalue() plot.png_data = return_data.png_data.getvalue() plot.svg_data = return_data.svg_data.getvalue() db.session.save(plot) db.session.flush()
def run(self): """ 2008-11-08 generate combinations of results_id, list_type_id and generate plots one after another save the plots into database if commit=1 """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup() session = db.session param_obj = PassingData(call_method_id=self.call_method_id, \ analysis_method_id=getattr(self, 'analysis_method_id', None),\ analysis_method_id_ls=getattr(self, 'analysis_method_id_ls', None),\ phenotype_method_id_ls=getattr(self, 'phenotype_method_id_ls', None),\ list_type_id_ls=self.list_type_id_ls, \ results_type=self.results_type) params_ls = MpiGeneListRankTest.generate_params(param_obj) for results_id, list_type_id in params_ls: rm = Stock_250kDB.ResultsMethod.get(results_id) list_type = Stock_250kDB.GeneListType.get(list_type_id) title = 'result(%s) of %s on %s with %s(%s) list'%\ (results_id, rm.analysis_method.short_name, rm.phenotype_method.short_name, list_type.short_name, list_type.id) TopSNPTestType_id_ls = self.getTopSNPTestType_id_ls(self.get_closest, self.min_MAF, self.allow_two_sample_overlapping, self.results_type, \ self.test_type_id, self.null_distribution_type_id) if self.commit: rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\ filter_by(results_id=results_id).filter_by(list_type_id=list_type_id) if rows.count()>0: row = rows.first() sys.stderr.write('%s already in db (%s of them) with first id=%s.\n'%(title, rows.count(), row.id)) continue if not TopSNPTestType_id_ls: sys.stderr.write("No TopSNPTestType matches the input requirements. Exit.\n") sys.exit(3) TopSNPTestType_id_ls_str = map(str, TopSNPTestType_id_ls) from_where_clause = "from %s t, %s y where t.type_id=y.id and t.results_id=%s and t.list_type_id=%s and y.id in (%s)"%\ (Stock_250kDB.CandidateGeneTopSNPTestRM.table.name, Stock_250kDB.CandidateGeneTopSNPTestRMType.table.name,\ results_id, list_type_id, ','.join(TopSNPTestType_id_ls_str)) no_of_top_snps_info = self.get_no_of_top_snps_info(db, from_where_clause) min_distance_info = self.get_min_distance_info(db, from_where_clause) rdata = self.get_data_matrix(db, no_of_top_snps_info, min_distance_info, from_where_clause, need_other_values=True, \ null_distribution_type_id=self.null_distribution_type_id) header = ['no_of_top_snps', ''] + min_distance_info.label_ls strain_acc_list = no_of_top_snps_info.label_ls category_list = no_of_top_snps_info.label_ls if SNPData.isDataMatrixEmpty(rdata.data_matrix): sys.stderr.write("Nothing fetched from database.\n") #sys.exit(3) continue if self.output_fname: write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list) """ if self.fig_fname: font = get_font(self.font_path, font_size=self.font_size) #2008-08-01 value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value) im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font) #im.save('%s_legend.png'%self.fig_fname_prefix) im = drawMatrix(rdata.data_matrix, value2color_func, no_of_top_snps_info.label_ls,\ min_distance_info.label_ls, with_grid=1, font=font) im = combineTwoImages(im, im_legend, font=font) im.save(self.fig_fname) """ if self.commit: output_fname_prefix = None else: title_cp = title title_cp = title_cp.replace('/', '_') output_fname_prefix='%s_%s_type_%s.png'%(os.path.splitext(self.fig_fname)[0], title_cp, TopSNPTestType_id_ls[0]) if rm.analysis_method_id ==1 or rm.analysis_method_id==7: preset_xlim = [0,8] else: preset_xlim = None return_data = self.plotCurve(rdata, no_of_top_snps_info, min_distance_info, output_fname_prefix, title=title, commit=self.commit, preset_xlim=preset_xlim) if self.commit and return_data.png_data: rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\ filter_by(results_id=results_id).filter_by(list_type_id=list_type_id) if rows.count()>0: row = rows.first() sys.stderr.write('%s already in db (%s of them) with first id=%s.\n'%(title, rows.count(), row.id)) continue plot = Stock_250kDB.CandidateVsNonRatioPlot(type_id=TopSNPTestType_id_ls[0], results_id=results_id, list_type_id=list_type_id) plot.png_thumbnail = return_data.png_thumbnail.getvalue() plot.png_data = return_data.png_data.getvalue() plot.svg_data = return_data.svg_data.getvalue() db.session.save(plot) db.session.flush()
def run(self): """ 2008-11-11 """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup() session = db.session param_obj = PassingData(call_method_id=self.call_method_id, \ analysis_method_id=getattr(self, 'analysis_method_id', None),\ analysis_method_id_ls=getattr(self, 'analysis_method_id_ls', None),\ phenotype_method_id_ls=getattr(self, 'phenotype_method_id_ls', None),\ list_type_id_ls=self.list_type_id_ls, \ results_type=self.results_type) params_ls = MpiGeneListRankTest.generate_params(param_obj) ax = self.init_3d_plot() result_index = 0 data_to_output_label_ls = [] data_to_output_ls = [] for results_id, list_type_id in params_ls: rm = Stock_250kDB.ResultsMethod.get(results_id) list_type = Stock_250kDB.GeneListType.get(list_type_id) title = '%s on %s_%s (%s)'%\ (rm.analysis_method.short_name, rm.phenotype_method.id, rm.phenotype_method.short_name, results_id) #a short output label output_label = '%s_%s (%s)'%\ (rm.phenotype_method.id, rm.phenotype_method.short_name, results_id) phenotype_label = '%s_%s'%\ (rm.phenotype_method.id, rm.phenotype_method.short_name) TopSNPTestType_id_ls = self.getTopSNPTestType_id_ls(self.get_closest, self.min_MAF, self.allow_two_sample_overlapping, self.results_type, \ self.test_type_id, self.null_distribution_type_id) if self.commit: rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\ filter_by(results_id=results_id).filter_by(list_type_id=list_type_id) if rows.count()>0: row = rows.first() sys.stderr.write('%s already in db (%s of them) with first id=%s.\n'%(title, rows.count(), row.id)) continue if not TopSNPTestType_id_ls: sys.stderr.write("No TopSNPTestType matches the input requirements. Exit.\n") sys.exit(3) TopSNPTestType_id_ls_str = map(str, TopSNPTestType_id_ls) from_where_clause = "from %s t, %s y where t.type_id=y.id and t.results_id=%s and t.list_type_id=%s and y.id in (%s)"%\ (Stock_250kDB.CandidateGeneTopSNPTestRM.table.name, Stock_250kDB.CandidateGeneTopSNPTestRMType.table.name,\ results_id, list_type_id, ','.join(TopSNPTestType_id_ls_str)) no_of_top_snps_info = self.get_no_of_top_snps_info(db, from_where_clause) min_distance_info = self.get_min_distance_info(db, from_where_clause) rdata = self.get_data_matrix(db, no_of_top_snps_info, min_distance_info, from_where_clause, need_other_values=True, \ null_distribution_type_id=self.null_distribution_type_id) if rm.analysis_method_id=='6': #For random forest, take log and determine bar length according to score_cutoff_ls score_cutoff_take_log = True else: score_cutoff_take_log = False return_code = self.plot_one_bar(ax, rdata, no_of_top_snps_info, min_distance_info, self.min_distance, result_index=result_index, data_type=self.data_type, \ output_fname=None, \ need_svg=False, title=phenotype_label, commit=0, preset_xlim =None, score_cutoff_take_log=score_cutoff_take_log) if return_code: data_to_output_label_ls.append(output_label) data_to_output_ls.append(return_code) result_index += 1 if self.fig_fname: pylab.savefig(self.fig_fname, dpi=300) pylab.savefig('%s.svg'%os.path.splitext(self.fig_fname)[0], dpi=300) #pylab.show() if self.output_fname and data_to_output_ls: self.output_data(data_to_output_label_ls, data_to_output_ls, self.min_distance, self.output_fname)
def run(self): """ 2008-11-11 """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup() session = db.session param_obj = PassingData(call_method_id=self.call_method_id, \ analysis_method_id=getattr(self, 'analysis_method_id', None),\ analysis_method_id_ls=getattr(self, 'analysis_method_id_ls', None),\ phenotype_method_id_ls=getattr(self, 'phenotype_method_id_ls', None),\ list_type_id_ls=self.list_type_id_ls, \ results_type=self.results_type) params_ls = MpiGeneListRankTest.generate_params(param_obj) ResultsClass, TestResultClass = Stock_250kDB.Stock_250kDB.getResultsAndTestResultsClass( results_type=self.results_type) if ResultsClass is None or TestResultClass is None: sys.stderr.write("Invalid results type : %s.\n" % pd.results_type) sys.exit(3) ax = self.init_3d_plot() result_index = 0 data_to_output_label_ls = [] data_to_output_ls = [] for results_id, list_type_id in params_ls: rm = ResultsClass.get(results_id) list_type = Stock_250kDB.GeneListType.get(list_type_id) title = '%s on %s_%s (%s)'%\ (rm.analysis_method.short_name, rm.phenotype_method.id, rm.phenotype_method.short_name, results_id) #a short output label output_label = '%s_%s (%s)'%\ (rm.phenotype_method.id, rm.phenotype_method.short_name, results_id) phenotype_label = '%s_%s'%\ (rm.phenotype_method.id, rm.phenotype_method.short_name) TopSNPTestType_id_ls = self.getTopSNPTestType_id_ls(self.get_closest, self.min_MAF, self.allow_two_sample_overlapping, self.results_type, \ self.test_type_id, self.null_distribution_type_id) if self.commit: rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\ filter_by(results_id=results_id).filter_by(list_type_id=list_type_id) if rows.count() > 0: row = rows.first() sys.stderr.write( '%s already in db (%s of them) with first id=%s.\n' % (title, rows.count(), row.id)) continue if not TopSNPTestType_id_ls: sys.stderr.write( "No TopSNPTestType matches the input requirements. Exit.\n" ) sys.exit(3) TopSNPTestType_id_ls_str = map(str, TopSNPTestType_id_ls) from_where_clause = "from %s t, %s y where t.type_id=y.id and t.results_id=%s and t.list_type_id=%s and y.id in (%s)"%\ (TestResultClass.table.name, Stock_250kDB.CandidateGeneTopSNPTestRMType.table.name,\ results_id, list_type_id, ','.join(TopSNPTestType_id_ls_str)) no_of_top_snps_info = self.get_no_of_top_snps_info( db, from_where_clause) min_distance_info = self.get_min_distance_info( db, from_where_clause) rdata = self.get_data_matrix(db, no_of_top_snps_info, min_distance_info, from_where_clause, need_other_values=True, \ null_distribution_type_id=self.null_distribution_type_id) if rm.analysis_method_id == '6': #For random forest, take log and determine bar length according to score_cutoff_ls score_cutoff_take_log = True else: score_cutoff_take_log = False return_code = self.plot_one_bar(ax, rdata, no_of_top_snps_info, min_distance_info, self.min_distance, result_index=result_index, data_type=self.data_type, \ output_fname=None, \ need_svg=False, title=phenotype_label, commit=0, preset_xlim =None, score_cutoff_take_log=score_cutoff_take_log) if return_code: data_to_output_label_ls.append(output_label) data_to_output_ls.append(return_code) result_index += 1 ax.set_xlabel('cutoff') ax.set_ylabel('result') ax.set_zlabel('data') if self.fig_fname: pylab.savefig(self.fig_fname, dpi=300) pylab.savefig('%s.svg' % os.path.splitext(self.fig_fname)[0], dpi=300) #pylab.show() if self.output_fname and data_to_output_ls: self.output_data(data_to_output_label_ls, data_to_output_ls, self.min_distance, self.output_fname)