def run(self):
        """
		2008-11-08
			generate combinations of results_id, list_type_id and generate plots one after another
			save the plots into database if commit=1
		"""
        if self.debug:
            import pdb
            pdb.set_trace()
        db = Stock_250kDB.Stock_250kDB(drivername=self.drivername,
                                       username=self.db_user,
                                       password=self.db_passwd,
                                       hostname=self.hostname,
                                       database=self.dbname,
                                       schema=self.schema)
        db.setup()
        session = db.session

        param_obj = PassingData(call_method_id=self.call_method_id, \
              analysis_method_id=getattr(self, 'analysis_method_id', None),\
              analysis_method_id_ls=getattr(self, 'analysis_method_id_ls', None),\
              phenotype_method_id_ls=getattr(self, 'phenotype_method_id_ls', None),\
              list_type_id_ls=self.list_type_id_ls, \
              results_type=self.results_type)
        params_ls = MpiGeneListRankTest.generate_params(param_obj)

        ResultsClass, TestResultClass = db.getResultsAndTestResultsClass(
            results_type=self.results_type)

        if ResultsClass is None or TestResultClass is None:
            sys.stderr.write("Invalid results type : %s.\n" % pd.results_type)
            sys.exit(3)

        for results_id, list_type_id in params_ls:
            rm = ResultsClass.get(results_id)
            list_type = Stock_250kDB.GeneListType.get(list_type_id)
            title = 'result(%s) of %s on %s with %s(%s) list'%\
             (results_id, rm.analysis_method.short_name, rm.phenotype_method.short_name, list_type.short_name, list_type.id)

            TopSNPTestType_id_ls = self.getTopSNPTestType_id_ls(self.get_closest, self.min_MAF, self.allow_two_sample_overlapping, self.results_type, \
                 self.test_type_id, self.null_distribution_type_id)
            if self.commit:
                rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\
                 filter_by(results_id=results_id).filter_by(list_type_id=list_type_id)
                if rows.count() > 0:
                    row = rows.first()
                    sys.stderr.write(
                        '%s already in db (%s of them) with first id=%s.\n' %
                        (title, rows.count(), row.id))
                    continue

            if not TopSNPTestType_id_ls:
                sys.stderr.write(
                    "No TopSNPTestType matches the input requirements. Exit.\n"
                )
                sys.exit(3)
            TopSNPTestType_id_ls_str = map(str, TopSNPTestType_id_ls)
            from_where_clause = "from %s t, %s y where t.type_id=y.id and t.results_id=%s and t.list_type_id=%s and y.id in (%s)"%\
             (TestResultClass.table.name, Stock_250kDB.CandidateGeneTopSNPTestRMType.table.name,\
             results_id, list_type_id, ','.join(TopSNPTestType_id_ls_str))

            no_of_top_snps_info = self.get_no_of_top_snps_info(
                db, from_where_clause)
            min_distance_info = self.get_min_distance_info(
                db, from_where_clause)
            rdata = self.get_data_matrix(db, no_of_top_snps_info, min_distance_info, from_where_clause, need_other_values=True, \
                   null_distribution_type_id=self.null_distribution_type_id)

            header = ['no_of_top_snps', ''] + min_distance_info.label_ls
            strain_acc_list = no_of_top_snps_info.label_ls
            category_list = no_of_top_snps_info.label_ls

            if SNPData.isDataMatrixEmpty(rdata.data_matrix):
                sys.stderr.write("Nothing fetched from database.\n")
                #sys.exit(3)
                continue

            if self.output_fname:
                write_data_matrix(rdata.data_matrix, self.output_fname, header,
                                  strain_acc_list, category_list)
            """
			if self.fig_fname:
				font = get_font(self.font_path, font_size=self.font_size)	#2008-08-01
				value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value)
				im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font)
				#im.save('%s_legend.png'%self.fig_fname_prefix)
				im = drawMatrix(rdata.data_matrix, value2color_func, no_of_top_snps_info.label_ls,\
							min_distance_info.label_ls, with_grid=1, font=font)
				im = combineTwoImages(im, im_legend, font=font)
				im.save(self.fig_fname)
			"""
            if self.commit:
                output_fname_prefix = None
            else:
                title_cp = title
                title_cp = title_cp.replace('/', '_')
                output_fname_prefix = '%s_%s_type_%s.png' % (os.path.splitext(
                    self.fig_fname)[0], title_cp, TopSNPTestType_id_ls[0])

            if rm.analysis_method_id == 1 or rm.analysis_method_id == 7:
                preset_xlim = [0, 8]
                preset_xlim = None
            else:
                preset_xlim = None
            return_data = self.plotCurve(rdata,
                                         no_of_top_snps_info,
                                         min_distance_info,
                                         output_fname_prefix,
                                         title=title,
                                         commit=self.commit,
                                         preset_xlim=preset_xlim)

            if self.commit and return_data.png_data:
                rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\
                 filter_by(results_id=results_id).filter_by(list_type_id=list_type_id)
                if rows.count() > 0:
                    row = rows.first()
                    sys.stderr.write(
                        '%s already in db (%s of them) with first id=%s.\n' %
                        (title, rows.count(), row.id))
                    continue
                plot = Stock_250kDB.CandidateVsNonRatioPlot(
                    type_id=TopSNPTestType_id_ls[0],
                    results_id=results_id,
                    list_type_id=list_type_id)
                plot.png_thumbnail = return_data.png_thumbnail.getvalue()
                plot.png_data = return_data.png_data.getvalue()
                plot.svg_data = return_data.svg_data.getvalue()
                db.session.save(plot)
                db.session.flush()
Example #2
0
	def run(self):
		"""
		2008-11-08
			generate combinations of results_id, list_type_id and generate plots one after another
			save the plots into database if commit=1
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup()
		session = db.session
		
		param_obj = PassingData(call_method_id=self.call_method_id, \
								analysis_method_id=getattr(self, 'analysis_method_id', None),\
								analysis_method_id_ls=getattr(self, 'analysis_method_id_ls', None),\
								phenotype_method_id_ls=getattr(self, 'phenotype_method_id_ls', None),\
								list_type_id_ls=self.list_type_id_ls, \
								results_type=self.results_type)
		params_ls = MpiGeneListRankTest.generate_params(param_obj)
		
		for results_id, list_type_id in params_ls:
			rm = Stock_250kDB.ResultsMethod.get(results_id)
			list_type = Stock_250kDB.GeneListType.get(list_type_id)
			title = 'result(%s) of %s on %s with %s(%s) list'%\
				(results_id, rm.analysis_method.short_name, rm.phenotype_method.short_name, list_type.short_name, list_type.id)
			
			TopSNPTestType_id_ls = self.getTopSNPTestType_id_ls(self.get_closest, self.min_MAF, self.allow_two_sample_overlapping, self.results_type, \
								self.test_type_id, self.null_distribution_type_id)
			if self.commit:
				rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\
					filter_by(results_id=results_id).filter_by(list_type_id=list_type_id)
				if rows.count()>0:
					row = rows.first()
					sys.stderr.write('%s already in db (%s of them) with first id=%s.\n'%(title, rows.count(), row.id))
					continue
			
			if not TopSNPTestType_id_ls:
				sys.stderr.write("No TopSNPTestType matches the input requirements. Exit.\n")
				sys.exit(3)
			TopSNPTestType_id_ls_str = map(str, TopSNPTestType_id_ls)
			from_where_clause = "from %s t, %s y where t.type_id=y.id and t.results_id=%s and t.list_type_id=%s and y.id in (%s)"%\
				(Stock_250kDB.CandidateGeneTopSNPTestRM.table.name, Stock_250kDB.CandidateGeneTopSNPTestRMType.table.name,\
				results_id, list_type_id, ','.join(TopSNPTestType_id_ls_str))
			
			no_of_top_snps_info = self.get_no_of_top_snps_info(db, from_where_clause)
			min_distance_info = self.get_min_distance_info(db, from_where_clause)
			rdata = self.get_data_matrix(db, no_of_top_snps_info, min_distance_info, from_where_clause, need_other_values=True, \
										null_distribution_type_id=self.null_distribution_type_id)
			
			header = ['no_of_top_snps', ''] + min_distance_info.label_ls
			strain_acc_list = no_of_top_snps_info.label_ls
			category_list = no_of_top_snps_info.label_ls
			
			if SNPData.isDataMatrixEmpty(rdata.data_matrix):
				sys.stderr.write("Nothing fetched from database.\n")
				#sys.exit(3)
				continue
			
			if self.output_fname:
				write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list)
			
			"""
			if self.fig_fname:
				font = get_font(self.font_path, font_size=self.font_size)	#2008-08-01
				value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value)
				im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font)
				#im.save('%s_legend.png'%self.fig_fname_prefix)
				im = drawMatrix(rdata.data_matrix, value2color_func, no_of_top_snps_info.label_ls,\
							min_distance_info.label_ls, with_grid=1, font=font)
				im = combineTwoImages(im, im_legend, font=font)
				im.save(self.fig_fname)
			"""
			if self.commit:
				output_fname_prefix = None
			else:
				title_cp = title
				title_cp = title_cp.replace('/', '_')
				output_fname_prefix='%s_%s_type_%s.png'%(os.path.splitext(self.fig_fname)[0], title_cp, TopSNPTestType_id_ls[0])
			
			if rm.analysis_method_id ==1 or rm.analysis_method_id==7:
				preset_xlim = [0,8]
			else:
				preset_xlim = None
			return_data = self.plotCurve(rdata, no_of_top_snps_info, min_distance_info, output_fname_prefix, title=title, commit=self.commit, preset_xlim=preset_xlim)
			
			if self.commit and return_data.png_data:
				rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\
					filter_by(results_id=results_id).filter_by(list_type_id=list_type_id)
				if rows.count()>0:
					row = rows.first()
					sys.stderr.write('%s already in db (%s of them) with first id=%s.\n'%(title, rows.count(), row.id))
					continue
				plot = Stock_250kDB.CandidateVsNonRatioPlot(type_id=TopSNPTestType_id_ls[0], results_id=results_id, list_type_id=list_type_id)
				plot.png_thumbnail = return_data.png_thumbnail.getvalue()
				plot.png_data = return_data.png_data.getvalue()
				plot.svg_data = return_data.svg_data.getvalue()
				db.session.save(plot)
				db.session.flush()
Example #3
0
	def run(self):
		"""
		2008-11-11
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup()
		session = db.session
		
		param_obj = PassingData(call_method_id=self.call_method_id, \
								analysis_method_id=getattr(self, 'analysis_method_id', None),\
								analysis_method_id_ls=getattr(self, 'analysis_method_id_ls', None),\
								phenotype_method_id_ls=getattr(self, 'phenotype_method_id_ls', None),\
								list_type_id_ls=self.list_type_id_ls, \
								results_type=self.results_type)
		params_ls = MpiGeneListRankTest.generate_params(param_obj)
		
		ax = self.init_3d_plot()
		result_index = 0
		data_to_output_label_ls = []
		data_to_output_ls = []
		for results_id, list_type_id in params_ls:
			rm = Stock_250kDB.ResultsMethod.get(results_id)
			list_type = Stock_250kDB.GeneListType.get(list_type_id)
			title = '%s on %s_%s (%s)'%\
				(rm.analysis_method.short_name, rm.phenotype_method.id, rm.phenotype_method.short_name, results_id)
			#a short output label
			output_label = '%s_%s (%s)'%\
				(rm.phenotype_method.id, rm.phenotype_method.short_name, results_id)
			phenotype_label = '%s_%s'%\
				(rm.phenotype_method.id, rm.phenotype_method.short_name)
			TopSNPTestType_id_ls = self.getTopSNPTestType_id_ls(self.get_closest, self.min_MAF, self.allow_two_sample_overlapping, self.results_type, \
								self.test_type_id, self.null_distribution_type_id)
			if self.commit:
				rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\
					filter_by(results_id=results_id).filter_by(list_type_id=list_type_id)
				if rows.count()>0:
					row = rows.first()
					sys.stderr.write('%s already in db (%s of them) with first id=%s.\n'%(title, rows.count(), row.id))
					continue
			
			if not TopSNPTestType_id_ls:
				sys.stderr.write("No TopSNPTestType matches the input requirements. Exit.\n")
				sys.exit(3)
			TopSNPTestType_id_ls_str = map(str, TopSNPTestType_id_ls)
			from_where_clause = "from %s t, %s y where t.type_id=y.id and t.results_id=%s and t.list_type_id=%s and y.id in (%s)"%\
				(Stock_250kDB.CandidateGeneTopSNPTestRM.table.name, Stock_250kDB.CandidateGeneTopSNPTestRMType.table.name,\
				results_id, list_type_id, ','.join(TopSNPTestType_id_ls_str))
			
			no_of_top_snps_info = self.get_no_of_top_snps_info(db, from_where_clause)
			min_distance_info = self.get_min_distance_info(db, from_where_clause)
			rdata = self.get_data_matrix(db, no_of_top_snps_info, min_distance_info, from_where_clause, need_other_values=True, \
										null_distribution_type_id=self.null_distribution_type_id)
			if rm.analysis_method_id=='6':	#For random forest, take log and determine bar length according to score_cutoff_ls
				score_cutoff_take_log = True
			else:
				score_cutoff_take_log = False
			return_code = self.plot_one_bar(ax, rdata, no_of_top_snps_info, min_distance_info, self.min_distance, result_index=result_index, data_type=self.data_type, \
							output_fname=None, \
							need_svg=False, title=phenotype_label, commit=0, preset_xlim =None, score_cutoff_take_log=score_cutoff_take_log)
			if return_code:
				data_to_output_label_ls.append(output_label)
				data_to_output_ls.append(return_code)
				result_index += 1
		if self.fig_fname:
			pylab.savefig(self.fig_fname, dpi=300)
			pylab.savefig('%s.svg'%os.path.splitext(self.fig_fname)[0], dpi=300)
		#pylab.show()
		if self.output_fname and data_to_output_ls:
			self.output_data(data_to_output_label_ls, data_to_output_ls, self.min_distance, self.output_fname)
    def run(self):
        """
		2008-11-11
		"""
        if self.debug:
            import pdb
            pdb.set_trace()
        db = Stock_250kDB.Stock_250kDB(drivername=self.drivername,
                                       username=self.db_user,
                                       password=self.db_passwd,
                                       hostname=self.hostname,
                                       database=self.dbname,
                                       schema=self.schema)
        db.setup()
        session = db.session

        param_obj = PassingData(call_method_id=self.call_method_id, \
              analysis_method_id=getattr(self, 'analysis_method_id', None),\
              analysis_method_id_ls=getattr(self, 'analysis_method_id_ls', None),\
              phenotype_method_id_ls=getattr(self, 'phenotype_method_id_ls', None),\
              list_type_id_ls=self.list_type_id_ls, \
              results_type=self.results_type)
        params_ls = MpiGeneListRankTest.generate_params(param_obj)

        ResultsClass, TestResultClass = Stock_250kDB.Stock_250kDB.getResultsAndTestResultsClass(
            results_type=self.results_type)

        if ResultsClass is None or TestResultClass is None:
            sys.stderr.write("Invalid results type : %s.\n" % pd.results_type)
            sys.exit(3)

        ax = self.init_3d_plot()
        result_index = 0
        data_to_output_label_ls = []
        data_to_output_ls = []
        for results_id, list_type_id in params_ls:
            rm = ResultsClass.get(results_id)
            list_type = Stock_250kDB.GeneListType.get(list_type_id)
            title = '%s on %s_%s (%s)'%\
             (rm.analysis_method.short_name, rm.phenotype_method.id, rm.phenotype_method.short_name, results_id)
            #a short output label
            output_label = '%s_%s (%s)'%\
             (rm.phenotype_method.id, rm.phenotype_method.short_name, results_id)
            phenotype_label = '%s_%s'%\
             (rm.phenotype_method.id, rm.phenotype_method.short_name)
            TopSNPTestType_id_ls = self.getTopSNPTestType_id_ls(self.get_closest, self.min_MAF, self.allow_two_sample_overlapping, self.results_type, \
                 self.test_type_id, self.null_distribution_type_id)
            if self.commit:
                rows = Stock_250kDB.CandidateVsNonRatioPlot.query.filter_by(type_id=TopSNPTestType_id_ls[0]).\
                 filter_by(results_id=results_id).filter_by(list_type_id=list_type_id)
                if rows.count() > 0:
                    row = rows.first()
                    sys.stderr.write(
                        '%s already in db (%s of them) with first id=%s.\n' %
                        (title, rows.count(), row.id))
                    continue

            if not TopSNPTestType_id_ls:
                sys.stderr.write(
                    "No TopSNPTestType matches the input requirements. Exit.\n"
                )
                sys.exit(3)
            TopSNPTestType_id_ls_str = map(str, TopSNPTestType_id_ls)
            from_where_clause = "from %s t, %s y where t.type_id=y.id and t.results_id=%s and t.list_type_id=%s and y.id in (%s)"%\
             (TestResultClass.table.name, Stock_250kDB.CandidateGeneTopSNPTestRMType.table.name,\
             results_id, list_type_id, ','.join(TopSNPTestType_id_ls_str))

            no_of_top_snps_info = self.get_no_of_top_snps_info(
                db, from_where_clause)
            min_distance_info = self.get_min_distance_info(
                db, from_where_clause)
            rdata = self.get_data_matrix(db, no_of_top_snps_info, min_distance_info, from_where_clause, need_other_values=True, \
                   null_distribution_type_id=self.null_distribution_type_id)
            if rm.analysis_method_id == '6':  #For random forest, take log and determine bar length according to score_cutoff_ls
                score_cutoff_take_log = True
            else:
                score_cutoff_take_log = False
            return_code = self.plot_one_bar(ax, rdata, no_of_top_snps_info, min_distance_info, self.min_distance, result_index=result_index, data_type=self.data_type, \
                output_fname=None, \
                need_svg=False, title=phenotype_label, commit=0, preset_xlim =None, score_cutoff_take_log=score_cutoff_take_log)
            if return_code:
                data_to_output_label_ls.append(output_label)
                data_to_output_ls.append(return_code)
                result_index += 1
        ax.set_xlabel('cutoff')
        ax.set_ylabel('result')
        ax.set_zlabel('data')
        if self.fig_fname:
            pylab.savefig(self.fig_fname, dpi=300)
            pylab.savefig('%s.svg' % os.path.splitext(self.fig_fname)[0],
                          dpi=300)
        #pylab.show()
        if self.output_fname and data_to_output_ls:
            self.output_data(data_to_output_label_ls, data_to_output_ls,
                             self.min_distance, self.output_fname)