Example #1
0
	def run(self):	
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB(drivername=self.drivername, username=self.db_user,
						password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup()
		session = db.session
		
		results_method_id_info = self.getResultsMethodIDInfo(db, self.call_method_id_ls, self.min_distance, self.get_closest, self.min_MAF)
		results_method_id2gene_set = self.getResultsMethodID2GeneSet(db, results_method_id_info, self.results_directory, self.max_rank)
		rdata = self.getDataMatrix(results_method_id2gene_set, results_method_id_info)
		
		header = ['', ''] + results_method_id_info.results_method_id_label_ls
		strain_acc_list = results_method_id_info.results_method_id_label_ls
		category_list = results_method_id_info.results_method_id_ls
		if SNPData.isDataMatrixEmpty(rdata.data_matrix):
			sys.stderr.write("Nothing fetched from database.\n")
			sys.exit(3)
		if self.output_fname:
			write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list)
		
		if self.fig_fname:
			font = get_font(self.font_path, font_size=self.font_size)	#2008-08-01
			value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value)
			im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font)
			#im.save('%s_legend.png'%self.fig_fname_prefix)
			im = drawMatrix(rdata.data_matrix, value2color_func, strain_acc_list,\
						strain_acc_list, with_grid=1, font=font)
			im = combineTwoImages(im, im_legend, font=font)
			im.save(self.fig_fname)
Example #2
0
	def run(self):	
		if self.debug:
			import pdb
			pdb.set_trace()
		db = StockDB.StockDB(drivername=self.drivername, username=self.db_user,
						password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		order_by_sentence = " order by c.longitude, c.latitude, e.longitude, e.latitude, e.nativename "	#how to order strains.
		if self.QC_method_id ==4:
			sql_table_str = "from %s e, %s s, %s a, %s c"%(StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\
								StockDB.Country.table.name)
			common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s " + order_by_sentence
			
			strain_where_condition = common_where_condition%(" and e.id=st.ecotypeid")
			strain_id_info_query = "select distinct st.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s"%(sql_table_str, StockDB.Strain.table.name, strain_where_condition)
		else:
			sql_table_str = "from %s q, %s e, %s s, %s a, %s c"%(StockDB.QCCrossMatch.table.name, StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\
									StockDB.Country.table.name)
			common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s"+ " and q.qc_method_id=%s and q.no_of_non_NA_pairs>=%s and q.mismatch_rate<=%s "%\
				(self.QC_method_id, self.min_no_of_non_NAs, self.max_mismatch_rate) + order_by_sentence
			
			strain_where_condition = common_where_condition%(" and e.id=st.ecotypeid and st.id=q.strainid")
			strain_id_info_query = "select distinct q.strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s"%(sql_table_str, StockDB.Strain.table.name, strain_where_condition)
		
		if self.how_to_group_strains==2 or self.how_to_group_strains==3:
			plate_info = self.alignStrainsAccordingToSeqPlate(db)
			id_set_data = PassingData()
			id_set_data.strain_id_set = None
			id_set_data.target_id_set = None
		elif self.input_fname:
			id_set_data = self.getStrainidTargetidFromFile(db, self.QC_method_id, self.input_fname, self.max_mismatch_rate, self.min_no_of_non_NAs)
		else:
			id_set_data = PassingData()
			id_set_data.strain_id_set = None
			id_set_data.target_id_set = None
		
		if self.how_to_group_strains==2 or self.how_to_group_strains==3:
			strain_id_info = self.getStrainInfoGivenPlateInfo(db, plate_info, strain_id_info_query, strain_id_set=None)
		else:
			strain_id_info = self.getStrainIDInfo(db, strain_id_info_query, id_set_data.strain_id_set)
		
		if self.QC_method_id==4:
			if self.how_to_group_strains==3:
				#2008-09-15 column strain id is in country, strain-longitude order
				target_id_info = self.getStrainIDInfo(db, strain_id_info_query, id_set_data.strain_id_set)
			else:
				target_id_info = strain_id_info
		else:
			target_where_condition = common_where_condition%(" and e.id=q.target_id")
			target_id_info_query = "select distinct e.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s %s"%(sql_table_str, target_where_condition)
			target_id_info = self.getStrainIDInfo(db, target_id_info_query)
		
		if self.input_fname:
			rdata = self.get_data_matrixFromFile(db, strain_id_info, target_id_info,  self.QC_method_id, self.input_fname, self.max_mismatch_rate, self.min_no_of_non_NAs)
		else:
			rdata = self.get_data_matrix(db, strain_id_info, target_id_info, self.QC_method_id, self.max_mismatch_rate, self.min_no_of_non_NAs)
		
		rdata.data_matrix = self.markDataMatrixBoundary(rdata.data_matrix, strain_id_info, target_id_info)
		
		header = ['strain info', ''] + target_id_info.strain_label_ls
		strain_acc_list = strain_id_info.strain_label_ls
		category_list = [1]*len(strain_acc_list)
		if SNPData.isDataMatrixEmpty(rdata.data_matrix):
			sys.stderr.write("Nothing fetched from database.\n")
			sys.exit(3)
		if self.output_fname:
			write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list)
		
		if self.fig_fname:
			font = get_font(self.font_path, font_size=self.font_size)	#2008-08-01
			value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value)
			im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font)
			#im.save('%s_legend.png'%self.fig_fname_prefix)
			im = drawMatrix(rdata.data_matrix, value2color_func, strain_id_info.strain_label_ls,\
						target_id_info.strain_label_ls, with_grid=1, font=font)
			im = combineTwoImages(im, im_legend, font=font)
			im.save(self.fig_fname)
Example #3
0
    def run(self):
        if self.debug:
            import pdb
            pdb.set_trace()
        db = StockDB.StockDB(drivername=self.drivername,
                             username=self.db_user,
                             password=self.db_passwd,
                             hostname=self.hostname,
                             database=self.dbname,
                             schema=self.schema)
        db.setup(create_tables=False)
        session = db.session
        order_by_sentence = " order by c.longitude, c.latitude, e.longitude, e.latitude, e.nativename "  #how to order strains.
        if self.QC_method_id == 4:
            sql_table_str = "from %s e, %s s, %s a, %s c"%(StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\
                 StockDB.Country.table.name)
            common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s " + order_by_sentence

            strain_where_condition = common_where_condition % (
                " and e.id=st.ecotypeid")
            strain_id_info_query = "select distinct st.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s" % (
                sql_table_str, StockDB.Strain.table.name,
                strain_where_condition)
        else:
            sql_table_str = "from %s q, %s e, %s s, %s a, %s c"%(StockDB.QCCrossMatch.table.name, StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\
                  StockDB.Country.table.name)
            common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s"+ " and q.qc_method_id=%s and q.no_of_non_NA_pairs>=%s and q.mismatch_rate<=%s "%\
             (self.QC_method_id, self.min_no_of_non_NAs, self.max_mismatch_rate) + order_by_sentence

            strain_where_condition = common_where_condition % (
                " and e.id=st.ecotypeid and st.id=q.strainid")
            strain_id_info_query = "select distinct q.strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s" % (
                sql_table_str, StockDB.Strain.table.name,
                strain_where_condition)

        if self.how_to_group_strains == 2 or self.how_to_group_strains == 3:
            plate_info = self.alignStrainsAccordingToSeqPlate(db)
            id_set_data = PassingData()
            id_set_data.strain_id_set = None
            id_set_data.target_id_set = None
        elif self.input_fname:
            id_set_data = self.getStrainidTargetidFromFile(
                db, self.QC_method_id, self.input_fname,
                self.max_mismatch_rate, self.min_no_of_non_NAs)
        else:
            id_set_data = PassingData()
            id_set_data.strain_id_set = None
            id_set_data.target_id_set = None

        if self.how_to_group_strains == 2 or self.how_to_group_strains == 3:
            strain_id_info = self.getStrainInfoGivenPlateInfo(
                db, plate_info, strain_id_info_query, strain_id_set=None)
        else:
            strain_id_info = self.getStrainIDInfo(db, strain_id_info_query,
                                                  id_set_data.strain_id_set)

        if self.QC_method_id == 4:
            if self.how_to_group_strains == 3:
                #2008-09-15 column strain id is in country, strain-longitude order
                target_id_info = self.getStrainIDInfo(
                    db, strain_id_info_query, id_set_data.strain_id_set)
            else:
                target_id_info = strain_id_info
        else:
            target_where_condition = common_where_condition % (
                " and e.id=q.target_id")
            target_id_info_query = "select distinct e.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s %s" % (
                sql_table_str, target_where_condition)
            target_id_info = self.getStrainIDInfo(db, target_id_info_query)

        if self.input_fname:
            rdata = self.get_data_matrixFromFile(db, strain_id_info,
                                                 target_id_info,
                                                 self.QC_method_id,
                                                 self.input_fname,
                                                 self.max_mismatch_rate,
                                                 self.min_no_of_non_NAs)
        else:
            rdata = self.get_data_matrix(db, strain_id_info, target_id_info,
                                         self.QC_method_id,
                                         self.max_mismatch_rate,
                                         self.min_no_of_non_NAs)

        rdata.data_matrix = self.markDataMatrixBoundary(
            rdata.data_matrix, strain_id_info, target_id_info)

        header = ['strain info', ''] + target_id_info.strain_label_ls
        strain_acc_list = strain_id_info.strain_label_ls
        category_list = [1] * len(strain_acc_list)
        if SNPData.isDataMatrixEmpty(rdata.data_matrix):
            sys.stderr.write("Nothing fetched from database.\n")
            sys.exit(3)
        if self.output_fname:
            write_data_matrix(rdata.data_matrix, self.output_fname, header,
                              strain_acc_list, category_list)

        if self.fig_fname:
            font = get_font(self.font_path,
                            font_size=self.font_size)  #2008-08-01
            value2color_func = lambda x: Value2Color.value2HSLcolor(
                x, rdata.min_value, rdata.max_value)
            im_legend = drawContinousLegend(rdata.min_value, rdata.max_value,
                                            self.no_of_ticks, value2color_func,
                                            font)
            #im.save('%s_legend.png'%self.fig_fname_prefix)
            im = drawMatrix(rdata.data_matrix, value2color_func, strain_id_info.strain_label_ls,\
               target_id_info.strain_label_ls, with_grid=1, font=font)
            im = combineTwoImages(im, im_legend, font=font)
            im.save(self.fig_fname)
Example #4
0
	def run(self):	
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
						password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup()
		session = db.session
		
		if self.test_result_type==1:
			test_result_class_table = CandidateGeneRankSumTestResult.table.name
			test_result_class_table = 'candidate_gene_rank_sum_test_result_2008_09_15'
		elif self.test_result_type==2:
			test_result_class_table = CandidateGeneTopSNPTest.table.name
		elif self.test_result_type==3:
			test_result_class_table = Stock_250kDB.CandidateGeneRankSumTestResultMethod.table.name
		else:
			sys.stderr.write(" test_result_type %s not supported.\n"%(self.test_result_type))
			sys.exit(2)

		#the condition for min_MAF is tricky because of the floating precision.
		if self.test_result_type==1:
			where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \
				and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"\
				%(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
		elif self.test_result_type==2:
			where_condition = "%s r, %s rg, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null and r.id=rg.results_method_id \
				and c.results_id=rg.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"\
				%(ResultsMethod.table.name, ResultsByGene.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
		elif self.test_result_type==3:
			where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \
				and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"\
				%(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
		if self.call_method_id_ls:
			where_condition += " and r.call_method_id in (%s)"%self.call_method_id_ls
		
		if self.analysis_method_id_ls:
			where_condition += " and r.analysis_method_id in (%s)"%self.analysis_method_id_ls
		if self.super_type_id:
			where_condition += " and g.super_type_id=%s"%self.super_type_id
		
		if self.test_type:
			where_condition += " and c.test_type=%s"%self.test_type
		
		if self.test_result_type==1:
			pass
			where_condition += " and c.max_pvalue_per_gene=%s"%(self.max_pvalue_per_gene)
		elif self.test_result_type==2:
			where_condition += " and c.no_of_top_snps=%s"%(self.no_of_top_snps)		
		
		list_type_id_ls = self.getListTypeInfo(db, where_condition)
		analysis_method_id_ls = self.getAnalysisMethodInfo(db, where_condition)
		list_type_analysis_method_info = self.orderListTypeAnalysisMethodID(list_type_id_ls, analysis_method_id_ls)
		phenotype_info = self.getPhenotypeInfo(db, where_condition)
		rdata = self.get_data_matrix(db, phenotype_info, list_type_analysis_method_info, where_condition)
		
		rdata.data_matrix = self.markDataMatrixBoundary(rdata.data_matrix, phenotype_info, list_type_analysis_method_info)
		
		header = ['list_type_analysis_method', ''] + phenotype_info.phenotype_method_label_ls
		strain_acc_list = list_type_analysis_method_info.list_type_analysis_method_label_ls
		category_list = list_type_analysis_method_info.list_type_id_analysis_method_id_ls
		if SNPData.isDataMatrixEmpty(rdata.data_matrix):
			sys.stderr.write("Nothing fetched from database.\n")
			sys.exit(3)
		if self.output_fname:
			write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list)
		
		if self.fig_fname:
			font = get_font(self.font_path, font_size=self.font_size)	#2008-08-01
			value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value)
			im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font)
			#im.save('%s_legend.png'%self.fig_fname_prefix)
			im = drawMatrix(rdata.data_matrix, value2color_func, list_type_analysis_method_info.list_type_analysis_method_label_ls,\
						phenotype_info.phenotype_method_label_ls, with_grid=1, font=font)
			im = combineTwoImages(im, im_legend, font=font)
			im.save(self.fig_fname)
    def run(self):
        if self.debug:
            import pdb
            pdb.set_trace()
        db = Stock_250kDB.Stock_250kDB(drivername=self.drivername,
                                       username=self.db_user,
                                       password=self.db_passwd,
                                       hostname=self.hostname,
                                       database=self.dbname,
                                       schema=self.schema)
        db.setup()
        session = db.session

        if self.test_result_type == 1:
            test_result_class_table = CandidateGeneRankSumTestResult.table.name
            test_result_class_table = 'candidate_gene_rank_sum_test_result_2008_09_15'
        elif self.test_result_type == 2:
            test_result_class_table = CandidateGeneTopSNPTest.table.name
        elif self.test_result_type == 3:
            test_result_class_table = Stock_250kDB.CandidateGeneRankSumTestResultMethod.table.name
        else:
            sys.stderr.write(" test_result_type %s not supported.\n" %
                             (self.test_result_type))
            sys.exit(2)

        #the condition for min_MAF is tricky because of the floating precision.
        if self.test_result_type == 1:
            where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \
				and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"         \
             %(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
        elif self.test_result_type == 2:
            where_condition = "%s r, %s rg, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null and r.id=rg.results_method_id \
				and c.results_id=rg.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"         \
             %(ResultsMethod.table.name, ResultsByGene.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
        elif self.test_result_type == 3:
            where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \
				and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"         \
             %(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
        if self.call_method_id_ls:
            where_condition += " and r.call_method_id in (%s)" % self.call_method_id_ls

        if self.analysis_method_id_ls:
            where_condition += " and r.analysis_method_id in (%s)" % self.analysis_method_id_ls
        if self.super_type_id:
            where_condition += " and g.super_type_id=%s" % self.super_type_id

        if self.test_type:
            where_condition += " and c.test_type=%s" % self.test_type

        if self.test_result_type == 1:
            pass
            where_condition += " and c.max_pvalue_per_gene=%s" % (
                self.max_pvalue_per_gene)
        elif self.test_result_type == 2:
            where_condition += " and c.no_of_top_snps=%s" % (
                self.no_of_top_snps)

        list_type_id_ls = self.getListTypeInfo(db, where_condition)
        analysis_method_id_ls = self.getAnalysisMethodInfo(db, where_condition)
        list_type_analysis_method_info = self.orderListTypeAnalysisMethodID(
            list_type_id_ls, analysis_method_id_ls)
        phenotype_info = self.getPhenotypeInfo(db, where_condition)
        rdata = self.get_data_matrix(db, phenotype_info,
                                     list_type_analysis_method_info,
                                     where_condition)

        rdata.data_matrix = self.markDataMatrixBoundary(
            rdata.data_matrix, phenotype_info, list_type_analysis_method_info)

        header = ['list_type_analysis_method', ''
                  ] + phenotype_info.phenotype_method_label_ls
        strain_acc_list = list_type_analysis_method_info.list_type_analysis_method_label_ls
        category_list = list_type_analysis_method_info.list_type_id_analysis_method_id_ls
        if SNPData.isDataMatrixEmpty(rdata.data_matrix):
            sys.stderr.write("Nothing fetched from database.\n")
            sys.exit(3)
        if self.output_fname:
            write_data_matrix(rdata.data_matrix, self.output_fname, header,
                              strain_acc_list, category_list)

        if self.fig_fname:
            font = get_font(self.font_path,
                            font_size=self.font_size)  #2008-08-01
            value2color_func = lambda x: Value2Color.value2HSLcolor(
                x, rdata.min_value, rdata.max_value)
            im_legend = drawContinousLegend(rdata.min_value, rdata.max_value,
                                            self.no_of_ticks, value2color_func,
                                            font)
            #im.save('%s_legend.png'%self.fig_fname_prefix)
            im = drawMatrix(rdata.data_matrix, value2color_func, list_type_analysis_method_info.list_type_analysis_method_label_ls,\
               phenotype_info.phenotype_method_label_ls, with_grid=1, font=font)
            im = combineTwoImages(im, im_legend, font=font)
            im.save(self.fig_fname)