Esempio n. 1
0
    def calculateOverlappingStatForOneCombo(self, db, phenotype_method_id, call_method_id, analysis_method_id_ls, \
             no_of_top_snps=1000, association_overlapping_type=None, commit=False, \
             results_directory=None):
        """
		2012.3.23
			pass argument db_250k to ResultsMethod2Results.rm2result()
		2009-11-2
		"""
        sys.stderr.write("Calculating overlapping stat for phenotype %s and combo %s ...\n"%(phenotype_method_id, \
                            repr(analysis_method_id_ls),))
        session = db.session
        snp_id_set_ls = []
        for analysis_method_id in analysis_method_id_ls:
            rm = Stock_250kDB.ResultsMethod.query.filter_by(phenotype_method_id=phenotype_method_id).\
              filter_by(call_method_id=call_method_id).filter_by(analysis_method_id=analysis_method_id).first()
            if rm.id in self.results_id2snp_id_set:
                snp_id_set = self.results_id2snp_id_set.get(rm.id)
            else:
                association_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id).\
                  filter(Stock_250kDB.Results.rank<=no_of_top_snps)
                no_of_association_entries = association_entries.count()
                if no_of_association_entries < no_of_top_snps:
                    min_rank = no_of_association_entries + 1
                    max_rank = no_of_top_snps
                    if self.snp_info is None:
                        self.snp_info = DrawSNPRegion.getSNPInfo(db)
                    ResultsMethod2Results.rm2result(session, rm, self.snp_info, min_rank=min_rank, max_rank=max_rank, \
                           commit=commit, results_directory=results_directory, db_250k=db)
                    association_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id).\
                      filter(Stock_250kDB.Results.rank<=no_of_top_snps)
                no_of_association_entries = association_entries.count()
                if no_of_association_entries != no_of_top_snps:
                    sys.stderr.write(
                        "Error: The number of SNPs %s from Result %s (analysis_method_id %s) doesn't match the no_of_top_snps %s.\n"
                        % (no_of_association_entries, rm.id,
                           rm.analysis_method_id, no_of_top_snps))
                    return
                snp_id_set = set()
                for entry in association_entries:
                    snp_id_set.add(entry.snps_id)
                self.results_id2snp_id_set[rm.id] = snp_id_set
            snp_id_set_ls.append(snp_id_set)
        overlapping_snp_id_set = snp_id_set_ls[0]

        for i in range(1, len(snp_id_set_ls)):
            snp_id_set = snp_id_set_ls[i]
            overlapping_snp_id_set = overlapping_snp_id_set & snp_id_set
        no_of_overlapping_snps = len(overlapping_snp_id_set)

        entry = Stock_250kDB.AssociationOverlappingStat(phenotype_method_id=phenotype_method_id, call_method_id=call_method_id, \
                no_of_top_snps=no_of_top_snps, no_of_overlapping_snps=no_of_overlapping_snps)
        entry.overlapping_type = association_overlapping_type
        session.save(entry)
        session.flush()
        sys.stderr.write("%s overlapping SNPs out of %s results. Done.\n" %
                         (no_of_overlapping_snps, len(snp_id_set_ls)))
Esempio n. 2
0
	def calculateOverlappingStatForOneCombo(self, db, phenotype_method_id, call_method_id, analysis_method_id_ls, \
										no_of_top_snps=1000, association_overlapping_type=None, commit=False, \
										results_directory=None):
		"""
		2009-11-2
		"""
		sys.stderr.write("Calculating overlapping stat for phenotype %s and combo %s ...\n"%(phenotype_method_id, \
																						repr(analysis_method_id_ls),))
		session = db.session
		snp_id_set_ls = []
		for analysis_method_id in analysis_method_id_ls:
			rm = Stock_250kDB.ResultsMethod.query.filter_by(phenotype_method_id=phenotype_method_id).\
					filter_by(call_method_id=call_method_id).filter_by(analysis_method_id=analysis_method_id).first()
			if rm.id in self.results_id2snp_id_set:
				snp_id_set = self.results_id2snp_id_set.get(rm.id)
			else:
				association_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id).\
						filter(Stock_250kDB.Results.rank<=no_of_top_snps)
				no_of_association_entries = association_entries.count()
				if no_of_association_entries<no_of_top_snps:
					min_rank = no_of_association_entries+1
					max_rank = no_of_top_snps
					if self.snp_info is None:
						self.snp_info = DrawSNPRegion.getSNPInfo(db)
					ResultsMethod2Results.rm2result(session, rm, self.snp_info, min_rank=min_rank, max_rank=max_rank, \
												commit=commit, results_directory=results_directory)
					association_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id).\
							filter(Stock_250kDB.Results.rank<=no_of_top_snps)
				no_of_association_entries = association_entries.count()
				if no_of_association_entries!=no_of_top_snps:
					sys.stderr.write("Error: The number of SNPs %s from Result %s (analysis_method_id %s) doesn't match the no_of_top_snps %s.\n"%(no_of_association_entries, rm.id, rm.analysis_method_id, no_of_top_snps))
					return
				snp_id_set = set()
				for entry in association_entries:
					snp_id_set.add(entry.snps_id)
				self.results_id2snp_id_set[rm.id] = snp_id_set
			snp_id_set_ls.append(snp_id_set)
		overlapping_snp_id_set = snp_id_set_ls[0]
		
		for i in range(1, len(snp_id_set_ls)):
			snp_id_set = snp_id_set_ls[i]
			overlapping_snp_id_set = overlapping_snp_id_set&snp_id_set
		no_of_overlapping_snps = len(overlapping_snp_id_set)
		
		entry = Stock_250kDB.AssociationOverlappingStat(phenotype_method_id=phenotype_method_id, call_method_id=call_method_id, \
										no_of_top_snps=no_of_top_snps, no_of_overlapping_snps=no_of_overlapping_snps)
		entry.overlapping_type = association_overlapping_type
		session.save(entry)
		session.flush()
		sys.stderr.write("%s overlapping SNPs out of %s results. Done.\n"%(no_of_overlapping_snps, len(snp_id_set_ls)))
Esempio n. 3
0
	def run(self):
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		#session.begin()
		snps_context_wrapper = GeneListRankTest.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest)
		gene_annotation = DrawSNPRegion.dealWithGeneAnnotation(self.gene_annotation_picklef)
		snp_info = DrawSNPRegion.getSNPInfo(db)
		
		snp_annotation_short_name2id = self.getSNPAnnotationShortName2id()
		self._constructSNPAnnotation(session, snp_info, snps_context_wrapper, gene_annotation, snp_annotation_short_name2id)
		if self.commit:
			session.flush()
			session.commit()
Esempio n. 4
0
	def run(self):
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		#session.begin()
		snps_context_wrapper = GeneListRankTest.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, \
																	self.get_closest)
		gene_annotation = DrawSNPRegion.dealWithGeneAnnotation(self.gene_annotation_picklef, tax_id=self.tax_id, \
															cls_with_db_args=self)
		snp_info = DrawSNPRegion.getSNPInfo(db)
		
		snp_annotation_short_name2id = self.getSNPAnnotationShortName2id()
		self._constructSNPAnnotation(session, snp_info, snps_context_wrapper, gene_annotation, snp_annotation_short_name2id)
		if self.commit:
			session.flush()
			session.commit()
Esempio n. 5
0
	def run(self):
		"""
		2009-6-10
			set Results.beta = getattr(data_obj, 'beta1', None)
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
									password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		session.begin()
		
		snp_info = DrawSNPRegion.getSNPInfo(db)
		
		query = Stock_250kDB.ResultsMethod.query.filter_by(call_method_id=self.call_method_id).\
			filter(Stock_250kDB.ResultsMethod.analysis_method_id.in_(self.analysis_method_id_ls))
		for rm in query:
			self.rm2result(session, rm, snp_info, max_rank=self.max_rank, commit=self.commit, results_directory=self.results_directory)
		if self.commit:
			session.commit()
Esempio n. 6
0
	def run(self):
		"""
		2008-12-08 if the plot under configuration is already in db, abort only if the program is gonna commit the database transaction.
		2008-10-19
			save figures in database if commit
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		#session.begin()
		
		if self.results_type==1:
			ResultsClass = Stock_250kDB.ResultsMethod
			snps_context_wrapper = self.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest)
		elif self.results_type==2:
			ResultsClass = Stock_250kDB.ResultsByGene
		else:
			sys.stderr.write("Invalid results type : %s.\n"%self.results_type)
			return None
		
		hist_type = self.getHistType(self.call_method_id, self.min_distance, self.get_closest, self.min_MAF, \
									self.allow_two_sample_overlapping, self.results_type, self.null_distribution_type_id)
		
		candidate_gene_list = self.getGeneList(self.list_type_id)
		if len(candidate_gene_list)<self.min_sample_size:
			sys.stderr.write("Candidate gene list of %s too small: %s.\n"%(self.list_type_id, len(candidate_gene_list)))
			sys.exit(4)
		#candidate_gene_list = []		#2009-01-12 just to plot the histogram of pvalue
		
		candidate_gene_set = Set(candidate_gene_list)
		list_type = Stock_250kDB.GeneListType.get(self.list_type_id)
		if list_type is None:
			sys.exit(3)
		
		phenotype_id2results_id_ls = self.getResultsIDLs(db, ResultsClass, self.results_type, self.phenotype_id_ls, \
														self.min_distance, self.get_closest, self.min_MAF, self.call_method_id)
		
			
		param_data = PassingData(results_directory=self.results_directory, candidate_gene_list=candidate_gene_list, \
			min_MAF=self.min_MAF, allow_two_sample_overlapping=self.allow_two_sample_overlapping, need_the_value=1, \
			do_log10_transformation=False)
			#need_the_value means to get the pvalue/score
			#force no log10 transformation. otherwise, transformation based on analysis_method
		if self.null_distribution_type_id==2 or self.null_distribution_type_id==3:	#gw-looping or random gene list
			snp_info = DrawSNPRegion.getSNPInfo(db)
			candidate_gene_snp_index_ls = self.get_candidate_gene_snp_index_ls(candidate_gene_set, snp_info.chr_pos_ls, snps_context_wrapper)
			no_of_snps = len(snp_info.chr_pos_ls)
			no_of_permutations = no_of_snps/len(candidate_gene_snp_index_ls) + 1
			param_data.chr_pos2index = snp_info.chr_pos2index	#pass to getGenomeWideResultFromFile
			if self.null_distribution_type_id==2:
				non_candidate_gene_snp_index_ls = self.get_non_candidate_gene_snp_index_ls_by_permutation(candidate_gene_snp_index_ls, no_of_snps, no_of_permutations)
				
			elif self.null_distribution_type_id == 3:
				gene_id_ls = get_total_gene_ls(db.metadata.bind)
				no_of_candidate_genes = len(candidate_gene_set)
				non_candidate_gene_snp_index_ls = numpy.zeros(0, numpy.int)
				while len(non_candidate_gene_snp_index_ls)<no_of_snps:
					non_candidate_gene_set = Set(random.sample(gene_id_ls, no_of_candidate_genes))
					_non_candidate_gene_snp_index_ls = self.get_candidate_gene_snp_index_ls(non_candidate_gene_set, snp_info.chr_pos_ls, snps_context_wrapper)
					non_candidate_gene_snp_index_ls = numpy.hstack((non_candidate_gene_snp_index_ls, _non_candidate_gene_snp_index_ls))
		
		for phenotype_id, results_id_ls in phenotype_id2results_id_ls.iteritems():
			if hist_type.id:	#hist_type already in database
				rows = Stock_250kDB.ScoreRankHistogram.query.filter_by(phenotype_method_id=phenotype_id).\
					filter_by(list_type_id=self.list_type_id).filter_by(hist_type_id=hist_type.id)
				if rows.count()>0 and self.commit:	#2008-12-08 only skip if the database transaction is gonna commit.
					row = rows.first()
					sys.stderr.write("Histogram already in database. id=%s, phenotype_id=%s, list_type_id=%s, hist_type_id=%s.\n"%\
									(row.id, row.phenotype_method_id, row.list_type_id, row.hist_type_id))
					continue
			phenotype_method = Stock_250kDB.PhenotypeMethod.get(phenotype_id)
			if not phenotype_method:
				continue
			score_rank_data_ls = []
			sys.stderr.write("Checking phenotype %s (%s) on list_type %s (%s) ...\n"%\
							(phenotype_method.id, phenotype_method.short_name, list_type.id, list_type.short_name))
			
			for results_id in results_id_ls:
				try:
					rm = ResultsClass.get(results_id)
					score_rank_data = None
					if self.null_distribution_type_id==1:
						if self.results_type==1:
							permData = self.prepareDataForPermutationRankTest(rm, snps_context_wrapper, param_data)
							if not permData:
								continue
							score_rank_data = PassingData(candidate_score_ls=permData.candidate_gene_snp_value_ls, \
													candidate_rank_ls=permData.candidate_gene_snp_rank_ls,\
									non_candidate_score_ls=permData.non_candidate_gene_snp_value_ls, non_candidate_rank_ls=permData.non_candidate_gene_snp_rank_ls,\
									analysis_method=rm.analysis_method)
							del permData
						elif self.results_type==2:
							score_rank_data = self.getScoreRankFromRBG(rm, candidate_gene_set, self.results_directory)
					elif self.null_distribution_type_id==2 or self.null_distribution_type_id==3:
						genome_wide_result = self.getResultMethodContent(rm, param_data.results_directory, param_data.min_MAF, pdata=param_data)
						if not genome_wide_result:
							continue
						score_rank_data = self.getScoreRankFromPermIndexLs(genome_wide_result, candidate_gene_snp_index_ls, non_candidate_gene_snp_index_ls)
						if score_rank_data:
							score_rank_data.analysis_method = rm.analysis_method
					
					if score_rank_data:
						score_rank_data_ls.append(score_rank_data)
				except:
						sys.stderr.write("Exception happened for results_id=%s, phenotype_id=%s.\n"%(results_id, phenotype_id))
						traceback.print_exc()
						sys.stderr.write('%s.\n'%repr(sys.exc_info()))
						continue
			if score_rank_data_ls:

				score_png_data, score_svg_data = self.plotHistForOnePhenotype(phenotype_method, list_type, score_rank_data_ls, self.output_dir, data_type='score', commit=self.commit)
				rank_png_data, rank_svg_data = self.plotHistForOnePhenotype(phenotype_method, list_type, score_rank_data_ls, self.output_dir, data_type='rank', commit=self.commit)
				if self.commit:
					score_rank_hist = Stock_250kDB.ScoreRankHistogram(phenotype_method_id=phenotype_id, list_type_id=list_type.id)
					score_rank_hist.hist_type = hist_type
					score_rank_hist.score_hist = score_png_data.getvalue()
					score_rank_hist.score_hist_svg = score_svg_data.getvalue()
					score_rank_hist.rank_hist = rank_png_data.getvalue()
					score_rank_hist.rank_hist_svg = rank_svg_data.getvalue()
					session.save(score_rank_hist)
					session.flush()
					del score_png_data, score_svg_data, rank_png_data, rank_svg_data
		"""