Example #1
0
	def rm2result(cls, session, rm, snp_info, max_rank=1000, commit=False, min_rank=1, results_directory=None):
		"""
		2009-11-2
			split out of run()
		"""
		
		# 2009-5-1 check whether it's already in db.
		db_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id)
		if db_entries.count()==max_rank-min_rank+1:
			sys.stderr.write("%s already in db. Ignore.\n"%rm.id)
			return
		
		param_data = PassingData(min_MAC=0)
		genome_wide_result = GeneListRankTest.getResultMethodContent(rm, results_directory=results_directory, min_MAF=0., \
																pdata=param_data)
		if genome_wide_result:
			for rank in range(min_rank, max_rank+1):
				data_obj = genome_wide_result.get_data_obj_at_given_rank(rank)
				if data_obj is not None:
					snps_id = snp_info.getSnpsIDGivenChrPos(data_obj.chromosome, data_obj.position)
					if data_obj.extra_col_ls:
						result_obj = cPickle.dumps(data_obj.extra_col_ls)
					else:
						result_obj = None
					Stock_250kDB.Results(snps_id=snps_id, results_id=rm.id, score=data_obj.value, rank=rank, beta=getattr(data_obj, 'beta1', None),\
										maf=data_obj.maf, mac=data_obj.mac, genotype_var_perc=data_obj.genotype_var_perc,\
										object=result_obj)
		if commit:
			session.flush()
			#session.commit()
		else:
			session.rollback()
Example #2
0
	def run(self):
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		#session.begin()
		snps_context_wrapper = GeneListRankTest.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest)
		gene_annotation = DrawSNPRegion.dealWithGeneAnnotation(self.gene_annotation_picklef)
		snp_info = DrawSNPRegion.getSNPInfo(db)
		
		snp_annotation_short_name2id = self.getSNPAnnotationShortName2id()
		self._constructSNPAnnotation(session, snp_info, snps_context_wrapper, gene_annotation, snp_annotation_short_name2id)
		if self.commit:
			session.flush()
			session.commit()
Example #3
0
	def run(self):
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		#session.begin()
		snps_context_wrapper = GeneListRankTest.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, \
																	self.get_closest)
		gene_annotation = DrawSNPRegion.dealWithGeneAnnotation(self.gene_annotation_picklef, tax_id=self.tax_id, \
															cls_with_db_args=self)
		snp_info = DrawSNPRegion.getSNPInfo(db)
		
		snp_annotation_short_name2id = self.getSNPAnnotationShortName2id()
		self._constructSNPAnnotation(session, snp_info, snps_context_wrapper, gene_annotation, snp_annotation_short_name2id)
		if self.commit:
			session.flush()
			session.commit()
Example #4
0
	def getTopNumberSNPs(self, call_method_id, phenotype_method_id, analysis_method_id=1, results_directory=None, min_MAC=7,\
						no_of_top_snps=10000):
		"""
		2009-3-18
			if data_obj from genome_wide_result is None, ignore it
		2009-2-16
			get a certain number of top SNPs from a result (according to call_method_id, phenotype_method_id, analysis_method_id)
		"""
		rm = Stock_250kDB.ResultsMethod.query.filter_by(call_method_id=call_method_id).\
			filter_by(phenotype_method_id=phenotype_method_id).filter_by(analysis_method_id=analysis_method_id).first()
		result_id = rm.id
		param_data = PassingData(min_MAC=min_MAC)
		genome_wide_result = GeneListRankTest.getResultMethodContent(rm, results_directory=results_directory, min_MAF=0., \
																	pdata=param_data)
		
		chr_pos_ls = []
		for i in range(no_of_top_snps):
			data_obj = genome_wide_result.get_data_obj_at_given_rank(i+1)
			if data_obj is not None:
				chr_pos_ls.append((data_obj.chromosome, data_obj.position))
		chr_pos_ls.sort()
		return chr_pos_ls
Example #5
0
	def run(self):
		"""
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		#session.begin()
		
		query = Stock_250kDB.ResultsMethod.query.filter_by(results_method_type_id=1)
		if self.call_method_id:
			query = query.filter_by(call_method_id=self.call_method_id)
		for rm in query.all():
			rows = Stock_250kDB.MAFVsScorePlot.query.filter_by(results_method_id=rm.id)
			if rows.count()>0:
				sys.stderr.write("Ignore. MAF vs score plot done for %s.\n"%rm.id)
				continue
			gwr = GeneListRankTest.getResultMethodContent(rm, results_directory=self.results_directory, min_MAF=0)
			if gwr is None:
				continue
			if self.output_dir:
				output_fname_prefix = os.path.join(self.output_dir, '%s_maf_vs_score'%rm.id)
			else:
				output_fname_prefix = None
			png_data, svg_data = self.plot_maf_vs_score(rm, gwr, output_fname_prefix, commit=self.commit)
			if png_data or svg_data:
				maf_vs_score_plot = Stock_250kDB.MAFVsScorePlot(results_method_id=rm.id)
				if png_data:
					maf_vs_score_plot.png_data = png_data.getvalue()
				if svg_data:
					maf_vs_score_plot.svg_data = svg_data.getvalue()
				session.save(maf_vs_score_plot)
				del png_data, svg_data
				if self.commit:
					session.flush()
		"""
Example #6
0
	def getTopNumberSNPs(self, call_method_id, phenotype_method_id, analysis_method_id=1, results_directory=None, min_MAC=7,\
						no_of_top_snps=10000):
		"""
		2009-3-18
			if data_obj from genome_wide_result is None, ignore it
		2009-2-16
			get a certain number of top SNPs from a result (according to call_method_id, phenotype_method_id, analysis_method_id)
		"""
		rm = Stock_250kDB.ResultsMethod.query.filter_by(call_method_id=call_method_id).\
			filter_by(phenotype_method_id=phenotype_method_id).filter_by(analysis_method_id=analysis_method_id).first()
		result_id = rm.id
		param_data = PassingData(min_MAC=min_MAC)
		genome_wide_result = GeneListRankTest.getResultMethodContent(rm, results_directory=results_directory, min_MAF=0., \
																	pdata=param_data)
		
		chr_pos_ls = []
		for i in range(no_of_top_snps):
			data_obj = genome_wide_result.get_data_obj_at_given_rank(i+1)
			if data_obj is not None:
				chr_pos_ls.append((data_obj.chromosome, data_obj.position))
		chr_pos_ls.sort()
		return chr_pos_ls
Example #7
0
	def on_button_draw_gene_list_bars_clicked(self, widget):
		"""
		2008-12-16
			draw vertical spans to denote the locations of genes from a candidate list
		"""
		if self.db is None:
			self.db_connect()
		if not self.chr_id2size:
			sys.stderr.write("No genome-wide pvalue plot has been drawn yet. Do it first!\n")
			return
		entry_gene_list_id = self.xml.get_widget("entry_gene_list_id")
		list_type_id = entry_gene_list_id.get_text()
		comboboxentry_bar_color = self.xml.get_widget("comboboxentry_bar_color")
		bar_color = comboboxentry_bar_color.get_active_text()
		if not bar_color:	#default is black
			bar_color = 'k'
		if list_type_id:
			list_type_id = int(list_type_id)
			self.candidate_gene_set = GeneListRankTest.dealWithCandidateGeneList(list_type_id, return_set=True)
			for gene_id in self.candidate_gene_set:
				gene_model = self.gene_annotation.gene_id2model[gene_id]
				if gene_id in self.gene_id2vspan_obj_id:
					artist_obj_id = self.gene_id2vspan_obj_id[gene_id]
					artist = self.artist_obj_id2artist_gene_id_ls[artist_obj_id][0]
					if artist.get_edgecolor()!=bar_color:
						artist.set_edgecolor(bar_color)
					if artist.get_facecolor()!=bar_color:
						artist.set_facecolor(bar_color)
					#artist.remove()
				else:
					this_chr_starting_pos_on_plot = self.chr_id2cumu_size[gene_model.chromosome]-\
							self.chr_id2size[gene_model.chromosome]-self.chr_gap
					xmin = this_chr_starting_pos_on_plot + gene_model.start
					xmax = this_chr_starting_pos_on_plot + gene_model.stop
					artist = self.ax.axvspan(xmin, xmax, edgecolor=bar_color, facecolor=bar_color, alpha=0.3, picker=6)
					artist_obj_id = id(artist)
					self.artist_obj_id2artist_gene_id_ls[artist_obj_id] = [artist, gene_id]
					self.gene_id2vspan_obj_id[gene_id] = artist_obj_id
			self.canvas_matplotlib.draw()
Example #8
0
	def get_phenotype_ls(self, rm, no_of_top_snps, chr_pos2ancestral_allele, pheno_data, geno_data, min_MAF, results_directory=None):
		"""
		2008-09-26
			consider the complement of the ancestral allele as ancestral as well.
		2008-09-19
			differentiate phenotype between ancestral/derived alleles
		"""
		sys.stderr.write("Getting phenotype_ls for ancestral/derived alleles ... ")
		genome_wide_result = GeneListRankTest.getResultMethodContent(rm, results_directory, min_MAF)
		genome_wide_result.data_obj_ls.sort()	#in value descending order. each SNP object has a defined method for comparison based on its value
		genome_wide_result.data_obj_ls.reverse()
		ancestral_allele_phenotype_ls = []
		derived_allele_phenotype_ls = []
		no_of_genotype_accessions = len(geno_data.row_id_ls)
		pheno_data_col_index = pheno_data.phenotype_method_id2col_index[rm.phenotype_method_id]
		no_of_polarized_snps = 0
		for i in range(no_of_top_snps):
			data_obj = genome_wide_result.data_obj_ls[i]
			chr_pos = '%s_%s'%(data_obj.chromosome, data_obj.position)
			geno_data_col_index = geno_data.col_id2col_index.get(chr_pos)
			if chr_pos in chr_pos2ancestral_allele and geno_data_col_index is not None:
				no_of_polarized_snps += 1
				for i in range(no_of_genotype_accessions):
					allele = geno_data.data_matrix[i][geno_data_col_index]
					pheno_row_index = pheno_data.row_id2row_index.get(geno_data.row_id_ls[i])	#find corresponding accession row index in phenotype matrix
					if pheno_row_index is not None and allele>=1 and allele<=4:	#no heterozygote or NA or deletion
						phenotype = pheno_data.data_matrix[pheno_row_index][pheno_data_col_index]
						if phenotype!='NA':
							phenotype = float(phenotype)
							if allele==chr_pos2ancestral_allele[chr_pos] or allele==number2complement[chr_pos2ancestral_allele[chr_pos]]:
								ancestral_allele_phenotype_ls.append(phenotype)
							else:
								derived_allele_phenotype_ls.append(phenotype)
		phenotype_ls_data = PassingData()
		phenotype_ls_data.ancestral_allele_phenotype_ls = ancestral_allele_phenotype_ls
		phenotype_ls_data.derived_allele_phenotype_ls = derived_allele_phenotype_ls
		sys.stderr.write("no_of_polarized_snps/no_of_top_snps=%s/%s=%.3f. Done.\n"%(no_of_polarized_snps, no_of_top_snps, no_of_polarized_snps/float(no_of_top_snps)))
		return phenotype_ls_data
Example #9
0
	def __init__(self,  **keywords):
		"""
		2008-08-20
		"""
		GeneListRankTest.__init__(self, **keywords)
	def __init__(self,  **keywords):
		GeneListRankTest.__init__(self, **keywords)
		self.list_type_id_ls = getListOutOfStr(self.list_type_id_ls, data_type=int)
		self.analysis_method_id_ls = getListOutOfStr(self.analysis_method_id_ls, data_type=int)
		self.phenotype_method_id_ls = getListOutOfStr(self.phenotype_method_id_ls, data_type=int)
Example #11
0
	def __init__(self,  **keywords):
		GeneListRankTest.__init__(self, **keywords)
		self.list_type_id_ls = getListOutOfStr(self.list_type_id_ls, data_type=int)
		self.analysis_method_id_ls = getListOutOfStr(self.analysis_method_id_ls, data_type=int)
		self.phenotype_method_id_ls = getListOutOfStr(self.phenotype_method_id_ls, data_type=int)
	def __init__(self,  **keywords):
		"""
		2008-08-20
		"""
		GeneListRankTest.__init__(self, **keywords)