def rm2result(cls, session, rm, snp_info, max_rank=1000, commit=False, min_rank=1, results_directory=None): """ 2009-11-2 split out of run() """ # 2009-5-1 check whether it's already in db. db_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id) if db_entries.count()==max_rank-min_rank+1: sys.stderr.write("%s already in db. Ignore.\n"%rm.id) return param_data = PassingData(min_MAC=0) genome_wide_result = GeneListRankTest.getResultMethodContent(rm, results_directory=results_directory, min_MAF=0., \ pdata=param_data) if genome_wide_result: for rank in range(min_rank, max_rank+1): data_obj = genome_wide_result.get_data_obj_at_given_rank(rank) if data_obj is not None: snps_id = snp_info.getSnpsIDGivenChrPos(data_obj.chromosome, data_obj.position) if data_obj.extra_col_ls: result_obj = cPickle.dumps(data_obj.extra_col_ls) else: result_obj = None Stock_250kDB.Results(snps_id=snps_id, results_id=rm.id, score=data_obj.value, rank=rank, beta=getattr(data_obj, 'beta1', None),\ maf=data_obj.maf, mac=data_obj.mac, genotype_var_perc=data_obj.genotype_var_perc,\ object=result_obj) if commit: session.flush() #session.commit() else: session.rollback()
def run(self): if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session #session.begin() snps_context_wrapper = GeneListRankTest.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest) gene_annotation = DrawSNPRegion.dealWithGeneAnnotation(self.gene_annotation_picklef) snp_info = DrawSNPRegion.getSNPInfo(db) snp_annotation_short_name2id = self.getSNPAnnotationShortName2id() self._constructSNPAnnotation(session, snp_info, snps_context_wrapper, gene_annotation, snp_annotation_short_name2id) if self.commit: session.flush() session.commit()
def run(self): if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session #session.begin() snps_context_wrapper = GeneListRankTest.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, \ self.get_closest) gene_annotation = DrawSNPRegion.dealWithGeneAnnotation(self.gene_annotation_picklef, tax_id=self.tax_id, \ cls_with_db_args=self) snp_info = DrawSNPRegion.getSNPInfo(db) snp_annotation_short_name2id = self.getSNPAnnotationShortName2id() self._constructSNPAnnotation(session, snp_info, snps_context_wrapper, gene_annotation, snp_annotation_short_name2id) if self.commit: session.flush() session.commit()
def getTopNumberSNPs(self, call_method_id, phenotype_method_id, analysis_method_id=1, results_directory=None, min_MAC=7,\ no_of_top_snps=10000): """ 2009-3-18 if data_obj from genome_wide_result is None, ignore it 2009-2-16 get a certain number of top SNPs from a result (according to call_method_id, phenotype_method_id, analysis_method_id) """ rm = Stock_250kDB.ResultsMethod.query.filter_by(call_method_id=call_method_id).\ filter_by(phenotype_method_id=phenotype_method_id).filter_by(analysis_method_id=analysis_method_id).first() result_id = rm.id param_data = PassingData(min_MAC=min_MAC) genome_wide_result = GeneListRankTest.getResultMethodContent(rm, results_directory=results_directory, min_MAF=0., \ pdata=param_data) chr_pos_ls = [] for i in range(no_of_top_snps): data_obj = genome_wide_result.get_data_obj_at_given_rank(i+1) if data_obj is not None: chr_pos_ls.append((data_obj.chromosome, data_obj.position)) chr_pos_ls.sort() return chr_pos_ls
def run(self): """ """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session #session.begin() query = Stock_250kDB.ResultsMethod.query.filter_by(results_method_type_id=1) if self.call_method_id: query = query.filter_by(call_method_id=self.call_method_id) for rm in query.all(): rows = Stock_250kDB.MAFVsScorePlot.query.filter_by(results_method_id=rm.id) if rows.count()>0: sys.stderr.write("Ignore. MAF vs score plot done for %s.\n"%rm.id) continue gwr = GeneListRankTest.getResultMethodContent(rm, results_directory=self.results_directory, min_MAF=0) if gwr is None: continue if self.output_dir: output_fname_prefix = os.path.join(self.output_dir, '%s_maf_vs_score'%rm.id) else: output_fname_prefix = None png_data, svg_data = self.plot_maf_vs_score(rm, gwr, output_fname_prefix, commit=self.commit) if png_data or svg_data: maf_vs_score_plot = Stock_250kDB.MAFVsScorePlot(results_method_id=rm.id) if png_data: maf_vs_score_plot.png_data = png_data.getvalue() if svg_data: maf_vs_score_plot.svg_data = svg_data.getvalue() session.save(maf_vs_score_plot) del png_data, svg_data if self.commit: session.flush() """
def on_button_draw_gene_list_bars_clicked(self, widget): """ 2008-12-16 draw vertical spans to denote the locations of genes from a candidate list """ if self.db is None: self.db_connect() if not self.chr_id2size: sys.stderr.write("No genome-wide pvalue plot has been drawn yet. Do it first!\n") return entry_gene_list_id = self.xml.get_widget("entry_gene_list_id") list_type_id = entry_gene_list_id.get_text() comboboxentry_bar_color = self.xml.get_widget("comboboxentry_bar_color") bar_color = comboboxentry_bar_color.get_active_text() if not bar_color: #default is black bar_color = 'k' if list_type_id: list_type_id = int(list_type_id) self.candidate_gene_set = GeneListRankTest.dealWithCandidateGeneList(list_type_id, return_set=True) for gene_id in self.candidate_gene_set: gene_model = self.gene_annotation.gene_id2model[gene_id] if gene_id in self.gene_id2vspan_obj_id: artist_obj_id = self.gene_id2vspan_obj_id[gene_id] artist = self.artist_obj_id2artist_gene_id_ls[artist_obj_id][0] if artist.get_edgecolor()!=bar_color: artist.set_edgecolor(bar_color) if artist.get_facecolor()!=bar_color: artist.set_facecolor(bar_color) #artist.remove() else: this_chr_starting_pos_on_plot = self.chr_id2cumu_size[gene_model.chromosome]-\ self.chr_id2size[gene_model.chromosome]-self.chr_gap xmin = this_chr_starting_pos_on_plot + gene_model.start xmax = this_chr_starting_pos_on_plot + gene_model.stop artist = self.ax.axvspan(xmin, xmax, edgecolor=bar_color, facecolor=bar_color, alpha=0.3, picker=6) artist_obj_id = id(artist) self.artist_obj_id2artist_gene_id_ls[artist_obj_id] = [artist, gene_id] self.gene_id2vspan_obj_id[gene_id] = artist_obj_id self.canvas_matplotlib.draw()
def get_phenotype_ls(self, rm, no_of_top_snps, chr_pos2ancestral_allele, pheno_data, geno_data, min_MAF, results_directory=None): """ 2008-09-26 consider the complement of the ancestral allele as ancestral as well. 2008-09-19 differentiate phenotype between ancestral/derived alleles """ sys.stderr.write("Getting phenotype_ls for ancestral/derived alleles ... ") genome_wide_result = GeneListRankTest.getResultMethodContent(rm, results_directory, min_MAF) genome_wide_result.data_obj_ls.sort() #in value descending order. each SNP object has a defined method for comparison based on its value genome_wide_result.data_obj_ls.reverse() ancestral_allele_phenotype_ls = [] derived_allele_phenotype_ls = [] no_of_genotype_accessions = len(geno_data.row_id_ls) pheno_data_col_index = pheno_data.phenotype_method_id2col_index[rm.phenotype_method_id] no_of_polarized_snps = 0 for i in range(no_of_top_snps): data_obj = genome_wide_result.data_obj_ls[i] chr_pos = '%s_%s'%(data_obj.chromosome, data_obj.position) geno_data_col_index = geno_data.col_id2col_index.get(chr_pos) if chr_pos in chr_pos2ancestral_allele and geno_data_col_index is not None: no_of_polarized_snps += 1 for i in range(no_of_genotype_accessions): allele = geno_data.data_matrix[i][geno_data_col_index] pheno_row_index = pheno_data.row_id2row_index.get(geno_data.row_id_ls[i]) #find corresponding accession row index in phenotype matrix if pheno_row_index is not None and allele>=1 and allele<=4: #no heterozygote or NA or deletion phenotype = pheno_data.data_matrix[pheno_row_index][pheno_data_col_index] if phenotype!='NA': phenotype = float(phenotype) if allele==chr_pos2ancestral_allele[chr_pos] or allele==number2complement[chr_pos2ancestral_allele[chr_pos]]: ancestral_allele_phenotype_ls.append(phenotype) else: derived_allele_phenotype_ls.append(phenotype) phenotype_ls_data = PassingData() phenotype_ls_data.ancestral_allele_phenotype_ls = ancestral_allele_phenotype_ls phenotype_ls_data.derived_allele_phenotype_ls = derived_allele_phenotype_ls sys.stderr.write("no_of_polarized_snps/no_of_top_snps=%s/%s=%.3f. Done.\n"%(no_of_polarized_snps, no_of_top_snps, no_of_polarized_snps/float(no_of_top_snps))) return phenotype_ls_data
def __init__(self, **keywords): """ 2008-08-20 """ GeneListRankTest.__init__(self, **keywords)
def __init__(self, **keywords): GeneListRankTest.__init__(self, **keywords) self.list_type_id_ls = getListOutOfStr(self.list_type_id_ls, data_type=int) self.analysis_method_id_ls = getListOutOfStr(self.analysis_method_id_ls, data_type=int) self.phenotype_method_id_ls = getListOutOfStr(self.phenotype_method_id_ls, data_type=int)