def calc_qval(study_n, pop_n, pop, assoc, term_pop, obo_dag, T=500): """Generate p-value distribution for FDR based on resampling.""" import fisher from goatools.ratio import count_terms print(("Generate p-value distribution for FDR " "based on resampling (this might take a while)"), file=sys.stderr) distribution = [] for i in range(T): new_study = random.sample(pop, study_n) new_term_study = count_terms(new_study, assoc, obo_dag) smallest_p = 1 for term, study_count in list(new_term_study.items()): pop_count = term_pop[term] p = fisher.pvalue_population(study_count, study_n, pop_count, pop_n) if p.two_tail < smallest_p: smallest_p = p.two_tail distribution.append(smallest_p) if i % 10 == 0: print("Sample {0} / {1}: p-value {2}".\ format(i, T, smallest_p), file=sys.stderr) return distribution
def calc_qval(study_n, pop_n, pop, assoc, term_pop, obo_dag, T=500): """Generate p-value distribution for FDR based on resampling.""" from goatools.pvalcalc import FisherFactory from goatools.ratio import count_terms sys.stderr.write("Generate p-value distribution for FDR " "based on resampling (this might take a while)\n") distribution = [] calc_pvalue = FisherFactory().pval_obj.calc_pvalue for i in range(T): new_study = random.sample(pop, study_n) new_term_study = count_terms(new_study, assoc, obo_dag) smallest_p = 1 for term, study_count in list(new_term_study.items()): pop_count = term_pop[term] p_uncorrected = calc_pvalue(study_count, study_n, pop_count, pop_n) if p_uncorrected < smallest_p: smallest_p = p_uncorrected distribution.append(smallest_p) if i % 10 == 0: sys.stderr.write("Sample {0} / {1}: " "p-value {2}\n".format(i, T, smallest_p)) return distribution
def _run_multitest_local(self, ntmt): """Use multitest mthods that have been implemented locally.""" corrected_pvals = None method = ntmt.nt_method.method if method == "bonferroni": corrected_pvals = Bonferroni( ntmt.pvals, ntmt.alpha).corrected_pvals elif method == "sidak": corrected_pvals = Sidak(ntmt.pvals, ntmt.alpha).corrected_pvals elif method == "holm": corrected_pvals = HolmBonferroni( ntmt.pvals, ntmt.alpha).corrected_pvals elif method == "fdr": # get the empirical p-value distributions for FDR term_pop = getattr(self, 'term_pop', None) if term_pop is None: term_pop = count_terms(self.pop, self.assoc, self.obo_dag) p_val_distribution = calc_qval(len(ntmt.study), self.pop_n, self.pop, self.assoc, term_pop, self.obo_dag) corrected_pvals = FDR(p_val_distribution, ntmt.results, ntmt.alpha).corrected_pvals self._update_pvalcorr(ntmt, corrected_pvals)
def _run_multitest_local(self, ntmt): """Use multitest mthods that have been implemented locally.""" corrected_pvals = None method = ntmt.nt_method.method if method == "bonferroni": corrected_pvals = Bonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals elif method == "sidak": corrected_pvals = Sidak(ntmt.pvals, ntmt.alpha).corrected_pvals elif method == "holm": corrected_pvals = HolmBonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals elif method == "fdr": # get the empirical p-value distributions for FDR term_pop = getattr(self, 'term_pop', None) if term_pop is None: term_pop = count_terms(self.pop, self.assoc, self.obo_dag) p_val_distribution = calc_qval(len(ntmt.study), self.pop_n, self.pop, self.assoc, term_pop, self.obo_dag) corrected_pvals = FDR(p_val_distribution, ntmt.results, ntmt.alpha).corrected_pvals self._update_pvalcorr(ntmt, corrected_pvals)