Ejemplo n.º 1
0
def calc_qval(study_n, pop_n, pop, assoc, term_pop, obo_dag, T=500):
    """Generate p-value distribution for FDR based on resampling."""
    import fisher
    from goatools.ratio import count_terms
    print(("Generate p-value distribution for FDR "
           "based on resampling (this might take a while)"),
          file=sys.stderr)
    distribution = []
    for i in range(T):
        new_study = random.sample(pop, study_n)
        new_term_study = count_terms(new_study, assoc, obo_dag)

        smallest_p = 1
        for term, study_count in list(new_term_study.items()):
            pop_count = term_pop[term]
            p = fisher.pvalue_population(study_count, study_n, pop_count,
                                         pop_n)
            if p.two_tail < smallest_p:
                smallest_p = p.two_tail

        distribution.append(smallest_p)
        if i % 10 == 0:
            print("Sample {0} / {1}: p-value {2}".\
                        format(i, T, smallest_p), file=sys.stderr)
    return distribution
Ejemplo n.º 2
0
def calc_qval(study_n, pop_n,
              pop, assoc, term_pop, obo_dag, T=500):
    """Generate p-value distribution for FDR based on resampling."""
    import fisher
    from goatools.ratio import count_terms
    print(("Generate p-value distribution for FDR "
           "based on resampling (this might take a while)"), file=sys.stderr)
    distribution = []
    for i in range(T):
        new_study = random.sample(pop, study_n)
        new_term_study = count_terms(new_study, assoc, obo_dag)

        smallest_p = 1
        for term, study_count in list(new_term_study.items()):
            pop_count = term_pop[term]
            p = fisher.pvalue_population(study_count,
                                         study_n,
                                         pop_count,
                                         pop_n)
            if p.two_tail < smallest_p:
                smallest_p = p.two_tail

        distribution.append(smallest_p)
        if i % 10 == 0:
            print("Sample {0} / {1}: p-value {2}".\
                        format(i, T, smallest_p), file=sys.stderr)
    return distribution
Ejemplo n.º 3
0
def calc_qval(study_n, pop_n,
              pop, assoc, term_pop, obo_dag, T=500):
    """Generate p-value distribution for FDR based on resampling."""
    from goatools.pvalcalc import FisherFactory
    from goatools.ratio import count_terms
    sys.stderr.write("Generate p-value distribution for FDR "
                     "based on resampling (this might take a while)\n")
    distribution = []
    calc_pvalue = FisherFactory().pval_obj.calc_pvalue
    for i in range(T):
        new_study = random.sample(pop, study_n)
        new_term_study = count_terms(new_study, assoc, obo_dag)

        smallest_p = 1
        for term, study_count in list(new_term_study.items()):
            pop_count = term_pop[term]
            p_uncorrected = calc_pvalue(study_count,
                                        study_n,
                                        pop_count,
                                        pop_n)
            if p_uncorrected < smallest_p:
                smallest_p = p_uncorrected

        distribution.append(smallest_p)
        if i % 10 == 0:
            sys.stderr.write("Sample {0} / {1}: "
                             "p-value {2}\n".format(i, T, smallest_p))
    return distribution
Ejemplo n.º 4
0
    def _run_multitest_local(self, ntmt):
        """Use multitest mthods that have been implemented locally."""
        corrected_pvals = None
        method = ntmt.nt_method.method
        if method == "bonferroni":
            corrected_pvals = Bonferroni(
                ntmt.pvals, ntmt.alpha).corrected_pvals
        elif method == "sidak":
            corrected_pvals = Sidak(ntmt.pvals, ntmt.alpha).corrected_pvals
        elif method == "holm":
            corrected_pvals = HolmBonferroni(
                ntmt.pvals, ntmt.alpha).corrected_pvals
        elif method == "fdr":
            # get the empirical p-value distributions for FDR
            term_pop = getattr(self, 'term_pop', None)
            if term_pop is None:
                term_pop = count_terms(self.pop, self.assoc, self.obo_dag)
            p_val_distribution = calc_qval(len(ntmt.study),
                                           self.pop_n,
                                           self.pop, self.assoc,
                                           term_pop, self.obo_dag)
            corrected_pvals = FDR(p_val_distribution,
                                  ntmt.results, ntmt.alpha).corrected_pvals

        self._update_pvalcorr(ntmt, corrected_pvals)
Ejemplo n.º 5
0
    def _run_multitest_local(self, ntmt):
        """Use multitest mthods that have been implemented locally."""
        corrected_pvals = None
        method = ntmt.nt_method.method
        if method == "bonferroni":
            corrected_pvals = Bonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals
        elif method == "sidak":
            corrected_pvals = Sidak(ntmt.pvals, ntmt.alpha).corrected_pvals
        elif method == "holm":
            corrected_pvals = HolmBonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals
        elif method == "fdr":
            # get the empirical p-value distributions for FDR
            term_pop = getattr(self, 'term_pop', None)
            if term_pop is None:
                term_pop = count_terms(self.pop, self.assoc, self.obo_dag)
            p_val_distribution = calc_qval(len(ntmt.study),
                                           self.pop_n,
                                           self.pop, self.assoc,
                                           term_pop, self.obo_dag)
            corrected_pvals = FDR(p_val_distribution,
                                  ntmt.results, ntmt.alpha).corrected_pvals

        self._update_pvalcorr(ntmt, corrected_pvals)