コード例 #1
0
    def _get_pval_uncorr(self, study, log=sys.stdout):
        """Calculate the uncorrected pvalues for study items."""
        log.write("Calculating uncorrected p-values using {PFNC}\n".format(PFNC=self.pval_obj.name))
        results = []
        go2studyitems = get_terms("study", study, self.assoc, self.obo_dag, log)
        pop_n, study_n = self.pop_n, len(study)
        allterms = set(go2studyitems.keys()).union(
            set(self.go2popitems.keys()))
        calc_pvalue = self.pval_obj.calc_pvalue

        for term in allterms:
            study_items = go2studyitems.get(term, set())
            study_count = len(study_items)
            pop_items = self.go2popitems.get(term, set())
            pop_count = len(pop_items)

            one_record = GOEnrichmentRecord(
                GO=term,
                p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n),
                study_items=study_items,
                pop_items=pop_items,
                ratio_in_study=(study_count, study_n),
                ratio_in_pop=(pop_count, pop_n))

            results.append(one_record)

        return results
コード例 #2
0
    def get_pval_uncorr(self, study, log=sys.stdout):
        """Calculate the uncorrected pvalues for study items."""
        results = []
        go2studyitems = get_terms("study", study, self.assoc, self.obo_dag, log)
        pop_n, study_n = self.pop_n, len(study)
        allterms = set(go2studyitems).union(set(self.go2popitems))
        if log is not None:
            log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format(
                N=len(allterms), PFNC=self.pval_obj.name))
        calc_pvalue = self.pval_obj.calc_pvalue

        for term in allterms:
            study_items = go2studyitems.get(term, set())
            study_count = len(study_items)
            pop_items = self.go2popitems.get(term, set())
            pop_count = len(pop_items)

            one_record = GOEnrichmentRecord(
                GO=term,
                p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n),
                study_items=study_items,
                pop_items=pop_items,
                ratio_in_study=(study_count, study_n),
                ratio_in_pop=(pop_count, pop_n))

            results.append(one_record)

        return results
コード例 #3
0
ファイル: go_enrichment.py プロジェクト: yintz/goatools
    def __init__(self,
                 pop,
                 assoc,
                 obo_dag,
                 propagate_counts=True,
                 alpha=.05,
                 methods=None,
                 **kws):
        self.log = kws['log'] if 'log' in kws else sys.stdout
        self._run_multitest = {
            'local': lambda iargs: self._run_multitest_local(iargs),
            'statsmodels': lambda iargs: self._run_multitest_statsmodels(iargs)
        }
        self.pop = pop
        self.pop_n = len(pop)
        self.assoc = assoc
        self.obo_dag = obo_dag
        self.alpha = alpha
        if methods is None:
            methods = ["bonferroni", "sidak", "holm"]
        self.methods = Methods(methods)
        self.pval_obj = FisherFactory(**kws).pval_obj

        if propagate_counts:
            sys.stderr.write("Propagating term counts to parents ..\n")
            obo_dag.update_association(assoc)
        self.go2popitems = get_terms("population", pop, assoc, obo_dag,
                                     self.log)
コード例 #4
0
    def __init__(self,
                 pop,
                 assoc,
                 obo_dag,
                 propagate_counts=True,
                 alpha=.05,
                 methods=None,
                 **kws):
        self.name = kws.get('name', 'GOEA')
        print('\nLoad {OBJNAME} Gene Ontology Analysis ...'.format(
            OBJNAME=self.name))
        self.log = kws['log'] if 'log' in kws else sys.stdout
        self._run_multitest = {
            'local': self._run_multitest_local,
            'statsmodels': self._run_multitest_statsmodels
        }
        self.pop = set(pop)
        self.pop_n = len(pop)
        self.assoc = assoc
        self.obo_dag = obo_dag
        self.alpha = alpha
        if methods is None:
            methods = ["bonferroni", "sidak", "holm"]
        self.methods = Methods(methods)
        self.pval_obj = FisherFactory(**kws).pval_obj

        if propagate_counts:
            update_association(assoc, obo_dag, kws.get('relationships', None))
        ## BROAD broad_goids = get_goids_to_remove(kws.get('remove_goids'))
        ## BROAD if broad_goids:
        ## BROAD     assoc = self._remove_assc_goids(assoc, broad_goids)
        self.go2popitems = get_terms("population", pop, assoc, obo_dag,
                                     self.log)
コード例 #5
0
ファイル: go_enrichment.py プロジェクト: yangle293/goatools
    def get_pval_uncorr(self, study, log=sys.stdout):
        """Calculate the uncorrected pvalues for study items."""
        results = []
        study_in_pop = self.pop.intersection(study)
        # " 99%    378 of    382 study items found in population"
        go2studyitems = get_terms("study", study_in_pop, self.assoc, self.obo_dag, log)
        pop_n, study_n = self.pop_n, len(study_in_pop)
        allterms = set(go2studyitems).union(set(self.go2popitems))
        if log is not None:
            study_n_orig = len(study)
            log.write("{R:3.0f}% {N:>6,} of {M:>6,} study items found in population({P})\n".format(
                N=study_n, M=study_n_orig, P=pop_n, R=100.0*study_n/study_n_orig))
            log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format(
                N=len(allterms), PFNC=self.pval_obj.name))
        calc_pvalue = self.pval_obj.calc_pvalue

        for goid in allterms:
            study_items = go2studyitems.get(goid, set())
            study_count = len(study_items)
            pop_items = self.go2popitems.get(goid, set())
            pop_count = len(pop_items)

            one_record = GOEnrichmentRecord(
                GO=goid,
                p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n),
                study_items=study_items,
                pop_items=pop_items,
                ratio_in_study=(study_count, study_n),
                ratio_in_pop=(pop_count, pop_n))

            results.append(one_record)

        return results
コード例 #6
0
    def __init__(self, pop, assoc, obo_dag, propagate_counts=True, alpha=.05, methods=None, **kws):
        self.log = kws['log'] if 'log' in kws else sys.stdout
        self._run_multitest = {
            'local':lambda iargs: self._run_multitest_local(iargs),
            'statsmodels':lambda iargs: self._run_multitest_statsmodels(iargs)}
        self.pop = pop
        self.pop_n = len(pop)
        self.assoc = assoc
        self.obo_dag = obo_dag
        self.alpha = alpha
        if methods is None:
            methods = ["bonferroni", "sidak", "holm"]
        self.methods = Methods(methods)
        self.pval_obj = FisherFactory(**kws).pval_obj

        if propagate_counts:
            sys.stderr.write("Propagating term counts to parents ..\n")
            obo_dag.update_association(assoc)
        self.go2popitems = get_terms("population", pop, assoc, obo_dag, self.log)
コード例 #7
0
    def get_pval_uncorr(self, study, log=sys.stdout):
        """Calculate the uncorrected pvalues for study items."""
        results = []
        study_in_pop = self.pop.intersection(study)
        # " 99%    378 of    382 study items found in population"
        go2studyitems = get_terms("study", study_in_pop, self.assoc,
                                  self.obo_dag, log)
        pop_n, study_n = self.pop_n, len(study_in_pop)
        allterms = set(go2studyitems).union(set(self.go2popitems))
        if log is not None:
            # Some study genes may not have been found in the population. Report from orig
            study_n_orig = len(study)
            perc = 100.0 * study_n / study_n_orig if study_n_orig != 0 else 0.0
            log.write(
                "{R:3.0f}% {N:>6,} of {M:>6,} study items found in population({P})\n"
                .format(N=study_n, M=study_n_orig, P=pop_n, R=perc))
            if study_n:
                log.write(
                    "Calculating {N:,} uncorrected p-values using {PFNC}\n".
                    format(N=len(allterms), PFNC=self.pval_obj.name))
        # If no study genes were found in the population, return empty GOEA results
        if not study_n:
            return []
        calc_pvalue = self.pval_obj.calc_pvalue

        for goid in allterms:
            study_items = go2studyitems.get(goid, set())
            study_count = len(study_items)
            pop_items = self.go2popitems.get(goid, set())
            pop_count = len(pop_items)

            one_record = GOEnrichmentRecord(
                goid,
                p_uncorrected=calc_pvalue(study_count, study_n, pop_count,
                                          pop_n),
                study_items=study_items,
                pop_items=pop_items,
                ratio_in_study=(study_count, study_n),
                ratio_in_pop=(pop_count, pop_n))

            results.append(one_record)

        return results
コード例 #8
0
ファイル: go_enrichment.py プロジェクト: tanghaibao/goatools
    def get_pval_uncorr(self, study, log=sys.stdout):
        """Calculate the uncorrected pvalues for study items."""
        results = []
        study_in_pop = self.pop.intersection(study)
        # " 99%    378 of    382 study items found in population"
        go2studyitems = get_terms("study", study_in_pop, self.assoc, self.obo_dag, log)
        pop_n, study_n = self.pop_n, len(study_in_pop)
        allterms = set(go2studyitems).union(set(self.go2popitems))
        if log is not None:
            # Some study genes may not have been found in the population. Report from orig
            study_n_orig = len(study)
            perc = 100.0*study_n/study_n_orig if study_n_orig != 0 else 0.0
            log.write("{R:3.0f}% {N:>6,} of {M:>6,} study items found in population({P})\n".format(
                N=study_n, M=study_n_orig, P=pop_n, R=perc))
            if study_n:
                log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format(
                    N=len(allterms), PFNC=self.pval_obj.name))
        # If no study genes were found in the population, return empty GOEA results
        if not study_n:
            return []
        calc_pvalue = self.pval_obj.calc_pvalue

        for goid in allterms:
            study_items = go2studyitems.get(goid, set())
            study_count = len(study_items)
            pop_items = self.go2popitems.get(goid, set())
            pop_count = len(pop_items)

            one_record = GOEnrichmentRecord(
                goid,
                p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n),
                study_items=study_items,
                pop_items=pop_items,
                ratio_in_study=(study_count, study_n),
                ratio_in_pop=(pop_count, pop_n))

            results.append(one_record)

        return results
コード例 #9
0
ファイル: go_enrichment.py プロジェクト: uweschmitt/goatools
    def _get_pval_uncorr(self, study, log=sys.stdout):
        """Calculate the uncorrected pvalues for study items."""
        if log is not None:
            log.write("Calculating uncorrected p-values using {PFNC}\n".format(
                PFNC=self.pval_obj.name))
        go2studyitems = get_terms("study", study, self.assoc, self.obo_dag,
                                  log)
        pop_n, study_n = self.pop_n, len(study)
        allterms = set(go2studyitems.keys()).union(set(
            self.go2popitems.keys()))

        # if self.pval_obj.log is a file handle, which we can not serialize, so we could
        # not transfer self.pval_obj.calc_pvalue to another python process with multiprocessing.
        # there fore we "path" the object which will later be restored again.
        old = self.pval_obj.log
        self.pval_obj.log = None
        calc_pvalue = self.pval_obj.calc_pvalue

        # -1 avoids freezing of the machine:
        n_procs = multiprocessing.cpu_count() - 1

        p = multiprocessing.Pool(n_procs)
        n = len(allterms)

        allterms = list(allterms)
        fragments = [allterms[i::n_procs] for i in range(n_procs)]

        remote_func = partial(compute_pvals,
                              calc_pvalue=calc_pvalue,
                              go2studyitems=go2studyitems,
                              go2popitems=self.go2popitems,
                              study_n=study_n,
                              pop_n=pop_n)

        all_p_values = p.map(remote_func, fragments)

        # restore patched file handle
        self.pval_obj.log = old

        results = []

        for p_values in all_p_values:

            for term, p_value in p_values.items():

                study_items = go2studyitems.get(term, set())
                study_count = len(study_items)
                pop_items = self.go2popitems.get(term, set())
                pop_count = len(pop_items)

                one_record = GOEnrichmentRecord(GO=term,
                                                p_uncorrected=p_value,
                                                study_items=study_items,
                                                pop_items=pop_items,
                                                ratio_in_study=(study_count,
                                                                study_n),
                                                ratio_in_pop=(pop_count,
                                                              pop_n))

                results.append(one_record)

        return results