def _get_pval_uncorr(self, study, log=sys.stdout): """Calculate the uncorrected pvalues for study items.""" log.write("Calculating uncorrected p-values using {PFNC}\n".format(PFNC=self.pval_obj.name)) results = [] go2studyitems = get_terms("study", study, self.assoc, self.obo_dag, log) pop_n, study_n = self.pop_n, len(study) allterms = set(go2studyitems.keys()).union( set(self.go2popitems.keys())) calc_pvalue = self.pval_obj.calc_pvalue for term in allterms: study_items = go2studyitems.get(term, set()) study_count = len(study_items) pop_items = self.go2popitems.get(term, set()) pop_count = len(pop_items) one_record = GOEnrichmentRecord( GO=term, p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n), study_items=study_items, pop_items=pop_items, ratio_in_study=(study_count, study_n), ratio_in_pop=(pop_count, pop_n)) results.append(one_record) return results
def get_pval_uncorr(self, study, log=sys.stdout): """Calculate the uncorrected pvalues for study items.""" results = [] go2studyitems = get_terms("study", study, self.assoc, self.obo_dag, log) pop_n, study_n = self.pop_n, len(study) allterms = set(go2studyitems).union(set(self.go2popitems)) if log is not None: log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format( N=len(allterms), PFNC=self.pval_obj.name)) calc_pvalue = self.pval_obj.calc_pvalue for term in allterms: study_items = go2studyitems.get(term, set()) study_count = len(study_items) pop_items = self.go2popitems.get(term, set()) pop_count = len(pop_items) one_record = GOEnrichmentRecord( GO=term, p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n), study_items=study_items, pop_items=pop_items, ratio_in_study=(study_count, study_n), ratio_in_pop=(pop_count, pop_n)) results.append(one_record) return results
def __init__(self, pop, assoc, obo_dag, propagate_counts=True, alpha=.05, methods=None, **kws): self.log = kws['log'] if 'log' in kws else sys.stdout self._run_multitest = { 'local': lambda iargs: self._run_multitest_local(iargs), 'statsmodels': lambda iargs: self._run_multitest_statsmodels(iargs) } self.pop = pop self.pop_n = len(pop) self.assoc = assoc self.obo_dag = obo_dag self.alpha = alpha if methods is None: methods = ["bonferroni", "sidak", "holm"] self.methods = Methods(methods) self.pval_obj = FisherFactory(**kws).pval_obj if propagate_counts: sys.stderr.write("Propagating term counts to parents ..\n") obo_dag.update_association(assoc) self.go2popitems = get_terms("population", pop, assoc, obo_dag, self.log)
def __init__(self, pop, assoc, obo_dag, propagate_counts=True, alpha=.05, methods=None, **kws): self.name = kws.get('name', 'GOEA') print('\nLoad {OBJNAME} Gene Ontology Analysis ...'.format( OBJNAME=self.name)) self.log = kws['log'] if 'log' in kws else sys.stdout self._run_multitest = { 'local': self._run_multitest_local, 'statsmodels': self._run_multitest_statsmodels } self.pop = set(pop) self.pop_n = len(pop) self.assoc = assoc self.obo_dag = obo_dag self.alpha = alpha if methods is None: methods = ["bonferroni", "sidak", "holm"] self.methods = Methods(methods) self.pval_obj = FisherFactory(**kws).pval_obj if propagate_counts: update_association(assoc, obo_dag, kws.get('relationships', None)) ## BROAD broad_goids = get_goids_to_remove(kws.get('remove_goids')) ## BROAD if broad_goids: ## BROAD assoc = self._remove_assc_goids(assoc, broad_goids) self.go2popitems = get_terms("population", pop, assoc, obo_dag, self.log)
def get_pval_uncorr(self, study, log=sys.stdout): """Calculate the uncorrected pvalues for study items.""" results = [] study_in_pop = self.pop.intersection(study) # " 99% 378 of 382 study items found in population" go2studyitems = get_terms("study", study_in_pop, self.assoc, self.obo_dag, log) pop_n, study_n = self.pop_n, len(study_in_pop) allterms = set(go2studyitems).union(set(self.go2popitems)) if log is not None: study_n_orig = len(study) log.write("{R:3.0f}% {N:>6,} of {M:>6,} study items found in population({P})\n".format( N=study_n, M=study_n_orig, P=pop_n, R=100.0*study_n/study_n_orig)) log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format( N=len(allterms), PFNC=self.pval_obj.name)) calc_pvalue = self.pval_obj.calc_pvalue for goid in allterms: study_items = go2studyitems.get(goid, set()) study_count = len(study_items) pop_items = self.go2popitems.get(goid, set()) pop_count = len(pop_items) one_record = GOEnrichmentRecord( GO=goid, p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n), study_items=study_items, pop_items=pop_items, ratio_in_study=(study_count, study_n), ratio_in_pop=(pop_count, pop_n)) results.append(one_record) return results
def __init__(self, pop, assoc, obo_dag, propagate_counts=True, alpha=.05, methods=None, **kws): self.log = kws['log'] if 'log' in kws else sys.stdout self._run_multitest = { 'local':lambda iargs: self._run_multitest_local(iargs), 'statsmodels':lambda iargs: self._run_multitest_statsmodels(iargs)} self.pop = pop self.pop_n = len(pop) self.assoc = assoc self.obo_dag = obo_dag self.alpha = alpha if methods is None: methods = ["bonferroni", "sidak", "holm"] self.methods = Methods(methods) self.pval_obj = FisherFactory(**kws).pval_obj if propagate_counts: sys.stderr.write("Propagating term counts to parents ..\n") obo_dag.update_association(assoc) self.go2popitems = get_terms("population", pop, assoc, obo_dag, self.log)
def get_pval_uncorr(self, study, log=sys.stdout): """Calculate the uncorrected pvalues for study items.""" results = [] study_in_pop = self.pop.intersection(study) # " 99% 378 of 382 study items found in population" go2studyitems = get_terms("study", study_in_pop, self.assoc, self.obo_dag, log) pop_n, study_n = self.pop_n, len(study_in_pop) allterms = set(go2studyitems).union(set(self.go2popitems)) if log is not None: # Some study genes may not have been found in the population. Report from orig study_n_orig = len(study) perc = 100.0 * study_n / study_n_orig if study_n_orig != 0 else 0.0 log.write( "{R:3.0f}% {N:>6,} of {M:>6,} study items found in population({P})\n" .format(N=study_n, M=study_n_orig, P=pop_n, R=perc)) if study_n: log.write( "Calculating {N:,} uncorrected p-values using {PFNC}\n". format(N=len(allterms), PFNC=self.pval_obj.name)) # If no study genes were found in the population, return empty GOEA results if not study_n: return [] calc_pvalue = self.pval_obj.calc_pvalue for goid in allterms: study_items = go2studyitems.get(goid, set()) study_count = len(study_items) pop_items = self.go2popitems.get(goid, set()) pop_count = len(pop_items) one_record = GOEnrichmentRecord( goid, p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n), study_items=study_items, pop_items=pop_items, ratio_in_study=(study_count, study_n), ratio_in_pop=(pop_count, pop_n)) results.append(one_record) return results
def get_pval_uncorr(self, study, log=sys.stdout): """Calculate the uncorrected pvalues for study items.""" results = [] study_in_pop = self.pop.intersection(study) # " 99% 378 of 382 study items found in population" go2studyitems = get_terms("study", study_in_pop, self.assoc, self.obo_dag, log) pop_n, study_n = self.pop_n, len(study_in_pop) allterms = set(go2studyitems).union(set(self.go2popitems)) if log is not None: # Some study genes may not have been found in the population. Report from orig study_n_orig = len(study) perc = 100.0*study_n/study_n_orig if study_n_orig != 0 else 0.0 log.write("{R:3.0f}% {N:>6,} of {M:>6,} study items found in population({P})\n".format( N=study_n, M=study_n_orig, P=pop_n, R=perc)) if study_n: log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format( N=len(allterms), PFNC=self.pval_obj.name)) # If no study genes were found in the population, return empty GOEA results if not study_n: return [] calc_pvalue = self.pval_obj.calc_pvalue for goid in allterms: study_items = go2studyitems.get(goid, set()) study_count = len(study_items) pop_items = self.go2popitems.get(goid, set()) pop_count = len(pop_items) one_record = GOEnrichmentRecord( goid, p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n), study_items=study_items, pop_items=pop_items, ratio_in_study=(study_count, study_n), ratio_in_pop=(pop_count, pop_n)) results.append(one_record) return results
def _get_pval_uncorr(self, study, log=sys.stdout): """Calculate the uncorrected pvalues for study items.""" if log is not None: log.write("Calculating uncorrected p-values using {PFNC}\n".format( PFNC=self.pval_obj.name)) go2studyitems = get_terms("study", study, self.assoc, self.obo_dag, log) pop_n, study_n = self.pop_n, len(study) allterms = set(go2studyitems.keys()).union(set( self.go2popitems.keys())) # if self.pval_obj.log is a file handle, which we can not serialize, so we could # not transfer self.pval_obj.calc_pvalue to another python process with multiprocessing. # there fore we "path" the object which will later be restored again. old = self.pval_obj.log self.pval_obj.log = None calc_pvalue = self.pval_obj.calc_pvalue # -1 avoids freezing of the machine: n_procs = multiprocessing.cpu_count() - 1 p = multiprocessing.Pool(n_procs) n = len(allterms) allterms = list(allterms) fragments = [allterms[i::n_procs] for i in range(n_procs)] remote_func = partial(compute_pvals, calc_pvalue=calc_pvalue, go2studyitems=go2studyitems, go2popitems=self.go2popitems, study_n=study_n, pop_n=pop_n) all_p_values = p.map(remote_func, fragments) # restore patched file handle self.pval_obj.log = old results = [] for p_values in all_p_values: for term, p_value in p_values.items(): study_items = go2studyitems.get(term, set()) study_count = len(study_items) pop_items = self.go2popitems.get(term, set()) pop_count = len(pop_items) one_record = GOEnrichmentRecord(GO=term, p_uncorrected=p_value, study_items=study_items, pop_items=pop_items, ratio_in_study=(study_count, study_n), ratio_in_pop=(pop_count, pop_n)) results.append(one_record) return results