def write(self, fout_xlsx=None, fout_txt=None, verbose=False): """Command-line interface for go_draw script.""" sortby = self._get_fncsortnt(self.objgrpd.grprobj.gosubdag.prt_attr['flds']) kws_sort = {'sortby' if verbose else 'section_sortby': sortby} sortobj = Sorter(self.objgrpd.grprobj, **kws_sort) # KWS: hdrgo_prt=True section_prt=None top_n=None use_sections=True # RET: {sortobj, sections, hdrgo_prt} or {sortobj flat hdrgo_prt} desc2nts = sortobj.get_desc2nts_fnc( hdrgo_prt=verbose, section_prt=True, top_n=None, use_sections=True) # print('FFFF', desc2nts['flds']) # Write user GO IDs in sections objgowr = WrXlsxSortedGos("init", sortobj, self.objgrpd.ver_list) if fout_xlsx is not None: kws_xlsx = {'shade_hdrgos':verbose} if not verbose: kws_xlsx['prt_flds'] = [f for f in desc2nts['flds'] if f not in self.excl_flds] objgowr.wr_xlsx_nts(fout_xlsx, desc2nts, **kws_xlsx) fout_desc = '{BASE}_desc.txt'.format(BASE=os.path.splitext(fout_xlsx)[0]) self._wr_ver_n_key(fout_desc, verbose) if fout_txt is not None: self._wr_txt_nts(fout_txt, desc2nts, objgowr, verbose) if fout_xlsx is None and fout_txt is None: self._prt_ver_n_key(sys.stdout, verbose) prtfmt = self._get_prtfmt(objgowr, verbose) summary_dct = objgowr.prt_txt_desc2nts(sys.stdout, desc2nts, prtfmt) self._prt_ver_n_key(sys.stdout, verbose) if summary_dct: print("\n{N} GO IDs in {S} sections".format( N=desc2nts['num_items'], S=desc2nts['num_sections']))
def _wr_xlsx(name, grprobj, **kws): """Group, sort, and print xlsx file.""" # Exclude ungrouped "Misc." section of sections var(sec_rd) fout_xlsx = "{NAME}.xlsx".format(NAME=name) # kws Sorter: hdrgo_prt section_prt top_n use_sections sortobj = Sorter(grprobj) desc2nts = sortobj.get_desc2nts(**kws) objwr = WrXlsxSortedGos(name, sortobj) # kws WrXlsxSortedGos wr_xlsx_nts: title hdrs objwr.wr_xlsx_nts(fout_xlsx, desc2nts, **kws)
def __init__(self, name, goea_results, obj): self.name = name self.datobj = obj # AArtGeneProductSetsAll _ini = _Init(obj) self.go2nt = _ini.get_go2nt(goea_results) _grprobj = Grouper("grp", self.go2nt, obj.hdrobj, obj.grprdflt.gosubdag, go2nt=self.go2nt) self.sortobj = Sorter(_grprobj) self.sec2gos = _ini.get_sec2gos(self.sortobj) self.sec2chr = cx.OrderedDict([(s, obj.sec2chr[s]) for s in self.sec2gos.keys()]) self.go2chrs = _ini.get_go2chrs(self.sec2gos, self.sec2chr) self.gene2gos = _ini.get_gene2gos(self.go2nt) self.gene2section2gos = _ini.get_gene2section2gos(self.gene2gos, self.sec2gos) self.gene2aart = _ini.get_gene2aart(self.gene2section2gos, self.sec2chr)
def _wr_xlsx(name, grprobj, **kws): """Group, sort, and print xlsx file.""" # print('\nTEST {} kws_sortobj: {}'.format(name, kws)) # KWS SORT OBJ kws_sort = {'sortby', 'hdrgo_sortby', 'section_sortby'} # KWS SORT FUNC: hdrgo_prt section_prt top_n use_sections prtfmt # Exclude ungrouped "Misc." section of sections var(sec_rd) fout_xlsx = "{NAME}.xlsx".format(NAME=name) # kws Sorter: hdrgo_prt section_prt top_n use_sections sortobj = Sorter(grprobj, **{k:v for k, v in kws.items() if k in kws_sort}) desc2nts = sortobj.get_desc2nts(**kws) objwr = WrXlsxSortedGos(name, sortobj) # kws WrXlsxSortedGos wr_xlsx_nts: title hdrs objwr.wr_xlsx_nts(fout_xlsx, desc2nts, **kws)
def cli(self, prt=sys.stdout): """Command-line interface for go_draw script.""" kws = self.objdoc.get_docargs(prt=None) godag = get_godag(kws['obo'], prt=None, loading_bar=False, optional_attrs=['relationship']) usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt) tcntobj = self._get_tcntobj(usrgos, godag, **kws) # Gets TermCounts or None self.gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=tcntobj, prt=None) grprdflt = GrouperDflts(self.gosubdag, kws['slims']) ver_list = [godag.version, grprdflt.ver_goslims] prt.write("{VER}\n".format(VER="\n".join(ver_list))) sections = self._read_sections(kws['ifile']) # print("SECSECSEC", sections) hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections) grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag) # Write sections objsecwr = WrSectionsTxt(grprobj, ver_list) if not os.path.exists(kws['ifile']): objsecwr.wr_txt_section_hdrgos(kws['ifile']) objsecwr.wr_txt_section_hdrgos(kws['ofile']) objsecpy = WrSectionsPy(grprobj, ver_list) if 'py' in kws: objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version) # Write user GO IDs in sections sortobj = Sorter(grprobj) objgowr = WrXlsxSortedGos("init", sortobj, ver_list) objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt) #objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt) self._prt_cnt_usrgos(usrgos, sys.stdout)
def _get_sortobj(): """Get object for grouping GO IDs.""" godag = get_godag("go-basic.obo", prt=None, loading_bar=False, optional_attrs=['relationship']) gosubdag = GoSubDag(USER_GOS, godag, relationships=True, tcntobj=None) grprdflt = GrouperDflts(gosubdag) hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, SECTIONS) grprobj = Grouper("wrusrgos", USER_GOS, hdrobj, gosubdag) return Sorter(grprobj)
def get_sortobj(self, goea_results, **kws): """Return a Grouper object, given a list of GOEnrichmentRecord.""" nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws) goids = set(nt.GO for nt in nts_goea) go2nt = {nt.GO:nt for nt in nts_goea} grprobj = Grouper("GOEA", goids, self.hdrobj, self.grprdflt.gosubdag, go2nt=go2nt) grprobj.prt_summary(sys.stdout) # hdrgo_prt", "section_prt", "top_n", "use_sections" sortobj = Sorter(grprobj, section_sortby=lambda nt: getattr(nt, self.pval_fld)) return sortobj
def run(grprobj, hdrobj, exp_hdrs, hdrgo_prt=True): """Load sorter. Check results.""" chk_hdrs(grprobj, hdrobj) act_hdrs = grprobj.get_hdrgos() print("ACTUAL") grprobj.gosubdag.prt_goids(sorted(act_hdrs)) print("EXPECTED") grprobj.gosubdag.prt_goids(sorted(exp_hdrs)) # assert act_hdrs == exp_hdrs sortobj = Sorter(grprobj, hdrgo_prt=hdrgo_prt) sys.stdout.write("\n{NAME} PRINT GOs hdrgo_prt({H}):\n".format( H=hdrgo_prt, NAME=grprobj.grpname)) sortobj.prt_gos() nts = sortobj.get_nts_flat(hdrgo_prt) nts_go = set([nt.GO for nt in nts]) usrgos = grprobj.usrgos assert nts_go.intersection(usrgos) == usrgos, \ "ONLY {N} of {U} user gos found in grouped sorted GOs. MISSING: {GOs}".format( N=len(nts_go.intersection(usrgos)), GOs=" ".join(usrgos.difference(nts_go.intersection(usrgos))), U=len(usrgos)) return sortobj, nts, nts_go, act_hdrs
class AArtGeneProductSetsOne(object): """Product gene lists with ASCII art sections and GO IDs for each gene product.""" # nts need: nt.GO and nt.study_items def __init__(self, name, goea_results, obj): self.name = name self.datobj = obj # AArtGeneProductSetsAll _ini = _Init(obj) self.go2nt = _ini.get_go2nt(goea_results) _grprobj = Grouper("grp", self.go2nt, obj.hdrobj, obj.grprdflt.gosubdag, go2nt=self.go2nt) self.sortobj = Sorter(_grprobj) self.sec2gos = _ini.get_sec2gos(self.sortobj) self.sec2chr = cx.OrderedDict([(s, obj.sec2chr[s]) for s in self.sec2gos.keys()]) self.go2chrs = _ini.get_go2chrs(self.sec2gos, self.sec2chr) self.gene2gos = _ini.get_gene2gos(self.go2nt) self.gene2section2gos = _ini.get_gene2section2gos( self.gene2gos, self.sec2gos) self.gene2aart = _ini.get_gene2aart(self.gene2section2gos, self.sec2chr) def prt_report_grp0(self, prt=sys.stdout): """Print full GO/gene report without grouping.""" summaryline = self.str_summaryline() kws_grp = { 'use_sections': False, 'hdrgo_prt': False, 'sortby': lambda nt: [-1 * nt.dcnt, nt.depth] } # Print grouped GO IDs prt.write("{SUMMARY}\n".format(SUMMARY=summaryline)) self.prt_gos_grouped(sys.stdout, **kws_grp) # genes genes = sorted(self.gene2gos.keys()) prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart(genes, prt) # Sort genes prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart_details(genes, prt) return (self.name, self.get_section_marks()) def prt_report_grp1(self, prt=sys.stdout, **kws_grp): """Print full GO/gene report with grouping.""" summaryline = self.str_summaryline() # Print grouped GO IDs prt.write("{SUMMARY}\n".format(SUMMARY=summaryline)) self.prt_gos_grouped(prt, **kws_grp) # genes genes = sorted(self.gene2gos.keys()) prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_section_key(prt) self.prt_gene_aart(genes, prt) # Sort genes prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart_details(genes, prt) return (self.name, self.get_section_marks()) def str_summaryline(self): """Print: 47 GOs, 262 genes described by 10 of 19 sections consistent_increase.""" return "{N} GOs, {M} genes described by {X} of {Y} sections {NM}".format( N=len(self.go2nt), M=len(self.gene2gos), X=len(self.sec2chr), Y=len(self.datobj.sec2chr), NM=self.name) def prt_gos_grouped(self, prt, **kws_grp): """Print grouped GO list.""" prtfmt = self.datobj.kws['fmtgo'] wrobj = WrXlsxSortedGos(self.name, self.sortobj) # Keyword arguments: control content: hdrgo_prt section_prt top_n use_sections desc2nts = self.sortobj.get_desc2nts(**kws_grp) wrobj.prt_txt_desc2nts(prt, desc2nts, prtfmt) def prt_gos_flat(self, prt): """Print flat GO list.""" prtfmt = self.datobj.kws['fmtgo'] _go2nt = self.sortobj.grprobj.go2nt go2nt = {go: _go2nt[go] for go in self.go2nt} prt.write("\n{N} GO IDs:\n".format(N=len(go2nt))) _sortby = self._get_sortgo() for ntgo in sorted(go2nt.values(), key=_sortby): prt.write(prtfmt.format(**ntgo._asdict())) #print("FFFMMMTTT", prtfmt) def _get_sortgo(self): """Get function for sorting GO terms in a list of namedtuples.""" if 'sortgo' in self.datobj.kws: return self.datobj.kws['sortgo'] return self.datobj.grprdflt.gosubdag.prt_attr['sort'] + "\n" def prt_gene_aart(self, geneids, prt=sys.stdout): """For each gene, print ASCII art which represents its associated GO IDs.""" patgene = self.datobj.kws["fmtgene"] itemid2name = self.datobj.kws.get("itemid2name") prt.write("\n{HDR}\n".format(HDR=self.str_hdr())) for geneid in geneids: symbol = "" if itemid2name is None else itemid2name.get(geneid, "") prt.write( patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol)) def prt_gene_aart_details(self, geneids, prt=sys.stdout): """For each gene, print ASCII art which represents its associated GO IDs.""" _go2nt = self.sortobj.grprobj.go2nt patgene = self.datobj.kws["fmtgene2"] patgo = self.datobj.kws["fmtgo2"] itemid2name = self.datobj.kws.get("itemid2name") chr2i = self.datobj.get_chr2idx() for geneid in geneids: gos_gene = self.gene2gos[geneid] symbol = "" if itemid2name is None else itemid2name.get(geneid, "") prt.write("\n") prt.write( patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol)) go2nt = { go: (_go2nt[go], "".join(self.go2chrs[go])) for go in gos_gene } for ntgo, abc in sorted( go2nt.values(), key=lambda t: [chr2i[t[1][:1]], t[0].NS, -1 * t[0].dcnt]): prt.write("{ABC} ".format(ABC=abc)) prt.write(patgo.format(**ntgo._asdict())) def prt_section_key(self, prt=sys.stdout): """Print the section name and its alias.""" for section_name, letter in self.datobj.sec2chr.items(): mrk = '*' if section_name in self.sec2chr else "" prt.write("{M:1} {ABC} {SECT}\n".format(M=mrk, ABC=letter, SECT=section_name)) def str_hdr(self): """Return a string representing the section headers: """ return "".join([c for _, c in self.sec2chr.items()]) def get_section_marks(self): """For each section in AArtGeneProducts, return '*' or "" .""" return [ abc if s in self.sec2chr else "." for s, abc in self.datobj.sec2chr.items() ] def get_gene2binvec(self): """Return a boolean vector for each gene representing GO section membership.""" _sec2chr = self.sec2chr return { g: [s in s2gos for s in _sec2chr] for g, s2gos in self.gene2section2gos.items() }
class AArtGeneProductSetsOne(object): """Product gene lists with ASCII art sections and GO IDs for each gene product.""" # nts need: nt.GO and nt.study_items def __init__(self, name, goea_results, obj): self.name = name self.datobj = obj # AArtGeneProductSetsAll _ini = _Init(obj) self.go2nt = _ini.get_go2nt(goea_results) _grprobj = Grouper("grp", self.go2nt, obj.hdrobj, obj.grprdflt.gosubdag, go2nt=self.go2nt) self.sortobj = Sorter(_grprobj) self.sec2gos = _ini.get_sec2gos(self.sortobj) self.sec2chr = cx.OrderedDict([(s, obj.sec2chr[s]) for s in self.sec2gos.keys()]) self.go2chrs = _ini.get_go2chrs(self.sec2gos, self.sec2chr) self.gene2gos = _ini.get_gene2gos(self.go2nt) self.gene2section2gos = _ini.get_gene2section2gos(self.gene2gos, self.sec2gos) self.gene2aart = _ini.get_gene2aart(self.gene2section2gos, self.sec2chr) def prt_report_grp0(self, prt=sys.stdout): """Print full GO/gene report without grouping.""" summaryline = self.str_summaryline() kws_grp = {'use_sections':False, 'hdrgo_prt':False, 'sortby':lambda nt: [-1*nt.dcnt, nt.depth]} # Print grouped GO IDs prt.write("{SUMMARY}\n".format(SUMMARY=summaryline)) self.prt_gos_grouped(sys.stdout, **kws_grp) # genes genes = sorted(self.gene2gos.keys()) prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart(genes, prt) # Sort genes prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart_details(genes, prt) return (self.name, self.get_section_marks()) def prt_report_grp1(self, prt=sys.stdout, **kws_grp): """Print full GO/gene report with grouping.""" summaryline = self.str_summaryline() # Print grouped GO IDs prt.write("{SUMMARY}\n".format(SUMMARY=summaryline)) self.prt_gos_grouped(prt, **kws_grp) # genes genes = sorted(self.gene2gos.keys()) prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_section_key(prt) self.prt_gene_aart(genes, prt) # Sort genes prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart_details(genes, prt) return (self.name, self.get_section_marks()) def str_summaryline(self): """Print: 47 GOs, 262 genes described by 10 of 19 sections consistent_increase.""" return "{N} GOs, {M} genes described by {X} of {Y} sections {NM}".format( N=len(self.go2nt), M=len(self.gene2gos), X=len(self.sec2chr), Y=len(self.datobj.sec2chr), NM=self.name) def prt_gos_grouped(self, prt, **kws_grp): """Print grouped GO list.""" prtfmt = self.datobj.kws['fmtgo'] wrobj = WrXlsxSortedGos(self.name, self.sortobj) # Keyword arguments: control content: hdrgo_prt section_prt top_n use_sections desc2nts = self.sortobj.get_desc2nts(**kws_grp) wrobj.prt_txt_desc2nts(prt, desc2nts, prtfmt) def prt_gos_flat(self, prt): """Print flat GO list.""" prtfmt = self.datobj.kws['fmtgo'] _go2nt = self.sortobj.grprobj.go2nt go2nt = {go:_go2nt[go] for go in self.go2nt} prt.write("\n{N} GO IDs:\n".format(N=len(go2nt))) _sortby = self._get_sortgo() for ntgo in sorted(go2nt.values(), key=_sortby): prt.write(prtfmt.format(**ntgo._asdict())) #print("FFFMMMTTT", prtfmt) def _get_sortgo(self): """Get function for sorting GO terms in a list of namedtuples.""" if 'sortgo' in self.datobj.kws: return self.datobj.kws['sortgo'] return self.datobj.grprdflt.gosubdag.prt_attr['sort'] + "\n" def prt_gene_aart(self, geneids, prt=sys.stdout): """For each gene, print ASCII art which represents its associated GO IDs.""" patgene = self.datobj.kws["fmtgene"] itemid2name = self.datobj.kws.get("itemid2name") prt.write("\n{HDR}\n".format(HDR=self.str_hdr())) for geneid in geneids: symbol = "" if itemid2name is None else itemid2name.get(geneid, "") prt.write(patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol)) def prt_gene_aart_details(self, geneids, prt=sys.stdout): """For each gene, print ASCII art which represents its associated GO IDs.""" _go2nt = self.sortobj.grprobj.go2nt patgene = self.datobj.kws["fmtgene2"] patgo = self.datobj.kws["fmtgo2"] itemid2name = self.datobj.kws.get("itemid2name") chr2i = self.datobj.get_chr2idx() for geneid in geneids: gos_gene = self.gene2gos[geneid] symbol = "" if itemid2name is None else itemid2name.get(geneid, "") prt.write("\n") prt.write(patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol)) go2nt = {go:(_go2nt[go], "".join(self.go2chrs[go])) for go in gos_gene} for ntgo, abc in sorted(go2nt.values(), key=lambda t: [chr2i[t[1][:1]], t[0].NS, -1*t[0].dcnt]): prt.write("{ABC} ".format(ABC=abc)) prt.write(patgo.format(**ntgo._asdict())) def prt_section_key(self, prt=sys.stdout): """Print the section name and its alias.""" for section_name, letter in self.datobj.sec2chr.items(): mrk = '*' if section_name in self.sec2chr else "" prt.write("{M:1} {ABC} {SECT}\n".format(M=mrk, ABC=letter, SECT=section_name)) def str_hdr(self): """Return a string representing the section headers: """ return "".join([c for _, c in self.sec2chr.items()]) def get_section_marks(self): """For each section in AArtGeneProducts, return '*' or "" .""" return [abc if s in self.sec2chr else "." for s, abc in self.datobj.sec2chr.items()] def get_gene2binvec(self): """Return a boolean vector for each gene representing GO section membership.""" _sec2chr = self.sec2chr return {g:[s in s2gos for s in _sec2chr] for g, s2gos in self.gene2section2gos.items()}