def _wr_xlsx(name, grprobj, **kws): """Group, sort, and print xlsx file.""" # Exclude ungrouped "Misc." section of sections var(sec_rd) fout_xlsx = "{NAME}.xlsx".format(NAME=name) # kws Sorter: hdrgo_prt section_prt top_n use_sections sortobj = Sorter(grprobj) desc2nts = sortobj.get_desc2nts(**kws) objwr = WrXlsxSortedGos(name, sortobj) # kws WrXlsxSortedGos wr_xlsx_nts: title hdrs objwr.wr_xlsx_nts(fout_xlsx, desc2nts, **kws)
def _wr_xlsx(name, grprobj, **kws): """Group, sort, and print xlsx file.""" # print('\nTEST {} kws_sortobj: {}'.format(name, kws)) # KWS SORT OBJ kws_sort = {'sortby', 'hdrgo_sortby', 'section_sortby'} # KWS SORT FUNC: hdrgo_prt section_prt top_n use_sections prtfmt # Exclude ungrouped "Misc." section of sections var(sec_rd) fout_xlsx = "{NAME}.xlsx".format(NAME=name) # kws Sorter: hdrgo_prt section_prt top_n use_sections sortobj = Sorter(grprobj, **{k:v for k, v in kws.items() if k in kws_sort}) desc2nts = sortobj.get_desc2nts(**kws) objwr = WrXlsxSortedGos(name, sortobj) # kws WrXlsxSortedGos wr_xlsx_nts: title hdrs objwr.wr_xlsx_nts(fout_xlsx, desc2nts, **kws)
class AArtGeneProductSetsOne(object): """Product gene lists with ASCII art sections and GO IDs for each gene product.""" # nts need: nt.GO and nt.study_items def __init__(self, name, goea_results, obj): self.name = name self.datobj = obj # AArtGeneProductSetsAll _ini = _Init(obj) self.go2nt = _ini.get_go2nt(goea_results) _grprobj = Grouper("grp", self.go2nt, obj.hdrobj, obj.grprdflt.gosubdag, go2nt=self.go2nt) self.sortobj = Sorter(_grprobj) self.sec2gos = _ini.get_sec2gos(self.sortobj) self.sec2chr = cx.OrderedDict([(s, obj.sec2chr[s]) for s in self.sec2gos.keys()]) self.go2chrs = _ini.get_go2chrs(self.sec2gos, self.sec2chr) self.gene2gos = _ini.get_gene2gos(self.go2nt) self.gene2section2gos = _ini.get_gene2section2gos( self.gene2gos, self.sec2gos) self.gene2aart = _ini.get_gene2aart(self.gene2section2gos, self.sec2chr) def prt_report_grp0(self, prt=sys.stdout): """Print full GO/gene report without grouping.""" summaryline = self.str_summaryline() kws_grp = { 'use_sections': False, 'hdrgo_prt': False, 'sortby': lambda nt: [-1 * nt.dcnt, nt.depth] } # Print grouped GO IDs prt.write("{SUMMARY}\n".format(SUMMARY=summaryline)) self.prt_gos_grouped(sys.stdout, **kws_grp) # genes genes = sorted(self.gene2gos.keys()) prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart(genes, prt) # Sort genes prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart_details(genes, prt) return (self.name, self.get_section_marks()) def prt_report_grp1(self, prt=sys.stdout, **kws_grp): """Print full GO/gene report with grouping.""" summaryline = self.str_summaryline() # Print grouped GO IDs prt.write("{SUMMARY}\n".format(SUMMARY=summaryline)) self.prt_gos_grouped(prt, **kws_grp) # genes genes = sorted(self.gene2gos.keys()) prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_section_key(prt) self.prt_gene_aart(genes, prt) # Sort genes prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart_details(genes, prt) return (self.name, self.get_section_marks()) def str_summaryline(self): """Print: 47 GOs, 262 genes described by 10 of 19 sections consistent_increase.""" return "{N} GOs, {M} genes described by {X} of {Y} sections {NM}".format( N=len(self.go2nt), M=len(self.gene2gos), X=len(self.sec2chr), Y=len(self.datobj.sec2chr), NM=self.name) def prt_gos_grouped(self, prt, **kws_grp): """Print grouped GO list.""" prtfmt = self.datobj.kws['fmtgo'] wrobj = WrXlsxSortedGos(self.name, self.sortobj) # Keyword arguments: control content: hdrgo_prt section_prt top_n use_sections desc2nts = self.sortobj.get_desc2nts(**kws_grp) wrobj.prt_txt_desc2nts(prt, desc2nts, prtfmt) def prt_gos_flat(self, prt): """Print flat GO list.""" prtfmt = self.datobj.kws['fmtgo'] _go2nt = self.sortobj.grprobj.go2nt go2nt = {go: _go2nt[go] for go in self.go2nt} prt.write("\n{N} GO IDs:\n".format(N=len(go2nt))) _sortby = self._get_sortgo() for ntgo in sorted(go2nt.values(), key=_sortby): prt.write(prtfmt.format(**ntgo._asdict())) #print("FFFMMMTTT", prtfmt) def _get_sortgo(self): """Get function for sorting GO terms in a list of namedtuples.""" if 'sortgo' in self.datobj.kws: return self.datobj.kws['sortgo'] return self.datobj.grprdflt.gosubdag.prt_attr['sort'] + "\n" def prt_gene_aart(self, geneids, prt=sys.stdout): """For each gene, print ASCII art which represents its associated GO IDs.""" patgene = self.datobj.kws["fmtgene"] itemid2name = self.datobj.kws.get("itemid2name") prt.write("\n{HDR}\n".format(HDR=self.str_hdr())) for geneid in geneids: symbol = "" if itemid2name is None else itemid2name.get(geneid, "") prt.write( patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol)) def prt_gene_aart_details(self, geneids, prt=sys.stdout): """For each gene, print ASCII art which represents its associated GO IDs.""" _go2nt = self.sortobj.grprobj.go2nt patgene = self.datobj.kws["fmtgene2"] patgo = self.datobj.kws["fmtgo2"] itemid2name = self.datobj.kws.get("itemid2name") chr2i = self.datobj.get_chr2idx() for geneid in geneids: gos_gene = self.gene2gos[geneid] symbol = "" if itemid2name is None else itemid2name.get(geneid, "") prt.write("\n") prt.write( patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol)) go2nt = { go: (_go2nt[go], "".join(self.go2chrs[go])) for go in gos_gene } for ntgo, abc in sorted( go2nt.values(), key=lambda t: [chr2i[t[1][:1]], t[0].NS, -1 * t[0].dcnt]): prt.write("{ABC} ".format(ABC=abc)) prt.write(patgo.format(**ntgo._asdict())) def prt_section_key(self, prt=sys.stdout): """Print the section name and its alias.""" for section_name, letter in self.datobj.sec2chr.items(): mrk = '*' if section_name in self.sec2chr else "" prt.write("{M:1} {ABC} {SECT}\n".format(M=mrk, ABC=letter, SECT=section_name)) def str_hdr(self): """Return a string representing the section headers: """ return "".join([c for _, c in self.sec2chr.items()]) def get_section_marks(self): """For each section in AArtGeneProducts, return '*' or "" .""" return [ abc if s in self.sec2chr else "." for s, abc in self.datobj.sec2chr.items() ] def get_gene2binvec(self): """Return a boolean vector for each gene representing GO section membership.""" _sec2chr = self.sec2chr return { g: [s in s2gos for s in _sec2chr] for g, s2gos in self.gene2section2gos.items() }
class AArtGeneProductSetsOne(object): """Product gene lists with ASCII art sections and GO IDs for each gene product.""" # nts need: nt.GO and nt.study_items def __init__(self, name, goea_results, obj): self.name = name self.datobj = obj # AArtGeneProductSetsAll _ini = _Init(obj) self.go2nt = _ini.get_go2nt(goea_results) _grprobj = Grouper("grp", self.go2nt, obj.hdrobj, obj.grprdflt.gosubdag, go2nt=self.go2nt) self.sortobj = Sorter(_grprobj) self.sec2gos = _ini.get_sec2gos(self.sortobj) self.sec2chr = cx.OrderedDict([(s, obj.sec2chr[s]) for s in self.sec2gos.keys()]) self.go2chrs = _ini.get_go2chrs(self.sec2gos, self.sec2chr) self.gene2gos = _ini.get_gene2gos(self.go2nt) self.gene2section2gos = _ini.get_gene2section2gos(self.gene2gos, self.sec2gos) self.gene2aart = _ini.get_gene2aart(self.gene2section2gos, self.sec2chr) def prt_report_grp0(self, prt=sys.stdout): """Print full GO/gene report without grouping.""" summaryline = self.str_summaryline() kws_grp = {'use_sections':False, 'hdrgo_prt':False, 'sortby':lambda nt: [-1*nt.dcnt, nt.depth]} # Print grouped GO IDs prt.write("{SUMMARY}\n".format(SUMMARY=summaryline)) self.prt_gos_grouped(sys.stdout, **kws_grp) # genes genes = sorted(self.gene2gos.keys()) prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart(genes, prt) # Sort genes prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart_details(genes, prt) return (self.name, self.get_section_marks()) def prt_report_grp1(self, prt=sys.stdout, **kws_grp): """Print full GO/gene report with grouping.""" summaryline = self.str_summaryline() # Print grouped GO IDs prt.write("{SUMMARY}\n".format(SUMMARY=summaryline)) self.prt_gos_grouped(prt, **kws_grp) # genes genes = sorted(self.gene2gos.keys()) prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_section_key(prt) self.prt_gene_aart(genes, prt) # Sort genes prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline)) self.prt_gene_aart_details(genes, prt) return (self.name, self.get_section_marks()) def str_summaryline(self): """Print: 47 GOs, 262 genes described by 10 of 19 sections consistent_increase.""" return "{N} GOs, {M} genes described by {X} of {Y} sections {NM}".format( N=len(self.go2nt), M=len(self.gene2gos), X=len(self.sec2chr), Y=len(self.datobj.sec2chr), NM=self.name) def prt_gos_grouped(self, prt, **kws_grp): """Print grouped GO list.""" prtfmt = self.datobj.kws['fmtgo'] wrobj = WrXlsxSortedGos(self.name, self.sortobj) # Keyword arguments: control content: hdrgo_prt section_prt top_n use_sections desc2nts = self.sortobj.get_desc2nts(**kws_grp) wrobj.prt_txt_desc2nts(prt, desc2nts, prtfmt) def prt_gos_flat(self, prt): """Print flat GO list.""" prtfmt = self.datobj.kws['fmtgo'] _go2nt = self.sortobj.grprobj.go2nt go2nt = {go:_go2nt[go] for go in self.go2nt} prt.write("\n{N} GO IDs:\n".format(N=len(go2nt))) _sortby = self._get_sortgo() for ntgo in sorted(go2nt.values(), key=_sortby): prt.write(prtfmt.format(**ntgo._asdict())) #print("FFFMMMTTT", prtfmt) def _get_sortgo(self): """Get function for sorting GO terms in a list of namedtuples.""" if 'sortgo' in self.datobj.kws: return self.datobj.kws['sortgo'] return self.datobj.grprdflt.gosubdag.prt_attr['sort'] + "\n" def prt_gene_aart(self, geneids, prt=sys.stdout): """For each gene, print ASCII art which represents its associated GO IDs.""" patgene = self.datobj.kws["fmtgene"] itemid2name = self.datobj.kws.get("itemid2name") prt.write("\n{HDR}\n".format(HDR=self.str_hdr())) for geneid in geneids: symbol = "" if itemid2name is None else itemid2name.get(geneid, "") prt.write(patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol)) def prt_gene_aart_details(self, geneids, prt=sys.stdout): """For each gene, print ASCII art which represents its associated GO IDs.""" _go2nt = self.sortobj.grprobj.go2nt patgene = self.datobj.kws["fmtgene2"] patgo = self.datobj.kws["fmtgo2"] itemid2name = self.datobj.kws.get("itemid2name") chr2i = self.datobj.get_chr2idx() for geneid in geneids: gos_gene = self.gene2gos[geneid] symbol = "" if itemid2name is None else itemid2name.get(geneid, "") prt.write("\n") prt.write(patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol)) go2nt = {go:(_go2nt[go], "".join(self.go2chrs[go])) for go in gos_gene} for ntgo, abc in sorted(go2nt.values(), key=lambda t: [chr2i[t[1][:1]], t[0].NS, -1*t[0].dcnt]): prt.write("{ABC} ".format(ABC=abc)) prt.write(patgo.format(**ntgo._asdict())) def prt_section_key(self, prt=sys.stdout): """Print the section name and its alias.""" for section_name, letter in self.datobj.sec2chr.items(): mrk = '*' if section_name in self.sec2chr else "" prt.write("{M:1} {ABC} {SECT}\n".format(M=mrk, ABC=letter, SECT=section_name)) def str_hdr(self): """Return a string representing the section headers: """ return "".join([c for _, c in self.sec2chr.items()]) def get_section_marks(self): """For each section in AArtGeneProducts, return '*' or "" .""" return [abc if s in self.sec2chr else "." for s, abc in self.datobj.sec2chr.items()] def get_gene2binvec(self): """Return a boolean vector for each gene representing GO section membership.""" _sec2chr = self.sec2chr return {g:[s in s2gos for s in _sec2chr] for g, s2gos in self.gene2section2gos.items()}