예제 #1
0
def _wr_xlsx(name, grprobj, **kws):
    """Group, sort, and print xlsx file."""
    # Exclude ungrouped "Misc." section of sections var(sec_rd)
    fout_xlsx = "{NAME}.xlsx".format(NAME=name)
    # kws Sorter: hdrgo_prt section_prt top_n use_sections
    sortobj = Sorter(grprobj)
    desc2nts = sortobj.get_desc2nts(**kws)
    objwr = WrXlsxSortedGos(name, sortobj)
    # kws WrXlsxSortedGos wr_xlsx_nts: title hdrs
    objwr.wr_xlsx_nts(fout_xlsx, desc2nts, **kws)
예제 #2
0
def _wr_xlsx(name, grprobj, **kws):
    """Group, sort, and print xlsx file."""
    # print('\nTEST {} kws_sortobj: {}'.format(name, kws))
    # KWS SORT OBJ
    kws_sort = {'sortby', 'hdrgo_sortby', 'section_sortby'}
    # KWS SORT FUNC: hdrgo_prt section_prt top_n use_sections prtfmt
    # Exclude ungrouped "Misc." section of sections var(sec_rd)
    fout_xlsx = "{NAME}.xlsx".format(NAME=name)
    # kws Sorter: hdrgo_prt section_prt top_n use_sections
    sortobj = Sorter(grprobj, **{k:v for k, v in kws.items() if k in kws_sort})
    desc2nts = sortobj.get_desc2nts(**kws)
    objwr = WrXlsxSortedGos(name, sortobj)
    # kws WrXlsxSortedGos wr_xlsx_nts: title hdrs
    objwr.wr_xlsx_nts(fout_xlsx, desc2nts, **kws)
예제 #3
0
class AArtGeneProductSetsOne(object):
    """Product gene lists with ASCII art sections and GO IDs for each gene product."""

    # nts need: nt.GO and nt.study_items

    def __init__(self, name, goea_results, obj):
        self.name = name
        self.datobj = obj  # AArtGeneProductSetsAll
        _ini = _Init(obj)
        self.go2nt = _ini.get_go2nt(goea_results)
        _grprobj = Grouper("grp",
                           self.go2nt,
                           obj.hdrobj,
                           obj.grprdflt.gosubdag,
                           go2nt=self.go2nt)
        self.sortobj = Sorter(_grprobj)
        self.sec2gos = _ini.get_sec2gos(self.sortobj)
        self.sec2chr = cx.OrderedDict([(s, obj.sec2chr[s])
                                       for s in self.sec2gos.keys()])
        self.go2chrs = _ini.get_go2chrs(self.sec2gos, self.sec2chr)
        self.gene2gos = _ini.get_gene2gos(self.go2nt)
        self.gene2section2gos = _ini.get_gene2section2gos(
            self.gene2gos, self.sec2gos)
        self.gene2aart = _ini.get_gene2aart(self.gene2section2gos,
                                            self.sec2chr)

    def prt_report_grp0(self, prt=sys.stdout):
        """Print full GO/gene report without grouping."""
        summaryline = self.str_summaryline()
        kws_grp = {
            'use_sections': False,
            'hdrgo_prt': False,
            'sortby': lambda nt: [-1 * nt.dcnt, nt.depth]
        }
        # Print grouped GO IDs
        prt.write("{SUMMARY}\n".format(SUMMARY=summaryline))
        self.prt_gos_grouped(sys.stdout, **kws_grp)
        # genes
        genes = sorted(self.gene2gos.keys())
        prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline))
        self.prt_gene_aart(genes, prt)
        # Sort genes
        prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline))
        self.prt_gene_aart_details(genes, prt)
        return (self.name, self.get_section_marks())

    def prt_report_grp1(self, prt=sys.stdout, **kws_grp):
        """Print full GO/gene report with grouping."""
        summaryline = self.str_summaryline()
        # Print grouped GO IDs
        prt.write("{SUMMARY}\n".format(SUMMARY=summaryline))
        self.prt_gos_grouped(prt, **kws_grp)
        # genes
        genes = sorted(self.gene2gos.keys())
        prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline))
        self.prt_section_key(prt)
        self.prt_gene_aart(genes, prt)
        # Sort genes
        prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline))
        self.prt_gene_aart_details(genes, prt)
        return (self.name, self.get_section_marks())

    def str_summaryline(self):
        """Print: 47 GOs, 262 genes described by 10 of 19 sections consistent_increase."""
        return "{N} GOs, {M} genes described by {X} of {Y} sections {NM}".format(
            N=len(self.go2nt),
            M=len(self.gene2gos),
            X=len(self.sec2chr),
            Y=len(self.datobj.sec2chr),
            NM=self.name)

    def prt_gos_grouped(self, prt, **kws_grp):
        """Print grouped GO list."""
        prtfmt = self.datobj.kws['fmtgo']
        wrobj = WrXlsxSortedGos(self.name, self.sortobj)
        # Keyword arguments: control content: hdrgo_prt section_prt top_n use_sections
        desc2nts = self.sortobj.get_desc2nts(**kws_grp)
        wrobj.prt_txt_desc2nts(prt, desc2nts, prtfmt)

    def prt_gos_flat(self, prt):
        """Print flat GO list."""
        prtfmt = self.datobj.kws['fmtgo']
        _go2nt = self.sortobj.grprobj.go2nt
        go2nt = {go: _go2nt[go] for go in self.go2nt}
        prt.write("\n{N} GO IDs:\n".format(N=len(go2nt)))
        _sortby = self._get_sortgo()
        for ntgo in sorted(go2nt.values(), key=_sortby):
            prt.write(prtfmt.format(**ntgo._asdict()))
        #print("FFFMMMTTT", prtfmt)

    def _get_sortgo(self):
        """Get function for sorting GO terms in a list of namedtuples."""
        if 'sortgo' in self.datobj.kws:
            return self.datobj.kws['sortgo']
        return self.datobj.grprdflt.gosubdag.prt_attr['sort'] + "\n"

    def prt_gene_aart(self, geneids, prt=sys.stdout):
        """For each gene, print ASCII art which represents its associated GO IDs."""
        patgene = self.datobj.kws["fmtgene"]
        itemid2name = self.datobj.kws.get("itemid2name")
        prt.write("\n{HDR}\n".format(HDR=self.str_hdr()))
        for geneid in geneids:
            symbol = "" if itemid2name is None else itemid2name.get(geneid, "")
            prt.write(
                patgene.format(AART=self.gene2aart[geneid],
                               ID=geneid,
                               NAME=symbol))

    def prt_gene_aart_details(self, geneids, prt=sys.stdout):
        """For each gene, print ASCII art which represents its associated GO IDs."""
        _go2nt = self.sortobj.grprobj.go2nt
        patgene = self.datobj.kws["fmtgene2"]
        patgo = self.datobj.kws["fmtgo2"]
        itemid2name = self.datobj.kws.get("itemid2name")
        chr2i = self.datobj.get_chr2idx()
        for geneid in geneids:
            gos_gene = self.gene2gos[geneid]
            symbol = "" if itemid2name is None else itemid2name.get(geneid, "")
            prt.write("\n")
            prt.write(
                patgene.format(AART=self.gene2aart[geneid],
                               ID=geneid,
                               NAME=symbol))
            go2nt = {
                go: (_go2nt[go], "".join(self.go2chrs[go]))
                for go in gos_gene
            }
            for ntgo, abc in sorted(
                    go2nt.values(),
                    key=lambda t: [chr2i[t[1][:1]], t[0].NS, -1 * t[0].dcnt]):
                prt.write("{ABC} ".format(ABC=abc))
                prt.write(patgo.format(**ntgo._asdict()))

    def prt_section_key(self, prt=sys.stdout):
        """Print the section name and its alias."""
        for section_name, letter in self.datobj.sec2chr.items():
            mrk = '*' if section_name in self.sec2chr else ""
            prt.write("{M:1} {ABC} {SECT}\n".format(M=mrk,
                                                    ABC=letter,
                                                    SECT=section_name))

    def str_hdr(self):
        """Return a string representing the section headers: """
        return "".join([c for _, c in self.sec2chr.items()])

    def get_section_marks(self):
        """For each section in AArtGeneProducts, return '*' or "" ."""
        return [
            abc if s in self.sec2chr else "."
            for s, abc in self.datobj.sec2chr.items()
        ]

    def get_gene2binvec(self):
        """Return a boolean vector for each gene representing GO section membership."""
        _sec2chr = self.sec2chr
        return {
            g: [s in s2gos for s in _sec2chr]
            for g, s2gos in self.gene2section2gos.items()
        }
예제 #4
0
class AArtGeneProductSetsOne(object):
    """Product gene lists with ASCII art sections and GO IDs for each gene product."""
    # nts need: nt.GO and nt.study_items

    def __init__(self, name, goea_results, obj):
        self.name = name
        self.datobj = obj  # AArtGeneProductSetsAll
        _ini = _Init(obj)
        self.go2nt = _ini.get_go2nt(goea_results)
        _grprobj = Grouper("grp", self.go2nt, obj.hdrobj, obj.grprdflt.gosubdag, go2nt=self.go2nt)
        self.sortobj = Sorter(_grprobj)
        self.sec2gos = _ini.get_sec2gos(self.sortobj)
        self.sec2chr = cx.OrderedDict([(s, obj.sec2chr[s]) for s in self.sec2gos.keys()])
        self.go2chrs = _ini.get_go2chrs(self.sec2gos, self.sec2chr)
        self.gene2gos = _ini.get_gene2gos(self.go2nt)
        self.gene2section2gos = _ini.get_gene2section2gos(self.gene2gos, self.sec2gos)
        self.gene2aart = _ini.get_gene2aart(self.gene2section2gos, self.sec2chr)

    def prt_report_grp0(self, prt=sys.stdout):
        """Print full GO/gene report without grouping."""
        summaryline = self.str_summaryline()
        kws_grp = {'use_sections':False,
                   'hdrgo_prt':False,
                   'sortby':lambda nt: [-1*nt.dcnt, nt.depth]}
        # Print grouped GO IDs
        prt.write("{SUMMARY}\n".format(SUMMARY=summaryline))
        self.prt_gos_grouped(sys.stdout, **kws_grp)
        # genes
        genes = sorted(self.gene2gos.keys())
        prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline))
        self.prt_gene_aart(genes, prt)
        # Sort genes
        prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline))
        self.prt_gene_aart_details(genes, prt)
        return (self.name, self.get_section_marks())

    def prt_report_grp1(self, prt=sys.stdout, **kws_grp):
        """Print full GO/gene report with grouping."""
        summaryline = self.str_summaryline()
        # Print grouped GO IDs
        prt.write("{SUMMARY}\n".format(SUMMARY=summaryline))
        self.prt_gos_grouped(prt, **kws_grp)
        # genes
        genes = sorted(self.gene2gos.keys())
        prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline))
        self.prt_section_key(prt)
        self.prt_gene_aart(genes, prt)
        # Sort genes
        prt.write("\n\n{SUMMARY}\n\n".format(SUMMARY=summaryline))
        self.prt_gene_aart_details(genes, prt)
        return (self.name, self.get_section_marks())

    def str_summaryline(self):
        """Print: 47 GOs, 262 genes described by 10 of 19 sections consistent_increase."""
        return "{N} GOs, {M} genes described by {X} of {Y} sections {NM}".format(
            N=len(self.go2nt), M=len(self.gene2gos),
            X=len(self.sec2chr), Y=len(self.datobj.sec2chr), NM=self.name)

    def prt_gos_grouped(self, prt, **kws_grp):
        """Print grouped GO list."""
        prtfmt = self.datobj.kws['fmtgo']
        wrobj = WrXlsxSortedGos(self.name, self.sortobj)
        # Keyword arguments: control content: hdrgo_prt section_prt top_n use_sections
        desc2nts = self.sortobj.get_desc2nts(**kws_grp)
        wrobj.prt_txt_desc2nts(prt, desc2nts, prtfmt)

    def prt_gos_flat(self, prt):
        """Print flat GO list."""
        prtfmt = self.datobj.kws['fmtgo']
        _go2nt = self.sortobj.grprobj.go2nt
        go2nt = {go:_go2nt[go] for go in self.go2nt}
        prt.write("\n{N} GO IDs:\n".format(N=len(go2nt)))
        _sortby = self._get_sortgo()
        for ntgo in sorted(go2nt.values(), key=_sortby):
            prt.write(prtfmt.format(**ntgo._asdict()))
        #print("FFFMMMTTT", prtfmt)

    def _get_sortgo(self):
        """Get function for sorting GO terms in a list of namedtuples."""
        if 'sortgo' in self.datobj.kws:
            return self.datobj.kws['sortgo']
        return self.datobj.grprdflt.gosubdag.prt_attr['sort'] + "\n"

    def prt_gene_aart(self, geneids, prt=sys.stdout):
        """For each gene, print ASCII art which represents its associated GO IDs."""
        patgene = self.datobj.kws["fmtgene"]
        itemid2name = self.datobj.kws.get("itemid2name")
        prt.write("\n{HDR}\n".format(HDR=self.str_hdr()))
        for geneid in geneids:
            symbol = "" if itemid2name is None else itemid2name.get(geneid, "")
            prt.write(patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol))

    def prt_gene_aart_details(self, geneids, prt=sys.stdout):
        """For each gene, print ASCII art which represents its associated GO IDs."""
        _go2nt = self.sortobj.grprobj.go2nt
        patgene = self.datobj.kws["fmtgene2"]
        patgo = self.datobj.kws["fmtgo2"]
        itemid2name = self.datobj.kws.get("itemid2name")
        chr2i = self.datobj.get_chr2idx()
        for geneid in geneids:
            gos_gene = self.gene2gos[geneid]
            symbol = "" if itemid2name is None else itemid2name.get(geneid, "")
            prt.write("\n")
            prt.write(patgene.format(AART=self.gene2aart[geneid], ID=geneid, NAME=symbol))
            go2nt = {go:(_go2nt[go], "".join(self.go2chrs[go])) for go in gos_gene}
            for ntgo, abc in sorted(go2nt.values(),
                                    key=lambda t: [chr2i[t[1][:1]], t[0].NS, -1*t[0].dcnt]):
                prt.write("{ABC} ".format(ABC=abc))
                prt.write(patgo.format(**ntgo._asdict()))

    def prt_section_key(self, prt=sys.stdout):
        """Print the section name and its alias."""
        for section_name, letter in self.datobj.sec2chr.items():
            mrk = '*' if section_name in self.sec2chr else ""
            prt.write("{M:1} {ABC} {SECT}\n".format(M=mrk, ABC=letter, SECT=section_name))

    def str_hdr(self):
        """Return a string representing the section headers: """
        return "".join([c for _, c in self.sec2chr.items()])

    def get_section_marks(self):
        """For each section in AArtGeneProducts, return '*' or "" ."""
        return [abc if s in self.sec2chr else "." for s, abc in self.datobj.sec2chr.items()]

    def get_gene2binvec(self):
        """Return a boolean vector for each gene representing GO section membership."""
        _sec2chr = self.sec2chr
        return {g:[s in s2gos for s in _sec2chr] for g, s2gos in self.gene2section2gos.items()}