Beispiel #1
0
 def __init__(self, **kws):
     _objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
     self.kws = _objdoc.get_docargs(prt=None) if not kws else kws
     self.godag = get_godag(self.kws.get('obo'), prt=sys.stdout,
                            loading_bar=False, optional_attrs=['relationship'])
     _ini = _Init(self.godag)
     self.go_ntsets = _ini.get_go_ntsets(self.kws.get('GO_FILE'))
     self.go_all = set.union(*[nt.go_set for nt in self.go_ntsets])
     _tcntobj = _ini.get_tcntobj(self.go_all, **self.kws)  # Gets TermCounts or None
     self.gosubdag = GoSubDag(self.go_all, self.godag, True, tcntobj=_tcntobj, prt=sys.stdout)
     self.objgrpd = _ini.get_grouped(self.go_ntsets, self.go_all, self.gosubdag, **self.kws)
Beispiel #2
0
 def __init__(self, **kws):
     _objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
     self.kws = _objdoc.get_docargs(prt=None) if not kws else kws
     self.godag = get_godag(self.kws.get('obo'), prt=sys.stdout,
                            loading_bar=False, optional_attrs=['relationship'])
     _ini = _Init(self.godag)
     self.go_ntsets = _ini.get_go_ntsets(self.kws.get('GO_FILE'))
     self.go_all = set.union(*[nt.go_set for nt in self.go_ntsets])
     _tcntobj = _ini.get_tcntobj(self.go_all, **self.kws)  # Gets TermCounts or None
     self.gosubdag = GoSubDag(self.go_all, self.godag, True, tcntobj=_tcntobj, prt=sys.stdout)
     self.objgrpd = _ini.get_grouped(self.go_ntsets, self.go_all, self.gosubdag, **self.kws)
Beispiel #3
0
class NCBIgeneToPythonCli(object):
    """Read a NCBI Gene gene_result.txt file and write a Python module."""

    kws_dict = set(['i', 'o'])

    def __init__(self):
        self.objdoc = DocOptParse(__doc__, self.kws_dict, set())

    def cli(self, prt=sys.stdout):
        """Command-line interface to print specified GO Terms from the DAG source ."""
        kws = self.objdoc.get_docargs(prt=None)
        if os.path.exists(kws['i']):
            obj = NCBIgeneFileReader(kws['i'])
            nts = obj.get_nts()
            if nts:
                geneid2nt = self._get_geneid2nt(nts)
                self._wrpy_ncbi_gene_nts(kws['o'], geneid2nt, prt)
        else:
            raise RuntimeError("\n{DOC}\n**ERROR: NO FILE FOUND: {NCBI}".format(
                NCBI=kws['i'], DOC=__doc__))

    @staticmethod
    def _get_geneid2nt(nts):
        """Get geneid2nt given a list of namedtuples."""
        geneid2nt = {}
        for ntd in nts:
            geneid = ntd.GeneID
            if geneid not in geneid2nt:
                geneid2nt[geneid] = ntd
            else:
                print("DUPLICATE GeneID FOUND {N:9} {SYM}".format(N=geneid, SYM=ntd.Symbol))
        return geneid2nt

    @staticmethod
    def _wrpy_ncbi_gene_nts(fout_py, geneid2nt, log):
        """Write namedtuples to a dict in a Python module."""
        num_genes = len(geneid2nt)
        with open(fout_py, 'w') as ofstrm:
            docstr = "Data downloaded from NCBI Gene converted into Python namedtuples."
            ofstrm.write('"""{PYDOC}"""\n\n'.format(PYDOC=docstr))
            ofstrm.write("from collections import namedtuple\n\n")
            ofstrm.write('WRITTEN = "{DATE}"'.format(
                DATE=re.sub('-', '_', str(datetime.date.today()))))
            ofstrm.write(' # {N} items\n\n'.format(N=num_genes))
            ntd = next(iter(geneid2nt.values())) # Access one dictionary value in Python 2
            ofstrm.write("#pylint: disable=line-too-long,too-many-lines,invalid-name\n")
            ofstrm.write("{NtName} = namedtuple('{NtName}', '{FLDS}')\n\n".format(
                NtName=type(ntd).__name__, FLDS=' '.join(ntd._fields)))
            ofstrm.write("GENEID2NT = {{ # {N:,} items\n".format(N=num_genes))
            for geneid, ntd in sorted(geneid2nt.items(), key=lambda t: t[0]):
                ofstrm.write("    {GeneID} : {NT},\n".format(GeneID=geneid, NT=ntd))
            ofstrm.write("}\n")
            log.write("  {N:9} geneids WROTE: {PY}\n".format(N=num_genes, PY=fout_py))
Beispiel #4
0
class PrtGOterms(object):
    """Command-line interface to print specified GO Terms from the DAG source."""

    kws_dict = set(['GO', 'GO_FILE', 'name', 'obo'])
    kws_set = set()

    def __init__(self):
        self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
        self.objsub = WrSubObo()

    def cli(self, prt=sys.stdout):
        """Command-line interface to print specified GO Terms from the DAG source ."""
        kws = self.objdoc.get_docargs(prt=None)
        print("KWS", kws)
        goids = GetGOs().get_goids(kws.get('GO'), kws.get('GO_FILE'), sys.stdout)
        if not goids and 'name' in kws:
            goids = self.objsub.get_goids(kws['obo'], kws['name'])
        self.objsub.prt_goterms(kws['obo'], goids, prt, b_prt=False)
        print("Printing {N:6} GO IDs: {GOs}".format(N=len(goids), GOs=goids))
Beispiel #5
0
class PrtGOterms(object):
    """Command-line interface to print specified GO Terms from the DAG source."""

    kws_dict = set(['GO', 'GO_FILE', 'name', 'obo'])
    kws_set = set()

    def __init__(self):
        self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
        self.objsub = WrSubObo()

    def cli(self, prt=sys.stdout):
        """Command-line interface to print specified GO Terms from the DAG source ."""
        kws = self.objdoc.get_docargs(prt=None)
        print("KWS", kws)
        goids = GetGOs().get_goids(kws.get('GO'), kws.get('GO_FILE'),
                                   sys.stdout)
        if not goids and 'name' in kws:
            goids = self.objsub.get_goids(kws['obo'], kws['name'])
        self.objsub.prt_goterms(kws['obo'], goids, prt, b_prt=False)
        print("Printing {N:6} GO IDs: {GOs}".format(N=len(goids), GOs=goids))
Beispiel #6
0
class PlotCli(object):
    """Class for command-line interface for creating GO term diagrams"""

    kws_dict = set([
        'GO', 'outfile', 'go_file', 'sections', 'S', 'gaf', 'gene2go', 'taxid',
        'title', 'obo', 'go_aliases'
    ])
    kws_set = set([
        'relationship', 'parentcnt', 'childcnt', 'mark_alt_id', 'shorten',
        'draw-children', 'norel'
    ])
    dflt_outfile = "go_plot.png"
    kws_plt = set(['parentcnt', 'childcnt', 'mark_alt_id', 'shorten'])

    def __init__(self, gosubdag=None):
        self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
        self.gosubdag = None if gosubdag is None else gosubdag

    def cli(self):
        """Command-line interface for go_draw script."""
        kws_all = self.get_docargs(prt=None)
        optional_attrs = self._get_optional_attrs(kws_all)
        go2obj = GODag(kws_all['obo'], optional_attrs)
        # GO kws_all: GO go_file draw-children
        goids, go2color = GetGOs(go2obj).get_go_color(**kws_all)
        relationships = 'relationship' in optional_attrs
        #### self.gosubdag = GoSubDag(goids, go2obj, relationships, tcntobj=tcntobj)
        kws_dag = self._get_kwsdag(goids, go2obj, **kws_all)
        self.gosubdag = GoSubDag(goids, go2obj, relationships, **kws_dag)

        if 'sections' in kws_all:
            return self._plt_gogrouped(goids, go2color, **kws_all)
        else:
            return self._plt_gosubdag(goids, go2color, **kws_all)

    def _plt_gogrouped(self, goids, go2color_usr, **kws):
        """Plot grouped GO IDs."""
        fout_img = self.get_outfile(kws['outfile'], goids)
        sections = read_sections(kws['sections'], exclude_ungrouped=True)
        print("KWWSSSSSSSS", kws)
        # kws_plt = {k:v for k, v in kws.items if k in self.kws_plt}
        grprobj_cur = self._get_grprobj(goids, sections)
        # GO: purple=hdr-only, green=hdr&usr, yellow=usr-only
        # BORDER: Black=hdr Blu=hdr&usr
        grpcolor = GrouperColors(
            grprobj_cur)  # get_bordercolor get_go2color_users
        grp_go2color = grpcolor.get_go2color_users()
        grp_go2bordercolor = grpcolor.get_bordercolor()
        for goid, color in go2color_usr.items():
            grp_go2color[goid] = color
        objcolor = Go2Color(self.gosubdag,
                            objgoea=None,
                            go2color=grp_go2color,
                            go2bordercolor=grp_go2bordercolor)
        go2txt = GrouperPlot.get_go2txt(grprobj_cur, grp_go2color,
                                        grp_go2bordercolor)
        objplt = GoSubDagPlot(self.gosubdag,
                              Go2Color=objcolor,
                              go2txt=go2txt,
                              **kws)
        objplt.prt_goids(sys.stdout)
        objplt.plt_dag(fout_img)
        sys.stdout.write("{N:>6} sections read\n".format(
            N="NO" if sections is None else len(sections)))
        return fout_img

    def _get_grprobj(self, goids, sections):
        """Get Grouper, given GO IDs and sections."""
        grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo")
        hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
        return Grouper("sections", goids, hdrobj, self.gosubdag)

    def _plt_gosubdag(self, goids, go2color, **kws):
        """Plot GO IDs."""
        fout_img = self.get_outfile(kws['outfile'], goids)
        objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color)
        objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws)
        objplt.prt_goids(sys.stdout)
        objplt.plt_dag(fout_img)
        return fout_img

    def _get_kwsdag(self, goids, go2obj, **kws_all):
        """Get keyword args for a GoSubDag."""
        kws_dag = {}
        # Term Counts for GO Term information score
        tcntobj = self._get_tcntobj(goids, go2obj,
                                    **kws_all)  # TermCounts or None
        if tcntobj is not None:
            kws_dag['tcntobj'] = tcntobj
        # GO letters specified by the user
        if 'go_aliases' in kws_all:
            fin_go_aliases = kws_all['go_aliases']
            if os.path.exists(fin_go_aliases):
                go2letter = read_d1_letter(fin_go_aliases)
                if go2letter:
                    kws_dag['go2letter'] = go2letter
        return kws_dag

    @staticmethod
    def _get_tcntobj(goids, go2obj, **kws):
        """Get a TermCounts object if the user provides an annotation file, otherwise None."""
        # kws: gaf (gene2go taxid)
        if 'gaf' in kws or 'gene2go' in kws:
            # Get a reduced go2obj set for TermCounts
            _gosubdag = GoSubDag(goids, go2obj, rcntobj=False)
            #return get_tcntobj(_gosubdag.go2obj, **kws)  # TermCounts
            return get_tcntobj(go2obj, **kws)  # TermCounts

    def get_docargs(self, args=None, prt=None):
        """Pare down docopt. Return a minimal dictionary and a set containing runtime arg values."""
        # docargs = self.objdoc.get_docargs(args, exp_letters=set(['o', 't', 'p', 'c']))
        docargs = self.objdoc.get_docargs(args, prt)
        self._chk_docopts(docargs)
        return docargs

    def _chk_docopts(self, kws):
        """Check for common user command-line errors."""
        # outfile should contain .png, .png, etc.
        outfile = kws['outfile']
        if len(kws) == 2 and os.path.basename(kws['obo']) == "go-basic.obo" and \
            kws['outfile'] == self.dflt_outfile:
            self._err("NO GO IDS SPECFIED", err=False)
        if 'obo' in outfile:
            self._err("BAD outfile({O})".format(O=outfile))
        if 'gaf' in kws and 'gene2go' in kws:
            self._err("SPECIFY ANNOTAIONS FROM ONE FILE")
        if 'gene2go' in kws:
            if 'taxid' not in kws:
                self._err("SPECIFIY taxid WHEN READ NCBI'S gene2go FILE")

    def _err(self, msg, err=True):
        """Print useage and error before exiting."""
        severity = "FATAL" if err else "NOTE"
        txt = "".join([
            self.objdoc.doc, "User's command-line:\n\n",
            "  % go_plot.py {ARGS}\n\n".format(ARGS=" ".join(sys.argv[1:])),
            "**{SEV}: {MSG}\n".format(SEV=severity, MSG=msg)
        ])
        if err:
            raise RuntimeError(txt)
        sys.stdout.write(txt)
        sys.exit(0)

    def get_outfile(self, outfile, goids=None):
        """Return output file for GO Term plot."""
        # 1. Use the user-specfied output filename for the GO Term plot
        if outfile != self.dflt_outfile:
            return outfile
        # 2. If only plotting 1 GO term, use GO is in plot name
        if goids is not None and len(goids) == 1:
            goid = next(iter(goids))
            goobj = self.gosubdag.go2obj[goid]
            fout = "GO_{NN}_{NM}".format(NN=goid.replace("GO:", ""),
                                         NM=goobj.name)
            return ".".join(
                [re.sub(r"[\s#'()+,-./:<=>\[\]_}]", '_', fout), 'png'])
        # 3. Return default name
        return self.dflt_outfile

    @staticmethod
    def _get_optional_attrs(kws):
        """Given keyword args, return optional_attributes to be loaded into the GODag."""
        vals = OboOptionalAttrs.attributes.intersection(kws.keys())
        if 'sections' in kws:
            vals.add('relationship')
        if 'norel' in kws:
            vals.discard('relationship')
        return vals
Beispiel #7
0
class SectionsWr(object):
    """Class for command-line interface for creating GO term diagrams"""

    kws_dict = set([
        'GO_FILE', 'obo', 'slims', 'ifile', 'ofile', 'txt', 'py', 'xlsx',
        'gaf', 'gene2go', 'taxid'
    ])
    kws_set = set()

    def __init__(self, gosubdag=None):
        self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
        self.gosubdag = None if gosubdag is None else gosubdag

    def cli(self, prt=sys.stdout):
        """Command-line interface for go_draw script."""
        kws = self.objdoc.get_docargs(prt=None)
        godag = get_godag(kws['obo'],
                          prt=None,
                          loading_bar=False,
                          optional_attrs=['relationship'])
        usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt)
        tcntobj = self._get_tcntobj(usrgos, godag,
                                    **kws)  # Gets TermCounts or None
        self.gosubdag = GoSubDag(usrgos,
                                 godag,
                                 relationships=True,
                                 tcntobj=tcntobj,
                                 prt=None)
        grprdflt = GrouperDflts(self.gosubdag, kws['slims'])
        ver_list = [godag.version, grprdflt.ver_goslims]
        prt.write("{VER}\n".format(VER="\n".join(ver_list)))
        sections = read_sections(kws['ifile'],
                                 exclude_ungrouped=True,
                                 prt=None)
        # print("SECSECSEC", sections)
        hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
        grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag)
        # Write sections
        objsecwr = WrSections(grprobj, ver_list)
        if not os.path.exists(kws['ifile']):
            objsecwr.wr_txt_section_hdrgos(kws['ifile'])
        objsecwr.wr_txt_section_hdrgos(kws['ofile'])
        objsecpy = WrPySections(grprobj, ver_list)
        if 'py' in kws:
            objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version)
        # Write user GO IDs in sections
        sortobj = Sorter(grprobj)
        objgowr = WrXlsxSortedGos("init", sortobj, ver_list)
        objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt)
        #objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt)
        self._prt_cnt_usrgos(usrgos, sys.stdout)

    def _prt_cnt_usrgos(self, usrgos_read, prt):
        num_usrgos = len(self.gosubdag.go_sources)
        prt.write("{GOs:6} user GO IDs".format(GOs=num_usrgos))
        if len(usrgos_read) != num_usrgos:
            prt.write(" of {M} GO IDs read".format(M=len(usrgos_read)))
        prt.write("\n")

    @staticmethod
    def _get_tcntobj(goids, go2obj, **kws):
        """Get a TermCounts object if the user provides an annotation file, otherwise None."""
        # kws: gaf (gene2go taxid)
        if 'gaf' in kws or 'gene2go' in kws:
            # Get a reduced go2obj set for TermCounts
            _gosubdag = GoSubDag(goids, go2obj, rcntobj=False, prt=None)
            return get_tcntobj(_gosubdag.go2obj, **kws)  # TermCounts