class WrHierCli(object): """Write hierarchy cli.""" kws_set_all = set(['relationship', 'up', 'f']) kws_dct_all = set([ 'GO', 'dag', 'i', 'o', 'max_indent', 'no_indent', 'concise', 'gaf', 'gene2go', 'dash_len', 'include_only' ]) kws_dct_wr = set( ['max_indent', 'no_indent', 'concise', 'relationship', 'dash_len']) def __init__(self, args=None, prt=sys.stdout): self.kws = DocOptParse(__doc__, self.kws_dct_all, self.kws_set_all).get_docargs( args, intvals=set(['max_indent', 'dash_len'])) opt_attrs = OboOptionalAttrs.attributes.intersection(self.kws.keys()) godag = get_godag(self.kws['dag'], prt, optional_attrs=opt_attrs) self.gosubdag = GoSubDag(godag.keys(), godag, relationships='relationship' in opt_attrs, tcntobj=get_tcntobj(godag, **self.kws), children=True, prt=prt) self.goids = GetGOs().get_goids(self.kws.get('GO'), self.kws.get('i'), sys.stdout) def get_fouts(self): """Get output filename.""" fouts_txt = [] if 'o' in self.kws: fouts_txt.append(self.kws['o']) if 'f' in self.kws: fouts_txt.append(self._get_fout_go()) return fouts_txt def _get_fout_go(self): """Get the name of an output file based on the top GO term.""" assert self.goids, "NO VALID GO IDs WERE PROVIDED" base = next(iter(self.goids)).replace(':', '') upstr = '_up' if 'up' in self.kws else '' return "hier_{BASE}{UP}.{EXT}".format(BASE=base, UP=upstr, EXT='txt') def wrtxt_hier(self, fout_txt): """Write hierarchy below specfied GO IDs to an ASCII file.""" with open(fout_txt, 'wb') as prt: self.prt_hier(prt) print(" WROTE: {TXT}".format(TXT=fout_txt)) def prt_hier(self, prt=sys.stdout): """Write hierarchy below specfied GO IDs.""" objwr = WrHierGO(self.gosubdag, **self.kws) assert self.goids, "NO VALID GO IDs WERE PROVIDED" if 'up' not in objwr.usrset: for goid in self.goids: objwr.prt_hier_down(goid, prt) else: objwr.prt_hier_up(self.goids, prt)
def __init__(self, **kws): _objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set) self.kws = _objdoc.get_docargs(prt=None) if not kws else kws self.godag = get_godag(self.kws.get('obo'), prt=sys.stdout, loading_bar=False, optional_attrs=['relationship']) _ini = _Init(self.godag) self.go_ntsets = _ini.get_go_ntsets(self.kws.get('GO_FILE')) self.go_all = set.union(*[nt.go_set for nt in self.go_ntsets]) _tcntobj = _ini.get_tcntobj(self.go_all, **self.kws) # Gets TermCounts or None self.gosubdag = GoSubDag(self.go_all, self.godag, True, tcntobj=_tcntobj, prt=sys.stdout) self.objgrpd = _ini.get_grouped(self.go_ntsets, self.go_all, self.gosubdag, **self.kws)
def __init__(self, args=None, prt=sys.stdout): self.kws = DocOptParse(__doc__, self.kws_dct_all, self.kws_set_all).get_docargs( args, intvals=set(['max_indent', 'dash_len'])) opt_attrs = OboOptionalAttrs.attributes.intersection(self.kws.keys()) godag = get_godag(self.kws['dag'], prt, optional_attrs=opt_attrs) self.gosubdag = GoSubDag(godag.keys(), godag, relationships='relationship' in opt_attrs, tcntobj=get_tcntobj(godag, **self.kws), children=True, prt=prt) self.goids = GetGOs().get_goids(self.kws.get('GO'), self.kws.get('i'), sys.stdout)
def __init__(self, args=None, prt=sys.stdout): self.kws = DocOptParse(__doc__, self.kws_dct_all, self.kws_set_all).get_docargs( args, intvals=set(['max_indent', 'dash_len'])) opt_attrs = OboOptionalAttrs.attributes.intersection(self.kws.keys()) godag = get_godag(self.kws['dag'], prt, optional_attrs=opt_attrs) self.gene2gos = read_annotations(**self.kws) self.tcntobj = TermCounts(godag, self.gene2gos) if self.gene2gos is not None else None self.gosubdag = GoSubDag(godag.keys(), godag, relationships='relationship' in opt_attrs, tcntobj=self.tcntobj, children=True, prt=prt) self.goids = self._init_goids() self._adj_item_marks() self._adj_include_only() self._adj_for_assc()
class NCBIgeneToPythonCli(object): """Read a NCBI Gene gene_result.txt file and write a Python module.""" kws_dict = set(['i', 'o']) def __init__(self): self.objdoc = DocOptParse(__doc__, self.kws_dict, set()) def cli(self, prt=sys.stdout): """Command-line interface to print specified GO Terms from the DAG source .""" kws = self.objdoc.get_docargs(prt=None) if os.path.exists(kws['i']): obj = NCBIgeneFileReader(kws['i']) nts = obj.get_nts() if nts: geneid2nt = self._get_geneid2nt(nts) self._wrpy_ncbi_gene_nts(kws['o'], geneid2nt, prt) else: raise RuntimeError("\n{DOC}\n**ERROR: NO FILE FOUND: {NCBI}".format( NCBI=kws['i'], DOC=__doc__)) @staticmethod def _get_geneid2nt(nts): """Get geneid2nt given a list of namedtuples.""" geneid2nt = {} for ntd in nts: geneid = ntd.GeneID if geneid not in geneid2nt: geneid2nt[geneid] = ntd else: print("DUPLICATE GeneID FOUND {N:9} {SYM}".format(N=geneid, SYM=ntd.Symbol)) return geneid2nt @staticmethod def _wrpy_ncbi_gene_nts(fout_py, geneid2nt, log): """Write namedtuples to a dict in a Python module.""" num_genes = len(geneid2nt) with open(fout_py, 'w') as ofstrm: docstr = "Data downloaded from NCBI Gene converted into Python namedtuples." ofstrm.write('"""{PYDOC}"""\n\n'.format(PYDOC=docstr)) ofstrm.write("from collections import namedtuple\n\n") ofstrm.write('WRITTEN = "{DATE}"'.format( DATE=re.sub('-', '_', str(datetime.date.today())))) ofstrm.write(' # {N} items\n\n'.format(N=num_genes)) ntd = next(iter(geneid2nt.values())) # Access one dictionary value in Python 2 ofstrm.write("#pylint: disable=line-too-long,too-many-lines,invalid-name\n") ofstrm.write("{NtName} = namedtuple('{NtName}', '{FLDS}')\n\n".format( NtName=type(ntd).__name__, FLDS=' '.join(ntd._fields))) ofstrm.write("GENEID2NT = {{ # {N:,} items\n".format(N=num_genes)) for geneid, ntd in sorted(geneid2nt.items(), key=lambda t: t[0]): ofstrm.write(" {GeneID} : {NT},\n".format(GeneID=geneid, NT=ntd)) ofstrm.write("}\n") log.write(" {N:9} geneids WROTE: {PY}\n".format(N=num_genes, PY=fout_py))
class PrtGOterms(object): """Command-line interface to print specified GO Terms from the DAG source.""" kws_dict = set(['GO', 'GO_FILE', 'name', 'obo']) kws_set = set() def __init__(self): self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set) self.objsub = WrSubObo() def cli(self, prt=sys.stdout): """Command-line interface to print specified GO Terms from the DAG source .""" kws = self.objdoc.get_docargs(prt=None) print("KWS", kws) goids = GetGOs().get_goids(kws.get('GO'), kws.get('GO_FILE'), sys.stdout) if not goids and 'name' in kws: goids = self.objsub.get_goids(kws['obo'], kws['name']) self.objsub.prt_goterms(kws['obo'], goids, prt, b_prt=False) print("Printing {N:6} GO IDs: {GOs}".format(N=len(goids), GOs=goids))
class WrHierCli(object): """Write hierarchy cli.""" kws_set_all = set(['relationship', 'up', 'f']) kws_dct_all = set(['GO', 'dag', 'i', 'o', 'max_indent', 'no_indent', 'concise', 'gaf', 'gene2go', 'taxid', 'dash_len', 'include_only', 'item_marks']) kws_dct_wr = set(['max_indent', 'no_indent', 'concise', 'relationship', 'dash_len']) def __init__(self, args=None, prt=sys.stdout): self.kws = DocOptParse(__doc__, self.kws_dct_all, self.kws_set_all).get_docargs( args, intvals=set(['max_indent', 'dash_len'])) opt_attrs = OboOptionalAttrs.attributes.intersection(self.kws.keys()) godag = get_godag(self.kws['dag'], prt, optional_attrs=opt_attrs) self.gene2gos = read_annotations(**self.kws) self.tcntobj = TermCounts(godag, self.gene2gos) if self.gene2gos is not None else None self.gosubdag = GoSubDag(godag.keys(), godag, relationships='relationship' in opt_attrs, tcntobj=self.tcntobj, children=True, prt=prt) self.goids = self._init_goids() self._adj_item_marks() self._adj_include_only() self._adj_for_assc() def _init_goids(self): goids_ret = [] godagconsts = Consts() # print("WWWWWWWWWWWWWWWWWWWWWW", self.kws) if 'GO' in self.kws: for goid in self.kws['GO']: if goid[:3] == "GO:": assert len(goid) == 10, "BAD GO ID({GO})".format(GO=goid) goids_ret.append(goid) elif goid in godagconsts.NS2GO: goids_ret.append(godagconsts.NS2GO[goid]) if 'i' in self.kws: goids_fin = GetGOs().rdtxt_gos(self.kws['i'], sys.stdout) if goids_fin: goids_ret.extend(list(goids_fin)) if goids_ret: return goids_ret # If GO DAG is small, print hierarchy for the entire DAG if len(self.gosubdag.go2nt) < 100: return set(self.gosubdag.go2nt.keys()) def get_fouts(self): """Get output filename.""" fouts_txt = [] if 'o' in self.kws: fouts_txt.append(self.kws['o']) if 'f' in self.kws: fouts_txt.append(self._get_fout_go()) return fouts_txt def _get_fout_go(self): """Get the name of an output file based on the top GO term.""" assert self.goids, "NO VALID GO IDs WERE PROVIDED AS STARTING POINTS FOR HIERARCHY REPORT" base = next(iter(self.goids)).replace(':', '') upstr = '_up' if 'up' in self.kws else '' return "hier_{BASE}{UP}.{EXT}".format(BASE=base, UP=upstr, EXT='txt') def wrtxt_hier(self, fout_txt): """Write hierarchy below specfied GO IDs to an ASCII file.""" with open(fout_txt, 'wb') as prt: self.prt_hier(prt) print(" WROTE: {TXT}".format(TXT=fout_txt)) def prt_hier(self, prt=sys.stdout): """Write hierarchy below specfied GO IDs.""" objwr = WrHierGO(self.gosubdag, **self.kws) assert self.goids, "NO VALID GO IDs WERE PROVIDED" if 'up' not in objwr.usrset: for goid in self.goids: objwr.prt_hier_down(goid, prt) else: objwr.prt_hier_up(self.goids, prt) def _adj_item_marks(self): """Adjust keywords, if needed.""" if 'item_marks' in self.kws: # Process GO IDs specified in item_marks goids = self._get_goids(self.kws['item_marks']) # item_marks can take a list of GO IDs on cmdline or in a file. # --item_marks=GO:0043473,GO:0009987 # --item_marks=item_marks.txt if goids: self.kws['item_marks'] = {go:'>' for go in goids} else: raise Exception("NO GO IDs FOUND IN item_marks") def _adj_include_only(self): """Adjust keywords, if needed.""" if 'include_only' in self.kws: # Process GO IDs specified in include_only goids = self._get_goids(self.kws['include_only']) # include_only can take a list of GO IDs on cmdline or in a file. # --include_only=GO:0043473,GO:0009987 # --include_only=include_only.txt if goids: self.kws['include_only'] = goids else: raise Exception("NO GO IDs FOUND IN include_only") def _adj_for_assc(self): """Print only GO IDs from associations and their ancestors.""" if self.gene2gos: gos_assoc = set(get_b2aset(self.gene2gos).keys()) if 'item_marks' not in self.kws: self.kws['item_marks'] = {go:'>' for go in gos_assoc} if 'include_only' not in self.kws: gosubdag = GoSubDag(gos_assoc, self.gosubdag.go2obj, self.gosubdag.relationships) self.kws['include_only'] = gosubdag.go2obj @staticmethod def _get_goids(gostr): """Return GO IDs from a GO str (e.g., GO:0043473,GO:0009987) or a file.""" if 'GO:' in gostr: return gostr.split(',') elif os.path.exists(gostr): return GetGOs().get_goids(None, gostr, sys.stdout)
def __init__(self, gosubdag=None): self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set) self.gosubdag = None if gosubdag is None else gosubdag
class PlotCli(object): """Class for command-line interface for creating GO term diagrams""" kws_dict = set([ 'GO', 'outfile', 'go_file', 'sections', 'S', 'gaf', 'gene2go', 'taxid', 'title', 'obo', 'go_aliases' ]) kws_set = set([ 'relationship', 'parentcnt', 'childcnt', 'mark_alt_id', 'shorten', 'draw-children', 'norel' ]) dflt_outfile = "go_plot.png" kws_plt = set(['parentcnt', 'childcnt', 'mark_alt_id', 'shorten']) def __init__(self, gosubdag=None): self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set) self.gosubdag = None if gosubdag is None else gosubdag def cli(self): """Command-line interface for go_draw script.""" kws_all = self.get_docargs(prt=None) optional_attrs = self._get_optional_attrs(kws_all) go2obj = GODag(kws_all['obo'], optional_attrs) # GO kws_all: GO go_file draw-children goids, go2color = GetGOs(go2obj).get_go_color(**kws_all) relationships = 'relationship' in optional_attrs #### self.gosubdag = GoSubDag(goids, go2obj, relationships, tcntobj=tcntobj) kws_dag = self._get_kwsdag(goids, go2obj, **kws_all) self.gosubdag = GoSubDag(goids, go2obj, relationships, **kws_dag) if 'sections' in kws_all: return self._plt_gogrouped(goids, go2color, **kws_all) else: return self._plt_gosubdag(goids, go2color, **kws_all) def _plt_gogrouped(self, goids, go2color_usr, **kws): """Plot grouped GO IDs.""" fout_img = self.get_outfile(kws['outfile'], goids) sections = read_sections(kws['sections'], exclude_ungrouped=True) print("KWWSSSSSSSS", kws) # kws_plt = {k:v for k, v in kws.items if k in self.kws_plt} grprobj_cur = self._get_grprobj(goids, sections) # GO: purple=hdr-only, green=hdr&usr, yellow=usr-only # BORDER: Black=hdr Blu=hdr&usr grpcolor = GrouperColors( grprobj_cur) # get_bordercolor get_go2color_users grp_go2color = grpcolor.get_go2color_users() grp_go2bordercolor = grpcolor.get_bordercolor() for goid, color in go2color_usr.items(): grp_go2color[goid] = color objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=grp_go2color, go2bordercolor=grp_go2bordercolor) go2txt = GrouperPlot.get_go2txt(grprobj_cur, grp_go2color, grp_go2bordercolor) objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, go2txt=go2txt, **kws) objplt.prt_goids(sys.stdout) objplt.plt_dag(fout_img) sys.stdout.write("{N:>6} sections read\n".format( N="NO" if sections is None else len(sections))) return fout_img def _get_grprobj(self, goids, sections): """Get Grouper, given GO IDs and sections.""" grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo") hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections) return Grouper("sections", goids, hdrobj, self.gosubdag) def _plt_gosubdag(self, goids, go2color, **kws): """Plot GO IDs.""" fout_img = self.get_outfile(kws['outfile'], goids) objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color) objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws) objplt.prt_goids(sys.stdout) objplt.plt_dag(fout_img) return fout_img def _get_kwsdag(self, goids, go2obj, **kws_all): """Get keyword args for a GoSubDag.""" kws_dag = {} # Term Counts for GO Term information score tcntobj = self._get_tcntobj(goids, go2obj, **kws_all) # TermCounts or None if tcntobj is not None: kws_dag['tcntobj'] = tcntobj # GO letters specified by the user if 'go_aliases' in kws_all: fin_go_aliases = kws_all['go_aliases'] if os.path.exists(fin_go_aliases): go2letter = read_d1_letter(fin_go_aliases) if go2letter: kws_dag['go2letter'] = go2letter return kws_dag @staticmethod def _get_tcntobj(goids, go2obj, **kws): """Get a TermCounts object if the user provides an annotation file, otherwise None.""" # kws: gaf (gene2go taxid) if 'gaf' in kws or 'gene2go' in kws: # Get a reduced go2obj set for TermCounts _gosubdag = GoSubDag(goids, go2obj, rcntobj=False) #return get_tcntobj(_gosubdag.go2obj, **kws) # TermCounts return get_tcntobj(go2obj, **kws) # TermCounts def get_docargs(self, args=None, prt=None): """Pare down docopt. Return a minimal dictionary and a set containing runtime arg values.""" # docargs = self.objdoc.get_docargs(args, exp_letters=set(['o', 't', 'p', 'c'])) docargs = self.objdoc.get_docargs(args, prt) self._chk_docopts(docargs) return docargs def _chk_docopts(self, kws): """Check for common user command-line errors.""" # outfile should contain .png, .png, etc. outfile = kws['outfile'] if len(kws) == 2 and os.path.basename(kws['obo']) == "go-basic.obo" and \ kws['outfile'] == self.dflt_outfile: self._err("NO GO IDS SPECFIED", err=False) if 'obo' in outfile: self._err("BAD outfile({O})".format(O=outfile)) if 'gaf' in kws and 'gene2go' in kws: self._err("SPECIFY ANNOTAIONS FROM ONE FILE") if 'gene2go' in kws: if 'taxid' not in kws: self._err("SPECIFIY taxid WHEN READ NCBI'S gene2go FILE") def _err(self, msg, err=True): """Print useage and error before exiting.""" severity = "FATAL" if err else "NOTE" txt = "".join([ self.objdoc.doc, "User's command-line:\n\n", " % go_plot.py {ARGS}\n\n".format(ARGS=" ".join(sys.argv[1:])), "**{SEV}: {MSG}\n".format(SEV=severity, MSG=msg) ]) if err: raise RuntimeError(txt) sys.stdout.write(txt) sys.exit(0) def get_outfile(self, outfile, goids=None): """Return output file for GO Term plot.""" # 1. Use the user-specfied output filename for the GO Term plot if outfile != self.dflt_outfile: return outfile # 2. If only plotting 1 GO term, use GO is in plot name if goids is not None and len(goids) == 1: goid = next(iter(goids)) goobj = self.gosubdag.go2obj[goid] fout = "GO_{NN}_{NM}".format(NN=goid.replace("GO:", ""), NM=goobj.name) return ".".join( [re.sub(r"[\s#'()+,-./:<=>\[\]_}]", '_', fout), 'png']) # 3. Return default name return self.dflt_outfile @staticmethod def _get_optional_attrs(kws): """Given keyword args, return optional_attributes to be loaded into the GODag.""" vals = OboOptionalAttrs.attributes.intersection(kws.keys()) if 'sections' in kws: vals.add('relationship') if 'norel' in kws: vals.discard('relationship') return vals
class SectionsWr(object): """Class for command-line interface for creating GO term diagrams""" kws_dict = set([ 'GO_FILE', 'obo', 'slims', 'ifile', 'ofile', 'txt', 'py', 'xlsx', 'gaf', 'gene2go', 'taxid' ]) kws_set = set() def __init__(self, gosubdag=None): self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set) self.gosubdag = None if gosubdag is None else gosubdag def cli(self, prt=sys.stdout): """Command-line interface for go_draw script.""" kws = self.objdoc.get_docargs(prt=None) godag = get_godag(kws['obo'], prt=None, loading_bar=False, optional_attrs=['relationship']) usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt) tcntobj = self._get_tcntobj(usrgos, godag, **kws) # Gets TermCounts or None self.gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=tcntobj, prt=None) grprdflt = GrouperDflts(self.gosubdag, kws['slims']) ver_list = [godag.version, grprdflt.ver_goslims] prt.write("{VER}\n".format(VER="\n".join(ver_list))) sections = read_sections(kws['ifile'], exclude_ungrouped=True, prt=None) # print("SECSECSEC", sections) hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections) grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag) # Write sections objsecwr = WrSections(grprobj, ver_list) if not os.path.exists(kws['ifile']): objsecwr.wr_txt_section_hdrgos(kws['ifile']) objsecwr.wr_txt_section_hdrgos(kws['ofile']) objsecpy = WrPySections(grprobj, ver_list) if 'py' in kws: objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version) # Write user GO IDs in sections sortobj = Sorter(grprobj) objgowr = WrXlsxSortedGos("init", sortobj, ver_list) objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt) #objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt) self._prt_cnt_usrgos(usrgos, sys.stdout) def _prt_cnt_usrgos(self, usrgos_read, prt): num_usrgos = len(self.gosubdag.go_sources) prt.write("{GOs:6} user GO IDs".format(GOs=num_usrgos)) if len(usrgos_read) != num_usrgos: prt.write(" of {M} GO IDs read".format(M=len(usrgos_read))) prt.write("\n") @staticmethod def _get_tcntobj(goids, go2obj, **kws): """Get a TermCounts object if the user provides an annotation file, otherwise None.""" # kws: gaf (gene2go taxid) if 'gaf' in kws or 'gene2go' in kws: # Get a reduced go2obj set for TermCounts _gosubdag = GoSubDag(goids, go2obj, rcntobj=False, prt=None) return get_tcntobj(_gosubdag.go2obj, **kws) # TermCounts
def __init__(self): self.objdoc = DocOptParse(__doc__, self.kws_dict, set())
def __init__(self, gosubdag=None, use_doc=True): _doc = __doc__ if use_doc else None self.objdoc = DocOptParse(_doc, self.kws_dict, self.kws_set) self.gosubdag = None if gosubdag is None else gosubdag
def __init__(self): self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set) self.objsub = WrSubObo()