def test_nb(): """Test notebook code""" godag = get_godag("go-basic.obo", optional_attrs={'relationship'}) go_leafs = set(o.item_id for o in godag.values() if not o.children) virion = 'GO:0019012' gosubdag_r0 = GoSubDag(go_leafs, godag) nt_virion = gosubdag_r0.go2nt[virion] print(nt_virion) print('r0 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt)) gosubdag_r1 = GoSubDag(go_leafs, godag, relationships=True) nt_virion = gosubdag_r1.go2nt[virion] print(nt_virion) print('r1 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt)) gosubdag_partof = GoSubDag(go_leafs, godag, relationships={'part_of'}) nt_virion = gosubdag_partof.go2nt[virion] print(nt_virion) print('THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt)) virion_descendants = gosubdag_partof.rcntobj.go2descendants[virion] print('{N} descendants of virion were found'.format( N=len(virion_descendants))) # Limit plot of descendants to get a smaller plot virion_capsid_fiber = {'GO:0098033', 'GO:0098032'} gosubdag_partof.prt_goids(virion_capsid_fiber, '{NS} {GO} dcnt({dcnt}) D-{depth:02} {GO_name}') # Limit plot size by choosing just two virion descendants # Get a subset containing only a couple virion descendants and their ancestors pltdag = GoSubDag(virion_capsid_fiber, godag, relationships={'part_of'}) pltobj = GoSubDagPlot(pltdag) pltobj.plt_dag('virion_capsid_fiber.png')
def plt_goea_results(fout_img, goea_results, **kws): """Plot a single page.""" go_sources = [rec.GO for rec in goea_results] go2obj = {rec.GO: rec.goterm for rec in goea_results} gosubdag = GoSubDag(go_sources, go2obj, rcntobj=True) godagplot = GoSubDagPlot(gosubdag, goea_results=goea_results, **kws) godagplot.plt_dag(fout_img)
def plt_goids(gosubdag, fout_img, goids, **kws_plt): """Plot GO IDs in a DAG (Directed Acyclic Graph).""" gosubdag_plt = GoSubDag(goids, gosubdag.go2obj, rcntobj=gosubdag.rcntobj, **kws_plt) godagplot = GoSubDagPlot(gosubdag_plt, **kws_plt) godagplot.plt_dag(fout_img) return godagplot
def _plot_grouped_gos(self, fout_img, pltgosusr, kws_plt, kws_dag): gosubdag_plt = GoSubDag(pltgosusr, self.grprobj.gosubdag.get_go2obj(pltgosusr), self.grprobj.gosubdag.relationships, rcntobj=self.grprobj.gosubdag.rcntobj, go2nt=self.grprobj.gosubdag.go2nt, **kws_dag) godagplot = GoSubDagPlot(gosubdag_plt, **kws_plt) godagplot.plt_dag(fout_img)
def plot_all(self, goids, name, prt=sys.stdout): """Create plots with various numbers of relationships.""" prt.write("\nCreate GoSubDag not loading any relationship") gosubdag_orig = GoSubDag(goids, self.go2obj, relationships=False, prt=prt) gosubdag_orig.prt_goids(gosubdag_orig.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag_orig.go2obj))) gopltdag = GoSubDagPlot(gosubdag_orig, mark_alt_id=True) gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r0.png".format(NAME=name))) # goids.update(['GO:0007507'], ['GO:0072359']) prt.write("\nCreate GoSubDag while loading only the 'part_of' relationship") gosubdag = GoSubDag(goids, self.go2obj, relationships=['part_of'], prt=prt) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True) prt.write("GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_partof.png".format(NAME=name))) prt.write("\nCreate GoSubDag while loading all relationships") gosubdag = GoSubDag(goids, self.go2obj, relationships=True, prt=prt) prt.write("ALL {N} GO IDS:".format(N=len(gosubdag.go2obj))) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("2 GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) goids_new = set(gosubdag.go2obj).difference(set(gosubdag_orig.go2obj)) go2color = {go:'#d5ffff' for go in goids_new} prt.write("{N} NEW GO IDS:".format(N=len(goids_new))) gosubdag.prt_goids(goids_new, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True, go2color=go2color) gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r1.png".format(NAME=name)))
def _plt(self, goid, exp_goids, act_goids, diff_exp, diff_act): """Plot GO IDs, colored by differences in expected and actual""" fout_png = '{NAME}_{GO}.png'.format(NAME=self.name, GO=goid.replace(':', '')) go_sources = set.union(exp_goids, act_goids, {goid}) gosubdag = GoSubDag(go_sources, self.godag, relationships=True) go2color = {goid: '#c8ffb0'} # xkcd light light green for go_diff in diff_exp: go2color[go_diff] = '#cafffb' # xkcd light light blue for go_diff in diff_act: go2color[go_diff] = '#ffd1df' # xkcd light pink goploter = GoSubDagPlot(gosubdag, go2color=go2color) goploter.plt_dag(fout_png)
def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.gosubdag_all.go2obj, prt=self.prt, # rcntobj=False, rcntobj=self.gosubdag_all.rcntobj, go2nt=self.gosubdag_all.go2nt) prtfmt = gosubdag.prt_attr['fmta'] goids_plt = GoSubDagPlot(gosubdag).get_goids_plt() self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt))) gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag( go_sources, self.gosubdag_all.go2obj, prt=self.prt, # rcntobj=False, rcntobj=self.gosubdag_all.rcntobj, go2nt=self.gosubdag_all.go2nt) prtfmt = gosubdag.prt_attr['fmta'] goids_plt = GoSubDagPlot(gosubdag).get_goids_plt() self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt))) gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def _get_objpltg(self, goids, go2color_usr, **kws): """Plot grouped GO IDs.""" #### fout_img = self.get_outfile(kws['outfile'], goids) sections = read_sections(kws['sections'], exclude_ungrouped=True) # print ("KWWSSSSSSSS", kws) # kws_plt = {k:v for k, v in kws.items if k in self.kws_plt} grprobj_cur = self._get_grprobj(goids, sections) # GO: purple=hdr-only, green=hdr&usr, yellow=usr-only # BORDER: Black=hdr Blu=hdr&usr grpcolor = GrouperColors( grprobj_cur) # get_bordercolor get_go2color_users grp_go2color = grpcolor.get_go2color_users() grp_go2bordercolor = grpcolor.get_bordercolor() for goid, color in go2color_usr.items(): grp_go2color[goid] = color objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=grp_go2color, go2bordercolor=grp_go2bordercolor) go2txt = GrouperPlot.get_go2txt(grprobj_cur, grp_go2color, grp_go2bordercolor) return GoSubDagPlot(self.gosubdag, Go2Color=objcolor, go2txt=go2txt, **kws)
def show_go_dag_for_terms(terms, add_relationships=True): if type(terms) is pd.core.series.Series: terms = terms.tolist() if not terms: return with open(os.devnull, 'w') as null, redirect_stdout(null): obo_fname = download_and_move_go_basic_obo(prt=null) file_gene2go = download_ncbi_associations(prt=null) if add_relationships: optional_attrs = ['relationship', 'def'] else: optional_attrs = ['def'] obodag = GODag("geneinfo_cache/go-basic.obo", optional_attrs=optional_attrs, prt=null) gosubdag = GoSubDag(terms, obodag, relationships=add_relationships) GoSubDagPlot(gosubdag).plt_dag('geneinfo_cache/plot.png') return Image('geneinfo_cache/plot.png')
def test_tcntobj_relationships(do_plt=False): """Test loading of relationships, like part_of, into TermCounts""" # Filenames fin_obo = os.path.join(REPO, "tests/data/yangRWC/fig2a.obo") fin_anno = os.path.join(REPO, "tests/data/yangRWC/fig2a.anno") fout_png_r0 = os.path.join(REPO, 'yang_fig2a_r0.png') fout_png_r1 = os.path.join(REPO, 'yang_fig2a_r1.png') relationships = { 'part_of', } # Load ontologies go2obj = GODag(fin_obo, optional_attrs=['relationship']) # Load annotations assoc = IdToGosReader(fin_anno, godag=go2obj).get_id2gos('CC') # Count genes annotated to GO terms w and wo/relationships tcntobj_r0 = TermCounts(go2obj, assoc) # relationship: G (GO:0000007) is part_of F (GO:0000006) tcntobj_r1 = TermCounts(go2obj, assoc, relationships) # Check results # Adding relationships does not change the total count of genes: assert tcntobj_r0.gocnts['GO:0005575'] == tcntobj_r1.gocnts['GO:0005575'] # Counts without relationships: assert tcntobj_r0.gocnts['GO:0000002'] == 40 # GO Term B assert tcntobj_r0.gocnts['GO:0000006'] == 10 # GO Term F # Counts with relationships: F counts G's 30 genes, so does B assert tcntobj_r1.gocnts['GO:0000002'] == 70 # GO Term B assert tcntobj_r1.gocnts['GO:0000006'] == 40 # GO Term F # Optionally visualize the difference between term counts w and wo/relationships if do_plt: go2txt_r0 = { nt.GO: 'tcnt={}'.format(nt.tcnt) for nt in tcntobj_r0.gosubdag.go2nt.values() } GoSubDagPlot(tcntobj_r0.gosubdag, go2txt=go2txt_r0).plt_dag(fout_png_r0) go2txt_r1 = { nt.GO: 'tcnt={}'.format(nt.tcnt) for nt in tcntobj_r1.gosubdag.go2nt.values() } GoSubDagPlot(tcntobj_r1.gosubdag, go2txt=go2txt_r1).plt_dag(fout_png_r1)
def get_gosubdagplot(self, goids=None, **kws_usr): """Plot GO IDs.""" if goids is None: goids = self.grprobj.usrgos kws_plt, kws_dag = self._get_kws_plt(goids, **kws_usr) gosubdag = GoSubDag(goids, self.grprobj.gosubdag.get_go2obj(goids), self.grprobj.gosubdag.relationships, rcntobj=self.grprobj.gosubdag.rcntobj, go2nt=self.grprobj.gosubdag.go2nt, **kws_dag) return GoSubDagPlot(gosubdag, **kws_plt)
def _get_gosubdagplotnt(self, ntplt, title, go2color, pltargs): """Return GoSubDagPlotNt, which contains both a GoSubDagPlot object and ntobj.""" kws_plt = pltargs.get_kws_plt() kws_plt['id'] = '"{ID}"'.format(ID=ntplt.hdrgo) kws_plt['title'] = "{TITLE} of {M} user GOs".format(TITLE=title, M=ntplt.tot_usrgos) kws_plt['go2color'] = go2color kws_plt['go2bordercolor'] = pltargs.go2bordercolor if ntplt.parentcnt: kws_plt["parentcnt"] = True gosubdagplot = GoSubDagPlot(ntplt.gosubdag, **kws_plt) return GoSubDagPlotNt(self.grprobj, gosubdagplot, ntplt)
def _plt_gogrouped(self, goids, go2color_usr, **kws): """Plot grouped GO IDs.""" fout_img = self.get_outfile(kws['outfile'], goids) sections = read_sections(kws['sections'], exclude_ungrouped=True) print("KWWSSSSSSSS", kws) # kws_plt = {k:v for k, v in kws.items if k in self.kws_plt} grprobj_cur = self._get_grprobj(goids, sections) # GO: purple=hdr-only, green=hdr&usr, yellow=usr-only # BORDER: Black=hdr Blu=hdr&usr grpcolor = GrouperColors( grprobj_cur) # get_bordercolor get_go2color_users grp_go2color = grpcolor.get_go2color_users() grp_go2bordercolor = grpcolor.get_bordercolor() for goid, color in go2color_usr.items(): grp_go2color[goid] = color objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=grp_go2color, go2bordercolor=grp_go2bordercolor) go2txt = GrouperPlot.get_go2txt(grprobj_cur, grp_go2color, grp_go2bordercolor) objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, go2txt=go2txt, **kws) objplt.prt_goids(sys.stdout) objplt.plt_dag(fout_img) sys.stdout.write("{N:>6} sections read\n".format( N="NO" if sections is None else len(sections))) return fout_img
def _plt_gosubdag(self, goids, go2color, **kws): """Plot GO IDs.""" fout_img = self.get_outfile(kws['outfile'], goids) objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color) objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws) objplt.prt_goids(sys.stdout) objplt.plt_dag(fout_img) return fout_img
def _plt_gosubdag(self, goids, go2color, **kws): """Plot GO IDs.""" print("PLOTTING KWS", kws) fout_img = self.get_outfile(kws['outfile'], goids, 'relationship' in kws) objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color) print("kws:") print(kws) objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws) objplt.prt_goids(sys.stdout) objplt.plt_dag(fout_img) return fout_img
def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.go2obj_all) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def _do_plt(tcntobj, godag): """Plot the test GO-DAG""" gosubdag = GoSubDag(tcntobj.go2obj.keys(), godag, tcntobj=tcntobj) GoSubDagPlot(gosubdag).plt_dag('i148b.png')
def _get_goobjplt(gosubdag): """STEP 3) Get a plotting object.""" go_sources = set(["GO:0036476", "GO:0007516"]) gopltdag = GoSubDag(go_sources, gosubdag.go2obj) return GoSubDagPlot(gopltdag)
def plt(self, fout_png, goids, relationships, go2txt): """Plot GO terms above resercher GO terms""" gosubdag = GoSubDag(goids, self.godag, relationships) GoSubDagPlot(gosubdag, go2txt=go2txt).plt_dag(fout_png)
def plot_gos(fout_img, goids, go2obj, **kws): """Given GO ids and the obo_dag, create a plot of paths from GO ids.""" gosubdag = GoSubDag(goids, go2obj, rcntobj=True) godagplot = GoSubDagPlot(gosubdag, **kws) godagplot.plt_dag(fout_img)
def _get_objplt(self, go2color, **kws): """Plot GO IDs.""" #### fout_img = self.get_outfile(kws['outfile'], goids) objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color) return GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws)
def plot_all(self, goids, name, prt=sys.stdout): """Create plots with various numbers of relationships.""" prt.write("\nCreate GoSubDag not loading any relationship") gosubdag_orig = GoSubDag(goids, self.go2obj, relationships=False, prt=prt) gosubdag_orig.prt_goids(gosubdag_orig.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag_orig.go2obj))) gopltdag = GoSubDagPlot(gosubdag_orig, mark_alt_id=True) gopltdag.plt_dag( os.path.join(REPO, "a_relationship_{NAME}_r0.png".format(NAME=name))) # goids.update(['GO:0007507'], ['GO:0072359']) prt.write( "\nCreate GoSubDag while loading only the 'part_of' relationship") gosubdag = GoSubDag(goids, self.go2obj, relationships=['part_of'], prt=prt) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True) prt.write("GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) gopltdag.plt_dag( os.path.join(REPO, "a_relationship_{NAME}_partof.png".format(NAME=name))) prt.write("\nCreate GoSubDag while loading all relationships") gosubdag = GoSubDag(goids, self.go2obj, relationships=True, prt=prt) prt.write("ALL {N} GO IDS:".format(N=len(gosubdag.go2obj))) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("2 GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) goids_new = set(gosubdag.go2obj).difference(set(gosubdag_orig.go2obj)) go2color = {go: '#d5ffff' for go in goids_new} prt.write("{N} NEW GO IDS:".format(N=len(goids_new))) gosubdag.prt_goids(goids_new, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True, go2color=go2color) gopltdag.plt_dag( os.path.join(REPO, "a_relationship_{NAME}_r1.png".format(NAME=name)))