def run(self, go_sources, exp_gos, **kws): """Create GoSubDag using specified GO sources.""" print("\nSRCS: {GOs}".format(GOs=go_sources)) gosubdag = GoSubDag(go_sources, self.go2obj_all, **kws) gosubdag.prt_goids(gosubdag.go2nt) assert set(gosubdag.go2nt) == exp_gos, "ACT({}) != EXP({})\n{} {}".format( sorted(gosubdag.go2nt), sorted(exp_gos), go_sources, kws)
def test_nb(): """Test notebook code""" godag = get_godag("go-basic.obo", optional_attrs={'relationship'}) go_leafs = set(o.item_id for o in godag.values() if not o.children) virion = 'GO:0019012' gosubdag_r0 = GoSubDag(go_leafs, godag) nt_virion = gosubdag_r0.go2nt[virion] print(nt_virion) print('r0 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt)) gosubdag_r1 = GoSubDag(go_leafs, godag, relationships=True) nt_virion = gosubdag_r1.go2nt[virion] print(nt_virion) print('r1 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt)) gosubdag_partof = GoSubDag(go_leafs, godag, relationships={'part_of'}) nt_virion = gosubdag_partof.go2nt[virion] print(nt_virion) print('THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt)) virion_descendants = gosubdag_partof.rcntobj.go2descendants[virion] print('{N} descendants of virion were found'.format( N=len(virion_descendants))) # Limit plot of descendants to get a smaller plot virion_capsid_fiber = {'GO:0098033', 'GO:0098032'} gosubdag_partof.prt_goids(virion_capsid_fiber, '{NS} {GO} dcnt({dcnt}) D-{depth:02} {GO_name}') # Limit plot size by choosing just two virion descendants # Get a subset containing only a couple virion descendants and their ancestors pltdag = GoSubDag(virion_capsid_fiber, godag, relationships={'part_of'}) pltobj = GoSubDagPlot(pltdag) pltobj.plt_dag('virion_capsid_fiber.png')
class Run(object): """Objects for running plotting test.""" def __init__(self, obo, gaf, prt): self.prt = prt self.cwd = os.getcwd() # Gene Ontologies self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo)) # Annotations #_file_gaf = dnld_gaf(os.path.join(REPO, gaf)) _file_gaf = dnld_gaf(gaf) print("GAF: {GAF}\n".format(GAF=_file_gaf)) self.gene2gos = read_gaf(_file_gaf) self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos) # GoSubDag self.gosubdag_all = GoSubDag(None, self.go2obj_all, tcntobj=self.tcntobj, prt=prt) self.prtfmt = self.gosubdag_all.prt_attr['fmta'] def prt_goids_all(self, prt): """Print all GO IDs, including alternate GO IDs, in GODag.""" self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt) def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.gosubdag_all.go2obj, prt=self.prt, # rcntobj=False, rcntobj=self.gosubdag_all.rcntobj, go2nt=self.gosubdag_all.go2nt) prtfmt = gosubdag.prt_attr['fmta'] goids_plt = GoSubDagPlot(gosubdag).get_goids_plt() self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt))) gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
class Run(object): """Printing GO IDs and Plotting; GODag from obo using GoSubDag.""" def __init__(self, obo): self.go2obj_all = get_godag(os.path.join(REPO, obo)) self.gosubdag_all = GoSubDag(None, self.go2obj_all) self.prtfmt = self.gosubdag_all.prt_attr['fmta'] def prt_goids_all(self, prt): """Print all GO IDs, including alternate GO IDs, in GODag.""" self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt) def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.go2obj_all) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(REPO, fout_img)) def run(self, go_sources, exp_gos, **kws): """Create GoSubDag using specified GO sources.""" print("\nSRCS: {GOs}".format(GOs=go_sources)) gosubdag = GoSubDag(go_sources, self.go2obj_all, **kws) gosubdag.prt_goids(gosubdag.go2nt) assert set(gosubdag.go2nt) == exp_gos, "ACT({}) != EXP({})\n{} {}".format( sorted(gosubdag.go2nt), sorted(exp_gos), go_sources, kws)
def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.gosubdag_all.go2obj, prt=self.prt, # rcntobj=False, rcntobj=self.gosubdag_all.rcntobj, go2nt=self.gosubdag_all.go2nt) prtfmt = gosubdag.prt_attr['fmta'] goids_plt = GoSubDagPlot(gosubdag).get_goids_plt() self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt))) gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag( go_sources, self.gosubdag_all.go2obj, prt=self.prt, # rcntobj=False, rcntobj=self.gosubdag_all.rcntobj, go2nt=self.gosubdag_all.go2nt) prtfmt = gosubdag.prt_attr['fmta'] goids_plt = GoSubDagPlot(gosubdag).get_goids_plt() self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt))) gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
class Run(object): """Printing GO IDs and Plotting; GODag from obo using GoSubDag.""" def __init__(self, obo): self.cwd = os.getcwd() self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo)) self.gosubdag_all = GoSubDag(None, self.go2obj_all) self.prtfmt = self.gosubdag_all.prt_attr['fmta'] def prt_goids_all(self, prt): """Print all GO IDs, including alternate GO IDs, in GODag.""" self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt) def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.go2obj_all) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def test_semantic_i88(): """Computing basic semantic similarities between GO terms.""" godag = obo_parser.GODag("go-basic.obo") goids = set(go for go, o in godag.items() if go == o.id) goids = set(godag.keys()) # Get all the annotations from arabidopsis. fin_gaf = os.path.join(REPO, "tair.gaf") # dnld_assc includes read_gaf associations = dnld_assc(fin_gaf, godag, prt=None) # First get the counts and information content for each GO term. termcounts = TermCounts(godag, associations) gosubdag = GoSubDag(goids, godag, tcntobj=termcounts) # Now we can calculate the semantic distance and semantic similarity, as so: # "The semantic similarity between terms GO:0048364 and GO:0044707 is 0.25. go_id3 = 'GO:0048364' # BP level-03 depth-04 root development go_id4 = 'GO:0044707' # BP level-02 depth-02 single-multicellular organism process go_root = deepest_common_ancestor([go_id3, go_id4], godag) sim = semantic_similarity(go_id3, go_id4, godag) print('\nThe semantic similarity between terms {GO1} and {GO2} is {VAL}.'. format(GO1=go_id3, GO2=go_id4, VAL=sim)) gosubdag.prt_goids([go_root, go_id3, go_id4]) # Calculate the information content go_id = "GO:0048364" infocontent = get_info_content(go_id, termcounts) print('\nInformation content ({GO}) = {INFO}\n'.format(GO=go_id, INFO=infocontent)) # Resnik's similarity measure is defined as the information content of the most # informative common ancestor. That is, the most specific common parent-term in # the GO. Then we can calculate this as follows: # "Resnik similarity score (GO:0048364, GO:0044707) = 4.0540784252 sim_r = resnik_sim(go_id3, go_id4, godag, termcounts) print('Resnik similarity score ({GO1}, {GO2}) = {VAL}'.format(GO1=go_id3, GO2=go_id4, VAL=sim_r)) # Lin similarity score (GO:0048364, GO:0044707) = -0.607721957763 sim_l = lin_sim(go_id3, go_id4, godag, termcounts) print('Lin similarity score ({GO1}, {GO2}) = {VAL}'.format(GO1=go_id3, GO2=go_id4, VAL=sim_l))
def test_semantic_i88(): """Computing basic semantic similarities between GO terms.""" godag = obo_parser.GODag("go-basic.obo") goids = set(go for go, o in godag.items() if go == o.id) goids = set(godag.keys()) # Get all the annotations from arabidopsis. fin_gaf = os.path.join(REPO, "tair.gaf") # dnld_assc includes read_gaf associations = dnld_assc(fin_gaf, godag, prt=None) # First get the counts and information content for each GO term. termcounts = TermCounts(godag, associations) gosubdag = GoSubDag(goids, godag, tcntobj=termcounts) # Now we can calculate the semantic distance and semantic similarity, as so: # "The semantic similarity between terms GO:0048364 and GO:0044707 is 0.25. go_id3 = 'GO:0048364' # BP level-03 depth-04 root development go_id4 = 'GO:0044707' # BP level-02 depth-02 single-multicellular organism process go_root = deepest_common_ancestor([go_id3, go_id4], godag) sim = semantic_similarity(go_id3, go_id4, godag) print('\nThe semantic similarity between terms {GO1} and {GO2} is {VAL}.'.format( GO1=go_id3, GO2=go_id4, VAL=sim)) gosubdag.prt_goids([go_root, go_id3, go_id4]) # Calculate the information content go_id = "GO:0048364" infocontent = get_info_content(go_id, termcounts) print('\nInformation content ({GO}) = {INFO}\n'.format(GO=go_id, INFO=infocontent)) # Resnik's similarity measure is defined as the information content of the most # informative common ancestor. That is, the most specific common parent-term in # the GO. Then we can calculate this as follows: # "Resnik similarity score (GO:0048364, GO:0044707) = 4.0540784252 sim_r = resnik_sim(go_id3, go_id4, godag, termcounts) print('Resnik similarity score ({GO1}, {GO2}) = {VAL}'.format( GO1=go_id3, GO2=go_id4, VAL=sim_r)) # Lin similarity score (GO:0048364, GO:0044707) = -0.607721957763 sim_l = lin_sim(go_id3, go_id4, godag, termcounts) print('Lin similarity score ({GO1}, {GO2}) = {VAL}'.format( GO1=go_id3, GO2=go_id4, VAL=sim_l))
class Run(object): """Objects for running plotting test.""" def __init__(self, obo, gaf, prt): self.prt = prt self.cwd = os.getcwd() # Gene Ontologies self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo)) # Annotations #_file_gaf = dnld_gaf(os.path.join(REPO, gaf)) _file_gaf = dnld_gaf(gaf) print("GAF: {GAF}\n".format(GAF=_file_gaf)) self.gene2gos = read_gaf(_file_gaf) self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos) # GoSubDag self.gosubdag_all = GoSubDag(None, self.go2obj_all, tcntobj=self.tcntobj, prt=prt) self.prtfmt = self.gosubdag_all.prt_attr['fmta'] def prt_goids_all(self, prt): """Print all GO IDs, including alternate GO IDs, in GODag.""" self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt) def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag( go_sources, self.gosubdag_all.go2obj, prt=self.prt, # rcntobj=False, rcntobj=self.gosubdag_all.rcntobj, go2nt=self.gosubdag_all.go2nt) prtfmt = gosubdag.prt_attr['fmta'] goids_plt = GoSubDagPlot(gosubdag).get_goids_plt() self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt))) gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def _wr_sub_obo(fout_obo, goid_chosen, godag_r1, fin_obo): """Sub plot used for visualizing this test file's elements""" # Load GO-DAG: Load optional 'relationship' godag = {go: o for go, o in godag_r1.items() if go == o.item_id} _prt_rtel_ctr(godag) rels_all = set([ 'part_of', 'regulates', 'negatively_regulates', 'positively_regulates' ]) goids_leaf_all = set(o.id for o in godag.values() if not o.children) gosubdag_r1 = GoSubDag(goids_leaf_all, godag, relationships=True, prt=sys.stdout) goids_src_r1_all = _get_leafs_w_relsinhier(rels_all, gosubdag_r1) gosubdag_r1.prt_goids(goids_src_r1_all) # Pick one of the GO IDs as a source for the subset DAG gosubdag_viral = GoSubDag({goid_chosen}, godag, relationships=True, prt=sys.stdout) goids_viral = set(gosubdag_viral.go2obj.keys()) with open(fout_obo, 'w') as prt: WrSubObo.prt_goterms(fin_obo, goids_viral, prt) print('{N} GO IDs WROTE: {OBO}'.format(N=len(goids_viral), OBO=fout_obo)) # Plot obo subset pat_r1 = '{REPO}/scripts/go_plot.py {GO} -o {PNG} -r' pat_r0 = '{REPO}/scripts/go_plot.py {GO} -o {PNG}' system( pat_r1.format(REPO=REPO, PNG=fout_obo.replace('.obo', '_r1.png'), GO=goid_chosen)) system( pat_r0.format(REPO=REPO, PNG=fout_obo.replace('.obo', '_r0.png'), GO=goid_chosen))
def plot_all(self, goids, name, prt=sys.stdout): """Create plots with various numbers of relationships.""" prt.write("\nCreate GoSubDag not loading any relationship") gosubdag_orig = GoSubDag(goids, self.go2obj, relationships=False, prt=prt) gosubdag_orig.prt_goids(gosubdag_orig.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag_orig.go2obj))) gopltdag = GoSubDagPlot(gosubdag_orig, mark_alt_id=True) gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r0.png".format(NAME=name))) # goids.update(['GO:0007507'], ['GO:0072359']) prt.write("\nCreate GoSubDag while loading only the 'part_of' relationship") gosubdag = GoSubDag(goids, self.go2obj, relationships=['part_of'], prt=prt) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True) prt.write("GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_partof.png".format(NAME=name))) prt.write("\nCreate GoSubDag while loading all relationships") gosubdag = GoSubDag(goids, self.go2obj, relationships=True, prt=prt) prt.write("ALL {N} GO IDS:".format(N=len(gosubdag.go2obj))) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("2 GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) goids_new = set(gosubdag.go2obj).difference(set(gosubdag_orig.go2obj)) go2color = {go:'#d5ffff' for go in goids_new} prt.write("{N} NEW GO IDS:".format(N=len(goids_new))) gosubdag.prt_goids(goids_new, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True, go2color=go2color) gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r1.png".format(NAME=name)))
def plot_all(self, goids, name, prt=sys.stdout): """Create plots with various numbers of relationships.""" prt.write("\nCreate GoSubDag not loading any relationship") gosubdag_orig = GoSubDag(goids, self.go2obj, relationships=False, prt=prt) gosubdag_orig.prt_goids(gosubdag_orig.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag_orig.go2obj))) gopltdag = GoSubDagPlot(gosubdag_orig, mark_alt_id=True) gopltdag.plt_dag( os.path.join(REPO, "a_relationship_{NAME}_r0.png".format(NAME=name))) # goids.update(['GO:0007507'], ['GO:0072359']) prt.write( "\nCreate GoSubDag while loading only the 'part_of' relationship") gosubdag = GoSubDag(goids, self.go2obj, relationships=['part_of'], prt=prt) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True) prt.write("GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) gopltdag.plt_dag( os.path.join(REPO, "a_relationship_{NAME}_partof.png".format(NAME=name))) prt.write("\nCreate GoSubDag while loading all relationships") gosubdag = GoSubDag(goids, self.go2obj, relationships=True, prt=prt) prt.write("ALL {N} GO IDS:".format(N=len(gosubdag.go2obj))) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("2 GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) goids_new = set(gosubdag.go2obj).difference(set(gosubdag_orig.go2obj)) go2color = {go: '#d5ffff' for go in goids_new} prt.write("{N} NEW GO IDS:".format(N=len(goids_new))) gosubdag.prt_goids(goids_new, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True, go2color=go2color) gopltdag.plt_dag( os.path.join(REPO, "a_relationship_{NAME}_r1.png".format(NAME=name)))