class WrSubObo(object): """Read a large GO-DAG from an obo file. Write a subset GO-DAG into a small obo file.""" def __init__(self, fin_obo): self.fin_obo = fin_obo self.godag = GODag(fin_obo) def wrobo(self, fout_obo, goid_sources): """Write a subset obo file containing GO ID sources and their parents.""" goids_all = self._get_all_parents(goid_sources).union(goid_sources) b_trm = False b_prt = True with open(fout_obo, 'w') as prt: self._prt_info(prt, goid_sources, goids_all) with open(self.fin_obo) as ifstrm: for line in ifstrm: if not b_trm: if line[:6] == "[Term]": b_trm = True b_prt = False else: if line[:6] == 'id: GO': b_trm = False b_prt = line[4:14] in goids_all if b_prt: prt.write("[Term]\n") if b_prt: prt.write(line) sys.stdout.write(" WROTE {N} GO TERMS: {OBO}\n".format( N=len(goids_all), OBO=fout_obo)) def _prt_info(self, prt, goid_sources, goids_all): """Print information describing how this obo setset was created.""" prt.write( "Contains {N} GO IDs. Created using {M} GO sources:\n".format( N=len(goids_all), M=len(goid_sources))) for goid in goid_sources: prt.write(" {GO}\n".format(GO=str(self.godag.get(goid, "")))) prt.write("\n") def _get_all_parents(self, goid_sources): """Get all GO ID parents for all GO IG sources.""" parents = set() for goid in goid_sources: goterm = self.godag.get(goid, None) if goterm is not None: parents |= goterm.get_all_parents() return parents
class WrSubObo(object): """Read a large GO-DAG from an obo file. Write a subset GO-DAG into a small obo file.""" def __init__(self, fin_obo, optional_attrs=None, load_obsolete=None): self.fin_obo = fin_obo self.godag = GODag(fin_obo, optional_attrs, load_obsolete) self.relationships = optional_attrs is not None and 'relationship' in optional_attrs def wrobo(self, fout_obo, goid_sources): """Write a subset obo file containing GO ID sources and their parents.""" goids_all = self._get_goids_all(goid_sources) b_trm = False b_prt = True with open(fout_obo, 'w') as prt: self._prt_info(prt, goid_sources, goids_all) with open(self.fin_obo) as ifstrm: for line in ifstrm: if not b_trm: if line[:6] == "[Term]": b_trm = True b_prt = False elif line[:6] == "[Typedef]": b_prt = True else: if line[:6] == 'id: GO': b_trm = False b_prt = line[4:14] in goids_all if b_prt: prt.write("[Term]\n") if b_prt: prt.write(line) sys.stdout.write(" WROTE {N} GO TERMS: {OBO}\n".format( N=len(goids_all), OBO=fout_obo)) def _get_goids_all(self, go_sources): """Given GO ID sources and optionally the relationship attribute, return all GO IDs.""" go2obj_user = {} objrel = CurNHigher(self.relationships, self.godag) objrel.get_go2obj_cur_n_high(go2obj_user, go_sources) goids = set(go2obj_user) for goterm in go2obj_user.values(): if goterm.alt_ids: goids.update(goterm.alt_ids) return goids def _prt_info(self, prt, goid_sources, goids_all): """Print information describing how this obo setset was created.""" prt.write( "! Contains {N} GO IDs. Created using {M} GO sources:\n".format( N=len(goids_all), M=len(goid_sources))) for goid in goid_sources: prt.write("! {GO}\n".format(GO=str(self.godag.get(goid, "")))) prt.write("\n")
class WrSubObo(object): """Read a large GO-DAG from an obo file. Write a subset GO-DAG into a small obo file.""" def __init__(self, fin_obo=None, optional_attrs=None, load_obsolete=None): self.fin_obo = fin_obo self.godag = GODag(fin_obo, optional_attrs, load_obsolete) if fin_obo is not None else None self.relationships = optional_attrs is not None and 'relationship' in optional_attrs def wrobo(self, fout_obo, goid_sources): """Write a subset obo file containing GO ID sources and their parents.""" goids_all = self._get_goids_all(goid_sources) with open(fout_obo, 'w') as prt: self._prt_info(prt, goid_sources, goids_all) self.prt_goterms(prt, self.fin_obo, goids_all) print(" WROTE {N} GO TERMS: {OBO}\n".format(N=len(goids_all), OBO=fout_obo)) @staticmethod def prt_goterms(fin_obo, goids, prt, b_prt=True): """Print the specified GO terms for GO IDs in arg.""" b_trm = False with open(fin_obo) as ifstrm: for line in ifstrm: if not b_trm: if line[:6] == "[Term]": b_trm = True b_prt = False elif line[:6] == "[Typedef]": b_prt = True else: if line[:6] == 'id: GO': b_trm = False b_prt = line[4:14] in goids if b_prt: prt.write("[Term]\n") if b_prt: prt.write(line) @staticmethod def get_goids(fin_obo, name): """Get GO IDs whose name matches given name.""" goids = set() # pylint: disable=unsubscriptable-object goterm = None with open(fin_obo) as ifstrm: for line in ifstrm: if goterm is not None: semi = line.find(':') if semi != -1: goterm[line[:semi]] = line[semi + 2:].rstrip() else: if name in goterm['name']: goids.add(goterm['id']) goterm = None elif line[:6] == "[Term]": goterm = {} return goids def _get_goids_all(self, go_sources): """Given GO ID sources and optionally the relationship attribute, return all GO IDs.""" go2obj_user = {} objrel = CurNHigher(self.relationships, self.godag) objrel.get_go2obj_cur_n_high(go2obj_user, go_sources) goids = set(go2obj_user) for goterm in go2obj_user.values(): if goterm.alt_ids: goids.update(goterm.alt_ids) return goids def _prt_info(self, prt, goid_sources, goids_all): """Print information describing how this obo setset was created.""" prt.write( "! Contains {N} GO IDs. Created using {M} GO sources:\n".format( N=len(goids_all), M=len(goid_sources))) for goid in goid_sources: prt.write("! {GO}\n".format(GO=str(self.godag.get(goid, "")))) prt.write("\n")
from goatools.obo_parser import GODag obodag = GODag("go-basic.obo") for i,line in enumerate(open('new_annotations.csv')): if i > 0: la = line.split("\t") gos = la[1].split(',') print(la[0]) outline = [] for go in gos: if go.startswith("GO:"): outline.append("{} ({})".format( obodag.get(go).name, go)) print(",".join(outline))
class WrSubObo(object): """Read a large GO-DAG from an obo file. Write a subset GO-DAG into a small obo file.""" def __init__(self, fin_obo=None, optional_attrs=None, load_obsolete=None): self.fin_obo = fin_obo self.godag = GODag(fin_obo, optional_attrs, load_obsolete) if fin_obo is not None else None self.relationships = optional_attrs is not None and 'relationship' in optional_attrs def wrobo(self, fout_obo, goid_sources): """Write a subset obo file containing GO ID sources and their parents.""" goids_all = self._get_goids_all(goid_sources) with open(fout_obo, 'w') as prt: self._prt_info(prt, goid_sources, goids_all) self.prt_goterms(prt, self.fin_obo, goids_all) print(" WROTE {N} GO TERMS: {OBO}\n".format(N=len(goids_all), OBO=fout_obo)) @staticmethod def prt_goterms(fin_obo, goids, prt, b_prt=True): """Print the specified GO terms for GO IDs in arg.""" b_trm = False with open(fin_obo) as ifstrm: for line in ifstrm: if not b_trm: if line[:6] == "[Term]": b_trm = True b_prt = False elif line[:6] == "[Typedef]": b_prt = True else: if line[:6] == 'id: GO': b_trm = False b_prt = line[4:14] in goids if b_prt: prt.write("[Term]\n") if b_prt: prt.write(line) @staticmethod def get_goids(fin_obo, name): """Get GO IDs whose name matches given name.""" goids = set() # pylint: disable=unsubscriptable-object goterm = None with open(fin_obo) as ifstrm: for line in ifstrm: if goterm is not None: semi = line.find(':') if semi != -1: goterm[line[:semi]] = line[semi+2:].rstrip() else: if name in goterm['name']: goids.add(goterm['id']) goterm = None elif line[:6] == "[Term]": goterm = {} return goids def _get_goids_all(self, go_sources): """Given GO ID sources and optionally the relationship attribute, return all GO IDs.""" go2obj_user = {} objrel = CurNHigher(self.relationships, self.godag) objrel.get_id2obj_cur_n_high(go2obj_user, go_sources) goids = set(go2obj_user) for goterm in go2obj_user.values(): if goterm.alt_ids: goids.update(goterm.alt_ids) return goids def _prt_info(self, prt, goid_sources, goids_all): """Print information describing how this obo setset was created.""" prt.write("! Contains {N} GO IDs. Created using {M} GO sources:\n".format( N=len(goids_all), M=len(goid_sources))) for goid in goid_sources: prt.write("! {GO}\n".format(GO=str(self.godag.get(goid, "")))) prt.write("\n")