def _init_go2dag(self, goids): """Get all GO IDs in the DAG above and including GO IDs in goids arg""" # GO terms provided by user ##tic = timeit.default_timer() s_godag = self.godag rels = self.rels go_set_all = set(goids) go_set_cur = go_set_all.intersection(s_godag.keys()) if go_set_cur != go_set_all: self._go_not_found(go_set_cur, go_set_all) # Ancestor GO terms for each user GO term ##tic = prt_hms(tic, '_init_go2dag GO IDs not found') go2ancestors = get_go2ancestors(self._get_goobjs(go_set_cur), rels) ##tic = prt_hms(tic, '_init_go2dag go2ancestors') go2depth = self._get_go2depth(go2ancestors, rels) ##tic = prt_hms(tic, '_init_go2dag go2depth') w_e = self.w_e # pylint: disable=line-too-long go2dag = { go: DagA(go, ancestors, go2depth, w_e, s_godag) for go, ancestors in go2ancestors.items() } ##tic = prt_hms(tic, '_init_go2dag DagA') # Add alt GO IDs for go_alt in go_set_cur.difference(go2ancestors.keys()): go_term = s_godag[go_alt] go_main = go_term.item_id go_depth = go_term.depth if go_depth != 0: go2dag[go_alt] = go2dag[go_main] elif go_depth == 0: go2dag[go_alt] = DagA(go_main, {}, go2depth, w_e, s_godag) ##tic = prt_hms(tic, '_init_go2dag ALT GO IDs') return go2dag
def get_go_lineage_of(self, terms): g = [self.GODAG[i] for i in terms] g = get_go2ancestors(g, False) gos = [] for key in g: gos.append(key) gos.extend(g[key]) return list(set(gos))
def __init__(self, go2obj, relationships, dcnt, go2letter): # Subset go2obj contains only items needed by go_sources self.go2obj = go2obj self.relationships = relationships self.dcnt = dcnt self.go2letter = go2letter # Ex: set(['part_of', 'regulates', 'negatively_regulates', 'positively_regulates']) _goobjs, _altgo2goobj = get_goobjs_altgo2goobj(self.go2obj) _r0 = not relationships # True if not using relationships self.go2descendants = get_go2descendants(_goobjs, relationships) self.go2ancestors = get_go2ancestors(_goobjs, relationships) self.go2dcnt = cx.Counter({go: len(p) for go, p in self.go2descendants.items()}) add_alt_goids(self.go2ancestors, _altgo2goobj) add_alt_goids(self.go2descendants, _altgo2goobj) add_alt_goids(self.go2dcnt, _altgo2goobj)
def _init_go2genes(self, relationship_set, godag): ''' Fills in the genes annotated to each GO, including ancestors Due to the ontology structure, gene products annotated to a GO Terma are also annotated to all ancestors. ''' go2geneset = defaultdict(set) go2up = get_go2ancestors(set(godag.values()), relationship_set) # Fill go-geneset dict with GO IDs in annotations and their corresponding counts for geneid, goids_anno in self.annots.items(): # Make a union of all the terms for a gene, if term parents are # propagated but they won't get double-counted for the gene allterms = set() for goid_main in goids_anno: allterms.add(goid_main) if goid_main in go2up: allterms.update(go2up[goid_main]) # Add 1 for each GO annotated to this gene product for ancestor in allterms: go2geneset[ancestor].add(geneid) return dict(go2geneset)
def _init_go2genes(self, annots, relationships=None): ''' Fills in the genes annotated to each GO, including ancestors Due to the ontology structure, gene products annotated to a GO Terma are also annotated to all ancestors. ''' go2geneset = defaultdict(set) if relationships is None: relationships = {} go2up = get_go2ancestors(set(self.go2obj.values()), relationships) godag = self.go2obj go_alts = set() # For alternate GO IDs goids_notfound = set() # For missing GO IDs # Fill go2geneset with GO IDs in annotations and their corresponding counts for geneid, goids_anno in annots.items(): # Make a union of all the terms for a gene, if term parents are # propagated but they won't get double-counted for the gene allterms = set() for goid_anno in goids_anno: if goid_anno in godag: goid_main = godag[goid_anno].item_id if goid_anno != goid_main: go_alts.add(goid_anno) allterms.add(goid_main) if goid_main in go2up: allterms |= go2up[goid_main] else: goids_notfound.add(goid_anno) # Add 1 for each GO annotated to this gene product for ancestor in allterms: go2geneset[ancestor].add(geneid) if goids_notfound: print("{N} Assc. GO IDs not found in the GODag\n".format( N=len(goids_notfound))) return dict(go2geneset), go_alts
def get_go2parents_go2obj(go2obj, relationships=None, prt=None): """Return go2parents (set of parent GO IDs) for all GO ID keys in go2obj.""" goobjs, altgo2goobj = get_goobjs_altgo2goobj(go2obj) go2parents = get_go2ancestors(goobjs, relationships, prt) add_alt_goids(go2parents, altgo2goobj) return go2parents