def test_parent_index(): ont = OntologyFactory().create("go") sub = ont.subontology(ont.ancestors('GO:0010971')) assert len(sub.parent_index()) > 0
class CollapsedAssociationSet: def __init__(self, associations): self.associations = associations self.collapsed_associations = [] self.assoc_dict = {} self.go_ontology = None def setup_ontologies(self): if self.go_ontology is None: self.go_ontology = OntologyFactory().create("go") def collapse_annotations(self): # Here we shall decide the distinct assertion instances going into the model # This will reduce/eliminate need to SPARQL model graph # Group by: # 1. ID # 2. qualifiers (normalize order; any array gotta do this) # 3. primary term # 4. With/From (if primary term is BINDING_ROOT or descendant) # 5. Extensions # Collapse multiple: # 1. Reference # 2. Evidence Code # 3. With/From (if primary term is not BINDING_ROOT or descendant) # 4. Source line # 5. Date # 6. Assigned by # 7. Properties self.setup_ontologies() for a in self.associations: # Header subj_id = a["subject"]["id"] qualifiers = a["qualifiers"] term = a["object"]["id"] with_from = a["evidence"]["with_support_from"] eco_code = a["evidence"]["type"] extensions = get_annot_extensions(a) with_froms = get_with_froms(a) # Handle pipe separation according to import requirements is_protein_binding = eco_code == IPI_ECO_CODE and BINDING_ROOT in self.go_ontology.ancestors(term, reflexive=True) if is_protein_binding: cas = self.find_or_create_collapsed_associations(subj_id, qualifiers, term, with_froms, extensions) with_from = None # Don't use ontobio-parsed with_from on lines else: cas = [self.find_or_create_collapsed_association(subj_id, qualifiers, term, None, extensions)] for ca in cas: # Line association_line = CollapsedAssociationLine(a, with_from) ca.lines.append(association_line) def find_or_create_collapsed_association(self, subj_id, qualifiers, term, with_from, extensions): query_header = { 'subject': { 'id': subj_id }, 'qualifiers': sorted(qualifiers), 'object': { 'id': term }, 'object_extensions': extensions } if with_from: query_header['evidence'] = {'with_support_from': sorted(with_from)} for ca in self.collapsed_associations: if ca.header == query_header: return ca new_ca = CollapsedAssociation(query_header) self.collapsed_associations.append(new_ca) return new_ca def find_or_create_collapsed_associations(self, subj_id, qualifiers, term, with_froms, extensions): cas = [] for wf in with_froms: ca = self.find_or_create_collapsed_association(subj_id, qualifiers, term, wf, extensions) cas.append(ca) return cas def __iter__(self): return iter(self.collapsed_associations)
class GoAspector: def __init__(self, go_ontology): if go_ontology: self.ontology = go_ontology else: self.ontology = OntologyFactory().create("go") def get_ancestors_through_subont(self, go_term, relations): """ Returns the ancestors from the relation filtered GO subontology of go_term's ancestors. subontology() primarily used here for speed when specifying relations to traverse. Point of this is to first get a smaller graph (all ancestors of go_term regardless of relation) and then filter relations on that instead of the whole GO. """ all_ancestors = self.ontology.ancestors(go_term, reflexive=True) subont = self.ontology.subontology(all_ancestors) return subont.ancestors(go_term, relations) def get_isa_partof_closure(self, go_term): return self.get_ancestors_through_subont( go_term, relations=["subClassOf", "BFO:0000050"]) def get_isa_closure(self, go_term): return self.get_ancestors_through_subont(go_term, relations=["subClassOf"]) def is_biological_process(self, go_term): """ Returns True is go_term has is_a, part_of ancestor of biological process GO:0008150 """ bp_root = "GO:0008150" if go_term == bp_root: return True ancestors = self.get_isa_closure(go_term) if bp_root in ancestors: return True else: return False def is_molecular_function(self, go_term): """ Returns True is go_term has is_a, part_of ancestor of molecular function GO:0003674 """ mf_root = "GO:0003674" if go_term == mf_root: return True ancestors = self.get_isa_closure(go_term) if mf_root in ancestors: return True else: return False def is_cellular_component(self, go_term): """ Returns True is go_term has is_a, part_of ancestor of cellular component GO:0005575 """ cc_root = "GO:0005575" if go_term == cc_root: return True ancestors = self.get_isa_closure(go_term) if cc_root in ancestors: return True else: return False def go_aspect(self, go_term): """ For GO terms, returns F, C, or P corresponding to its aspect """ if not go_term.startswith("GO:"): return None else: # Check ancestors for root terms if self.is_molecular_function(go_term): return 'F' elif self.is_cellular_component(go_term): return 'C' elif self.is_biological_process(go_term): return 'P'
OUTFILE = args.outfile regen_cache = None if args.use_cache: regen_cache = False # ont = OntologyFactory().create("/Users/ebertdu/Downloads/go.owl") ont = OntologyFactory().create(args.ontology_file) # aset = AssociationSetFactory().create(ont, file=GAF_FILE) common_terms = get_common_terms(ont, GAF_FILE, USAGE_COUNT_CONSTRAINT, regen_cache) print("Grabbed {} common terms".format(len(common_terms))) all_terms = [] term_to_ancestors = {} for t in common_terms: subont = ont.subontology(ont.ancestors(t), relations=RELATIONS) term_to_ancestors[t] = subont.nodes( ) # Keep ancestor list in case we want to only include common ancestors for n in subont.nodes(): if n not in all_terms: all_terms.append(n) print("Grabbed all ancestors") if ONLY_SHARED_ANCESTORS: shared_ancestors = [] for t in common_terms: for anc in term_to_ancestors[t]: ## x-y term matrix checking for shared ancestors in multiple ancestor sets for other_t in common_terms: if other_t == t: continue