Beispiel #1
0
def test_parent_index():
    ont = OntologyFactory().create("go")
    sub = ont.subontology(ont.ancestors('GO:0010971'))
    assert len(sub.parent_index()) > 0
Beispiel #2
0
class CollapsedAssociationSet:
    def __init__(self, associations):
        self.associations = associations
        self.collapsed_associations = []
        self.assoc_dict = {}
        self.go_ontology = None

    def setup_ontologies(self):
        if self.go_ontology is None:
            self.go_ontology = OntologyFactory().create("go")

    def collapse_annotations(self):
        # Here we shall decide the distinct assertion instances going into the model
        # This will reduce/eliminate need to SPARQL model graph
        # Group by:
        # 		1. ID
        # 		2. qualifiers (normalize order; any array gotta do this)
        # 		3. primary term
        # 		4. With/From (if primary term is BINDING_ROOT or descendant)
        # 		5. Extensions
        # 	Collapse multiple:
        # 		1. Reference
        # 		2. Evidence Code
        #       3. With/From (if primary term is not BINDING_ROOT or descendant)
        # 		4. Source line
        # 		5. Date
        # 		6. Assigned by
        # 		7. Properties
        self.setup_ontologies()

        for a in self.associations:
            # Header
            subj_id = a["subject"]["id"]
            qualifiers = a["qualifiers"]
            term = a["object"]["id"]
            with_from = a["evidence"]["with_support_from"]
            eco_code = a["evidence"]["type"]
            extensions = get_annot_extensions(a)
            with_froms = get_with_froms(a)  # Handle pipe separation according to import requirements
            is_protein_binding = eco_code == IPI_ECO_CODE and BINDING_ROOT in self.go_ontology.ancestors(term, reflexive=True)
            if is_protein_binding:
                cas = self.find_or_create_collapsed_associations(subj_id, qualifiers, term, with_froms, extensions)
                with_from = None  # Don't use ontobio-parsed with_from on lines
            else:
                cas = [self.find_or_create_collapsed_association(subj_id, qualifiers, term, None, extensions)]
            for ca in cas:
                # Line
                association_line = CollapsedAssociationLine(a, with_from)
                ca.lines.append(association_line)

    def find_or_create_collapsed_association(self, subj_id, qualifiers, term, with_from, extensions):
        query_header = {
            'subject': {
                'id': subj_id
            },
            'qualifiers': sorted(qualifiers),
            'object': {
                'id': term
            },
            'object_extensions': extensions
        }
        if with_from:
            query_header['evidence'] = {'with_support_from': sorted(with_from)}
        for ca in self.collapsed_associations:
            if ca.header == query_header:
                return ca
        new_ca = CollapsedAssociation(query_header)
        self.collapsed_associations.append(new_ca)
        return new_ca

    def find_or_create_collapsed_associations(self, subj_id, qualifiers, term, with_froms, extensions):
        cas = []
        for wf in with_froms:
            ca = self.find_or_create_collapsed_association(subj_id, qualifiers, term, wf, extensions)
            cas.append(ca)
        return cas

    def __iter__(self):
        return iter(self.collapsed_associations)
Beispiel #3
0
class GoAspector:
    def __init__(self, go_ontology):
        if go_ontology:
            self.ontology = go_ontology
        else:
            self.ontology = OntologyFactory().create("go")

    def get_ancestors_through_subont(self, go_term, relations):
        """
        Returns the ancestors from the relation filtered GO subontology of go_term's ancestors.

        subontology() primarily used here for speed when specifying relations to traverse. Point of this is to first get
        a smaller graph (all ancestors of go_term regardless of relation) and then filter relations on that instead of
        the whole GO.
        """
        all_ancestors = self.ontology.ancestors(go_term, reflexive=True)
        subont = self.ontology.subontology(all_ancestors)
        return subont.ancestors(go_term, relations)

    def get_isa_partof_closure(self, go_term):
        return self.get_ancestors_through_subont(
            go_term, relations=["subClassOf", "BFO:0000050"])

    def get_isa_closure(self, go_term):
        return self.get_ancestors_through_subont(go_term,
                                                 relations=["subClassOf"])

    def is_biological_process(self, go_term):
        """
        Returns True is go_term has is_a, part_of ancestor of biological process GO:0008150
        """
        bp_root = "GO:0008150"
        if go_term == bp_root:
            return True
        ancestors = self.get_isa_closure(go_term)
        if bp_root in ancestors:
            return True
        else:
            return False

    def is_molecular_function(self, go_term):
        """
        Returns True is go_term has is_a, part_of ancestor of molecular function GO:0003674
        """
        mf_root = "GO:0003674"
        if go_term == mf_root:
            return True
        ancestors = self.get_isa_closure(go_term)
        if mf_root in ancestors:
            return True
        else:
            return False

    def is_cellular_component(self, go_term):
        """
        Returns True is go_term has is_a, part_of ancestor of cellular component GO:0005575
        """
        cc_root = "GO:0005575"
        if go_term == cc_root:
            return True
        ancestors = self.get_isa_closure(go_term)
        if cc_root in ancestors:
            return True
        else:
            return False

    def go_aspect(self, go_term):
        """
        For GO terms, returns F, C, or P corresponding to its aspect
        """
        if not go_term.startswith("GO:"):
            return None
        else:
            # Check ancestors for root terms
            if self.is_molecular_function(go_term):
                return 'F'
            elif self.is_cellular_component(go_term):
                return 'C'
            elif self.is_biological_process(go_term):
                return 'P'
Beispiel #4
0
    OUTFILE = args.outfile
    regen_cache = None
    if args.use_cache:
        regen_cache = False

    # ont = OntologyFactory().create("/Users/ebertdu/Downloads/go.owl")
    ont = OntologyFactory().create(args.ontology_file)
    # aset = AssociationSetFactory().create(ont, file=GAF_FILE)
    common_terms = get_common_terms(ont, GAF_FILE, USAGE_COUNT_CONSTRAINT,
                                    regen_cache)
    print("Grabbed {} common terms".format(len(common_terms)))

    all_terms = []
    term_to_ancestors = {}
    for t in common_terms:
        subont = ont.subontology(ont.ancestors(t), relations=RELATIONS)
        term_to_ancestors[t] = subont.nodes(
        )  # Keep ancestor list in case we want to only include common ancestors
        for n in subont.nodes():
            if n not in all_terms:
                all_terms.append(n)
    print("Grabbed all ancestors")

    if ONLY_SHARED_ANCESTORS:
        shared_ancestors = []
        for t in common_terms:
            for anc in term_to_ancestors[t]:
                ## x-y term matrix checking for shared ancestors in multiple ancestor sets
                for other_t in common_terms:
                    if other_t == t:
                        continue