Example #1
0
    def read(self):
        """
        Returns gene annotation list and ontology graph read from nexo file.
        """
        
        content_handler = NexoContentHandler(self.get_all_attrs, self.annotation_source)
        xml.sax.parse(self.handle, content_handler)
        
        annotations = []
        for obj, assocs in content_handler.annotations.items():
            annotations.append(GeneAnnotation(obj,
                        associations = [TermAssociation(x) for x in assocs]))
        
        graph = OntologyGraph()

        for _, node in content_handler.nodes.items():
            graph.add_node(node.id, node)
        
        edge_types = set()
        for edge in content_handler.edges:
            source = content_handler.nodes[edge[0]].id
            target = content_handler.nodes[edge[1]].id
            graph.add_edge(target, source, edge[2]) # in our representation it is inverted
            edge_types.add(edge[2])
            
        for edge_type in edge_types:
            graph.typedefs[edge_type] = {"id" : edge_type}
            
        return (annotations, graph)
Example #2
0
def terms_to_graph(terms):
    """
    Crates OntologyGraph from terms list obtained from OboIterator
    """

    g = OntologyGraph()
    defined_relations = set()
    found_relations = set()

    for (term_type, data) in terms:
        if term_type == "Term":  # Add only terms and typedefs for now
            nid = data.pop("id")[0]
            name = data.pop("name")[0]
            term = OntologyTerm(nid, name, data)
            if g.node_exists(nid):
                g.update_node(nid, term)
            else:
                g.add_node(nid, term)
            if "is_a" in data:
                for edge in data["is_a"]:
                    g.add_edge(nid, edge, "is_a")
            if "synonym" in data:
                node = g.get_node(nid)
                for synonym in data["synonym"]:
                    g.synonyms[shlex.split(synonym)[0]] = node
            if "relationship" in data:
                for edge in data["relationship"]:
                    p = edge.split()
                    if len(p) == 2:
                        g.add_edge(nid, p[1], p[0])
                        found_relations.add(p[0])
                    else:
                        raise ValueError("Incorrect relationship: " + edge)
            if "alt_id" in data:
                for alt_id in data["alt_id"]:
                    g.alt_ids[alt_id] = nid
            if "namespace" in data:
                g.namespace[nid] = data.pop("namespace")[0]

        elif term_type == "Typedef":
            rid = data["id"][0]
            g.typedefs[rid] = data
            defined_relations.add(rid)

    # validate whether all relationships were defined
    not_defined = found_relations.difference(defined_relations)
    if len(not_defined) > 0:
        raise ValueError("Undefined relationships found: " + str(not_defined))

    g.typedefs["is_a"] = _IS_A_TYPE
    for alt_id in g.alt_ids:
        g.namespace[alt_id] = g.namespace[g.alt_ids[alt_id]]
    return g
Example #3
0
def terms_to_graph(terms):
    """
    Crates OntologyGraph from terms list obtained from OboIterator
    """

    g = OntologyGraph()
    defined_relations = set()
    found_relations = set()

    for (term_type, data) in terms:
        if term_type == "Term":  # Add only terms and typedefs for now
            nid = data.pop("id")[0]
            name = data.pop("name")[0]
            term = OntologyTerm(nid, name, data)
            if g.node_exists(nid):
                g.update_node(nid, term)
            else:
                g.add_node(nid, term)
            if "is_a" in data:
                for edge in data["is_a"]:
                    g.add_edge(nid, edge, "is_a")
            if "synonym" in data:
                node = g.get_node(nid)
                for synonym in data["synonym"]:
                    g.synonyms[shlex.split(synonym)[0]] = node
            if "relationship" in data:
                for edge in data["relationship"]:
                    p = edge.split()
                    if len(p) == 2:
                        g.add_edge(nid, p[1], p[0])
                        found_relations.add(p[0])
                    else:
                        raise ValueError("Incorrect relationship: " + edge)
            if "alt_id" in data:
                for alt_id in data["alt_id"]:
                    g.alt_ids[alt_id] = nid
            if "namespace" in data:
                g.namespace[nid] = data.pop("namespace")[0]

        elif term_type == "Typedef":
            rid = data["id"][0]
            g.typedefs[rid] = data
            defined_relations.add(rid)

    # validate whether all relationships were defined
    not_defined = found_relations.difference(defined_relations)
    if len(not_defined) > 0:
        raise ValueError("Undefined relationships found: " + str(not_defined))

    g.typedefs["is_a"] = _IS_A_TYPE
    for alt_id in g.alt_ids:
        g.namespace[alt_id] = g.namespace[g.alt_ids[alt_id]]
    return g