コード例 #1
0
ファイル: transitive.py プロジェクト: zxenia/rdflib
"""

if __name__ == '__main__':
    from rdflib import ConjunctiveGraph, URIRef

    person = URIRef('ex:person')
    dad = URIRef('ex:d')
    mom = URIRef('ex:m')
    momOfDad = URIRef('ex:gm0')
    momOfMom = URIRef('ex:gm1')
    dadOfDad = URIRef('ex:gf0')
    dadOfMom = URIRef('ex:gf1')

    parent = URIRef('ex:parent')

    g = ConjunctiveGraph()
    g.add((person, parent, dad))
    g.add((person, parent, mom))
    g.add((dad, parent, momOfDad))
    g.add((dad, parent, dadOfDad))
    g.add((mom, parent, momOfMom))
    g.add((mom, parent, dadOfMom))

    print "Parents, forward from `ex:person`:"
    for i in g.transitive_objects(person, parent):
        print i

    print "Parents, *backward* from `ex:gm1`:"
    for i in g.transitive_subjects(parent, momOfMom):
        print i
コード例 #2
0
def parse_workflow():
    # FIXME TODO these states should probably be compiled down to numbers???
    docs = Path(__file__).parent.absolute().resolve().parent / 'docs'
    rridpath = docs / 'workflow-rrid.graphml'
    paperpath = docs / 'workflow-paper-id.graphml'

    cgraph = ConjunctiveGraph()
    gt.WorkflowMapping(rridpath.as_posix()).graph(cgraph)
    gt.PaperIdMapping(paperpath.as_posix(), False).graph(cgraph)
    write(cgraph, '/tmp/workflow.ttl')
    predicates = set(cgraph.predicates())
    OntCuries({cp:str(ip) for cp, ip in cgraph.namespaces()})
    OntCuries({'RRID': 'https://scicrunch.org/resolver/RRID:',
               'DOI': 'https://doi.org/',
               'PMID': 'https://www.ncbi.nlm.nih.gov/pubmed/'})
    hg = makeGraph('', graph=cgraph)
    short = sorted(hg.qname(_) for _ in predicates)

    wf.hasTag
    wf.hasReplyTag
    wf.hasTagOrReplyTag
    wf.hasOutputTag

    #if type isa wf.tag

    tag_types = set(cgraph.transitive_subjects(rdfs.subClassOf, wf.tag))
    tag_tokens = {tagType:sorted(set(t for t in cgraph.transitive_subjects(rdf.type, tagType)
                                     if t != tagType))
                  for tagType in tag_types}
    has_tag_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasTagOrReplyTag))
    has_tag_types.add(wf.hasOutputTag)
    has_next_action_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasOutput))
    has_next_action_types.add(wf.hasNextStep)

    terminals = sorted(tag
                       for ttype in tag_types
                       if ttype != wf.tagScibot  # scibot is not 'terminal' for this part
                       for tag in cgraph[:rdf.type:ttype]
                       if not isinstance(tag, BNode)
                       and not any(o for httype in has_tag_types
                                   for o in cgraph[tag:httype]))

    endpoints = sorted(endpoint
                       for endpoint in cgraph[:rdf.type:wf.state]
                       if not isinstance(endpoint, BNode)
                       and not any(o for hnatype in has_next_action_types
                                   for o in cgraph[endpoint:hnatype]))

    complicated = sorted(a_given_tag
                 for tt in tag_types
                 for a_given_tag in cgraph[:rdf.type:tt]
                 if not isinstance(a_given_tag, BNode)
                         and not [successor_tag
                          for htt in has_tag_types
                          for successor_tag in chain(t
                                                     for t in cgraph[a_given_tag:htt]
                                                     #if not isinstance(t, BNode)
                                        ,
                                                     # we don't actually need this for terminals
                                                     # we will need it later
                                                     #(t for b in cgraph[a_given_tag:htt]
                                                     #if isinstance(b, BNode)
                                                     #for listhead in cgraph[b:owl.oneOf]
                                                     #for t in unlist(listhead, cgraph)),
                         )])

    def topList(node, g):
        for s in g[:rdf.rest:node]:
            yield s

    def getLists(node, g):
        for linker in g[:rdf.first:node]:
            top = None
            for top in g.transitiveClosure(topList, linker):
                pass

            if top:
                yield top
            else:
                yield linker

    def getIsTagOf(node, g):
        for htt in has_tag_types:
            for parent_tag in g[:htt:node]:
                yield parent_tag

    def getIsOneOfTagOf(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                for parent_tag, _ in g[::linker]:
                    yield parent_tag

    def getPreviousTag(node, g):  # not quite what we need
        yield from getIsOneOfTagOf(node, g)
        yield from getIsTagOf(node, g)

    def getTagChains(node, g, seen=tuple()):
        # seen to prevent recursion cases where
        # taggning can occur in either order e.g. PMID -> DOI
        #print(tc.red(repr(OntId(node))))  # tc.red(OntId(node)) does weird stuff O_o
        parent_tag = None
        for parent_tag in chain(getIsOneOfTagOf(node, g),
                                getIsTagOf(node, g)):
            if parent_tag in seen:
                parent_tag = None
                continue
            ptt = next(g[parent_tag:rdf.type])
            #if ptt in tag_types:
            for pchain in getTagChains(parent_tag, g, seen + (node,)):
                if ptt in tag_types:
                    out = parent_tag, *pchain
                else:
                    out = pchain
                yield out

            if not ptt and not out:
                parent_tag = None

        if not parent_tag:
            yield tuple()

    def getInitiatesAction(node, g):
        for action in g[:wf.initiatesAction:node]:
            yield action

    def getIsOneOfOutputOf(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                for hot in has_next_action_types:
                    for parent_thing  in g[:hot:linker]:
                        yield parent_thing

    def getActionChains(node, g):
        parent_action = None
        for parent_action in chain(getIsOneOfOutputOf(node, g),  # works for actions too
                                   getInitiatesAction(node, g)):
            for pchain in getActionChains(parent_action, g):  # NOTE may also be a tag...
                out = parent_action, *pchain
                #print(tuple(hg.qname(o) for o in out))
                yield out

        if not parent_action:
            yield tuple()

    def getRestSubjects(predicate, object, g):
        """ invert restriction """
        rsco = cmb.Restriction(rdfs.subClassOf)
        for rt in rsco.parse(graph=g):
            if rt.p == predicate and rt.o == object:
                yield from g.transitive_subjects(rdfs.subClassOf, rt.s)

    annoParts = list(getRestSubjects(wf.isAttachedTo, wf.annotation, cgraph))
    partInstances = {OntId(a):set(t if isinstance(t, BNode) else OntId(t)
                                  for t in cgraph.transitive_subjects(rdf.type, a)
                                  if not isinstance(t, BNode) and t != a)
                     for a in annoParts}

    _endpoint_chains = {OntId(endpoint):[[OntId(endpoint)] + [OntId(e) for e in chain]
                                            for chain in getActionChains(endpoint, cgraph)]
                        for endpoint in endpoints}

    #print([hg.qname(e) for e in endpoints])
    #print([print([hg.qname(c) for c in getActionChains(endpoint, cgraph) if c])
           #for endpoint in endpoints
           #if endpoint])

    #_ = [print(list(getActionChains(e, cgraph)) for e in endpoints)]
    #return

    wat = cgraph.transitiveClosure(getPreviousTag, RRIDCUR.Duplicate)
    wat = list(wat)
    #def invOneOf(tag, g):

    fake_chains = {hg.qname(terminal):
                   [hg.qname(c)
                    for c in cgraph.transitiveClosure(getPreviousTag, terminal)]
                   for terminal in terminals}

    def make_chains(things, getChains):
        return {OntId(thing):[[OntId(thing)] + [OntId(e) for e in chain]
                              for chain in getChains(thing, cgraph)]
                for thing in things
                #if not print(thing)
        }

    def print_chains(thing_chains):
        print('\nstart from beginning')

        print('\n'.join(sorted(' -> '.join(hg.qname(e) for e in reversed(chain))
                               for chains in thing_chains.values()
                               for chain in chains)))

        print('\nstart from end')

        print('\n'.join(sorted(' <- '.join(e.curie for e in chain)
                               for chains in thing_chains.values()
                               for chain in chains)))

    def valid_tagsets(all_chains):
        # not the most efficient way to do this ...
        transitions = defaultdict(set)
        for end, chains in all_chains.items():
            for chain in chains:
                valid = set()
                prior_state = None
                for element in reversed(chain):
                    valid.add(element)
                    state = frozenset(valid)
                    transitions[prior_state].add(state)
                    prior_state = state

        return {s:frozenset(n) for s, n in transitions.items()}

    endpoint_chains = make_chains(endpoints, getActionChains)
    #endpoint_transitions = valid_transitions(endpoint_chains)  # not the right structure
    print_chains(endpoint_chains)
    terminal_chains = make_chains(terminals, getTagChains)
    print_chains(terminal_chains)
    tag_transitions = valid_tagsets(terminal_chains)
    terminal_tags_to_endpoints =  'TODO'

    def printq(*things):
        print(*(OntId(t).curie for t in things))

    from pprint import pprint
    def get_linkers(s, o, g, linkerFunc):  # FIXME not right
        for p in g[s::o]:
            yield p

        for l in linkerFunc(o, g):
            #print(tc.blue(f'{OntId(s).curie} {l if isinstance(l, BNode) else OntId(l).curie}'))
            for p in g[s::l]:
                #print(tc.red(f'{s} {l} {o} {p}'))
                yield p
        return 
        linkers = set(l for l in g.transitiveClosure(linkerFunc, o))
        for p, o in g[s::]:
            if o in linkers:
                yield p

    def edge_to_symbol(p, rev=False):
        if p == wf.initiatesAction:
            return '<<' if rev else '>>'
        elif p == wf.hasReplyTag:
            return '<' if rev else '>'
        elif p == wf.hasTagOrReplyTag:
            return '<=' if rev else '=>'
        elif p == wf.hasOutputTag:
            return '-<-' if rev else '->-'
        else:
            return '<??' if rev else '??>'

    def chain_to_typed_chain(chain, g, func):
        # duh...
        #pprint(chain)
        for s, o in zip(chain, chain[1:]):
            # TODO deal with reversed case
            s, o = s.u, o.u
            p = None
            #print(s, o)
            printq(s, o)
            for p in get_linkers(s, o, g, func):
                #print(tc.yellow(p))
                #yield (s, edge_to_symbol(p), o)
                yield from (s, edge_to_symbol(p), o)

            if not p:
                for rp in get_linkers(o, s, g, func):
                    print(tc.blue(rp))
                    yield from (s, edge_to_symbol(rp, rev=True), o)

    def tchains(thing_chains, func):
        return sorted([OntId(e).curie if isinstance(e, URIRef) else e
                       for e in chain_to_typed_chain(list(reversed(chain)), cgraph, func)]
                      for chains in thing_chains.values()
                      for chain in chains)

    def getLinkers(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                yield linker

    def allSubjects(object, graph):
        yield from (s for s, p in graph[::object])
        yield from getLinkers(object, graph)

    print()
    ttc = tchains(terminal_chains, allSubjects)
    tec = tchains(endpoint_chains, allSubjects)
    pprint(ttc)
    pprint(tec)

    valid_tagsets = frozenset((t for s in tag_transitions.values() for t in s))
    tts = valid_tagsets - frozenset(tag_transitions)
    endtype = 'TODO'  # 
    tt = {}
    for endtype, chains  in endpoint_chains.items():
        for *_chain, tag in chains:
            if _chain:
                next_thing = _chain[-1]
            for ets in tts:
                if tag in ets:
                    tt[ets] = next_thing

    terminal_tagsets = tt

    #[print(wat) for wat in terminal_chains.values()]
    #pprint(terminal_chains)
    return tag_types, tag_tokens, partInstances, valid_tagsets, terminal_tagsets, tag_transitions
コード例 #3
0
class OntologyAPI(object):
    """Class that includes methods for querying an RDFS/OWL ontology"""
    def __init__(self, uri, language=""):
        super(OntologyAPI, self).__init__()

        self.rdfGraph = ConjunctiveGraph()
        try:
            self.rdfGraph.parse(uri, format="xml")
        except Exception as e:
            print('xml failed')
            print(e.args)

            try:
                self.rdfGraph.parse(uri, format="n3")
            except Exception as e:
                print('n3 failed')
                print(e.args)

                try:
                    print('trying ttl')
                    self.rdfGraph.parse(uri, format="turtle")
                    print('successful ttl')
                except Exception as e:
                    print('ttl failed')
                    print(e.args)
                    raise Exception(
                        'Could not parse the file! Is it a valid RDF/OWL ontology?'
                    )

        finally:
            # get list of all classes and the base of the ontology
            self.baseURI = self.getOntologyURI() or uri
            self.allclasses = self.getAllNamedClasses()
            self.topClasses = self.getTopClasses()

    def getOntologyURI(self, return_as_string=True):
        ontology_uri = [
            x for x, y, z in self.rdfGraph.triples((None, RDF.type,
                                                    OWL.Ontology))
        ]

        if ontology_uri:
            if return_as_string:
                return str(ontology_uri[0])
            else:
                return ontology_uri[0]
        else:
            return None

    def getAllNamedClasses(self, classPredicate="", removeBlankNodes=True):
        """
        Extracts all the classes from a model
        We use the RDFS and OWL predicate by default; also, we extract non explicitly declared classes
        """

        rdfGraph = self.rdfGraph
        entities = set()

        # estimate lengths
        print('owl classes')
        print(len(list(rdfGraph.triples((None, RDF.type, OWL.Class)))))

        print('rdfs classes')
        print(len(list(rdfGraph.triples((None, RDF.type, RDFS.Class)))))

        print('subclass of anything')
        print(len(list(rdfGraph.triples((None, RDFS.subClassOf, None)))))

        print('rdfs domain')
        print(len(list(rdfGraph.triples((None, RDFS.domain, None)))))

        print('rdfs range')
        print(len(list(rdfGraph.triples((None, RDFS.range, None)))))

        if not classPredicate:
            print('getting owl classes')
            for s, v, o in rdfGraph.triples((None, RDF.type, OWL.Class)):
                entities.add(s)
            print('getting rdf classes')
            for s, v, o in rdfGraph.triples((None, RDF.type, RDFS.Class)):
                entities.add(s)

            # this extra routine makes sure we include classes not declared explicitly
            # eg when importing another onto and subclassing one of its classes...
            print('getting unnamed axioms')
            for s, v, o in rdfGraph.triples((None, RDFS.subClassOf, None)):
                entities.add(s)
                entities.add(o)

            # this extra routine includes classes found only in rdfs:domain and rdfs:range definitions
            for s, v, o in rdfGraph.triples((None, RDFS.domain, None)):
                entities.add(o)
            for s, v, o in rdfGraph.triples((None, RDFS.range, None)):
                entities.add(o)

        else:
            if classPredicate == "rdfs" or classPredicate == "rdf":
                for s, v, o in rdfGraph.triples((None, RDF.type, RDFS.Class)):
                    entities.add(s)
            elif classPredicate == "owl":
                for s, v, o in rdfGraph.triples((None, RDF.type, OWL.Class)):
                    entities.add(s)
            else:
                raise Exception(
                    'ClassPredicate must be either rdf, rdfs or owl')

        if removeBlankNodes:
            entities = [x for x in entities if not isBlankNode(x)]

        # return sort_uri_list_by_name(entities)
        return list(entities)

    # methods for getting ancestores and descendants of classes: by default, we do not include blank nodes

    def getDirectSuperclasses(self, uri, excludeBnodes=True):
        returnlist = set()
        for s, v, o in self.rdfGraph.triples((uri, RDFS.subClassOf, None)):
            if excludeBnodes:
                if not isBlankNode(o):
                    returnlist.add(o)
            else:
                returnlist.add(o)

        return list(returnlist)

    def getDirectSubclasses(self, uri, excludeBnodes=True):
        returnlist = set()
        for s, v, o in self.rdfGraph.triples((None, RDFS.subClassOf, uri)):
            if excludeBnodes:
                if not isBlankNode(s):
                    returnlist.add(s)

            else:
                returnlist.add(s)

        return list(returnlist)

    def getDescendants(self, uri, excludeBnodes=True):

        return self.getTransitiveSubjects(RDFS.subClassOf, uri)

    def getAncestors(self, uri, excludeBnodes=True):

        return self.getTransitiveObjects(uri, RDFS.subClassOf)

    def getClassSiblings(self, uri, excludeBnodes=True):
        returnlist = set()
        for father in self.getDirectSuperclasses(uri, excludeBnodes):
            for child in self.getDirectSubclasses(father, excludeBnodes):
                if child != uri:
                    returnlist.add(child)

        return list(returnlist)

    def getValuesForGivenProperty(self, subject, property):
        results = self.rdfGraph.triples((subject, property, None))
        values = []
        for s, p, o in results:
            values.append(o)
        return values

    def getRDFSLabel(self, subject):
        '''
        Get the RDFS label for a given class
        :param subject:
        :return:
        '''
        results = self.rdfGraph.label(subject)
        return results

    def getTransitiveSubjects(self, predicate, object):
        results = self.rdfGraph.transitive_subjects(predicate, object)
        values = set()
        for s in results:
            if not isBlankNode(s):
                values.add(s)
        if object in values:
            values.remove(object)
        return list(values)

    def getTransitiveObjects(self, subject, predicate):
        values = set()
        for o in self.rdfGraph.transitive_objects(subject, predicate):
            if not isBlankNode(o):
                values.add(o)
        if subject in values:
            values.remove(subject)
        return list(values)

    def getTopClasses(self, classPredicate=''):
        """ Finds the topclass in an ontology (works also when we have more than on superclass)
        """
        returnlist = set()

        # gets all the classes
        for eachclass in self.allclasses:
            x = self.getDirectSuperclasses(eachclass)
            if not x:
                returnlist.add(eachclass)

        return list(returnlist)

    def getClassTree(self, father=None, out=None):
        """ Reconstructs the taxonomical tree of an ontology, from the 'topClasses' (= classes with no supers, see below)
            Returns a dictionary in which each class is a key, and its direct subs are the values.
            The top classes have key = 0

            Eg.
            {'0' : [class1, class2], class1: [class1-2, class1-3], class2: [class2-1, class2-2]}
        """

        if not father:
            out = {}
            topclasses = self.topClasses
            out[0] = topclasses

            for top in topclasses:
                children = self.getDirectSubclasses(top)
                out[top] = children
                for potentialfather in children:
                    self.getClassTree(potentialfather, out)

            return out

        else:
            children = self.getDirectSubclasses(father)
            out[father] = children
            for ch in children:
                self.getClassTree(ch, out)
コード例 #4
0
ファイル: transitive.py プロジェクト: RDFLib/rdflib
"""

if __name__ == '__main__':
    from rdflib import ConjunctiveGraph, URIRef

    person = URIRef('ex:person')
    dad = URIRef('ex:d')
    mom = URIRef('ex:m')
    momOfDad = URIRef('ex:gm0')
    momOfMom = URIRef('ex:gm1')
    dadOfDad = URIRef('ex:gf0')
    dadOfMom = URIRef('ex:gf1')

    parent = URIRef('ex:parent')

    g = ConjunctiveGraph()
    g.add((person, parent, dad))
    g.add((person, parent, mom))
    g.add((dad, parent, momOfDad))
    g.add((dad, parent, dadOfDad))
    g.add((mom, parent, momOfMom))
    g.add((mom, parent, dadOfMom))

    print("Parents, forward from `ex:person`:")
    for i in g.transitive_objects(person, parent):
        print(i)

    print("Parents, *backward* from `ex:gm1`:")
    for i in g.transitive_subjects(parent, momOfMom):
        print(i)