コード例 #1
0
ファイル: rdfhelp.py プロジェクト: detrout/encoded_client
def get_turtle_header():
    """Return a turtle header with our typical namespaces"""
    empty = ConjunctiveGraph()
    add_default_namespaces(empty)
    turtle_header = []
    for term, urlterm in empty.namespaces():
        turtle_header.append("@prefix {}: <{}>.".format(term, urlterm))
    return "\n".join(turtle_header)
コード例 #2
0
ファイル: rdfobject.py プロジェクト: benosteen/RDFobject
 def from_string(self, uri, text, format="xml", encoding="utf-8"):
     self.reset()
     self.set_uri(uri)
     t = TextInputSource(text, system_id=uri)
     t.setEncoding(encoding)
     g = ConjunctiveGraph(identifier=self.uri)
     g = g.parse(t, format)
     for prefix, ns in g.namespaces():
         self.add_namespace(prefix, ns)
     for s,p,o in g.triples((self.uri, None, None)):
         self.add_triple(p, o)
コード例 #3
0
ファイル: rdfobject.py プロジェクト: benosteen/RDFobject
 def from_url(self, url, uri=None, format="xml",  encoding="utf-8"):
     self.reset()
     if not uri:
         self.set_uri(url)
     else:
         self.set_uri(uri)
     g = ConjunctiveGraph(identifier=self.uri)
     g = g.parse(url, format)
     for prefix, ns in g.namespaces():
         self.add_namespace(prefix, ns)
     for s,p,o in g.triples((self.uri, None, None)):
         self.add_triple(p, o)
コード例 #4
0
ファイル: manifest.py プロジェクト: benosteen/RDFobject
    def from_string(self, rdf_manifest_string, format="xml"):
        t = TextInputSource(rdf_manifest_string)
        g = ConjunctiveGraph()
        g = g.parse(t, format)
        
        for s,p,o in g.triples((None, None, None)):
            if s not in self.items:
                self.items.append(s)
            if p == NAMESPACES['rdf']['type']:
                self.items_rdfobjects.setdefault(s,RDFobject(uri=s)).add_type(o)
            else:
                self.items_rdfobjects.setdefault(s,RDFobject(uri=s)).add_triple(p, o)

        for prefix, ns in g.namespaces():
            self.add_namespace(prefix ,ns)
コード例 #5
0
def list_used_namespaces():
    """
    Print Python code as strings
    that can be used to setup namespaces.
    """
    g = ConjunctiveGraph()
    flist = get_file_list(sys.argv[1])
    for full in flist:
        g.parse(source=full, format='turtle')
    ns_mgr = NamespaceManager(Graph())
    for pre, ns in g.namespaces():
        q = namespace_rq.replace('--ns--', ns.toPython())
        used = g.query(q).askAnswer
        if used is True:
            #print pre, ns
            print "{} = Namespace('{}')".format(pre.upper(), ns)
            ns_mgr.bind(pre, Namespace(ns))
    return ns_mgr
コード例 #6
0
class SQLATestCase(unittest.TestCase):
    identifier = URIRef("rdflib_test")
    dburi = Literal("sqlite://")

    def setUp(self):
        self.store = plugin.get(
            "SQLAlchemy", Store)(identifier=self.identifier)
        self.graph = ConjunctiveGraph(self.store, identifier=self.identifier)
        self.graph.open(self.dburi, create=True)

    def tearDown(self):
        self.graph.destroy(self.dburi)
        try:
            self.graph.close()
        except:
            pass

    def test_registerplugins(self):
        # I doubt this is quite right for a fresh pip installation,
        # this test is mainly here to fill a coverage gap.
        registerplugins()
        self.assert_(plugin.get("SQLAlchemy", Store) is not None)
        p = plugin._plugins
        self.assert_(("SQLAlchemy", Store) in p, p)
        del p[("SQLAlchemy", Store)]
        plugin._plugins = p
        registerplugins()
        self.assert_(("SQLAlchemy", Store) in p, p)

    def test_namespaces(self):
        self.assert_(list(self.graph.namespaces()) != [])

    def test_contexts_without_triple(self):
        self.assert_(list(self.graph.contexts()) == [])

    def test_contexts_with_triple(self):
        statemnt = (michel, likes, pizza)
        self.assert_(self.graph.contexts(triple=statemnt) != [])

    def test__len(self):
        self.assert_(self.store.__len__() == 0)

    def test__remove_context(self):
        self.store._remove_context(self.identifier)
コード例 #7
0
ファイル: query.py プロジェクト: octadocs/octadocs
def query(
    query_text: str,
    instance: rdflib.ConjunctiveGraph,
    **kwargs: str,
) -> QueryResult:
    """Run SPARQL SELECT query and return formatted result."""
    sparql_result: SPARQLResult = instance.query(
        query_text,
        initBindings=kwargs,
    )

    if sparql_result.askAnswer is not None:
        return sparql_result.askAnswer

    if sparql_result.graph is not None:
        graph: rdflib.Graph = sparql_result.graph
        for prefix, namespace in instance.namespaces():
            graph.bind(prefix, namespace)

        return graph

    return _format_query_bindings(sparql_result.bindings)
コード例 #8
0
def parse_workflow():
    # FIXME TODO these states should probably be compiled down to numbers???
    docs = Path(__file__).parent.absolute().resolve().parent / 'docs'
    rridpath = docs / 'workflow-rrid.graphml'
    paperpath = docs / 'workflow-paper-id.graphml'

    cgraph = ConjunctiveGraph()
    gt.WorkflowMapping(rridpath.as_posix()).graph(cgraph)
    gt.PaperIdMapping(paperpath.as_posix(), False).graph(cgraph)
    write(cgraph, '/tmp/workflow.ttl')
    predicates = set(cgraph.predicates())
    OntCuries({cp:str(ip) for cp, ip in cgraph.namespaces()})
    OntCuries({'RRID': 'https://scicrunch.org/resolver/RRID:',
               'DOI': 'https://doi.org/',
               'PMID': 'https://www.ncbi.nlm.nih.gov/pubmed/'})
    hg = makeGraph('', graph=cgraph)
    short = sorted(hg.qname(_) for _ in predicates)

    wf.hasTag
    wf.hasReplyTag
    wf.hasTagOrReplyTag
    wf.hasOutputTag

    #if type isa wf.tag

    tag_types = set(cgraph.transitive_subjects(rdfs.subClassOf, wf.tag))
    tag_tokens = {tagType:sorted(set(t for t in cgraph.transitive_subjects(rdf.type, tagType)
                                     if t != tagType))
                  for tagType in tag_types}
    has_tag_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasTagOrReplyTag))
    has_tag_types.add(wf.hasOutputTag)
    has_next_action_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasOutput))
    has_next_action_types.add(wf.hasNextStep)

    terminals = sorted(tag
                       for ttype in tag_types
                       if ttype != wf.tagScibot  # scibot is not 'terminal' for this part
                       for tag in cgraph[:rdf.type:ttype]
                       if not isinstance(tag, BNode)
                       and not any(o for httype in has_tag_types
                                   for o in cgraph[tag:httype]))

    endpoints = sorted(endpoint
                       for endpoint in cgraph[:rdf.type:wf.state]
                       if not isinstance(endpoint, BNode)
                       and not any(o for hnatype in has_next_action_types
                                   for o in cgraph[endpoint:hnatype]))

    complicated = sorted(a_given_tag
                 for tt in tag_types
                 for a_given_tag in cgraph[:rdf.type:tt]
                 if not isinstance(a_given_tag, BNode)
                         and not [successor_tag
                          for htt in has_tag_types
                          for successor_tag in chain(t
                                                     for t in cgraph[a_given_tag:htt]
                                                     #if not isinstance(t, BNode)
                                        ,
                                                     # we don't actually need this for terminals
                                                     # we will need it later
                                                     #(t for b in cgraph[a_given_tag:htt]
                                                     #if isinstance(b, BNode)
                                                     #for listhead in cgraph[b:owl.oneOf]
                                                     #for t in unlist(listhead, cgraph)),
                         )])

    def topList(node, g):
        for s in g[:rdf.rest:node]:
            yield s

    def getLists(node, g):
        for linker in g[:rdf.first:node]:
            top = None
            for top in g.transitiveClosure(topList, linker):
                pass

            if top:
                yield top
            else:
                yield linker

    def getIsTagOf(node, g):
        for htt in has_tag_types:
            for parent_tag in g[:htt:node]:
                yield parent_tag

    def getIsOneOfTagOf(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                for parent_tag, _ in g[::linker]:
                    yield parent_tag

    def getPreviousTag(node, g):  # not quite what we need
        yield from getIsOneOfTagOf(node, g)
        yield from getIsTagOf(node, g)

    def getTagChains(node, g, seen=tuple()):
        # seen to prevent recursion cases where
        # taggning can occur in either order e.g. PMID -> DOI
        #print(tc.red(repr(OntId(node))))  # tc.red(OntId(node)) does weird stuff O_o
        parent_tag = None
        for parent_tag in chain(getIsOneOfTagOf(node, g),
                                getIsTagOf(node, g)):
            if parent_tag in seen:
                parent_tag = None
                continue
            ptt = next(g[parent_tag:rdf.type])
            #if ptt in tag_types:
            for pchain in getTagChains(parent_tag, g, seen + (node,)):
                if ptt in tag_types:
                    out = parent_tag, *pchain
                else:
                    out = pchain
                yield out

            if not ptt and not out:
                parent_tag = None

        if not parent_tag:
            yield tuple()

    def getInitiatesAction(node, g):
        for action in g[:wf.initiatesAction:node]:
            yield action

    def getIsOneOfOutputOf(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                for hot in has_next_action_types:
                    for parent_thing  in g[:hot:linker]:
                        yield parent_thing

    def getActionChains(node, g):
        parent_action = None
        for parent_action in chain(getIsOneOfOutputOf(node, g),  # works for actions too
                                   getInitiatesAction(node, g)):
            for pchain in getActionChains(parent_action, g):  # NOTE may also be a tag...
                out = parent_action, *pchain
                #print(tuple(hg.qname(o) for o in out))
                yield out

        if not parent_action:
            yield tuple()

    def getRestSubjects(predicate, object, g):
        """ invert restriction """
        rsco = cmb.Restriction(rdfs.subClassOf)
        for rt in rsco.parse(graph=g):
            if rt.p == predicate and rt.o == object:
                yield from g.transitive_subjects(rdfs.subClassOf, rt.s)

    annoParts = list(getRestSubjects(wf.isAttachedTo, wf.annotation, cgraph))
    partInstances = {OntId(a):set(t if isinstance(t, BNode) else OntId(t)
                                  for t in cgraph.transitive_subjects(rdf.type, a)
                                  if not isinstance(t, BNode) and t != a)
                     for a in annoParts}

    _endpoint_chains = {OntId(endpoint):[[OntId(endpoint)] + [OntId(e) for e in chain]
                                            for chain in getActionChains(endpoint, cgraph)]
                        for endpoint in endpoints}

    #print([hg.qname(e) for e in endpoints])
    #print([print([hg.qname(c) for c in getActionChains(endpoint, cgraph) if c])
           #for endpoint in endpoints
           #if endpoint])

    #_ = [print(list(getActionChains(e, cgraph)) for e in endpoints)]
    #return

    wat = cgraph.transitiveClosure(getPreviousTag, RRIDCUR.Duplicate)
    wat = list(wat)
    #def invOneOf(tag, g):

    fake_chains = {hg.qname(terminal):
                   [hg.qname(c)
                    for c in cgraph.transitiveClosure(getPreviousTag, terminal)]
                   for terminal in terminals}

    def make_chains(things, getChains):
        return {OntId(thing):[[OntId(thing)] + [OntId(e) for e in chain]
                              for chain in getChains(thing, cgraph)]
                for thing in things
                #if not print(thing)
        }

    def print_chains(thing_chains):
        print('\nstart from beginning')

        print('\n'.join(sorted(' -> '.join(hg.qname(e) for e in reversed(chain))
                               for chains in thing_chains.values()
                               for chain in chains)))

        print('\nstart from end')

        print('\n'.join(sorted(' <- '.join(e.curie for e in chain)
                               for chains in thing_chains.values()
                               for chain in chains)))

    def valid_tagsets(all_chains):
        # not the most efficient way to do this ...
        transitions = defaultdict(set)
        for end, chains in all_chains.items():
            for chain in chains:
                valid = set()
                prior_state = None
                for element in reversed(chain):
                    valid.add(element)
                    state = frozenset(valid)
                    transitions[prior_state].add(state)
                    prior_state = state

        return {s:frozenset(n) for s, n in transitions.items()}

    endpoint_chains = make_chains(endpoints, getActionChains)
    #endpoint_transitions = valid_transitions(endpoint_chains)  # not the right structure
    print_chains(endpoint_chains)
    terminal_chains = make_chains(terminals, getTagChains)
    print_chains(terminal_chains)
    tag_transitions = valid_tagsets(terminal_chains)
    terminal_tags_to_endpoints =  'TODO'

    def printq(*things):
        print(*(OntId(t).curie for t in things))

    from pprint import pprint
    def get_linkers(s, o, g, linkerFunc):  # FIXME not right
        for p in g[s::o]:
            yield p

        for l in linkerFunc(o, g):
            #print(tc.blue(f'{OntId(s).curie} {l if isinstance(l, BNode) else OntId(l).curie}'))
            for p in g[s::l]:
                #print(tc.red(f'{s} {l} {o} {p}'))
                yield p
        return 
        linkers = set(l for l in g.transitiveClosure(linkerFunc, o))
        for p, o in g[s::]:
            if o in linkers:
                yield p

    def edge_to_symbol(p, rev=False):
        if p == wf.initiatesAction:
            return '<<' if rev else '>>'
        elif p == wf.hasReplyTag:
            return '<' if rev else '>'
        elif p == wf.hasTagOrReplyTag:
            return '<=' if rev else '=>'
        elif p == wf.hasOutputTag:
            return '-<-' if rev else '->-'
        else:
            return '<??' if rev else '??>'

    def chain_to_typed_chain(chain, g, func):
        # duh...
        #pprint(chain)
        for s, o in zip(chain, chain[1:]):
            # TODO deal with reversed case
            s, o = s.u, o.u
            p = None
            #print(s, o)
            printq(s, o)
            for p in get_linkers(s, o, g, func):
                #print(tc.yellow(p))
                #yield (s, edge_to_symbol(p), o)
                yield from (s, edge_to_symbol(p), o)

            if not p:
                for rp in get_linkers(o, s, g, func):
                    print(tc.blue(rp))
                    yield from (s, edge_to_symbol(rp, rev=True), o)

    def tchains(thing_chains, func):
        return sorted([OntId(e).curie if isinstance(e, URIRef) else e
                       for e in chain_to_typed_chain(list(reversed(chain)), cgraph, func)]
                      for chains in thing_chains.values()
                      for chain in chains)

    def getLinkers(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                yield linker

    def allSubjects(object, graph):
        yield from (s for s, p in graph[::object])
        yield from getLinkers(object, graph)

    print()
    ttc = tchains(terminal_chains, allSubjects)
    tec = tchains(endpoint_chains, allSubjects)
    pprint(ttc)
    pprint(tec)

    valid_tagsets = frozenset((t for s in tag_transitions.values() for t in s))
    tts = valid_tagsets - frozenset(tag_transitions)
    endtype = 'TODO'  # 
    tt = {}
    for endtype, chains  in endpoint_chains.items():
        for *_chain, tag in chains:
            if _chain:
                next_thing = _chain[-1]
            for ets in tts:
                if tag in ets:
                    tt[ets] = next_thing

    terminal_tagsets = tt

    #[print(wat) for wat in terminal_chains.values()]
    #pprint(terminal_chains)
    return tag_types, tag_tokens, partInstances, valid_tagsets, terminal_tagsets, tag_transitions
コード例 #9
0
class SQLATestCase(unittest.TestCase):
    identifier = URIRef("rdflib_test")
    dburi = Literal('sqlite://')

    def setUp(self):
        self.store = plugin.get(
            "SQLAlchemy", Store)(identifier=self.identifier)
        self.graph = ConjunctiveGraph(self.store, identifier=self.identifier)
        self.graph.open(self.dburi, create=True)

    def tearDown(self):
        self.graph.destroy(self.dburi)
        try:
            self.graph.close()
        except:
            pass

    def test_registerplugins(self):
        # I doubt this is quite right for a fresh pip installation,
        # this test is mainly here to fill a coverage gap.
        from rdflib_sqlalchemy import registerplugins
        from rdflib import plugin
        from rdflib.store import Store
        registerplugins()
        self.assert_(plugin.get('SQLAlchemy', Store) is not None)
        p = plugin._plugins
        self.assert_(('SQLAlchemy', Store) in p, p)
        del p[('SQLAlchemy', Store)]
        plugin._plugins = p
        registerplugins()
        self.assert_(('SQLAlchemy', Store) in p, p)

    def test_skolemisation(self):
        from rdflib_sqlalchemy.SQLAlchemy import skolemise
        testbnode = BNode()
        statemnt = (michel, likes, testbnode)
        res = skolemise(statemnt)
        self.assert_('bnode:N' in str(res[2]), res)

    def test_deskolemisation(self):
        from rdflib_sqlalchemy.SQLAlchemy import deskolemise
        testbnode = BNode()
        statemnt = (michel, likes, testbnode)
        res = deskolemise(statemnt)
        self.assert_(str(res[2]).startswith('N'), res)

    def test_redeskolemisation(self):
        from rdflib_sqlalchemy.SQLAlchemy import skolemise, deskolemise
        testbnode = BNode()
        statemnt = skolemise((michel, likes, testbnode))
        res = deskolemise(statemnt)
        self.assert_(str(res[2]).startswith('N'), res)

    def test__parse_rfc1738_args(self):
        from rdflib_sqlalchemy.SQLAlchemy import _parse_rfc1738_args
        self.assertRaises(ValueError, _parse_rfc1738_args, 'Not parseable')

    def test_namespaces(self):
        self.assert_(list(self.graph.namespaces()) != [])

    def test_contexts_without_triple(self):
        self.assert_(list(self.graph.contexts()) == [])

    def test_contexts_with_triple(self):
        statemnt = (michel, likes, pizza)
        self.assert_(self.graph.contexts(triple=statemnt) != [])

    def test__len(self):
        self.assert_(self.store.__len__() == 0)

    def test__remove_context(self):
        self.store._remove_context(self.identifier)
コード例 #10
0
class ElasticTestCase(unittest.TestCase):
    identifier = URIRef("rdflib_test")
    dburi = Literal("http://localhost:9200/collection")

    def setUp(self):
        self.store = plugin.get("Elasticsearch",
                                Store)(identifier=self.identifier,
                                       configuration=self.dburi)
        self.graph = ConjunctiveGraph(self.store, identifier=self.identifier)
        self.graph.open(self.dburi, create=True)

    def tearDown(self):
        self.graph.destroy(self.dburi)
        self.graph.close()

    def test_registerplugins(self):
        # I doubt this is quite right for a fresh pip installation,
        # this test is mainly here to fill a coverage gap.
        registerplugins()
        self.assertIsNotNone(plugin.get("Elasticsearch", Store))
        p = plugin._plugins
        self.assertIn(("Elasticsearch", Store), p)
        del p[("Elasticsearch", Store)]
        plugin._plugins = p
        registerplugins()
        self.assertIn(("Elasticsearch", Store), p)

    def test_namespaces(self):
        self.assertNotEqual(list(self.graph.namespaces()), [])

    def test_contexts_without_triple(self):
        self.assertEqual(list(self.graph.contexts()), [])

    def test_contexts_result(self):
        g = self.graph.get_context(ctx_id)
        g.add((michel, likes, pizza))
        actual = list(self.store.contexts())
        self.assertEqual(actual[0], ctx_id)

    def test_contexts_with_triple(self):
        statemnt = (michel, likes, pizza)
        self.assertEqual(list(self.graph.contexts(triple=statemnt)), [])

    def test__len(self):
        self.assertEqual(self.store.__len__(), 0)

    def test_triples_choices(self):
        # Set this so we're not including selects for both asserted and literal tables for
        # a choice
        self.store.STRONGLY_TYPED_TERMS = True
        # Set the grouping of terms
        self.store.max_terms_per_where = 2

        results = [((michel, likes, pizza), ctx_id)]

        # force execution of the generator
        for x in self.store.triples_choices(
            (None, likes, [michel, pizza, likes])):
            print("x=" + str(x))
            print("results=" + str(results))
            assert x in results
コード例 #11
0
ファイル: swap_primer.py プロジェクト: mobilemadman2/rdflib-1
    # technically, we already created a namespace
    # with the object init (and it added some namespaces as well)
    # By default, your main namespace is the URI of your
    # current working directory, so lets make that simpler:

    myNS = Namespace('http://www.w3.org/2000/10/swap/Primer#')
    primer.bind('', myNS)
    primer.bind('owl', OWL)
    primer.bind('dc', DC)
    primer.bind('swap', 'http://www.w3.org/2000/10/swap/')

    # Lets load it up!

    primer.parse(data=mySource, format='n3')

    # Now you can query, either directly straight into a list:

    [(x, y, z) for x, y, z in primer]

    # or spit it back out (mostly) the way we created it:

    print(primer.serialize(format='n3'))

    # for more insight into things already done, lets see the namespaces

    list(primer.namespaces())

    # lets ask something about the data

    list(primer.objects(myNS.pat, myNS.child))
コード例 #12
0
ファイル: test2.py プロジェクト: assemblee-virtuelle/pySolid
from rdflib import Literal, XSD, URIRef
from rdflib.namespace import FOAF, RDF, RDFS
from rdflib.serializer import Serializer
from rdflib import plugin


def pprint(msg):
    msg = msg.decode('utf-8')
    for l in msg.split('\n'):
        if l.strip():
            print(l)


store = 'Sleepycat'
graph = ConjunctiveGraph(store=store, identifier='mygraph')
graph.open('foaf_flask/static/rdf/sleepycat', create=False)

#CONSTRUCT {{ ?uri  ?p ?o . }}
query = """CONSTRUCT { ?uri ?p ?o . }
WHERE {
    {GRAPH ?g { ?uri ?p ?o } }
    UNION { ?uri ?p ?o }
}"""
# query = """CONSTRUCT { ?uri  ?p ?o . }
# WHERE  { ?uri ?p ?o } """
bind = {'uri': URIRef('http://127.0.0.1:5000/ldp/donna')}
context = dict(graph.namespaces())
query_result = graph.query(query, initBindings=bind, initNs=context)
newg = Graph().parse(data=query_result.serialize(format='xml'))
data = newg.serialize(format='turtle', context=context)
pprint(data)
コード例 #13
0
def convert(teifile, namespace):
    #graph_uri = "http://contextus.net/resource/blue_velvet/"

    ns = Namespace(namespace)

    graph = ConjunctiveGraph()
    graph.load(teifile, format="rdfa")

    graph.bind("default", ns)

    to_update = ""

    for prefix, nsuri in graph.namespaces():
        #print("prefix: " + str(prefix) + " - " + str(nsuri))
        if nsuri in ns:
            to_update = nsuri

    for s, p, o in graph:
        #    		print s, p, o
        if to_update != "" and to_update in s:
            graph.remove((s, p, o))
            s = URIRef(s.replace(to_update, ns))
            graph.add((s, p, o))

    act = ""
    scene = ""
    line = ""
    char = 0
    loc = 0

    #timeline = ns['timeline/narrative']
    #graph.add((timeline, RDF.type, ome['Timeline']))

    tree = ET.parse(teifile)
    cast = dict()

    titleNode = tree.find('//title')

    castItems = tree.findall('/text/body/div1/castList//castItem')
    for castItem in castItems:
        actorNode = castItem.find('actor')
        roleNode = castItem.find('role')

        if roleNode != None:
            id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id")

        #print("Found castItem!")

        actor = None
        role = None

        # Check to see if we already have an entry
        if (roleNode != None and roleNode.get("about")):

            charname = roleNode.get("about")

            if (charname.find(":") > -1):
                nmsp, nom = charname.split(":", 1)
                charcode = "character/" + str(char)
                charref = nmsp + ":" + charcode + "]"
                role = extractCURIEorURI(graph, charref, nom[0:-1])
                char += 1
                #print("1:" + charname + ": adding id " + id + " to " + role)
            else:
                role = extractCURIEorURI(graph, charname)
                #print("2:" + charname + ": adding id " + id + " to " + role)

            cast[id] = role
            graph.add((role, RDF.type, omb['Character']))
            #print(charname + ": adding id " + id + " to " + role)

        if (actorNode != None and actorNode.get("about")):
            actor = extractCURIEorURI(graph, actorNode.get("about"))
            graph.add((actor, RDF.type, omb['Being']))

        if actor != None and role != None:
            graph.add((actor, omb['portrays'], role))
            graph.add((role, omb['portrayed-by'], actor))

    eventCount = 1
    groupCount = 1
    prior_event = None

    actItems = tree.findall('/text/body/div1')
    ref = ""

    for actItem in actItems:

        if actItem.get("type") == "act":
            act = actItem.get("n")

        sceneItems = actItem.findall('div2')

        for sceneItem in sceneItems:

            #print("Found sceneItems!")

            if sceneItem.get("type") == "scene":
                scene = sceneItem.get("n")

            # Work out the location of this scene
            location = None
            stageItems = sceneItem.findall("stage")

            #internalnum = 1
            stagenum = 0
            speechnum = 1

            for stageItem in stageItems:
                if stageItem.get("type") == "location":
                    # The RDFa parser doesn't handle the type - so we can grab that here.

                    if stageItem.get("about") != None:
                        locname = stageItem.get("about")

                        # Adding location type/oml:space for location
                        if stageItem.get("typeof") and stageItem.get("about"):
                            type = extractCURIEorURI(graph,
                                                     stageItem.get("typeof"))
                            #print "1. Location: " + str(location) + " Type: " + str(type)
                        elif stageItem.get("about"):
                            #print "2. Location: " + str(locname)
                            type = extractCURIEorURI(graph, oml['Space'])

                        # Get location value and add rdfs:label is location is not using the TEI value
                        if (locname.find(":") > -1):
                            nmsp, nom = locname.split(":", 1)
                            loccode = "location/" + str(loc)
                            locref = nmsp + ":" + loccode + "]"
                            location = extractCURIEorURI(
                                graph, locref, nom[0:-1])
                            loc += 1
                            graph.add((
                                location,
                                rdflib.URIRef(
                                    'http://www.w3.org/2000/01/rdf-schema#label'
                                ), Literal(nom[0:-1])))
                        else:
                            location = extractCURIEorURI(
                                graph, stageItem.get("about"))

                        # Add location to graph
                        graph.add((location, RDF.type, type))
                    else:
                        location = ""

                    #print("Adding location type: " + type + " (" + location + ")")

            if cast:
                # Work out a list of all cast in a given section
                currentCast = list()
                speakers = list()

            # Iterate through elements within stageItem
            # Find speaker events and add to list of current cast for inclusion in social event
            # Find reference events and add to ongoing social event ?
            # Find stage events
            # If event is an entrance then
            # create social event for people talking before entrance
            # create travel event i.e. entrance
            # add new arrival to current cast list
            # If event is exit event then
            # create social event for people talking before exit
            # create travel event i.e. exit
            # if leavers are not named directly the calculate who is leaving
            # remove leavers from current cast list
            # If reach end of scene then create social event with current cast list

            #Also need to check if social event before exit has same composition as social event after exit since then they should be merged

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            refersTo = list()
            #parent = None
            speakerNodes = list()
            speakerRef = list()

            xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(
                perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
            stagecount = 0
            stage_array = list()

            for node in sceneItem.getiterator():
                #print("Node: " + node.tag)
                """
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
						else:
							ref = str(act) + "." + str(scene) + "." + str(line)
							
						#xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)
				"""

                if node.tag == "sp":
                    id = node.get("who")

                    if id and cast:
                        speakers.append(cast[id[1:]])
                        speakerNodes.append(node)

                        if perseusid == None:
                            speakerRef.append(ref)
                        else:
                            #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
                            speechRef = xpointer + "#xpointer(//div2/sp[" + str(
                                speechnum) + "])"
                            speakerRef.append(speechRef)
                        #print("Line ref: " + ref)

                        if cast[id[1:]] not in currentCast:
                            currentCast.append(cast[id[1:]])

                    #internalnum = 1
                    speechnum += 1
                    stagecount = 0

                    previousl = 0

                    for subnode in node.getiterator():
                        if subnode.tag == "l":
                            previousl += 1

                        if subnode.tag == "stage":
                            #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
                            stage_array.append(previousl)
                            stagecount += 1

                elif node.tag == "stage":

                    if stagecount > 0:
                        s_max = len(stage_array)
                        diff = s_max - stagecount

                        #if diff == 0:
                        #	stagenum += 1

                        entRef = xpointer + "#xpointer(//div2/sp[" + str(
                            speechnum - 1) + "]/l[" + str(
                                stage_array[diff]) + "]/stage)"
                        #internalnum += 1
                        stagecount -= 1
                    else:
                        stagenum += 1
                        entRef = xpointer + "#xpointer(//div2/stage[" + str(
                            stagenum) + "])"

                    if node.get("type") == "entrance":

                        # Add Social Events for all the people who spoke since the last break (if there were any)

                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                                           location)
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), Literal(ref)))
                        else:
                            #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), URIRef(entRef)))

                        #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                        #print("Found entrence event!")
                        if location:
                            graph.add((event, ome['to'], location))

                        involved = node.get("about")

                        if (len(involved) > 0 and involved[0] == "["
                                and involved[-1] == "]"):
                            involved = involved[1:-1]

                        chunks = involved.split()

                        chunk_count = len(chunks)

                        if chunk_count > 1:
                            #type = extractCURIEorURI(graph, "[omb:Group]")
                            #graph.add((group, RDF.type, type))
                            graph.add((group, RDF.type, omb['Group']))

                        event_label = ""
                        en = 1

                        for chunk in chunks:
                            striped = chunk.strip()

                            if (len(striped) > 0 and striped[0] == "["
                                    and striped[-1] == "]"):
                                striped = striped[1:-1]
                                currentCast.append(cast[striped])

                            if chunk_count > 1:
                                graph.add(
                                    (group, ome['contains'], cast[striped]))

                                if en == chunk_count:
                                    event_label = event_label[
                                        0:-2] + " and " + striped
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(event_label + " arrive")))
                                elif en < chunk_count:
                                    event_label += striped + ", "

                            else:
                                #print("Adding person as subject-entity to entry event "   + str(eventCount))
                                graph.add((
                                    event,
                                    rdflib.URIRef(
                                        'http://www.w3.org/2000/01/rdf-schema#label'
                                    ), Literal(striped + " arrives")))
                                graph.add((event, ome['has-subject-entity'],
                                           cast[striped]))

                            en += 1

                        if chunk_count > 1:
                            graph.add(
                                (event, ome['has-subject-entity'], group))
                            #print("Adding group as subject-entity to entry event "   + str(eventCount))
                            groupCount = groupCount + 1
                            group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                    if node.get("type") == "exit":

                        # Add Social Events for all the people who spoke since the last break (if there were any)
                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                                           location)
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), Literal(ref)))
                        else:
                            #exitRef = xpointer
                            #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef)))
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), URIRef(entRef)))

                        #print("Found entrence event!")
                        if location != None:
                            graph.add((event, ome['from'], location))

                        involved = node.get("about")

                        if involved.strip() == "" or "-all" in involved:
                            # Remove everyone

                            #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #for peep in currentCast:
                            #	print(peep)

                            if len(currentCast) > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for peep in currentCast:
                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if peep == value:
                                        short_ref = key

                                if len(currentCast) > 1:
                                    graph.add((group, ome['contains'], peep))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += short_ref + ", "

                                else:
                                    #print("Adding person as subject-entity to exuant event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         peep))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if len(currentCast) > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exuant event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                            currentCast = list()

                        elif "!" in involved:
                            #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #print("Event: " + involved);

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            involved = involved.strip()

                            if (len(involved) > 0 and involved[0] == "!"
                                    and involved[1] == "("
                                    and involved[-1] == ")"):
                                involved = involved[2:-1]

                            #print("involved: " + involved)

                            striped = involved.strip()

                            c_ids = striped.split()

                            chunks = list()

                            for stay in c_ids:
                                #print("Staying: " + cast[stay])
                                chunks.append(cast[stay])

                            staying = list()
                            going = list()

                            for player in currentCast:
                                #print("Player: " + player)
                                if player in chunks:
                                    staying.append(player)
                                else:
                                    going.append(player)

                            going_count = len(going)

                            if going_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for ghost in going:
                                #print("ghost: " + ghost)

                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if ghost == value:
                                        short_ref = key

                                if ghost in currentCast:
                                    currentCast.remove(ghost)
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if going_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(going):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(going):
                                        event_label += short_ref + ", "

                                else:
                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         ghost))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if going_count > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                        else:
                            #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            striped = involved.strip()
                            chunks = striped.split()

                            #print("striped: " + striped)

                            chunk_count = len(chunks)

                            if chunk_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for chunk in chunks:
                                #print("chunk: " + chunk)

                                ghost = cast[chunk]

                                #print("ghost: " + ghost)

                                if ghost in currentCast:
                                    currentCast.remove(ghost)
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if chunk_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + chunk
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += chunk + ", "

                                else:
                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         ghost))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(chunk + " leaves")))

                                en += 1

                            if chunk_count > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                #elif node.tag == "rs":
                #	#print("Found rs node")
                #	if parent:
                #		#print("Parent type is " + parent.tag)
                #		if parent.tag == "p" or  parent.tag == "l":
                #			refersTo.append(node.get("about"))

                #parent = node

            # Add Social Events for all the people who spoke since the last break (if there were any)
            #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
            update = list()
            update = getSocial(graph, ns, speakers, speakerNodes, speakerRef,
                               cast, currentCast, eventCount, event,
                               prior_event, location)
            eventCount = update[0]
            prior_event = update[1]

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            speakers = list()
            speakerNodes = list()
            currentCast = list()
            speakerRef = list()

    print graph.serialize(format='xml')
コード例 #14
0
ファイル: namespaces.py プロジェクト: t00m/Vazaar
    u"tmo"   : TMO,
    u"dc"    : DC,
    u"dct"   : DCTERMS,
    u"foaf"  : FOAF,
    u"sioc"  : SIOC,
    u"sioct" : SIOCT,
    u"geo"   : GEO,
    u"mvcb"  : MVCB,
    u"ical"  : ICAL,
    u"xsd"   : XSD,
    u"owl"   : OWL,
    u"skos"  : SKOS,
    u"doap"  : DOAP,
}

for shortname, namespace in namespace_manager.namespaces():
    NSBINDINGS[shortname] = namespace

"""
keyword['LIBEXTRACTOR KEYWORD-TYPE'] = (
    'DC-ELEMENT',
    'DC-TERM',
    'NEPOMUK-ONTOLOGY',         # left part predicate
    'NEPOMUK-CLASS/PROPERTY'    # right part predicate
)
"""

EXTRACTOR_KEYWORD = {}
EXTRACTOR_KEYWORD['album'] = ('title', 'collection', 'nid3', 'albumTitle')
EXTRACTOR_KEYWORD['artist'] = ('creator', '', 'nao', 'creator')
EXTRACTOR_KEYWORD['book title'] = ('title', '', 'nie', 'title')
コード例 #15
0
def get_vocab_base(vocabfile):
    graph = Graph()
    try:
        graph.parse(vocabfile)
    except:
        graph = None
        graph = Graph()
        try:
            graph.parse(vocabfile, format="n3")
        except:
            return (None, None, None)
    identifier = None
    for v in graph.objects(None, namespaces['dc']['identifier']):
        identifier = v
    if not identifier:
        for v in graph.objects(None, namespaces['dcterms']['identifier']):
            identifier = v

    base = None
    if not base:
        for s in graph.subjects(namespaces['rdf']['type'], namespaces['owl']['Ontology']):
            base = s
            break
    if not base:
        for s in graph.subjects(namespaces['dc']['title'], None):
            base = s
            break
    if not base:
        for s in graph.subjects(namespaces['dcterms']['title'], None):
            base = s
            break
    if not base:
        for s in graph.subjects(namespaces['dc']['creator'], None):
            base = s
            break
    if not base:
        for s in graph.subjects(namespaces['dcterms']['creator'], None):
            base = s
            break
    if not base:
        for v in graph.objects(None, namespaces['vann']['preferredNamespaceUri']):
            base = v
            break
    if not base:
        for v in graph.namespaces():
            if v[0] == '':
                base = v[1]
                break

    prefix = None
    vocab_prefixes = graph.objects(None, namespaces['vann']['preferredNamespacePrefix'])
    for vp in vocab_prefixes:
        prefix = vp
    if not prefix and base:
        for v in graph.namespaces():
            if str(v[1]) == str(base):
                prefix = v[0]
                break
    if not prefix and base:
        prefix = base.strip().strip('/').split('/')[-1].strip('#').strip(' ')
    if base:
        base = base.strip()
        if (base[-1]!="/" and base[-1]!="#"):
            base += "#"
    return (identifier, base, prefix)
コード例 #16
0
def read_manifest(item, manifest_file):
    triples = []
    namespaces = {}
    seeAlsoFiles = []
    oxdsClasses = ['http://vocab.ox.ac.uk/dataset/schema#Grouping', 'http://vocab.ox.ac.uk/dataset/schema#DataSet']

    aggregates = item.list_rdf_objects(item.uri, "ore:aggregates")
    
    g = ConjunctiveGraph()
    gparsed = g.parse(manifest_file, format='xml')
    namespaces = dict(g.namespaces())
    #Get the subjects
    subjects = {}
    for s in gparsed.subjects():
        if s in subjects:
            continue
        if type(s).__name__ == 'URIRef':
            if str(s).startswith('file://'):
                ss = str(s).replace('file://', '')
                if manifest_file in ss:
                    subjects[s] = URIRef(item.uri)
                else:
                    manifest_file_path, manifest_file_name = os.path.split(manifest_file)
                    ss = ss.replace(manifest_file_path, '').strip('/')
                    for file_uri in aggregates:
                        if ss in str(file_uri):
                            subjects[s] = URIRef(file_uri)
                            break
                    if not s in subjects:
                        subjects[s] = URIRef(item.uri)
            else:
                subjects[s] = URIRef(s)
        elif type(s).__name__ == 'BNode':
            replace_subject = True
            for o in gparsed.objects():
                if o == s:
                    replace_subject = False
            if replace_subject:
                subjects[s] = URIRef(item.uri)
            else:
                subjects[s] = s
    #Get the dataset type 
    #set the subject uri to item uri if it is of type as defined in oxdsClasses
    datasetType = False
    for s,p,o in gparsed.triples((None, RDF.type, None)):
        if str(o) in oxdsClasses:
            if type(s).__name__ == 'URIRef' and len(s) > 0 and str(s) != str(item.uri) and str(subjects[s]) != str(item.uri):
                namespaces['owl'] = URIRef("http://www.w3.org/2002/07/owl#")
                triples.append((item.uri, 'owl:sameAs', s))
                triples.append((item.uri, RDF.type, o))              
            elif type(s).__name__ == 'BNode' or len(s) == 0 or str(s) == str(item.uri) or str(subjects[s]) == str(item.uri):
                gparsed.remove((s, p, o))
            subjects[s] = item.uri

    #Get the uri for the see also files
    for s,p,o in gparsed.triples((None, URIRef('http://www.w3.org/2000/01/rdf-schema#seeAlso'), None)):
        if type(o).__name__ == 'URIRef' and len(o) > 0:
            obj = str(o)
            if obj.startswith('file://'):
                obj_path, obj_name = os.path.split(obj)
                obj = obj.replace(obj_path, '').strip('/')
            for file_uri in aggregates:
                if obj in str(file_uri):
                    seeAlsoFiles.append(file_uri)
        gparsed.remove((s, p, o))

    #Add remaining triples
    for s,p,o in gparsed.triples((None, None, None)):
        triples.append((subjects[s], p, o))
    return namespaces, triples, seeAlsoFiles
コード例 #17
0
    primer.parse(data=mySource, format="n3")

    # Now you can query, either directly straight into a list:

    print()
    print("Printing bigger example's triples:")
    for i in [(x, y, z) for x, y, z in primer]:
        print(i)

    # or spit it back out (mostly) the way we created it:

    print()
    print("Printing bigger example as N3:")
    print(primer.serialize(format="n3").decode("utf-8"))

    # for more insight into things already done, lets see the namespaces

    print()
    print("Printing bigger example's namespaces:")
    for n in list(primer.namespaces()):
        print(n)

    # lets ask something about the data, using a SPARQL query

    print()
    print("Who are pat's children?")
    q = "SELECT ?child WHERE { :pat :child ?child }"
    for r in primer.query(q):
        print(r)
コード例 #18
0
ファイル: tei2onto2.py プロジェクト: klaffenboeck/contextus
def convert(teifile, namespace):
	#graph_uri = "http://contextus.net/resource/blue_velvet/"
	
	ns = Namespace(namespace)

	graph = ConjunctiveGraph()
	graph.load(teifile, format="rdfa")
	
	graph.bind("default", ns)
	
	to_update = ""

	for prefix, nsuri in graph.namespaces(): 
		#print("prefix: " + str(prefix) + " - " + str(nsuri))
		if nsuri in ns:
			to_update = nsuri
			
	for s, p, o in graph:
#    		print s, p, o
    		if to_update != "" and to_update in s:
    			graph.remove((s, p, o))
			s = URIRef(s.replace(to_update, ns))			
			graph.add((s, p, o))
	
	act = ""
	scene = ""
	line = ""
	char = 0
	loc = 0
	
	
	#timeline = ns['timeline/narrative']
	#graph.add((timeline, RDF.type, ome['Timeline']))

	tree = ET.parse(teifile)
	cast = dict()
	
	titleNode = tree.find('//title')
	
	castItems = tree.findall('/text/body/div1/castList//castItem')
	for castItem in castItems:
		actorNode = castItem.find('actor')
		roleNode = castItem.find('role')

		if roleNode != None:
			id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id")
		
		#print("Found castItem!")

		actor = None
		role = None

		# Check to see if we already have an entry
		if(roleNode != None and roleNode.get("about")):		

			charname = roleNode.get("about")
			
			if(charname.find(":") > -1):
				nmsp,nom = charname.split(":", 1)		
				charcode =  "character/" + str(char)
				charref = nmsp + ":" + charcode + "]"
				role = extractCURIEorURI(graph, charref,nom[0:-1])
				char += 1		
				#print("1:" + charname + ": adding id " + id + " to " + role)
			else:
				role = extractCURIEorURI(graph, charname)
				#print("2:" + charname + ": adding id " + id + " to " + role)

			cast[id] = role
			graph.add((role, RDF.type, omb['Character']))
			#print(charname + ": adding id " + id + " to " + role)
		
		if(actorNode != None and actorNode.get("about")):
			actor = extractCURIEorURI(graph, actorNode.get("about"))
			graph.add((actor, RDF.type, omb['Being']))

		if actor != None and role != None:
			graph.add((actor, omb['portrays'], role))
			graph.add((role, omb['portrayed-by'], actor))

	eventCount = 1
	groupCount = 1
	prior_event = None
	
	actItems = tree.findall('/text/body/div1')
	ref = ""
	
	for actItem in actItems:
	
		if actItem.get("type") == "act":
			act = actItem.get("n")
		
		sceneItems = actItem.findall('div2')
		
		for sceneItem in sceneItems:
			
			#print("Found sceneItems!")
			
			if sceneItem.get("type") == "scene":
				scene = sceneItem.get("n")		
			
			# Work out the location of this scene
			location = None
			stageItems = sceneItem.findall("stage")
			
			#internalnum = 1
			stagenum = 0
			speechnum = 1
			
			for stageItem in stageItems:
				if stageItem.get("type") == "location":
					# The RDFa parser doesn't handle the type - so we can grab that here.
					
					if stageItem.get("about") != None:
						locname = stageItem.get("about")
					
						# Adding location type/oml:space for location
						if stageItem.get("typeof") and stageItem.get("about"):
							type = extractCURIEorURI(graph, stageItem.get("typeof"))
							#print "1. Location: " + str(location) + " Type: " + str(type)
						elif stageItem.get("about"):	
							#print "2. Location: " + str(locname)											
							type = extractCURIEorURI(graph, oml['Space'])						
						
						
						# Get location value and add rdfs:label is location is not using the TEI value
						if(locname.find(":") > -1):
							nmsp,nom = locname.split(":", 1)		
							loccode =  "location/" + str(loc)
							locref = nmsp + ":" + loccode + "]"
							location = extractCURIEorURI(graph, locref, nom[0:-1])
							loc += 1
							graph.add((location, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(nom[0:-1])))
						else:
							location = extractCURIEorURI(graph, stageItem.get("about"))
						
						# Add location to graph
						graph.add((location, RDF.type, type))	
					else:
						location = ""
					
						
					#print("Adding location type: " + type + " (" + location + ")")
	
	
			if cast:
				# Work out a list of all cast in a given section
				currentCast = list()
				speakers = list()
			
	
			# Iterate through elements within stageItem
				# Find speaker events and add to list of current cast for inclusion in social event
				# Find reference events and add to ongoing social event ?
				# Find stage events
					# If event is an entrance then
						# create social event for people talking before entrance
						# create travel event i.e. entrance
						# add new arrival to current cast list
					# If event is exit event then
						# create social event for people talking before exit
						# create travel event i.e. exit
							# if leavers are not named directly the calculate who is leaving
						# remove leavers from current cast list
				# If reach end of scene then create social event with current cast list
				
				#Also need to check if social event before exit has same composition as social event after exit since then they should be merged
				
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]	
			
			refersTo = list()
			#parent = None
			speakerNodes = list()
			speakerRef = list()
			
			xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
			stagecount = 0
			stage_array = list()
						
			for node in sceneItem.getiterator():
				#print("Node: " + node.tag)	
				
				
				"""
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
						else:
							ref = str(act) + "." + str(scene) + "." + str(line)
							
						#xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)
				"""		
						
				if node.tag == "sp":
					id = node.get("who")
					
					if id and cast:
						speakers.append(cast[id[1:]])	
						speakerNodes.append(node)
						
						if perseusid == None:
							speakerRef.append(ref)
						else:
							#speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
							speechRef  = xpointer + "#xpointer(//div2/sp[" + str(speechnum) + "])";
							speakerRef.append(speechRef)
						#print("Line ref: " + ref)
						
						if cast[id[1:]] not in currentCast:
							currentCast.append(cast[id[1:]])
							
					#internalnum = 1
					speechnum += 1
					stagecount = 0
					
					
					previousl = 0
					
					for subnode in node.getiterator():
						if subnode.tag == "l":
							previousl += 1
						
						if subnode.tag == "stage":
							#print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
							stage_array.append(previousl)
							stagecount += 1
							
					
						
				elif node.tag == "stage":
					
					if stagecount > 0:
						s_max = len(stage_array)
						diff = s_max - stagecount
						
						#if diff == 0:
						#	stagenum += 1
					
						entRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum - 1) + "]/l[" + str(stage_array[diff]) +"]/stage)";
						#internalnum += 1
						stagecount -= 1
					else:
						stagenum += 1
						entRef = xpointer + "#xpointer(//div2/stage[" + str(stagenum) +"])";				
					
					if node.get("type") == "entrance":		
					
						# Add Social Events for all the people who spoke since the last break (if there were any)
						
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						
						event = ns['event/'+str(eventCount)]
						
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
					
						# Add Travel Event
						
						graph.add((event, RDF.type, omj['Travel']))
						
						if perseusid == None:
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref)))
						else:
							#entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef)))
						
						#print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
	
						#print("Found entrence event!")
						if location:
							graph.add((event, ome['to'], location))		
							
						involved = node.get("about")
						
						if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
							involved = involved[1:-1]
							
						chunks = involved.split()
						
						chunk_count = len(chunks)
						
						if chunk_count > 1:
							#type = extractCURIEorURI(graph, "[omb:Group]")
							#graph.add((group, RDF.type, type))
							graph.add((group, RDF.type, omb['Group']))
							
						event_label = ""	
						en = 1
						
						for chunk in chunks:
							striped = chunk.strip()
							
							if(len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"):
								striped = striped[1:-1]
								currentCast.append(cast[striped])								
							
							if chunk_count > 1:
								graph.add((group, ome['contains'], cast[striped]))
								
								if en == chunk_count:
									event_label = event_label[0:-2] + " and " + striped
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " arrive")))
								elif en < chunk_count:
									event_label += striped + ", "									
									
							else:
								#print("Adding person as subject-entity to entry event "   + str(eventCount))
								graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(striped + " arrives")))
								graph.add((event, ome['has-subject-entity'], cast[striped]))
								
							en += 1
									
							
						if chunk_count > 1:
							graph.add((event, ome['has-subject-entity'], group))	
							#print("Adding group as subject-entity to entry event "   + str(eventCount))
							groupCount = groupCount + 1
							group = ns['group/'+str(groupCount)]	
		
						if(prior_event):
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
		
						prior_event = event					
	
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
									
					if node.get("type") == "exit":		
						
						# Add Social Events for all the people who spoke since the last break (if there were any)
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						
						event = ns['event/'+str(eventCount)]
						
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
						
						# Add Travel Event
					
						graph.add((event, RDF.type, omj['Travel']))		
						
						if perseusid == None:
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref)))
						else:
							#exitRef = xpointer
							#graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef)))
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef)))
	
						#print("Found entrence event!")
						if location != None:
							graph.add((event, ome['from'], location))		
							
						involved = node.get("about")	
						
						if involved.strip() == "" or "-all" in involved:
							# Remove everyone
													
							#print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							#for peep in currentCast:	
							#	print(peep)
							
							if len(currentCast) > 1:							
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
															
							event_label = ""
							en = 1
							
							for peep in currentCast:	
								short_ref = ""
								for key, value in cast.iteritems():
									if peep == value:	
										short_ref = key
							
								if len(currentCast) > 1:
									graph.add((group, ome['contains'], peep))
									
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += short_ref + ", "
																	
								else:
									#print("Adding person as subject-entity to exuant event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], peep))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves")))
									
								en += 1
	
							if len(currentCast) > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exuant event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
							
							currentCast = list()
						
						elif "!" in involved:
							#print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							#print("Event: " + involved);
							
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
								
							involved = involved.strip()	
							
							if(len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"):
								involved = involved[2:-1]	
							
							#print("involved: " + involved)
							
							striped = involved.strip()	
							
							c_ids = striped.split()
							
							chunks = list()
							
							for stay in c_ids:
								#print("Staying: " + cast[stay])
								chunks.append(cast[stay])							
							
							staying = list()
							going = list()
							
							for player in currentCast:
								#print("Player: " + player)							
								if player in chunks:
									staying.append(player)
								else:
									going.append(player)
									
							going_count = len(going)	
							
							if going_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))	
								graph.add((group, RDF.type, omb['Group']))
								

							event_label = ""
							en = 1
								
							for ghost in going:							
								#print("ghost: " + ghost)
								
								short_ref = ""
								for key, value in cast.iteritems():
									if ghost == value:	
										short_ref = key
										
										
								if ghost in currentCast:
									currentCast.remove(ghost)
									#print("Current cast count: "  + str(len(currentCast)))	
								
								if going_count > 1:
									graph.add((group, ome['contains'], ghost))
									
									if en == len(going):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(going):
										event_label += short_ref + ", "	
										
								else:
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves")))
									
								en += 1
								
								
							if going_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
		
										
						else:
							#print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
								
							striped = involved.strip()							
							chunks = striped.split()
							
							#print("striped: " + striped)
					
							chunk_count = len(chunks)
						
							if chunk_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
								
								
							event_label = ""
							en = 1								
							
							for chunk in chunks:							
								#print("chunk: " + chunk)			
									
								ghost = cast[chunk]
								
								#print("ghost: " + ghost)
								
								if ghost in currentCast:
									currentCast.remove(ghost)
									#print("Current cast count: "  + str(len(currentCast)))	
								
								if chunk_count > 1:
									graph.add((group, ome['contains'], ghost))
									
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + chunk
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += chunk + ", "										
									
								else:
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(chunk + " leaves")))
									
								en += 1	
								
							if chunk_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
	
		
							
							
						if(prior_event):
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
		
						prior_event = event					
	
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
						
				#elif node.tag == "rs":	
				#	#print("Found rs node")
				#	if parent:
				#		#print("Parent type is " + parent.tag)
				#		if parent.tag == "p" or  parent.tag == "l":
				#			refersTo.append(node.get("about"))
							
				#parent = node
					
	
			# Add Social Events for all the people who spoke since the last break (if there were any)
			#print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
			update = list()
			update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
			eventCount = update[0]
			prior_event = update[1]
			
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]
				
			speakers = list()
			speakerNodes = list()
			currentCast = list()
			speakerRef = list()
		
		
		
	print graph.serialize(format='xml')		
コード例 #19
0
def read_manifest(item, manifest_file):
    triples = []
    namespaces = {}
    seeAlsoFiles = []
    oxdsClasses = ['http://vocab.ox.ac.uk/dataset/schema#Grouping', 'http://vocab.ox.ac.uk/dataset/schema#DataSet']

    aggregates = item.list_rdf_objects(item.uri, "ore:aggregates")
    
    g = ConjunctiveGraph()
    gparsed = g.parse(manifest_file, format='xml')
    namespaces = dict(g.namespaces())
    #Get the subjects
    subjects = {}
    for s in gparsed.subjects():
        if s in subjects:
            continue
        if type(s).__name__ == 'URIRef':
            if str(s).startswith('file://'):
                ss = str(s).replace('file://', '')
                if manifest_file in ss:
                    subjects[s] = URIRef(item.uri)
                else:
                    manifest_file_path, manifest_file_name = os.path.split(manifest_file)
                    ss = ss.replace(manifest_file_path, '').strip('/')
                    for file_uri in aggregates:
                        if ss in str(file_uri):
                            subjects[s] = URIRef(file_uri)
                            break
                    if not s in subjects:
                        subjects[s] = URIRef(item.uri)
            else:
                subjects[s] = URIRef(s)
        elif type(s).__name__ == 'BNode':
            replace_subject = True
            for o in gparsed.objects():
                if o == s:
                    replace_subject = False
            if replace_subject:
                subjects[s] = URIRef(item.uri)
            else:
                subjects[s] = s
    #Get the dataset type 
    #set the subject uri to item uri if it is of type as defined in oxdsClasses
    datasetType = False
    for s,p,o in gparsed.triples((None, RDF.type, None)):
        if str(o) in oxdsClasses:
            if type(s).__name__ == 'URIRef' and len(s) > 0 and str(s) != str(item.uri) and str(subjects[s]) != str(item.uri):
                namespaces['owl'] = URIRef("http://www.w3.org/2002/07/owl#")
                triples.append((item.uri, 'owl:sameAs', s))
                triples.append((item.uri, RDF.type, o))              
            elif type(s).__name__ == 'BNode' or len(s) == 0 or str(s) == str(item.uri) or str(subjects[s]) == str(item.uri):
                gparsed.remove((s, p, o))
            subjects[s] = item.uri

    #Get the uri for the see also files
    for s,p,o in gparsed.triples((None, URIRef('http://www.w3.org/2000/01/rdf-schema#seeAlso'), None)):
        if type(o).__name__ == 'URIRef' and len(o) > 0:
            obj = str(o)
            if obj.startswith('file://'):
                obj_path, obj_name = os.path.split(obj)
                obj = obj.replace(obj_path, '').strip('/')
            for file_uri in aggregates:
                if obj in str(file_uri):
                    seeAlsoFiles.append(file_uri)
        gparsed.remove((s, p, o))

    #Add remaining triples
    for s,p,o in gparsed.triples((None, None, None)):
        triples.append((subjects[s], p, o))
    return namespaces, triples, seeAlsoFiles