Esempio n. 1
def get_turtle_header():
    """Return a turtle header with our typical namespaces"""
    empty = ConjunctiveGraph()
    turtle_header = []
    for term, urlterm in empty.namespaces():
        turtle_header.append("@prefix {}: <{}>.".format(term, urlterm))
    return "\n".join(turtle_header)
Esempio n. 2
 def from_string(self, uri, text, format="xml", encoding="utf-8"):
     t = TextInputSource(text, system_id=uri)
     g = ConjunctiveGraph(identifier=self.uri)
     g = g.parse(t, format)
     for prefix, ns in g.namespaces():
         self.add_namespace(prefix, ns)
     for s,p,o in g.triples((self.uri, None, None)):
         self.add_triple(p, o)
Esempio n. 3
 def from_url(self, url, uri=None, format="xml",  encoding="utf-8"):
     if not uri:
     g = ConjunctiveGraph(identifier=self.uri)
     g = g.parse(url, format)
     for prefix, ns in g.namespaces():
         self.add_namespace(prefix, ns)
     for s,p,o in g.triples((self.uri, None, None)):
         self.add_triple(p, o)
Esempio n. 4
    def from_string(self, rdf_manifest_string, format="xml"):
        t = TextInputSource(rdf_manifest_string)
        g = ConjunctiveGraph()
        g = g.parse(t, format)
        for s,p,o in g.triples((None, None, None)):
            if s not in self.items:
            if p == NAMESPACES['rdf']['type']:
                self.items_rdfobjects.setdefault(s,RDFobject(uri=s)).add_triple(p, o)

        for prefix, ns in g.namespaces():
            self.add_namespace(prefix ,ns)
Esempio n. 5
def list_used_namespaces():
    Print Python code as strings
    that can be used to setup namespaces.
    g = ConjunctiveGraph()
    flist = get_file_list(sys.argv[1])
    for full in flist:
        g.parse(source=full, format='turtle')
    ns_mgr = NamespaceManager(Graph())
    for pre, ns in g.namespaces():
        q = namespace_rq.replace('--ns--', ns.toPython())
        used = g.query(q).askAnswer
        if used is True:
            #print pre, ns
            print "{} = Namespace('{}')".format(pre.upper(), ns)
            ns_mgr.bind(pre, Namespace(ns))
    return ns_mgr
class SQLATestCase(unittest.TestCase):
    identifier = URIRef("rdflib_test")
    dburi = Literal("sqlite://")

    def setUp(self): = plugin.get(
            "SQLAlchemy", Store)(identifier=self.identifier)
        self.graph = ConjunctiveGraph(, identifier=self.identifier), create=True)

    def tearDown(self):

    def test_registerplugins(self):
        # I doubt this is quite right for a fresh pip installation,
        # this test is mainly here to fill a coverage gap.
        self.assert_(plugin.get("SQLAlchemy", Store) is not None)
        p = plugin._plugins
        self.assert_(("SQLAlchemy", Store) in p, p)
        del p[("SQLAlchemy", Store)]
        plugin._plugins = p
        self.assert_(("SQLAlchemy", Store) in p, p)

    def test_namespaces(self):
        self.assert_(list(self.graph.namespaces()) != [])

    def test_contexts_without_triple(self):
        self.assert_(list(self.graph.contexts()) == [])

    def test_contexts_with_triple(self):
        statemnt = (michel, likes, pizza)
        self.assert_(self.graph.contexts(triple=statemnt) != [])

    def test__len(self):
        self.assert_( == 0)

    def test__remove_context(self):
Esempio n. 7
def query(
    query_text: str,
    instance: rdflib.ConjunctiveGraph,
    **kwargs: str,
) -> QueryResult:
    """Run SPARQL SELECT query and return formatted result."""
    sparql_result: SPARQLResult = instance.query(

    if sparql_result.askAnswer is not None:
        return sparql_result.askAnswer

    if sparql_result.graph is not None:
        graph: rdflib.Graph = sparql_result.graph
        for prefix, namespace in instance.namespaces():
            graph.bind(prefix, namespace)

        return graph

    return _format_query_bindings(sparql_result.bindings)
Esempio n. 8
def parse_workflow():
    # FIXME TODO these states should probably be compiled down to numbers???
    docs = Path(__file__).parent.absolute().resolve().parent / 'docs'
    rridpath = docs / 'workflow-rrid.graphml'
    paperpath = docs / 'workflow-paper-id.graphml'

    cgraph = ConjunctiveGraph()
    gt.PaperIdMapping(paperpath.as_posix(), False).graph(cgraph)
    write(cgraph, '/tmp/workflow.ttl')
    predicates = set(cgraph.predicates())
    OntCuries({cp:str(ip) for cp, ip in cgraph.namespaces()})
    OntCuries({'RRID': '',
               'DOI': '',
               'PMID': ''})
    hg = makeGraph('', graph=cgraph)
    short = sorted(hg.qname(_) for _ in predicates)


    #if type isa wf.tag

    tag_types = set(cgraph.transitive_subjects(rdfs.subClassOf, wf.tag))
    tag_tokens = {tagType:sorted(set(t for t in cgraph.transitive_subjects(rdf.type, tagType)
                                     if t != tagType))
                  for tagType in tag_types}
    has_tag_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasTagOrReplyTag))
    has_next_action_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasOutput))

    terminals = sorted(tag
                       for ttype in tag_types
                       if ttype != wf.tagScibot  # scibot is not 'terminal' for this part
                       for tag in cgraph[:rdf.type:ttype]
                       if not isinstance(tag, BNode)
                       and not any(o for httype in has_tag_types
                                   for o in cgraph[tag:httype]))

    endpoints = sorted(endpoint
                       for endpoint in cgraph[:rdf.type:wf.state]
                       if not isinstance(endpoint, BNode)
                       and not any(o for hnatype in has_next_action_types
                                   for o in cgraph[endpoint:hnatype]))

    complicated = sorted(a_given_tag
                 for tt in tag_types
                 for a_given_tag in cgraph[:rdf.type:tt]
                 if not isinstance(a_given_tag, BNode)
                         and not [successor_tag
                          for htt in has_tag_types
                          for successor_tag in chain(t
                                                     for t in cgraph[a_given_tag:htt]
                                                     #if not isinstance(t, BNode)
                                                     # we don't actually need this for terminals
                                                     # we will need it later
                                                     #(t for b in cgraph[a_given_tag:htt]
                                                     #if isinstance(b, BNode)
                                                     #for listhead in cgraph[b:owl.oneOf]
                                                     #for t in unlist(listhead, cgraph)),

    def topList(node, g):
        for s in g[]:
            yield s

    def getLists(node, g):
        for linker in g[:rdf.first:node]:
            top = None
            for top in g.transitiveClosure(topList, linker):

            if top:
                yield top
                yield linker

    def getIsTagOf(node, g):
        for htt in has_tag_types:
            for parent_tag in g[:htt:node]:
                yield parent_tag

    def getIsOneOfTagOf(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                for parent_tag, _ in g[::linker]:
                    yield parent_tag

    def getPreviousTag(node, g):  # not quite what we need
        yield from getIsOneOfTagOf(node, g)
        yield from getIsTagOf(node, g)

    def getTagChains(node, g, seen=tuple()):
        # seen to prevent recursion cases where
        # taggning can occur in either order e.g. PMID -> DOI
        #print(  # does weird stuff O_o
        parent_tag = None
        for parent_tag in chain(getIsOneOfTagOf(node, g),
                                getIsTagOf(node, g)):
            if parent_tag in seen:
                parent_tag = None
            ptt = next(g[parent_tag:rdf.type])
            #if ptt in tag_types:
            for pchain in getTagChains(parent_tag, g, seen + (node,)):
                if ptt in tag_types:
                    out = parent_tag, *pchain
                    out = pchain
                yield out

            if not ptt and not out:
                parent_tag = None

        if not parent_tag:
            yield tuple()

    def getInitiatesAction(node, g):
        for action in g[:wf.initiatesAction:node]:
            yield action

    def getIsOneOfOutputOf(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                for hot in has_next_action_types:
                    for parent_thing  in g[:hot:linker]:
                        yield parent_thing

    def getActionChains(node, g):
        parent_action = None
        for parent_action in chain(getIsOneOfOutputOf(node, g),  # works for actions too
                                   getInitiatesAction(node, g)):
            for pchain in getActionChains(parent_action, g):  # NOTE may also be a tag...
                out = parent_action, *pchain
                #print(tuple(hg.qname(o) for o in out))
                yield out

        if not parent_action:
            yield tuple()

    def getRestSubjects(predicate, object, g):
        """ invert restriction """
        rsco = cmb.Restriction(rdfs.subClassOf)
        for rt in rsco.parse(graph=g):
            if rt.p == predicate and rt.o == object:
                yield from g.transitive_subjects(rdfs.subClassOf, rt.s)

    annoParts = list(getRestSubjects(wf.isAttachedTo, wf.annotation, cgraph))
    partInstances = {OntId(a):set(t if isinstance(t, BNode) else OntId(t)
                                  for t in cgraph.transitive_subjects(rdf.type, a)
                                  if not isinstance(t, BNode) and t != a)
                     for a in annoParts}

    _endpoint_chains = {OntId(endpoint):[[OntId(endpoint)] + [OntId(e) for e in chain]
                                            for chain in getActionChains(endpoint, cgraph)]
                        for endpoint in endpoints}

    #print([hg.qname(e) for e in endpoints])
    #print([print([hg.qname(c) for c in getActionChains(endpoint, cgraph) if c])
           #for endpoint in endpoints
           #if endpoint])

    #_ = [print(list(getActionChains(e, cgraph)) for e in endpoints)]

    wat = cgraph.transitiveClosure(getPreviousTag, RRIDCUR.Duplicate)
    wat = list(wat)
    #def invOneOf(tag, g):

    fake_chains = {hg.qname(terminal):
                    for c in cgraph.transitiveClosure(getPreviousTag, terminal)]
                   for terminal in terminals}

    def make_chains(things, getChains):
        return {OntId(thing):[[OntId(thing)] + [OntId(e) for e in chain]
                              for chain in getChains(thing, cgraph)]
                for thing in things
                #if not print(thing)

    def print_chains(thing_chains):
        print('\nstart from beginning')

        print('\n'.join(sorted(' -> '.join(hg.qname(e) for e in reversed(chain))
                               for chains in thing_chains.values()
                               for chain in chains)))

        print('\nstart from end')

        print('\n'.join(sorted(' <- '.join(e.curie for e in chain)
                               for chains in thing_chains.values()
                               for chain in chains)))

    def valid_tagsets(all_chains):
        # not the most efficient way to do this ...
        transitions = defaultdict(set)
        for end, chains in all_chains.items():
            for chain in chains:
                valid = set()
                prior_state = None
                for element in reversed(chain):
                    state = frozenset(valid)
                    prior_state = state

        return {s:frozenset(n) for s, n in transitions.items()}

    endpoint_chains = make_chains(endpoints, getActionChains)
    #endpoint_transitions = valid_transitions(endpoint_chains)  # not the right structure
    terminal_chains = make_chains(terminals, getTagChains)
    tag_transitions = valid_tagsets(terminal_chains)
    terminal_tags_to_endpoints =  'TODO'

    def printq(*things):
        print(*(OntId(t).curie for t in things))

    from pprint import pprint
    def get_linkers(s, o, g, linkerFunc):  # FIXME not right
        for p in g[s::o]:
            yield p

        for l in linkerFunc(o, g):
            #print('{OntId(s).curie} {l if isinstance(l, BNode) else OntId(l).curie}'))
            for p in g[s::l]:
                #print('{s} {l} {o} {p}'))
                yield p
        linkers = set(l for l in g.transitiveClosure(linkerFunc, o))
        for p, o in g[s::]:
            if o in linkers:
                yield p

    def edge_to_symbol(p, rev=False):
        if p == wf.initiatesAction:
            return '<<' if rev else '>>'
        elif p == wf.hasReplyTag:
            return '<' if rev else '>'
        elif p == wf.hasTagOrReplyTag:
            return '<=' if rev else '=>'
        elif p == wf.hasOutputTag:
            return '-<-' if rev else '->-'
            return '<??' if rev else '??>'

    def chain_to_typed_chain(chain, g, func):
        # duh...
        for s, o in zip(chain, chain[1:]):
            # TODO deal with reversed case
            s, o = s.u, o.u
            p = None
            #print(s, o)
            printq(s, o)
            for p in get_linkers(s, o, g, func):
                #yield (s, edge_to_symbol(p), o)
                yield from (s, edge_to_symbol(p), o)

            if not p:
                for rp in get_linkers(o, s, g, func):
                    yield from (s, edge_to_symbol(rp, rev=True), o)

    def tchains(thing_chains, func):
        return sorted([OntId(e).curie if isinstance(e, URIRef) else e
                       for e in chain_to_typed_chain(list(reversed(chain)), cgraph, func)]
                      for chains in thing_chains.values()
                      for chain in chains)

    def getLinkers(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                yield linker

    def allSubjects(object, graph):
        yield from (s for s, p in graph[::object])
        yield from getLinkers(object, graph)

    ttc = tchains(terminal_chains, allSubjects)
    tec = tchains(endpoint_chains, allSubjects)

    valid_tagsets = frozenset((t for s in tag_transitions.values() for t in s))
    tts = valid_tagsets - frozenset(tag_transitions)
    endtype = 'TODO'  # 
    tt = {}
    for endtype, chains  in endpoint_chains.items():
        for *_chain, tag in chains:
            if _chain:
                next_thing = _chain[-1]
            for ets in tts:
                if tag in ets:
                    tt[ets] = next_thing

    terminal_tagsets = tt

    #[print(wat) for wat in terminal_chains.values()]
    return tag_types, tag_tokens, partInstances, valid_tagsets, terminal_tagsets, tag_transitions
Esempio n. 10
class ElasticTestCase(unittest.TestCase):
    identifier = URIRef("rdflib_test")
    dburi = Literal("http://localhost:9200/collection")

    def setUp(self): = plugin.get("Elasticsearch",
        self.graph = ConjunctiveGraph(, identifier=self.identifier), create=True)

    def tearDown(self):

    def test_registerplugins(self):
        # I doubt this is quite right for a fresh pip installation,
        # this test is mainly here to fill a coverage gap.
        self.assertIsNotNone(plugin.get("Elasticsearch", Store))
        p = plugin._plugins
        self.assertIn(("Elasticsearch", Store), p)
        del p[("Elasticsearch", Store)]
        plugin._plugins = p
        self.assertIn(("Elasticsearch", Store), p)

    def test_namespaces(self):
        self.assertNotEqual(list(self.graph.namespaces()), [])

    def test_contexts_without_triple(self):
        self.assertEqual(list(self.graph.contexts()), [])

    def test_contexts_result(self):
        g = self.graph.get_context(ctx_id)
        g.add((michel, likes, pizza))
        actual = list(
        self.assertEqual(actual[0], ctx_id)

    def test_contexts_with_triple(self):
        statemnt = (michel, likes, pizza)
        self.assertEqual(list(self.graph.contexts(triple=statemnt)), [])

    def test__len(self):
        self.assertEqual(, 0)

    def test_triples_choices(self):
        # Set this so we're not including selects for both asserted and literal tables for
        # a choice = True
        # Set the grouping of terms = 2

        results = [((michel, likes, pizza), ctx_id)]

        # force execution of the generator
        for x in
            (None, likes, [michel, pizza, likes])):
            print("x=" + str(x))
            print("results=" + str(results))
            assert x in results
Esempio n. 11
    # technically, we already created a namespace
    # with the object init (and it added some namespaces as well)
    # By default, your main namespace is the URI of your
    # current working directory, so lets make that simpler:

    myNS = Namespace('')
    primer.bind('', myNS)
    primer.bind('owl', OWL)
    primer.bind('dc', DC)
    primer.bind('swap', '')

    # Lets load it up!

    primer.parse(data=mySource, format='n3')

    # Now you can query, either directly straight into a list:

    [(x, y, z) for x, y, z in primer]

    # or spit it back out (mostly) the way we created it:


    # for more insight into things already done, lets see the namespaces


    # lets ask something about the data

    list(primer.objects(myNS.pat, myNS.child))
Esempio n. 12
from rdflib import Literal, XSD, URIRef
from rdflib.namespace import FOAF, RDF, RDFS
from rdflib.serializer import Serializer
from rdflib import plugin

def pprint(msg):
    msg = msg.decode('utf-8')
    for l in msg.split('\n'):
        if l.strip():

store = 'Sleepycat'
graph = ConjunctiveGraph(store=store, identifier='mygraph')'foaf_flask/static/rdf/sleepycat', create=False)

#CONSTRUCT {{ ?uri  ?p ?o . }}
query = """CONSTRUCT { ?uri ?p ?o . }
    {GRAPH ?g { ?uri ?p ?o } }
    UNION { ?uri ?p ?o }
# query = """CONSTRUCT { ?uri  ?p ?o . }
# WHERE  { ?uri ?p ?o } """
bind = {'uri': URIRef('')}
context = dict(graph.namespaces())
query_result = graph.query(query, initBindings=bind, initNs=context)
newg = Graph().parse(data=query_result.serialize(format='xml'))
data = newg.serialize(format='turtle', context=context)
Esempio n. 13
def convert(teifile, namespace):
    #graph_uri = ""

    ns = Namespace(namespace)

    graph = ConjunctiveGraph()
    graph.load(teifile, format="rdfa")

    graph.bind("default", ns)

    to_update = ""

    for prefix, nsuri in graph.namespaces():
        #print("prefix: " + str(prefix) + " - " + str(nsuri))
        if nsuri in ns:
            to_update = nsuri

    for s, p, o in graph:
        #    		print s, p, o
        if to_update != "" and to_update in s:
            graph.remove((s, p, o))
            s = URIRef(s.replace(to_update, ns))
            graph.add((s, p, o))

    act = ""
    scene = ""
    line = ""
    char = 0
    loc = 0

    #timeline = ns['timeline/narrative']
    #graph.add((timeline, RDF.type, ome['Timeline']))

    tree = ET.parse(teifile)
    cast = dict()

    titleNode = tree.find('//title')

    castItems = tree.findall('/text/body/div1/castList//castItem')
    for castItem in castItems:
        actorNode = castItem.find('actor')
        roleNode = castItem.find('role')

        if roleNode != None:
            id = roleNode.get("{}id")

        #print("Found castItem!")

        actor = None
        role = None

        # Check to see if we already have an entry
        if (roleNode != None and roleNode.get("about")):

            charname = roleNode.get("about")

            if (charname.find(":") > -1):
                nmsp, nom = charname.split(":", 1)
                charcode = "character/" + str(char)
                charref = nmsp + ":" + charcode + "]"
                role = extractCURIEorURI(graph, charref, nom[0:-1])
                char += 1
                #print("1:" + charname + ": adding id " + id + " to " + role)
                role = extractCURIEorURI(graph, charname)
                #print("2:" + charname + ": adding id " + id + " to " + role)

            cast[id] = role
            graph.add((role, RDF.type, omb['Character']))
            #print(charname + ": adding id " + id + " to " + role)

        if (actorNode != None and actorNode.get("about")):
            actor = extractCURIEorURI(graph, actorNode.get("about"))
            graph.add((actor, RDF.type, omb['Being']))

        if actor != None and role != None:
            graph.add((actor, omb['portrays'], role))
            graph.add((role, omb['portrayed-by'], actor))

    eventCount = 1
    groupCount = 1
    prior_event = None

    actItems = tree.findall('/text/body/div1')
    ref = ""

    for actItem in actItems:

        if actItem.get("type") == "act":
            act = actItem.get("n")

        sceneItems = actItem.findall('div2')

        for sceneItem in sceneItems:

            #print("Found sceneItems!")

            if sceneItem.get("type") == "scene":
                scene = sceneItem.get("n")

            # Work out the location of this scene
            location = None
            stageItems = sceneItem.findall("stage")

            #internalnum = 1
            stagenum = 0
            speechnum = 1

            for stageItem in stageItems:
                if stageItem.get("type") == "location":
                    # The RDFa parser doesn't handle the type - so we can grab that here.

                    if stageItem.get("about") != None:
                        locname = stageItem.get("about")

                        # Adding location type/oml:space for location
                        if stageItem.get("typeof") and stageItem.get("about"):
                            type = extractCURIEorURI(graph,
                            #print "1. Location: " + str(location) + " Type: " + str(type)
                        elif stageItem.get("about"):
                            #print "2. Location: " + str(locname)
                            type = extractCURIEorURI(graph, oml['Space'])

                        # Get location value and add rdfs:label is location is not using the TEI value
                        if (locname.find(":") > -1):
                            nmsp, nom = locname.split(":", 1)
                            loccode = "location/" + str(loc)
                            locref = nmsp + ":" + loccode + "]"
                            location = extractCURIEorURI(
                                graph, locref, nom[0:-1])
                            loc += 1
                                ), Literal(nom[0:-1])))
                            location = extractCURIEorURI(
                                graph, stageItem.get("about"))

                        # Add location to graph
                        graph.add((location, RDF.type, type))
                        location = ""

                    #print("Adding location type: " + type + " (" + location + ")")

            if cast:
                # Work out a list of all cast in a given section
                currentCast = list()
                speakers = list()

            # Iterate through elements within stageItem
            # Find speaker events and add to list of current cast for inclusion in social event
            # Find reference events and add to ongoing social event ?
            # Find stage events
            # If event is an entrance then
            # create social event for people talking before entrance
            # create travel event i.e. entrance
            # add new arrival to current cast list
            # If event is exit event then
            # create social event for people talking before exit
            # create travel event i.e. exit
            # if leavers are not named directly the calculate who is leaving
            # remove leavers from current cast list
            # If reach end of scene then create social event with current cast list

            #Also need to check if social event before exit has same composition as social event after exit since then they should be merged

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            refersTo = list()
            #parent = None
            speakerNodes = list()
            speakerRef = list()

            xpointer = "" + str(
                perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
            stagecount = 0
            stage_array = list()

            for node in sceneItem.getiterator():
                #print("Node: " + node.tag)
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
							ref = str(act) + "." + str(scene) + "." + str(line)
						#xpointer = ""  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = ""  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)

                if node.tag == "sp":
                    id = node.get("who")

                    if id and cast:

                        if perseusid == None:
                            #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
                            speechRef = xpointer + "#xpointer(//div2/sp[" + str(
                                speechnum) + "])"
                        #print("Line ref: " + ref)

                        if cast[id[1:]] not in currentCast:

                    #internalnum = 1
                    speechnum += 1
                    stagecount = 0

                    previousl = 0

                    for subnode in node.getiterator():
                        if subnode.tag == "l":
                            previousl += 1

                        if subnode.tag == "stage":
                            #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
                            stagecount += 1

                elif node.tag == "stage":

                    if stagecount > 0:
                        s_max = len(stage_array)
                        diff = s_max - stagecount

                        #if diff == 0:
                        #	stagenum += 1

                        entRef = xpointer + "#xpointer(//div2/sp[" + str(
                            speechnum - 1) + "]/l[" + str(
                                stage_array[diff]) + "]/stage)"
                        #internalnum += 1
                        stagecount -= 1
                        stagenum += 1
                        entRef = xpointer + "#xpointer(//div2/stage[" + str(
                            stagenum) + "])"

                    if node.get("type") == "entrance":

                        # Add Social Events for all the people who spoke since the last break (if there were any)

                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                                ), Literal(ref)))
                            #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
                                ), URIRef(entRef)))

                        #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                        #print("Found entrence event!")
                        if location:
                            graph.add((event, ome['to'], location))

                        involved = node.get("about")

                        if (len(involved) > 0 and involved[0] == "["
                                and involved[-1] == "]"):
                            involved = involved[1:-1]

                        chunks = involved.split()

                        chunk_count = len(chunks)

                        if chunk_count > 1:
                            #type = extractCURIEorURI(graph, "[omb:Group]")
                            #graph.add((group, RDF.type, type))
                            graph.add((group, RDF.type, omb['Group']))

                        event_label = ""
                        en = 1

                        for chunk in chunks:
                            striped = chunk.strip()

                            if (len(striped) > 0 and striped[0] == "["
                                    and striped[-1] == "]"):
                                striped = striped[1:-1]

                            if chunk_count > 1:
                                    (group, ome['contains'], cast[striped]))

                                if en == chunk_count:
                                    event_label = event_label[
                                        0:-2] + " and " + striped
                                        ), Literal(event_label + " arrive")))
                                elif en < chunk_count:
                                    event_label += striped + ", "

                                #print("Adding person as subject-entity to entry event "   + str(eventCount))
                                    ), Literal(striped + " arrives")))
                                graph.add((event, ome['has-subject-entity'],

                            en += 1

                        if chunk_count > 1:
                                (event, ome['has-subject-entity'], group))
                            #print("Adding group as subject-entity to entry event "   + str(eventCount))
                            groupCount = groupCount + 1
                            group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                    if node.get("type") == "exit":

                        # Add Social Events for all the people who spoke since the last break (if there were any)
                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                                ), Literal(ref)))
                            #exitRef = xpointer
                            #graph.add((event, rdflib.URIRef(""), URIRef(exitRef)))
                                ), URIRef(entRef)))

                        #print("Found entrence event!")
                        if location != None:
                            graph.add((event, ome['from'], location))

                        involved = node.get("about")

                        if involved.strip() == "" or "-all" in involved:
                            # Remove everyone

                            #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #for peep in currentCast:
                            #	print(peep)

                            if len(currentCast) > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for peep in currentCast:
                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if peep == value:
                                        short_ref = key

                                if len(currentCast) > 1:
                                    graph.add((group, ome['contains'], peep))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += short_ref + ", "

                                    #print("Adding person as subject-entity to exuant event "   + str(eventCount))
                                        (event, ome['has-subject-entity'],
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if len(currentCast) > 1:
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exuant event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                            currentCast = list()

                        elif "!" in involved:
                            #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #print("Event: " + involved);

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            involved = involved.strip()

                            if (len(involved) > 0 and involved[0] == "!"
                                    and involved[1] == "("
                                    and involved[-1] == ")"):
                                involved = involved[2:-1]

                            #print("involved: " + involved)

                            striped = involved.strip()

                            c_ids = striped.split()

                            chunks = list()

                            for stay in c_ids:
                                #print("Staying: " + cast[stay])

                            staying = list()
                            going = list()

                            for player in currentCast:
                                #print("Player: " + player)
                                if player in chunks:

                            going_count = len(going)

                            if going_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for ghost in going:
                                #print("ghost: " + ghost)

                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if ghost == value:
                                        short_ref = key

                                if ghost in currentCast:
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if going_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(going):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                            Literal(event_label + " leave")))
                                    elif en < len(going):
                                        event_label += short_ref + ", "

                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                        (event, ome['has-subject-entity'],
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if going_count > 1:
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                            #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            striped = involved.strip()
                            chunks = striped.split()

                            #print("striped: " + striped)

                            chunk_count = len(chunks)

                            if chunk_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for chunk in chunks:
                                #print("chunk: " + chunk)

                                ghost = cast[chunk]

                                #print("ghost: " + ghost)

                                if ghost in currentCast:
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if chunk_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + chunk
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += chunk + ", "

                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                        (event, ome['has-subject-entity'],
                                        ), Literal(chunk + " leaves")))

                                en += 1

                            if chunk_count > 1:
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                #elif node.tag == "rs":
                #	#print("Found rs node")
                #	if parent:
                #		#print("Parent type is " + parent.tag)
                #		if parent.tag == "p" or  parent.tag == "l":
                #			refersTo.append(node.get("about"))

                #parent = node

            # Add Social Events for all the people who spoke since the last break (if there were any)
            #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
            update = list()
            update = getSocial(graph, ns, speakers, speakerNodes, speakerRef,
                               cast, currentCast, eventCount, event,
                               prior_event, location)
            eventCount = update[0]
            prior_event = update[1]

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            speakers = list()
            speakerNodes = list()
            currentCast = list()
            speakerRef = list()

    print graph.serialize(format='xml')
Esempio n. 14
    u"tmo"   : TMO,
    u"dc"    : DC,
    u"dct"   : DCTERMS,
    u"foaf"  : FOAF,
    u"sioc"  : SIOC,
    u"sioct" : SIOCT,
    u"geo"   : GEO,
    u"mvcb"  : MVCB,
    u"ical"  : ICAL,
    u"xsd"   : XSD,
    u"owl"   : OWL,
    u"skos"  : SKOS,
    u"doap"  : DOAP,

for shortname, namespace in namespace_manager.namespaces():
    NSBINDINGS[shortname] = namespace

    'NEPOMUK-ONTOLOGY',         # left part predicate
    'NEPOMUK-CLASS/PROPERTY'    # right part predicate

EXTRACTOR_KEYWORD['album'] = ('title', 'collection', 'nid3', 'albumTitle')
EXTRACTOR_KEYWORD['artist'] = ('creator', '', 'nao', 'creator')
EXTRACTOR_KEYWORD['book title'] = ('title', '', 'nie', 'title')
Esempio n. 15
def get_vocab_base(vocabfile):
    graph = Graph()
        graph = None
        graph = Graph()
            graph.parse(vocabfile, format="n3")
            return (None, None, None)
    identifier = None
    for v in graph.objects(None, namespaces['dc']['identifier']):
        identifier = v
    if not identifier:
        for v in graph.objects(None, namespaces['dcterms']['identifier']):
            identifier = v

    base = None
    if not base:
        for s in graph.subjects(namespaces['rdf']['type'], namespaces['owl']['Ontology']):
            base = s
    if not base:
        for s in graph.subjects(namespaces['dc']['title'], None):
            base = s
    if not base:
        for s in graph.subjects(namespaces['dcterms']['title'], None):
            base = s
    if not base:
        for s in graph.subjects(namespaces['dc']['creator'], None):
            base = s
    if not base:
        for s in graph.subjects(namespaces['dcterms']['creator'], None):
            base = s
    if not base:
        for v in graph.objects(None, namespaces['vann']['preferredNamespaceUri']):
            base = v
    if not base:
        for v in graph.namespaces():
            if v[0] == '':
                base = v[1]

    prefix = None
    vocab_prefixes = graph.objects(None, namespaces['vann']['preferredNamespacePrefix'])
    for vp in vocab_prefixes:
        prefix = vp
    if not prefix and base:
        for v in graph.namespaces():
            if str(v[1]) == str(base):
                prefix = v[0]
    if not prefix and base:
        prefix = base.strip().strip('/').split('/')[-1].strip('#').strip(' ')
    if base:
        base = base.strip()
        if (base[-1]!="/" and base[-1]!="#"):
            base += "#"
    return (identifier, base, prefix)
Esempio n. 16
def read_manifest(item, manifest_file):
    triples = []
    namespaces = {}
    seeAlsoFiles = []
    oxdsClasses = ['', '']

    aggregates = item.list_rdf_objects(item.uri, "ore:aggregates")
    g = ConjunctiveGraph()
    gparsed = g.parse(manifest_file, format='xml')
    namespaces = dict(g.namespaces())
    #Get the subjects
    subjects = {}
    for s in gparsed.subjects():
        if s in subjects:
        if type(s).__name__ == 'URIRef':
            if str(s).startswith('file://'):
                ss = str(s).replace('file://', '')
                if manifest_file in ss:
                    subjects[s] = URIRef(item.uri)
                    manifest_file_path, manifest_file_name = os.path.split(manifest_file)
                    ss = ss.replace(manifest_file_path, '').strip('/')
                    for file_uri in aggregates:
                        if ss in str(file_uri):
                            subjects[s] = URIRef(file_uri)
                    if not s in subjects:
                        subjects[s] = URIRef(item.uri)
                subjects[s] = URIRef(s)
        elif type(s).__name__ == 'BNode':
            replace_subject = True
            for o in gparsed.objects():
                if o == s:
                    replace_subject = False
            if replace_subject:
                subjects[s] = URIRef(item.uri)
                subjects[s] = s
    #Get the dataset type 
    #set the subject uri to item uri if it is of type as defined in oxdsClasses
    datasetType = False
    for s,p,o in gparsed.triples((None, RDF.type, None)):
        if str(o) in oxdsClasses:
            if type(s).__name__ == 'URIRef' and len(s) > 0 and str(s) != str(item.uri) and str(subjects[s]) != str(item.uri):
                namespaces['owl'] = URIRef("")
                triples.append((item.uri, 'owl:sameAs', s))
                triples.append((item.uri, RDF.type, o))              
            elif type(s).__name__ == 'BNode' or len(s) == 0 or str(s) == str(item.uri) or str(subjects[s]) == str(item.uri):
                gparsed.remove((s, p, o))
            subjects[s] = item.uri

    #Get the uri for the see also files
    for s,p,o in gparsed.triples((None, URIRef(''), None)):
        if type(o).__name__ == 'URIRef' and len(o) > 0:
            obj = str(o)
            if obj.startswith('file://'):
                obj_path, obj_name = os.path.split(obj)
                obj = obj.replace(obj_path, '').strip('/')
            for file_uri in aggregates:
                if obj in str(file_uri):
        gparsed.remove((s, p, o))

    #Add remaining triples
    for s,p,o in gparsed.triples((None, None, None)):
        triples.append((subjects[s], p, o))
    return namespaces, triples, seeAlsoFiles
Esempio n. 17
    primer.parse(data=mySource, format="n3")

    # Now you can query, either directly straight into a list:

    print("Printing bigger example's triples:")
    for i in [(x, y, z) for x, y, z in primer]:

    # or spit it back out (mostly) the way we created it:

    print("Printing bigger example as N3:")

    # for more insight into things already done, lets see the namespaces

    print("Printing bigger example's namespaces:")
    for n in list(primer.namespaces()):

    # lets ask something about the data, using a SPARQL query

    print("Who are pat's children?")
    q = "SELECT ?child WHERE { :pat :child ?child }"
    for r in primer.query(q):
Esempio n. 18
def convert(teifile, namespace):
	#graph_uri = ""
	ns = Namespace(namespace)

	graph = ConjunctiveGraph()
	graph.load(teifile, format="rdfa")
	graph.bind("default", ns)
	to_update = ""

	for prefix, nsuri in graph.namespaces(): 
		#print("prefix: " + str(prefix) + " - " + str(nsuri))
		if nsuri in ns:
			to_update = nsuri
	for s, p, o in graph:
#    		print s, p, o
    		if to_update != "" and to_update in s:
    			graph.remove((s, p, o))
			s = URIRef(s.replace(to_update, ns))			
			graph.add((s, p, o))
	act = ""
	scene = ""
	line = ""
	char = 0
	loc = 0
	#timeline = ns['timeline/narrative']
	#graph.add((timeline, RDF.type, ome['Timeline']))

	tree = ET.parse(teifile)
	cast = dict()
	titleNode = tree.find('//title')
	castItems = tree.findall('/text/body/div1/castList//castItem')
	for castItem in castItems:
		actorNode = castItem.find('actor')
		roleNode = castItem.find('role')

		if roleNode != None:
			id = roleNode.get("{}id")
		#print("Found castItem!")

		actor = None
		role = None

		# Check to see if we already have an entry
		if(roleNode != None and roleNode.get("about")):		

			charname = roleNode.get("about")
			if(charname.find(":") > -1):
				nmsp,nom = charname.split(":", 1)		
				charcode =  "character/" + str(char)
				charref = nmsp + ":" + charcode + "]"
				role = extractCURIEorURI(graph, charref,nom[0:-1])
				char += 1		
				#print("1:" + charname + ": adding id " + id + " to " + role)
				role = extractCURIEorURI(graph, charname)
				#print("2:" + charname + ": adding id " + id + " to " + role)

			cast[id] = role
			graph.add((role, RDF.type, omb['Character']))
			#print(charname + ": adding id " + id + " to " + role)
		if(actorNode != None and actorNode.get("about")):
			actor = extractCURIEorURI(graph, actorNode.get("about"))
			graph.add((actor, RDF.type, omb['Being']))

		if actor != None and role != None:
			graph.add((actor, omb['portrays'], role))
			graph.add((role, omb['portrayed-by'], actor))

	eventCount = 1
	groupCount = 1
	prior_event = None
	actItems = tree.findall('/text/body/div1')
	ref = ""
	for actItem in actItems:
		if actItem.get("type") == "act":
			act = actItem.get("n")
		sceneItems = actItem.findall('div2')
		for sceneItem in sceneItems:
			#print("Found sceneItems!")
			if sceneItem.get("type") == "scene":
				scene = sceneItem.get("n")		
			# Work out the location of this scene
			location = None
			stageItems = sceneItem.findall("stage")
			#internalnum = 1
			stagenum = 0
			speechnum = 1
			for stageItem in stageItems:
				if stageItem.get("type") == "location":
					# The RDFa parser doesn't handle the type - so we can grab that here.
					if stageItem.get("about") != None:
						locname = stageItem.get("about")
						# Adding location type/oml:space for location
						if stageItem.get("typeof") and stageItem.get("about"):
							type = extractCURIEorURI(graph, stageItem.get("typeof"))
							#print "1. Location: " + str(location) + " Type: " + str(type)
						elif stageItem.get("about"):	
							#print "2. Location: " + str(locname)											
							type = extractCURIEorURI(graph, oml['Space'])						
						# Get location value and add rdfs:label is location is not using the TEI value
						if(locname.find(":") > -1):
							nmsp,nom = locname.split(":", 1)		
							loccode =  "location/" + str(loc)
							locref = nmsp + ":" + loccode + "]"
							location = extractCURIEorURI(graph, locref, nom[0:-1])
							loc += 1
							graph.add((location, rdflib.URIRef(''), Literal(nom[0:-1])))
							location = extractCURIEorURI(graph, stageItem.get("about"))
						# Add location to graph
						graph.add((location, RDF.type, type))	
						location = ""
					#print("Adding location type: " + type + " (" + location + ")")
			if cast:
				# Work out a list of all cast in a given section
				currentCast = list()
				speakers = list()
			# Iterate through elements within stageItem
				# Find speaker events and add to list of current cast for inclusion in social event
				# Find reference events and add to ongoing social event ?
				# Find stage events
					# If event is an entrance then
						# create social event for people talking before entrance
						# create travel event i.e. entrance
						# add new arrival to current cast list
					# If event is exit event then
						# create social event for people talking before exit
						# create travel event i.e. exit
							# if leavers are not named directly the calculate who is leaving
						# remove leavers from current cast list
				# If reach end of scene then create social event with current cast list
				#Also need to check if social event before exit has same composition as social event after exit since then they should be merged
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]	
			refersTo = list()
			#parent = None
			speakerNodes = list()
			speakerRef = list()
			xpointer = ""  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
			stagecount = 0
			stage_array = list()
			for node in sceneItem.getiterator():
				#print("Node: " + node.tag)	
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
							ref = str(act) + "." + str(scene) + "." + str(line)
						#xpointer = ""  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = ""  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)
				if node.tag == "sp":
					id = node.get("who")
					if id and cast:
						if perseusid == None:
							#speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
							speechRef  = xpointer + "#xpointer(//div2/sp[" + str(speechnum) + "])";
						#print("Line ref: " + ref)
						if cast[id[1:]] not in currentCast:
					#internalnum = 1
					speechnum += 1
					stagecount = 0
					previousl = 0
					for subnode in node.getiterator():
						if subnode.tag == "l":
							previousl += 1
						if subnode.tag == "stage":
							#print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
							stagecount += 1
				elif node.tag == "stage":
					if stagecount > 0:
						s_max = len(stage_array)
						diff = s_max - stagecount
						#if diff == 0:
						#	stagenum += 1
						entRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum - 1) + "]/l[" + str(stage_array[diff]) +"]/stage)";
						#internalnum += 1
						stagecount -= 1
						stagenum += 1
						entRef = xpointer + "#xpointer(//div2/stage[" + str(stagenum) +"])";				
					if node.get("type") == "entrance":		
						# Add Social Events for all the people who spoke since the last break (if there were any)
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						event = ns['event/'+str(eventCount)]
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
						# Add Travel Event
						graph.add((event, RDF.type, omj['Travel']))
						if perseusid == None:
							graph.add((event, rdflib.URIRef(""), Literal(ref)))
							#entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
							graph.add((event, rdflib.URIRef(""), URIRef(entRef)))
						#print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
						#print("Found entrence event!")
						if location:
							graph.add((event, ome['to'], location))		
						involved = node.get("about")
						if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
							involved = involved[1:-1]
						chunks = involved.split()
						chunk_count = len(chunks)
						if chunk_count > 1:
							#type = extractCURIEorURI(graph, "[omb:Group]")
							#graph.add((group, RDF.type, type))
							graph.add((group, RDF.type, omb['Group']))
						event_label = ""	
						en = 1
						for chunk in chunks:
							striped = chunk.strip()
							if(len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"):
								striped = striped[1:-1]
							if chunk_count > 1:
								graph.add((group, ome['contains'], cast[striped]))
								if en == chunk_count:
									event_label = event_label[0:-2] + " and " + striped
									graph.add((event, rdflib.URIRef(''), Literal(event_label + " arrive")))
								elif en < chunk_count:
									event_label += striped + ", "									
								#print("Adding person as subject-entity to entry event "   + str(eventCount))
								graph.add((event, rdflib.URIRef(''), Literal(striped + " arrives")))
								graph.add((event, ome['has-subject-entity'], cast[striped]))
							en += 1
						if chunk_count > 1:
							graph.add((event, ome['has-subject-entity'], group))	
							#print("Adding group as subject-entity to entry event "   + str(eventCount))
							groupCount = groupCount + 1
							group = ns['group/'+str(groupCount)]	
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
						prior_event = event					
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
					if node.get("type") == "exit":		
						# Add Social Events for all the people who spoke since the last break (if there were any)
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						event = ns['event/'+str(eventCount)]
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
						# Add Travel Event
						graph.add((event, RDF.type, omj['Travel']))		
						if perseusid == None:
							graph.add((event, rdflib.URIRef(""), Literal(ref)))
							#exitRef = xpointer
							#graph.add((event, rdflib.URIRef(""), URIRef(exitRef)))
							graph.add((event, rdflib.URIRef(""), URIRef(entRef)))
						#print("Found entrence event!")
						if location != None:
							graph.add((event, ome['from'], location))		
						involved = node.get("about")	
						if involved.strip() == "" or "-all" in involved:
							# Remove everyone
							#print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							#for peep in currentCast:	
							#	print(peep)
							if len(currentCast) > 1:							
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
							event_label = ""
							en = 1
							for peep in currentCast:	
								short_ref = ""
								for key, value in cast.iteritems():
									if peep == value:	
										short_ref = key
								if len(currentCast) > 1:
									graph.add((group, ome['contains'], peep))
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef(''), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += short_ref + ", "
									#print("Adding person as subject-entity to exuant event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], peep))
									graph.add((event, rdflib.URIRef(''), Literal(short_ref + " leaves")))
								en += 1
							if len(currentCast) > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exuant event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
							currentCast = list()
						elif "!" in involved:
							#print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							#print("Event: " + involved);
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
							involved = involved.strip()	
							if(len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"):
								involved = involved[2:-1]	
							#print("involved: " + involved)
							striped = involved.strip()	
							c_ids = striped.split()
							chunks = list()
							for stay in c_ids:
								#print("Staying: " + cast[stay])
							staying = list()
							going = list()
							for player in currentCast:
								#print("Player: " + player)							
								if player in chunks:
							going_count = len(going)	
							if going_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))	
								graph.add((group, RDF.type, omb['Group']))

							event_label = ""
							en = 1
							for ghost in going:							
								#print("ghost: " + ghost)
								short_ref = ""
								for key, value in cast.iteritems():
									if ghost == value:	
										short_ref = key
								if ghost in currentCast:
									#print("Current cast count: "  + str(len(currentCast)))	
								if going_count > 1:
									graph.add((group, ome['contains'], ghost))
									if en == len(going):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef(''), Literal(event_label + " leave")))	
									elif en < len(going):
										event_label += short_ref + ", "	
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef(''), Literal(short_ref + " leaves")))
								en += 1
							if going_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
							#print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
							striped = involved.strip()							
							chunks = striped.split()
							#print("striped: " + striped)
							chunk_count = len(chunks)
							if chunk_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
							event_label = ""
							en = 1								
							for chunk in chunks:							
								#print("chunk: " + chunk)			
								ghost = cast[chunk]
								#print("ghost: " + ghost)
								if ghost in currentCast:
									#print("Current cast count: "  + str(len(currentCast)))	
								if chunk_count > 1:
									graph.add((group, ome['contains'], ghost))
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + chunk
										graph.add((event, rdflib.URIRef(''), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += chunk + ", "										
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef(''), Literal(chunk + " leaves")))
								en += 1	
							if chunk_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
						prior_event = event					
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
				#elif node.tag == "rs":	
				#	#print("Found rs node")
				#	if parent:
				#		#print("Parent type is " + parent.tag)
				#		if parent.tag == "p" or  parent.tag == "l":
				#			refersTo.append(node.get("about"))
				#parent = node
			# Add Social Events for all the people who spoke since the last break (if there were any)
			#print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
			update = list()
			update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
			eventCount = update[0]
			prior_event = update[1]
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]
			speakers = list()
			speakerNodes = list()
			currentCast = list()
			speakerRef = list()
	print graph.serialize(format='xml')		
Esempio n. 19
def read_manifest(item, manifest_file):
    triples = []
    namespaces = {}
    seeAlsoFiles = []
    oxdsClasses = ['', '']

    aggregates = item.list_rdf_objects(item.uri, "ore:aggregates")
    g = ConjunctiveGraph()
    gparsed = g.parse(manifest_file, format='xml')
    namespaces = dict(g.namespaces())
    #Get the subjects
    subjects = {}
    for s in gparsed.subjects():
        if s in subjects:
        if type(s).__name__ == 'URIRef':
            if str(s).startswith('file://'):
                ss = str(s).replace('file://', '')
                if manifest_file in ss:
                    subjects[s] = URIRef(item.uri)
                    manifest_file_path, manifest_file_name = os.path.split(manifest_file)
                    ss = ss.replace(manifest_file_path, '').strip('/')
                    for file_uri in aggregates:
                        if ss in str(file_uri):
                            subjects[s] = URIRef(file_uri)
                    if not s in subjects:
                        subjects[s] = URIRef(item.uri)
                subjects[s] = URIRef(s)
        elif type(s).__name__ == 'BNode':
            replace_subject = True
            for o in gparsed.objects():
                if o == s:
                    replace_subject = False
            if replace_subject:
                subjects[s] = URIRef(item.uri)
                subjects[s] = s
    #Get the dataset type 
    #set the subject uri to item uri if it is of type as defined in oxdsClasses
    datasetType = False
    for s,p,o in gparsed.triples((None, RDF.type, None)):
        if str(o) in oxdsClasses:
            if type(s).__name__ == 'URIRef' and len(s) > 0 and str(s) != str(item.uri) and str(subjects[s]) != str(item.uri):
                namespaces['owl'] = URIRef("")
                triples.append((item.uri, 'owl:sameAs', s))
                triples.append((item.uri, RDF.type, o))              
            elif type(s).__name__ == 'BNode' or len(s) == 0 or str(s) == str(item.uri) or str(subjects[s]) == str(item.uri):
                gparsed.remove((s, p, o))
            subjects[s] = item.uri

    #Get the uri for the see also files
    for s,p,o in gparsed.triples((None, URIRef(''), None)):
        if type(o).__name__ == 'URIRef' and len(o) > 0:
            obj = str(o)
            if obj.startswith('file://'):
                obj_path, obj_name = os.path.split(obj)
                obj = obj.replace(obj_path, '').strip('/')
            for file_uri in aggregates:
                if obj in str(file_uri):
        gparsed.remove((s, p, o))

    #Add remaining triples
    for s,p,o in gparsed.triples((None, None, None)):
        triples.append((subjects[s], p, o))
    return namespaces, triples, seeAlsoFiles