def get_turtle_header(): """Return a turtle header with our typical namespaces""" empty = ConjunctiveGraph() add_default_namespaces(empty) turtle_header = [] for term, urlterm in empty.namespaces(): turtle_header.append("@prefix {}: <{}>.".format(term, urlterm)) return "\n".join(turtle_header)
def from_string(self, uri, text, format="xml", encoding="utf-8"): self.reset() self.set_uri(uri) t = TextInputSource(text, system_id=uri) t.setEncoding(encoding) g = ConjunctiveGraph(identifier=self.uri) g = g.parse(t, format) for prefix, ns in g.namespaces(): self.add_namespace(prefix, ns) for s,p,o in g.triples((self.uri, None, None)): self.add_triple(p, o)
def from_url(self, url, uri=None, format="xml", encoding="utf-8"): self.reset() if not uri: self.set_uri(url) else: self.set_uri(uri) g = ConjunctiveGraph(identifier=self.uri) g = g.parse(url, format) for prefix, ns in g.namespaces(): self.add_namespace(prefix, ns) for s,p,o in g.triples((self.uri, None, None)): self.add_triple(p, o)
def from_string(self, rdf_manifest_string, format="xml"): t = TextInputSource(rdf_manifest_string) g = ConjunctiveGraph() g = g.parse(t, format) for s,p,o in g.triples((None, None, None)): if s not in self.items: self.items.append(s) if p == NAMESPACES['rdf']['type']: self.items_rdfobjects.setdefault(s,RDFobject(uri=s)).add_type(o) else: self.items_rdfobjects.setdefault(s,RDFobject(uri=s)).add_triple(p, o) for prefix, ns in g.namespaces(): self.add_namespace(prefix ,ns)
def list_used_namespaces(): """ Print Python code as strings that can be used to setup namespaces. """ g = ConjunctiveGraph() flist = get_file_list(sys.argv[1]) for full in flist: g.parse(source=full, format='turtle') ns_mgr = NamespaceManager(Graph()) for pre, ns in g.namespaces(): q = namespace_rq.replace('--ns--', ns.toPython()) used = g.query(q).askAnswer if used is True: #print pre, ns print "{} = Namespace('{}')".format(pre.upper(), ns) ns_mgr.bind(pre, Namespace(ns)) return ns_mgr
class SQLATestCase(unittest.TestCase): identifier = URIRef("rdflib_test") dburi = Literal("sqlite://") def setUp(self): self.store = plugin.get( "SQLAlchemy", Store)(identifier=self.identifier) self.graph = ConjunctiveGraph(self.store, identifier=self.identifier) self.graph.open(self.dburi, create=True) def tearDown(self): self.graph.destroy(self.dburi) try: self.graph.close() except: pass def test_registerplugins(self): # I doubt this is quite right for a fresh pip installation, # this test is mainly here to fill a coverage gap. registerplugins() self.assert_(plugin.get("SQLAlchemy", Store) is not None) p = plugin._plugins self.assert_(("SQLAlchemy", Store) in p, p) del p[("SQLAlchemy", Store)] plugin._plugins = p registerplugins() self.assert_(("SQLAlchemy", Store) in p, p) def test_namespaces(self): self.assert_(list(self.graph.namespaces()) != []) def test_contexts_without_triple(self): self.assert_(list(self.graph.contexts()) == []) def test_contexts_with_triple(self): statemnt = (michel, likes, pizza) self.assert_(self.graph.contexts(triple=statemnt) != []) def test__len(self): self.assert_(self.store.__len__() == 0) def test__remove_context(self): self.store._remove_context(self.identifier)
def query( query_text: str, instance: rdflib.ConjunctiveGraph, **kwargs: str, ) -> QueryResult: """Run SPARQL SELECT query and return formatted result.""" sparql_result: SPARQLResult = instance.query( query_text, initBindings=kwargs, ) if sparql_result.askAnswer is not None: return sparql_result.askAnswer if sparql_result.graph is not None: graph: rdflib.Graph = sparql_result.graph for prefix, namespace in instance.namespaces(): graph.bind(prefix, namespace) return graph return _format_query_bindings(sparql_result.bindings)
def parse_workflow(): # FIXME TODO these states should probably be compiled down to numbers??? docs = Path(__file__).parent.absolute().resolve().parent / 'docs' rridpath = docs / 'workflow-rrid.graphml' paperpath = docs / 'workflow-paper-id.graphml' cgraph = ConjunctiveGraph() gt.WorkflowMapping(rridpath.as_posix()).graph(cgraph) gt.PaperIdMapping(paperpath.as_posix(), False).graph(cgraph) write(cgraph, '/tmp/workflow.ttl') predicates = set(cgraph.predicates()) OntCuries({cp:str(ip) for cp, ip in cgraph.namespaces()}) OntCuries({'RRID': 'https://scicrunch.org/resolver/RRID:', 'DOI': 'https://doi.org/', 'PMID': 'https://www.ncbi.nlm.nih.gov/pubmed/'}) hg = makeGraph('', graph=cgraph) short = sorted(hg.qname(_) for _ in predicates) wf.hasTag wf.hasReplyTag wf.hasTagOrReplyTag wf.hasOutputTag #if type isa wf.tag tag_types = set(cgraph.transitive_subjects(rdfs.subClassOf, wf.tag)) tag_tokens = {tagType:sorted(set(t for t in cgraph.transitive_subjects(rdf.type, tagType) if t != tagType)) for tagType in tag_types} has_tag_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasTagOrReplyTag)) has_tag_types.add(wf.hasOutputTag) has_next_action_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasOutput)) has_next_action_types.add(wf.hasNextStep) terminals = sorted(tag for ttype in tag_types if ttype != wf.tagScibot # scibot is not 'terminal' for this part for tag in cgraph[:rdf.type:ttype] if not isinstance(tag, BNode) and not any(o for httype in has_tag_types for o in cgraph[tag:httype])) endpoints = sorted(endpoint for endpoint in cgraph[:rdf.type:wf.state] if not isinstance(endpoint, BNode) and not any(o for hnatype in has_next_action_types for o in cgraph[endpoint:hnatype])) complicated = sorted(a_given_tag for tt in tag_types for a_given_tag in cgraph[:rdf.type:tt] if not isinstance(a_given_tag, BNode) and not [successor_tag for htt in has_tag_types for successor_tag in chain(t for t in cgraph[a_given_tag:htt] #if not isinstance(t, BNode) , # we don't actually need this for terminals # we will need it later #(t for b in cgraph[a_given_tag:htt] #if isinstance(b, BNode) #for listhead in cgraph[b:owl.oneOf] #for t in unlist(listhead, cgraph)), )]) def topList(node, g): for s in g[:rdf.rest:node]: yield s def getLists(node, g): for linker in g[:rdf.first:node]: top = None for top in g.transitiveClosure(topList, linker): pass if top: yield top else: yield linker def getIsTagOf(node, g): for htt in has_tag_types: for parent_tag in g[:htt:node]: yield parent_tag def getIsOneOfTagOf(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: for parent_tag, _ in g[::linker]: yield parent_tag def getPreviousTag(node, g): # not quite what we need yield from getIsOneOfTagOf(node, g) yield from getIsTagOf(node, g) def getTagChains(node, g, seen=tuple()): # seen to prevent recursion cases where # taggning can occur in either order e.g. PMID -> DOI #print(tc.red(repr(OntId(node)))) # tc.red(OntId(node)) does weird stuff O_o parent_tag = None for parent_tag in chain(getIsOneOfTagOf(node, g), getIsTagOf(node, g)): if parent_tag in seen: parent_tag = None continue ptt = next(g[parent_tag:rdf.type]) #if ptt in tag_types: for pchain in getTagChains(parent_tag, g, seen + (node,)): if ptt in tag_types: out = parent_tag, *pchain else: out = pchain yield out if not ptt and not out: parent_tag = None if not parent_tag: yield tuple() def getInitiatesAction(node, g): for action in g[:wf.initiatesAction:node]: yield action def getIsOneOfOutputOf(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: for hot in has_next_action_types: for parent_thing in g[:hot:linker]: yield parent_thing def getActionChains(node, g): parent_action = None for parent_action in chain(getIsOneOfOutputOf(node, g), # works for actions too getInitiatesAction(node, g)): for pchain in getActionChains(parent_action, g): # NOTE may also be a tag... out = parent_action, *pchain #print(tuple(hg.qname(o) for o in out)) yield out if not parent_action: yield tuple() def getRestSubjects(predicate, object, g): """ invert restriction """ rsco = cmb.Restriction(rdfs.subClassOf) for rt in rsco.parse(graph=g): if rt.p == predicate and rt.o == object: yield from g.transitive_subjects(rdfs.subClassOf, rt.s) annoParts = list(getRestSubjects(wf.isAttachedTo, wf.annotation, cgraph)) partInstances = {OntId(a):set(t if isinstance(t, BNode) else OntId(t) for t in cgraph.transitive_subjects(rdf.type, a) if not isinstance(t, BNode) and t != a) for a in annoParts} _endpoint_chains = {OntId(endpoint):[[OntId(endpoint)] + [OntId(e) for e in chain] for chain in getActionChains(endpoint, cgraph)] for endpoint in endpoints} #print([hg.qname(e) for e in endpoints]) #print([print([hg.qname(c) for c in getActionChains(endpoint, cgraph) if c]) #for endpoint in endpoints #if endpoint]) #_ = [print(list(getActionChains(e, cgraph)) for e in endpoints)] #return wat = cgraph.transitiveClosure(getPreviousTag, RRIDCUR.Duplicate) wat = list(wat) #def invOneOf(tag, g): fake_chains = {hg.qname(terminal): [hg.qname(c) for c in cgraph.transitiveClosure(getPreviousTag, terminal)] for terminal in terminals} def make_chains(things, getChains): return {OntId(thing):[[OntId(thing)] + [OntId(e) for e in chain] for chain in getChains(thing, cgraph)] for thing in things #if not print(thing) } def print_chains(thing_chains): print('\nstart from beginning') print('\n'.join(sorted(' -> '.join(hg.qname(e) for e in reversed(chain)) for chains in thing_chains.values() for chain in chains))) print('\nstart from end') print('\n'.join(sorted(' <- '.join(e.curie for e in chain) for chains in thing_chains.values() for chain in chains))) def valid_tagsets(all_chains): # not the most efficient way to do this ... transitions = defaultdict(set) for end, chains in all_chains.items(): for chain in chains: valid = set() prior_state = None for element in reversed(chain): valid.add(element) state = frozenset(valid) transitions[prior_state].add(state) prior_state = state return {s:frozenset(n) for s, n in transitions.items()} endpoint_chains = make_chains(endpoints, getActionChains) #endpoint_transitions = valid_transitions(endpoint_chains) # not the right structure print_chains(endpoint_chains) terminal_chains = make_chains(terminals, getTagChains) print_chains(terminal_chains) tag_transitions = valid_tagsets(terminal_chains) terminal_tags_to_endpoints = 'TODO' def printq(*things): print(*(OntId(t).curie for t in things)) from pprint import pprint def get_linkers(s, o, g, linkerFunc): # FIXME not right for p in g[s::o]: yield p for l in linkerFunc(o, g): #print(tc.blue(f'{OntId(s).curie} {l if isinstance(l, BNode) else OntId(l).curie}')) for p in g[s::l]: #print(tc.red(f'{s} {l} {o} {p}')) yield p return linkers = set(l for l in g.transitiveClosure(linkerFunc, o)) for p, o in g[s::]: if o in linkers: yield p def edge_to_symbol(p, rev=False): if p == wf.initiatesAction: return '<<' if rev else '>>' elif p == wf.hasReplyTag: return '<' if rev else '>' elif p == wf.hasTagOrReplyTag: return '<=' if rev else '=>' elif p == wf.hasOutputTag: return '-<-' if rev else '->-' else: return '<??' if rev else '??>' def chain_to_typed_chain(chain, g, func): # duh... #pprint(chain) for s, o in zip(chain, chain[1:]): # TODO deal with reversed case s, o = s.u, o.u p = None #print(s, o) printq(s, o) for p in get_linkers(s, o, g, func): #print(tc.yellow(p)) #yield (s, edge_to_symbol(p), o) yield from (s, edge_to_symbol(p), o) if not p: for rp in get_linkers(o, s, g, func): print(tc.blue(rp)) yield from (s, edge_to_symbol(rp, rev=True), o) def tchains(thing_chains, func): return sorted([OntId(e).curie if isinstance(e, URIRef) else e for e in chain_to_typed_chain(list(reversed(chain)), cgraph, func)] for chains in thing_chains.values() for chain in chains) def getLinkers(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: yield linker def allSubjects(object, graph): yield from (s for s, p in graph[::object]) yield from getLinkers(object, graph) print() ttc = tchains(terminal_chains, allSubjects) tec = tchains(endpoint_chains, allSubjects) pprint(ttc) pprint(tec) valid_tagsets = frozenset((t for s in tag_transitions.values() for t in s)) tts = valid_tagsets - frozenset(tag_transitions) endtype = 'TODO' # tt = {} for endtype, chains in endpoint_chains.items(): for *_chain, tag in chains: if _chain: next_thing = _chain[-1] for ets in tts: if tag in ets: tt[ets] = next_thing terminal_tagsets = tt #[print(wat) for wat in terminal_chains.values()] #pprint(terminal_chains) return tag_types, tag_tokens, partInstances, valid_tagsets, terminal_tagsets, tag_transitions
class SQLATestCase(unittest.TestCase): identifier = URIRef("rdflib_test") dburi = Literal('sqlite://') def setUp(self): self.store = plugin.get( "SQLAlchemy", Store)(identifier=self.identifier) self.graph = ConjunctiveGraph(self.store, identifier=self.identifier) self.graph.open(self.dburi, create=True) def tearDown(self): self.graph.destroy(self.dburi) try: self.graph.close() except: pass def test_registerplugins(self): # I doubt this is quite right for a fresh pip installation, # this test is mainly here to fill a coverage gap. from rdflib_sqlalchemy import registerplugins from rdflib import plugin from rdflib.store import Store registerplugins() self.assert_(plugin.get('SQLAlchemy', Store) is not None) p = plugin._plugins self.assert_(('SQLAlchemy', Store) in p, p) del p[('SQLAlchemy', Store)] plugin._plugins = p registerplugins() self.assert_(('SQLAlchemy', Store) in p, p) def test_skolemisation(self): from rdflib_sqlalchemy.SQLAlchemy import skolemise testbnode = BNode() statemnt = (michel, likes, testbnode) res = skolemise(statemnt) self.assert_('bnode:N' in str(res[2]), res) def test_deskolemisation(self): from rdflib_sqlalchemy.SQLAlchemy import deskolemise testbnode = BNode() statemnt = (michel, likes, testbnode) res = deskolemise(statemnt) self.assert_(str(res[2]).startswith('N'), res) def test_redeskolemisation(self): from rdflib_sqlalchemy.SQLAlchemy import skolemise, deskolemise testbnode = BNode() statemnt = skolemise((michel, likes, testbnode)) res = deskolemise(statemnt) self.assert_(str(res[2]).startswith('N'), res) def test__parse_rfc1738_args(self): from rdflib_sqlalchemy.SQLAlchemy import _parse_rfc1738_args self.assertRaises(ValueError, _parse_rfc1738_args, 'Not parseable') def test_namespaces(self): self.assert_(list(self.graph.namespaces()) != []) def test_contexts_without_triple(self): self.assert_(list(self.graph.contexts()) == []) def test_contexts_with_triple(self): statemnt = (michel, likes, pizza) self.assert_(self.graph.contexts(triple=statemnt) != []) def test__len(self): self.assert_(self.store.__len__() == 0) def test__remove_context(self): self.store._remove_context(self.identifier)
class ElasticTestCase(unittest.TestCase): identifier = URIRef("rdflib_test") dburi = Literal("http://localhost:9200/collection") def setUp(self): self.store = plugin.get("Elasticsearch", Store)(identifier=self.identifier, configuration=self.dburi) self.graph = ConjunctiveGraph(self.store, identifier=self.identifier) self.graph.open(self.dburi, create=True) def tearDown(self): self.graph.destroy(self.dburi) self.graph.close() def test_registerplugins(self): # I doubt this is quite right for a fresh pip installation, # this test is mainly here to fill a coverage gap. registerplugins() self.assertIsNotNone(plugin.get("Elasticsearch", Store)) p = plugin._plugins self.assertIn(("Elasticsearch", Store), p) del p[("Elasticsearch", Store)] plugin._plugins = p registerplugins() self.assertIn(("Elasticsearch", Store), p) def test_namespaces(self): self.assertNotEqual(list(self.graph.namespaces()), []) def test_contexts_without_triple(self): self.assertEqual(list(self.graph.contexts()), []) def test_contexts_result(self): g = self.graph.get_context(ctx_id) g.add((michel, likes, pizza)) actual = list(self.store.contexts()) self.assertEqual(actual[0], ctx_id) def test_contexts_with_triple(self): statemnt = (michel, likes, pizza) self.assertEqual(list(self.graph.contexts(triple=statemnt)), []) def test__len(self): self.assertEqual(self.store.__len__(), 0) def test_triples_choices(self): # Set this so we're not including selects for both asserted and literal tables for # a choice self.store.STRONGLY_TYPED_TERMS = True # Set the grouping of terms self.store.max_terms_per_where = 2 results = [((michel, likes, pizza), ctx_id)] # force execution of the generator for x in self.store.triples_choices( (None, likes, [michel, pizza, likes])): print("x=" + str(x)) print("results=" + str(results)) assert x in results
# technically, we already created a namespace # with the object init (and it added some namespaces as well) # By default, your main namespace is the URI of your # current working directory, so lets make that simpler: myNS = Namespace('http://www.w3.org/2000/10/swap/Primer#') primer.bind('', myNS) primer.bind('owl', OWL) primer.bind('dc', DC) primer.bind('swap', 'http://www.w3.org/2000/10/swap/') # Lets load it up! primer.parse(data=mySource, format='n3') # Now you can query, either directly straight into a list: [(x, y, z) for x, y, z in primer] # or spit it back out (mostly) the way we created it: print(primer.serialize(format='n3')) # for more insight into things already done, lets see the namespaces list(primer.namespaces()) # lets ask something about the data list(primer.objects(myNS.pat, myNS.child))
from rdflib import Literal, XSD, URIRef from rdflib.namespace import FOAF, RDF, RDFS from rdflib.serializer import Serializer from rdflib import plugin def pprint(msg): msg = msg.decode('utf-8') for l in msg.split('\n'): if l.strip(): print(l) store = 'Sleepycat' graph = ConjunctiveGraph(store=store, identifier='mygraph') graph.open('foaf_flask/static/rdf/sleepycat', create=False) #CONSTRUCT {{ ?uri ?p ?o . }} query = """CONSTRUCT { ?uri ?p ?o . } WHERE { {GRAPH ?g { ?uri ?p ?o } } UNION { ?uri ?p ?o } }""" # query = """CONSTRUCT { ?uri ?p ?o . } # WHERE { ?uri ?p ?o } """ bind = {'uri': URIRef('http://127.0.0.1:5000/ldp/donna')} context = dict(graph.namespaces()) query_result = graph.query(query, initBindings=bind, initNs=context) newg = Graph().parse(data=query_result.serialize(format='xml')) data = newg.serialize(format='turtle', context=context) pprint(data)
def convert(teifile, namespace): #graph_uri = "http://contextus.net/resource/blue_velvet/" ns = Namespace(namespace) graph = ConjunctiveGraph() graph.load(teifile, format="rdfa") graph.bind("default", ns) to_update = "" for prefix, nsuri in graph.namespaces(): #print("prefix: " + str(prefix) + " - " + str(nsuri)) if nsuri in ns: to_update = nsuri for s, p, o in graph: # print s, p, o if to_update != "" and to_update in s: graph.remove((s, p, o)) s = URIRef(s.replace(to_update, ns)) graph.add((s, p, o)) act = "" scene = "" line = "" char = 0 loc = 0 #timeline = ns['timeline/narrative'] #graph.add((timeline, RDF.type, ome['Timeline'])) tree = ET.parse(teifile) cast = dict() titleNode = tree.find('//title') castItems = tree.findall('/text/body/div1/castList//castItem') for castItem in castItems: actorNode = castItem.find('actor') roleNode = castItem.find('role') if roleNode != None: id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id") #print("Found castItem!") actor = None role = None # Check to see if we already have an entry if (roleNode != None and roleNode.get("about")): charname = roleNode.get("about") if (charname.find(":") > -1): nmsp, nom = charname.split(":", 1) charcode = "character/" + str(char) charref = nmsp + ":" + charcode + "]" role = extractCURIEorURI(graph, charref, nom[0:-1]) char += 1 #print("1:" + charname + ": adding id " + id + " to " + role) else: role = extractCURIEorURI(graph, charname) #print("2:" + charname + ": adding id " + id + " to " + role) cast[id] = role graph.add((role, RDF.type, omb['Character'])) #print(charname + ": adding id " + id + " to " + role) if (actorNode != None and actorNode.get("about")): actor = extractCURIEorURI(graph, actorNode.get("about")) graph.add((actor, RDF.type, omb['Being'])) if actor != None and role != None: graph.add((actor, omb['portrays'], role)) graph.add((role, omb['portrayed-by'], actor)) eventCount = 1 groupCount = 1 prior_event = None actItems = tree.findall('/text/body/div1') ref = "" for actItem in actItems: if actItem.get("type") == "act": act = actItem.get("n") sceneItems = actItem.findall('div2') for sceneItem in sceneItems: #print("Found sceneItems!") if sceneItem.get("type") == "scene": scene = sceneItem.get("n") # Work out the location of this scene location = None stageItems = sceneItem.findall("stage") #internalnum = 1 stagenum = 0 speechnum = 1 for stageItem in stageItems: if stageItem.get("type") == "location": # The RDFa parser doesn't handle the type - so we can grab that here. if stageItem.get("about") != None: locname = stageItem.get("about") # Adding location type/oml:space for location if stageItem.get("typeof") and stageItem.get("about"): type = extractCURIEorURI(graph, stageItem.get("typeof")) #print "1. Location: " + str(location) + " Type: " + str(type) elif stageItem.get("about"): #print "2. Location: " + str(locname) type = extractCURIEorURI(graph, oml['Space']) # Get location value and add rdfs:label is location is not using the TEI value if (locname.find(":") > -1): nmsp, nom = locname.split(":", 1) loccode = "location/" + str(loc) locref = nmsp + ":" + loccode + "]" location = extractCURIEorURI( graph, locref, nom[0:-1]) loc += 1 graph.add(( location, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(nom[0:-1]))) else: location = extractCURIEorURI( graph, stageItem.get("about")) # Add location to graph graph.add((location, RDF.type, type)) else: location = "" #print("Adding location type: " + type + " (" + location + ")") if cast: # Work out a list of all cast in a given section currentCast = list() speakers = list() # Iterate through elements within stageItem # Find speaker events and add to list of current cast for inclusion in social event # Find reference events and add to ongoing social event ? # Find stage events # If event is an entrance then # create social event for people talking before entrance # create travel event i.e. entrance # add new arrival to current cast list # If event is exit event then # create social event for people talking before exit # create travel event i.e. exit # if leavers are not named directly the calculate who is leaving # remove leavers from current cast list # If reach end of scene then create social event with current cast list #Also need to check if social event before exit has same composition as social event after exit since then they should be merged event = ns['event/' + str(eventCount)] group = ns['group/' + str(groupCount)] refersTo = list() #parent = None speakerNodes = list() speakerRef = list() xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str( perseusid) + ":act=" + str(act) + ":scene=" + str(scene) stagecount = 0 stage_array = list() for node in sceneItem.getiterator(): #print("Node: " + node.tag) """ if node.tag == "lb": if node.get("ed") == "F1": line = node.get("n") if titleNode != None: ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line) else: ref = str(act) + "." + str(scene) + "." + str(line) #xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "'])" xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) #print("Ref: " + xpointer) """ if node.tag == "sp": id = node.get("who") if id and cast: speakers.append(cast[id[1:]]) speakerNodes.append(node) if perseusid == None: speakerRef.append(ref) else: #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)" speechRef = xpointer + "#xpointer(//div2/sp[" + str( speechnum) + "])" speakerRef.append(speechRef) #print("Line ref: " + ref) if cast[id[1:]] not in currentCast: currentCast.append(cast[id[1:]]) #internalnum = 1 speechnum += 1 stagecount = 0 previousl = 0 for subnode in node.getiterator(): if subnode.tag == "l": previousl += 1 if subnode.tag == "stage": #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n") stage_array.append(previousl) stagecount += 1 elif node.tag == "stage": if stagecount > 0: s_max = len(stage_array) diff = s_max - stagecount #if diff == 0: # stagenum += 1 entRef = xpointer + "#xpointer(//div2/sp[" + str( speechnum - 1) + "]/l[" + str( stage_array[diff]) + "]/stage)" #internalnum += 1 stagecount -= 1 else: stagenum += 1 entRef = xpointer + "#xpointer(//div2/stage[" + str( stagenum) + "])" if node.get("type") == "entrance": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), Literal(ref))) else: #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)" graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), URIRef(entRef))) #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Found entrence event!") if location: graph.add((event, ome['to'], location)) involved = node.get("about") if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] chunks = involved.split() chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: striped = chunk.strip() if (len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"): striped = striped[1:-1] currentCast.append(cast[striped]) if chunk_count > 1: graph.add( (group, ome['contains'], cast[striped])) if en == chunk_count: event_label = event_label[ 0:-2] + " and " + striped graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " arrive"))) elif en < chunk_count: event_label += striped + ", " else: #print("Adding person as subject-entity to entry event " + str(eventCount)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(striped + " arrives"))) graph.add((event, ome['has-subject-entity'], cast[striped])) en += 1 if chunk_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to entry event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] if (prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/' + str(eventCount)] if node.get("type") == "exit": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), Literal(ref))) else: #exitRef = xpointer #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef))) graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), URIRef(entRef))) #print("Found entrence event!") if location != None: graph.add((event, ome['from'], location)) involved = node.get("about") if involved.strip() == "" or "-all" in involved: # Remove everyone #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #for peep in currentCast: # print(peep) if len(currentCast) > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for peep in currentCast: short_ref = "" for key, value in cast.iteritems(): if peep == value: short_ref = key if len(currentCast) > 1: graph.add((group, ome['contains'], peep)) if en == len(currentCast): event_label = event_label[ 0:-2] + " and " + short_ref graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exuant event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], peep)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(short_ref + " leaves"))) en += 1 if len(currentCast) > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exuant event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] currentCast = list() elif "!" in involved: #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Event: " + involved); if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] involved = involved.strip() if (len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"): involved = involved[2:-1] #print("involved: " + involved) striped = involved.strip() c_ids = striped.split() chunks = list() for stay in c_ids: #print("Staying: " + cast[stay]) chunks.append(cast[stay]) staying = list() going = list() for player in currentCast: #print("Player: " + player) if player in chunks: staying.append(player) else: going.append(player) going_count = len(going) if going_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for ghost in going: #print("ghost: " + ghost) short_ref = "" for key, value in cast.iteritems(): if ghost == value: short_ref = key if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if going_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(going): event_label = event_label[ 0:-2] + " and " + short_ref graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(going): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], ghost)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(short_ref + " leaves"))) en += 1 if going_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] else: #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] striped = involved.strip() chunks = striped.split() #print("striped: " + striped) chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: #print("chunk: " + chunk) ghost = cast[chunk] #print("ghost: " + ghost) if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if chunk_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(currentCast): event_label = event_label[ 0:-2] + " and " + chunk graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += chunk + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], ghost)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(chunk + " leaves"))) en += 1 if chunk_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] if (prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/' + str(eventCount)] #elif node.tag == "rs": # #print("Found rs node") # if parent: # #print("Parent type is " + parent.tag) # if parent.tag == "p" or parent.tag == "l": # refersTo.append(node.get("about")) #parent = node # Add Social Events for all the people who spoke since the last break (if there were any) #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers))) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] group = ns['group/' + str(groupCount)] speakers = list() speakerNodes = list() currentCast = list() speakerRef = list() print graph.serialize(format='xml')
u"tmo" : TMO, u"dc" : DC, u"dct" : DCTERMS, u"foaf" : FOAF, u"sioc" : SIOC, u"sioct" : SIOCT, u"geo" : GEO, u"mvcb" : MVCB, u"ical" : ICAL, u"xsd" : XSD, u"owl" : OWL, u"skos" : SKOS, u"doap" : DOAP, } for shortname, namespace in namespace_manager.namespaces(): NSBINDINGS[shortname] = namespace """ keyword['LIBEXTRACTOR KEYWORD-TYPE'] = ( 'DC-ELEMENT', 'DC-TERM', 'NEPOMUK-ONTOLOGY', # left part predicate 'NEPOMUK-CLASS/PROPERTY' # right part predicate ) """ EXTRACTOR_KEYWORD = {} EXTRACTOR_KEYWORD['album'] = ('title', 'collection', 'nid3', 'albumTitle') EXTRACTOR_KEYWORD['artist'] = ('creator', '', 'nao', 'creator') EXTRACTOR_KEYWORD['book title'] = ('title', '', 'nie', 'title')
def get_vocab_base(vocabfile): graph = Graph() try: graph.parse(vocabfile) except: graph = None graph = Graph() try: graph.parse(vocabfile, format="n3") except: return (None, None, None) identifier = None for v in graph.objects(None, namespaces['dc']['identifier']): identifier = v if not identifier: for v in graph.objects(None, namespaces['dcterms']['identifier']): identifier = v base = None if not base: for s in graph.subjects(namespaces['rdf']['type'], namespaces['owl']['Ontology']): base = s break if not base: for s in graph.subjects(namespaces['dc']['title'], None): base = s break if not base: for s in graph.subjects(namespaces['dcterms']['title'], None): base = s break if not base: for s in graph.subjects(namespaces['dc']['creator'], None): base = s break if not base: for s in graph.subjects(namespaces['dcterms']['creator'], None): base = s break if not base: for v in graph.objects(None, namespaces['vann']['preferredNamespaceUri']): base = v break if not base: for v in graph.namespaces(): if v[0] == '': base = v[1] break prefix = None vocab_prefixes = graph.objects(None, namespaces['vann']['preferredNamespacePrefix']) for vp in vocab_prefixes: prefix = vp if not prefix and base: for v in graph.namespaces(): if str(v[1]) == str(base): prefix = v[0] break if not prefix and base: prefix = base.strip().strip('/').split('/')[-1].strip('#').strip(' ') if base: base = base.strip() if (base[-1]!="/" and base[-1]!="#"): base += "#" return (identifier, base, prefix)
def read_manifest(item, manifest_file): triples = [] namespaces = {} seeAlsoFiles = [] oxdsClasses = ['http://vocab.ox.ac.uk/dataset/schema#Grouping', 'http://vocab.ox.ac.uk/dataset/schema#DataSet'] aggregates = item.list_rdf_objects(item.uri, "ore:aggregates") g = ConjunctiveGraph() gparsed = g.parse(manifest_file, format='xml') namespaces = dict(g.namespaces()) #Get the subjects subjects = {} for s in gparsed.subjects(): if s in subjects: continue if type(s).__name__ == 'URIRef': if str(s).startswith('file://'): ss = str(s).replace('file://', '') if manifest_file in ss: subjects[s] = URIRef(item.uri) else: manifest_file_path, manifest_file_name = os.path.split(manifest_file) ss = ss.replace(manifest_file_path, '').strip('/') for file_uri in aggregates: if ss in str(file_uri): subjects[s] = URIRef(file_uri) break if not s in subjects: subjects[s] = URIRef(item.uri) else: subjects[s] = URIRef(s) elif type(s).__name__ == 'BNode': replace_subject = True for o in gparsed.objects(): if o == s: replace_subject = False if replace_subject: subjects[s] = URIRef(item.uri) else: subjects[s] = s #Get the dataset type #set the subject uri to item uri if it is of type as defined in oxdsClasses datasetType = False for s,p,o in gparsed.triples((None, RDF.type, None)): if str(o) in oxdsClasses: if type(s).__name__ == 'URIRef' and len(s) > 0 and str(s) != str(item.uri) and str(subjects[s]) != str(item.uri): namespaces['owl'] = URIRef("http://www.w3.org/2002/07/owl#") triples.append((item.uri, 'owl:sameAs', s)) triples.append((item.uri, RDF.type, o)) elif type(s).__name__ == 'BNode' or len(s) == 0 or str(s) == str(item.uri) or str(subjects[s]) == str(item.uri): gparsed.remove((s, p, o)) subjects[s] = item.uri #Get the uri for the see also files for s,p,o in gparsed.triples((None, URIRef('http://www.w3.org/2000/01/rdf-schema#seeAlso'), None)): if type(o).__name__ == 'URIRef' and len(o) > 0: obj = str(o) if obj.startswith('file://'): obj_path, obj_name = os.path.split(obj) obj = obj.replace(obj_path, '').strip('/') for file_uri in aggregates: if obj in str(file_uri): seeAlsoFiles.append(file_uri) gparsed.remove((s, p, o)) #Add remaining triples for s,p,o in gparsed.triples((None, None, None)): triples.append((subjects[s], p, o)) return namespaces, triples, seeAlsoFiles
primer.parse(data=mySource, format="n3") # Now you can query, either directly straight into a list: print() print("Printing bigger example's triples:") for i in [(x, y, z) for x, y, z in primer]: print(i) # or spit it back out (mostly) the way we created it: print() print("Printing bigger example as N3:") print(primer.serialize(format="n3").decode("utf-8")) # for more insight into things already done, lets see the namespaces print() print("Printing bigger example's namespaces:") for n in list(primer.namespaces()): print(n) # lets ask something about the data, using a SPARQL query print() print("Who are pat's children?") q = "SELECT ?child WHERE { :pat :child ?child }" for r in primer.query(q): print(r)
def convert(teifile, namespace): #graph_uri = "http://contextus.net/resource/blue_velvet/" ns = Namespace(namespace) graph = ConjunctiveGraph() graph.load(teifile, format="rdfa") graph.bind("default", ns) to_update = "" for prefix, nsuri in graph.namespaces(): #print("prefix: " + str(prefix) + " - " + str(nsuri)) if nsuri in ns: to_update = nsuri for s, p, o in graph: # print s, p, o if to_update != "" and to_update in s: graph.remove((s, p, o)) s = URIRef(s.replace(to_update, ns)) graph.add((s, p, o)) act = "" scene = "" line = "" char = 0 loc = 0 #timeline = ns['timeline/narrative'] #graph.add((timeline, RDF.type, ome['Timeline'])) tree = ET.parse(teifile) cast = dict() titleNode = tree.find('//title') castItems = tree.findall('/text/body/div1/castList//castItem') for castItem in castItems: actorNode = castItem.find('actor') roleNode = castItem.find('role') if roleNode != None: id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id") #print("Found castItem!") actor = None role = None # Check to see if we already have an entry if(roleNode != None and roleNode.get("about")): charname = roleNode.get("about") if(charname.find(":") > -1): nmsp,nom = charname.split(":", 1) charcode = "character/" + str(char) charref = nmsp + ":" + charcode + "]" role = extractCURIEorURI(graph, charref,nom[0:-1]) char += 1 #print("1:" + charname + ": adding id " + id + " to " + role) else: role = extractCURIEorURI(graph, charname) #print("2:" + charname + ": adding id " + id + " to " + role) cast[id] = role graph.add((role, RDF.type, omb['Character'])) #print(charname + ": adding id " + id + " to " + role) if(actorNode != None and actorNode.get("about")): actor = extractCURIEorURI(graph, actorNode.get("about")) graph.add((actor, RDF.type, omb['Being'])) if actor != None and role != None: graph.add((actor, omb['portrays'], role)) graph.add((role, omb['portrayed-by'], actor)) eventCount = 1 groupCount = 1 prior_event = None actItems = tree.findall('/text/body/div1') ref = "" for actItem in actItems: if actItem.get("type") == "act": act = actItem.get("n") sceneItems = actItem.findall('div2') for sceneItem in sceneItems: #print("Found sceneItems!") if sceneItem.get("type") == "scene": scene = sceneItem.get("n") # Work out the location of this scene location = None stageItems = sceneItem.findall("stage") #internalnum = 1 stagenum = 0 speechnum = 1 for stageItem in stageItems: if stageItem.get("type") == "location": # The RDFa parser doesn't handle the type - so we can grab that here. if stageItem.get("about") != None: locname = stageItem.get("about") # Adding location type/oml:space for location if stageItem.get("typeof") and stageItem.get("about"): type = extractCURIEorURI(graph, stageItem.get("typeof")) #print "1. Location: " + str(location) + " Type: " + str(type) elif stageItem.get("about"): #print "2. Location: " + str(locname) type = extractCURIEorURI(graph, oml['Space']) # Get location value and add rdfs:label is location is not using the TEI value if(locname.find(":") > -1): nmsp,nom = locname.split(":", 1) loccode = "location/" + str(loc) locref = nmsp + ":" + loccode + "]" location = extractCURIEorURI(graph, locref, nom[0:-1]) loc += 1 graph.add((location, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(nom[0:-1]))) else: location = extractCURIEorURI(graph, stageItem.get("about")) # Add location to graph graph.add((location, RDF.type, type)) else: location = "" #print("Adding location type: " + type + " (" + location + ")") if cast: # Work out a list of all cast in a given section currentCast = list() speakers = list() # Iterate through elements within stageItem # Find speaker events and add to list of current cast for inclusion in social event # Find reference events and add to ongoing social event ? # Find stage events # If event is an entrance then # create social event for people talking before entrance # create travel event i.e. entrance # add new arrival to current cast list # If event is exit event then # create social event for people talking before exit # create travel event i.e. exit # if leavers are not named directly the calculate who is leaving # remove leavers from current cast list # If reach end of scene then create social event with current cast list #Also need to check if social event before exit has same composition as social event after exit since then they should be merged event = ns['event/'+str(eventCount)] group = ns['group/'+str(groupCount)] refersTo = list() #parent = None speakerNodes = list() speakerRef = list() xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) stagecount = 0 stage_array = list() for node in sceneItem.getiterator(): #print("Node: " + node.tag) """ if node.tag == "lb": if node.get("ed") == "F1": line = node.get("n") if titleNode != None: ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line) else: ref = str(act) + "." + str(scene) + "." + str(line) #xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "'])" xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) #print("Ref: " + xpointer) """ if node.tag == "sp": id = node.get("who") if id and cast: speakers.append(cast[id[1:]]) speakerNodes.append(node) if perseusid == None: speakerRef.append(ref) else: #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)" speechRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum) + "])"; speakerRef.append(speechRef) #print("Line ref: " + ref) if cast[id[1:]] not in currentCast: currentCast.append(cast[id[1:]]) #internalnum = 1 speechnum += 1 stagecount = 0 previousl = 0 for subnode in node.getiterator(): if subnode.tag == "l": previousl += 1 if subnode.tag == "stage": #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n") stage_array.append(previousl) stagecount += 1 elif node.tag == "stage": if stagecount > 0: s_max = len(stage_array) diff = s_max - stagecount #if diff == 0: # stagenum += 1 entRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum - 1) + "]/l[" + str(stage_array[diff]) +"]/stage)"; #internalnum += 1 stagecount -= 1 else: stagenum += 1 entRef = xpointer + "#xpointer(//div2/stage[" + str(stagenum) +"])"; if node.get("type") == "entrance": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref))) else: #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)" graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef))) #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Found entrence event!") if location: graph.add((event, ome['to'], location)) involved = node.get("about") if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] chunks = involved.split() chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: striped = chunk.strip() if(len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"): striped = striped[1:-1] currentCast.append(cast[striped]) if chunk_count > 1: graph.add((group, ome['contains'], cast[striped])) if en == chunk_count: event_label = event_label[0:-2] + " and " + striped graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " arrive"))) elif en < chunk_count: event_label += striped + ", " else: #print("Adding person as subject-entity to entry event " + str(eventCount)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(striped + " arrives"))) graph.add((event, ome['has-subject-entity'], cast[striped])) en += 1 if chunk_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to entry event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] if(prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/'+str(eventCount)] if node.get("type") == "exit": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref))) else: #exitRef = xpointer #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef))) graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef))) #print("Found entrence event!") if location != None: graph.add((event, ome['from'], location)) involved = node.get("about") if involved.strip() == "" or "-all" in involved: # Remove everyone #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #for peep in currentCast: # print(peep) if len(currentCast) > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for peep in currentCast: short_ref = "" for key, value in cast.iteritems(): if peep == value: short_ref = key if len(currentCast) > 1: graph.add((group, ome['contains'], peep)) if en == len(currentCast): event_label = event_label[0:-2] + " and " + short_ref graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exuant event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], peep)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves"))) en += 1 if len(currentCast) > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exuant event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] currentCast = list() elif "!" in involved: #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Event: " + involved); if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] involved = involved.strip() if(len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"): involved = involved[2:-1] #print("involved: " + involved) striped = involved.strip() c_ids = striped.split() chunks = list() for stay in c_ids: #print("Staying: " + cast[stay]) chunks.append(cast[stay]) staying = list() going = list() for player in currentCast: #print("Player: " + player) if player in chunks: staying.append(player) else: going.append(player) going_count = len(going) if going_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for ghost in going: #print("ghost: " + ghost) short_ref = "" for key, value in cast.iteritems(): if ghost == value: short_ref = key if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if going_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(going): event_label = event_label[0:-2] + " and " + short_ref graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(going): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], ghost)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves"))) en += 1 if going_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] else: #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] striped = involved.strip() chunks = striped.split() #print("striped: " + striped) chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: #print("chunk: " + chunk) ghost = cast[chunk] #print("ghost: " + ghost) if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if chunk_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(currentCast): event_label = event_label[0:-2] + " and " + chunk graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += chunk + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], ghost)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(chunk + " leaves"))) en += 1 if chunk_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] if(prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/'+str(eventCount)] #elif node.tag == "rs": # #print("Found rs node") # if parent: # #print("Parent type is " + parent.tag) # if parent.tag == "p" or parent.tag == "l": # refersTo.append(node.get("about")) #parent = node # Add Social Events for all the people who spoke since the last break (if there were any) #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers))) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] group = ns['group/'+str(groupCount)] speakers = list() speakerNodes = list() currentCast = list() speakerRef = list() print graph.serialize(format='xml')