def get_graph_from_sparql_results(sparql_json, named_graph=None): if len(sparql_json['results']['bindings']) == 0: return ConjunctiveGraph(), 0 sparql_vars = sparql_json['head']['vars'] if 'g' in sparql_vars: if not named_graph: named_graph = sparql_json['results']['bindings'][0]['g']['value'] sparql_vars.remove('g') triple_levels = RDFModel.get_context_triples(sparql_json['head']['vars']) nr_levels = len(triple_levels) if named_graph: named_graph = URIRef(named_graph) graph = ConjunctiveGraph(identifier=named_graph) graph.namespace_manager = namespace_manager for binding in sparql_json['results']['bindings']: binding_levels = RDFModel.get_context_levels(len(binding.keys())) for s, p, o in triple_levels[:binding_levels]: subject = URIRef(binding[s]['value']) if binding[s]['type'] == 'bnode': subject = BNode(binding[s]['value']) predicate = URIRef(binding[p]['value']) obj = RDFModel.get_object_from_sparql_result(binding[o]) graph.add((subject, predicate, obj)) # materialize inferences for subject, obj in graph.subject_objects( predicate=URIRef("http://www.openarchives.org/ore/terms/isAggregatedBy")): graph.add((obj, URIRef("http://www.openarchives.org/ore/terms/aggregates"), subject)) graph.remove((subject, URIRef("http://www.openarchives.org/ore/terms/isAggregatedBy"), obj)) return graph, nr_levels
def run(input_file, input_format_hint, output_format): #print(input_format_hint) g = ConjunctiveGraph(store=OrderedAndIndexedStore()) g.parse(input_file, format=input_format_hint) triples = [] for t in g.triples((None,None,None)): triples.append(t) triples.sort(key=lambda x:x[0]) #for t in triples: # print(t[0]) #import IPython; IPython.embed() g.remove((None,None,None)) #print(list(g.triples((None,None,None)))) for t in triples: g.add(t) #print(list(g2.triples((None,None,None)))) #import IPython; IPython.embed() #out = open('out.n3', 'wb') # g.serialize(out, format='n3') for l in g.serialize(format=output_format).splitlines(): print(l.decode())
def update_mediator(params): #Write user metadata and save the rdf file if not ('username' in params and params['username']): return False det = get_mediator_details(params['username']) graph = Graph() graph.parse(os.path.join(ag.mediatorsdir, '%s.rdf'%params['username'])) for prefix, url in namespaces.iteritems(): graph.bind(prefix, URIRef(url)) uri = URIRef(det['uri']) if 'firstname' in params and params['firstname']: graph.remove((uri, namespaces['foaf']['firstName'], None)) graph.add((uri, namespaces['foaf']['firstName'], Literal(params['firstname']))) if 'lastname' in params and params['lastname']: graph.remove((uri, namespaces['foaf']['lastName'], None)) graph.add((uri, namespaces['foaf']['lastName'], Literal(params['lastname']))) if 'email' in params and params['email']: graph.remove((uri, namespaces['foaf']['mbox'], None)) graph.add((uri, namespaces['foaf']['mbox'], Literal(params['email']))) if 'title' in params and params['title']: graph.remove((uri, namespaces['foaf']['title'], None)) graph.add((uri, namespaces['foaf']['title'], Literal(params['title']))) if 'department' in params and params['department']: graph.remove((uri, namespaces['dcterms']['isPartOf'], None)) department = params['department'].split(';') for d in department: graph.add((uri, namespaces['dcterms']['isPartOf'], Literal(d.strip()))) rdf_str = None rdf_str = graph.serialize() f = codecs.open(os.path.join(ag.mediatorsdir, '%s.rdf'%params['username']), 'w', 'utf-8') f.write(rdf_str) f.close() return True
def change_status(vocabprefix, uri, predicate, message, action): if not action in ['add', 'remove']: return False vocab_uri = URIRef(uri) vocabdir = os.path.join(ag.vocabulariesdir, vocabprefix) vocabstatusfile = os.path.join(vocabdir, "status.rdf") if not os.path.isfile(vocabstatusfile): return False graph = Graph() graph.parse(vocabstatusfile) predicate = predicate.split(':') ns = predicate[0] term = predicate[1] if message and (message.startswith('http://') or message.startswith('file://')): message = URIRef(message) elif message: message = Literal(message) if action == 'add': for prefix, url in namespaces.iteritems(): graph.bind(prefix, URIRef(url)) graph.add((vocab_uri, namespaces[ns][term], message)) elif action == 'remove': graph.remove((vocab_uri, namespaces[ns][term], message)) rdf_str = None rdf_str = graph.serialize() f = codecs.open(vocabstatusfile, 'w', 'utf-8') f.write(rdf_str) f.close() return True
def update_vocab_uri_in_statusfile(userid, oldprefix, newprefix, oldvocabdir, newvocabdir): olduri = "http://vocab.ox.ac.uk/%s"%oldprefix newuri = "http://vocab.ox.ac.uk/%s"%newprefix mediatorfile = os.path.join(ag.mediatorsdir, '%s.rdf'%userid) vocabstatusfile = os.path.join(newvocabdir, 'status.rdf') if not os.path.isfile(mediatorfile) or not os.path.isfile(vocabstatusfile): return False #update uri in mediator file rdf_str = None f = codecs.open(mediatorfile, 'r', 'utf-8') rdf_str = f.read() f.close() rdf_str = rdf_str.replace(olduri, newuri) rdf_str = rdf_str.replace(oldvocabdir, newvocabdir) f = codecs.open(mediatorfile, 'w', 'utf-8') f.write(rdf_str) f.close() #update uri in vocab status file rdf_str = None f = codecs.open(vocabstatusfile, 'r', 'utf-8') rdf_str = f.read() f.close() rdf_str = rdf_str.replace(olduri, newuri) rdf_str = rdf_str.replace(oldvocabdir, newvocabdir) f = codecs.open(vocabstatusfile, 'w', 'utf-8') f.write(rdf_str) f.close() #Remove editorial note 0 graph = Graph() graph.parse(vocabstatusfile) for s, p, o in graph.triples((URIRef(newuri), namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[0]))): graph.remove((s, p, o)) rdf_str = None rdf_str = graph.serialize() f = codecs.open(vocabstatusfile, 'w', 'utf-8') f.write(rdf_str) f.close() return True
#Adding new asana bnode = BNode() # class relations rdfGraph.add((newAsana, RDF.type, asana)) rdfGraph.add((newAsana, RDF.type, bnode)) rdfGraph.add((newAsana, RDFS.label, Literal("Новая асана", lang="ru"))) rdfGraph.add((newAsana, description, Literal("Описание процесса выполнения", datatype=XSD.string))) rdfGraph.add((bnode, OWL.onProperty, URIRef(negativeAffect))) rdfGraph.add((bnode, OWL.someValuesOf, URIRef(backbone))) print("\nGetting with new element") printElements(rdfGraph) #Remove sukhasana rdfGraph.remove((sukhasana, None, None)) print("\nGetting with deleted element") printElements(rdfGraph) #Search for all categories which affects 'позвоночник' ds = URIRef(description).n3( rdfGraph.namespace_manager) # convert to NS:suffix format print(f"{description} -> {ds}") pq = prepareQuery(f"""SELECT ?asana ?label ?description WHERE {{ ?asana rdf:type ?o . ?asana {ds} ?description . ?asana rdfs:label ?label . ?o owl:onProperty ?affect . ?o owl:someValuesFrom ?affectTo .
class ContextTestCase(unittest.TestCase): storetest = True identifier = URIRef("rdflib_test") michel = URIRef(u"michel") tarek = URIRef(u"tarek") bob = URIRef(u"bob") likes = URIRef(u"likes") hates = URIRef(u"hates") pizza = URIRef(u"pizza") cheese = URIRef(u"cheese") c1 = URIRef(u"context-1") c2 = URIRef(u"context-2") def setUp(self, uri="sqlite://", storename=None): store = plugin.get(storename, Store)(identifier=self.identifier) self.graph = ConjunctiveGraph(store, identifier=self.identifier) self.graph.open(uri, create=True) def tearDown(self, uri="sqlite://"): self.graph.destroy(uri) self.graph.close() def get_context(self, identifier): assert isinstance(identifier, URIRef) or \ isinstance(identifier, BNode), type(identifier) return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEqual(len(self.graph.store), len(graph.store)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEqual(len(graph), oldLen + 10) self.assertEqual(len(self.get_context(c1)), oldLen + 10) self.graph.remove_context(self.get_context(c1)) self.assertEqual(len(self.graph), oldLen) self.assertEqual(len(graph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph.store) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEqual(len(self.graph.store), oldLen + 1, [self.graph.store, oldLen + 1]) graph = Graph(self.graph.store, self.c1) self.assertEqual(len(graph.store), oldLen + 1, [graph.store, oldLen + 1]) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assertIn(triple, self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assertIn(triple, self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assertIn(triple, self.graph) self.graph.remove(triple) # now gone! self.assertNotIn(triple, self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assertNotIn(triple, self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): if not isinstance(c, string_types): return c.identifier return c self.assertIn(self.c1, list(map(cid, self.graph.contexts()))) self.assertIn(self.c2, list(map(cid, self.graph.contexts()))) contextList = list(map(cid, list(self.graph.contexts(triple)))) self.assertIn(self.c1, contextList) self.assertIn(self.c2, contextList) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEqual(len(Graph(self.graph.store, c1)), 1) self.assertEqual(len(self.get_context(c1)), 1) self.graph.remove_context(self.get_context(c1)) self.assertNotIn(self.c1, self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEqual(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEqual triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob,))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set( c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte(set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)])) asserte(set(c.predicate_objects( michel)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)])) asserte(set(c.predicate_objects( tarek)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.subject_predicates( pizza)), set([(bob, hates), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(cheese)), set([( bob, likes), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte(set(c), set([ (bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)])) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
class ContextTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = ConjunctiveGraph(store=self.store) except ImportError: raise SkipTest( "Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): if self.store == "SQLite": raise SkipTest("Skipping known issue with __len__") self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEqual(len(self.graph), len(graph)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.graph.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEqual(len(graph), oldLen + 10) self.assertEqual(len(self.graph.get_context(c1)), oldLen + 10) self.graph.remove_context(self.graph.get_context(c1)) self.assertEqual(len(self.graph), oldLen) self.assertEqual(len(graph), 0) def testLenInMultipleContexts(self): if self.store == "SQLite": raise SkipTest("Skipping known issue with __len__") oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEqual(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEqual(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assertTrue(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assertTrue(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assertTrue(triple in self.graph) self.graph.remove(triple) # now gone! self.assertTrue(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assertTrue(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): return c.identifier self.assertTrue(self.c1 in map(cid, self.graph.contexts())) self.assertTrue(self.c2 in map(cid, self.graph.contexts())) contextList = list(map(cid, list(self.graph.contexts(triple)))) self.assertTrue(self.c1 in contextList, (self.c1, contextList)) self.assertTrue(self.c2 in contextList, (self.c2, contextList)) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEqual(len(Graph(self.graph.store, c1)), 1) self.assertEqual(len(self.graph.get_context(c1)), 1) self.graph.remove_context(self.graph.get_context(c1)) self.assertTrue(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEqual(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEqual triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.graph.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob,))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set( c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte( set(c.subject_objects(likes)), set( [(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)])) asserte(set(c.predicate_objects( michel)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)])) asserte(set(c.predicate_objects( tarek)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.subject_predicates( pizza)), set([(bob, hates), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(cheese)), set([( bob, likes), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte(set(c), set( [(bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)])) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
class ContextTestCase(unittest.TestCase): storetest = True identifier = URIRef("rdflib_test") michel = URIRef(u'michel') tarek = URIRef(u'tarek') bob = URIRef(u'bob') likes = URIRef(u'likes') hates = URIRef(u'hates') pizza = URIRef(u'pizza') cheese = URIRef(u'cheese') c1 = URIRef(u'context-1') c2 = URIRef(u'context-2') def setUp(self, uri='sqlite://', storename=None): store = plugin.get(storename, Store)(identifier=self.identifier) self.graph = ConjunctiveGraph(store, identifier=self.identifier) self.graph.open(uri, create=True) def tearDown(self, uri='sqlite://'): self.graph.destroy(uri) try: self.graph.close() except: pass def get_context(self, identifier): assert isinstance(identifier, URIRef) or \ isinstance(identifier, BNode), type(identifier) return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) # print("Graph", graph.identifier, graph.serialize(format="nt")) # print("Selfgraph", self.graph.identifier, # self.graph.serialize(format="nt")) self.assertEquals(len(self.graph.store), len(graph.store)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEquals(len(graph), oldLen + 10) self.assertEquals(len(self.get_context(c1)), oldLen + 10) self.graph.remove_context(self.get_context(c1)) self.assertEquals(len(self.graph), oldLen) self.assertEquals(len(graph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph.store) print("Original", oldLen, self.graph.store) self.addStuffInMultipleContexts() newLen = len(self.graph.store) print("MultipleContexts", newLen, self.graph.store) # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 print("No context", len(list(self.graph.triples((None, None, None))))) print("Context context-1", len( list(self.graph.triples((None, None, None), context=self.c1)))) print("Context context-2", len( list(self.graph.triples((None, None, None), context=self.c2)))) self.assertEquals(len(self.graph.store), oldLen + 1, [self.graph.store, oldLen + 1]) graph = Graph(self.graph.store, self.c1) self.assertEquals(len(graph.store), oldLen + 1, [graph.store, oldLen + 1]) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assert_(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assert_(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assert_(triple in self.graph) self.graph.remove(triple) # now gone! self.assert_(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assert_(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): if (PY3 and not isinstance(c,(str, bytes))) or not isinstance(c, basestring): return c.identifier return c self.assert_(self.c1 in list(map(cid, self.graph.contexts()))) self.assert_(self.c2 in list(map(cid, self.graph.contexts()))) contextList = list(map(cid, list(self.graph.contexts(triple)))) self.assert_(self.c1 in contextList) self.assert_(self.c2 in contextList) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEquals(len(Graph(self.graph.store, c1)), 1) self.assertEquals(len(self.get_context(c1)), 1) self.graph.remove_context(self.get_context(c1)) self.assert_(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEquals(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEquals triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob,))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set( c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte(set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)])) asserte(set(c.predicate_objects( michel)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)])) asserte(set(c.predicate_objects( tarek)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.subject_predicates( pizza)), set([(bob, hates), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(cheese)), set([( bob, likes), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte(set(c), set([ (bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)])) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
class ContextTestCase(unittest.TestCase): store_name = 'default' path = None storetest = True create = True michel = URIRef(u'michel') tarek = URIRef(u'tarek') bob = URIRef(u'bob') likes = URIRef(u'likes') hates = URIRef(u'hates') pizza = URIRef(u'pizza') cheese = URIRef(u'cheese') c1 = URIRef(u'context-1') c2 = URIRef(u'context-2') def setUp(self): self.graph = ConjunctiveGraph(store=self.store_name) self.graph.destroy(self.path) if isinstance(self.path, type(None)): if self.store_name == "SQLite": self.path = mkstemp(prefix='test',dir='/tmp') else: self.path = mkdtemp(prefix='test',dir='/tmp') self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) try: self.graph.close() except: pass import os if hasattr(self,'path') and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path+'/'+f) os.rmdir(self.path) elif len(self.path.split(':')) == 1: os.unlink(self.path) else: os.remove(self.path) def get_context(self, identifier): assert isinstance(identifier, URIRef) or \ isinstance(identifier, BNode), type(identifier) return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEquals(len(self.graph), len(graph)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEquals(len(graph), oldLen + 10) self.assertEquals(len(self.get_context(c1)), oldLen + 10) self.graph.remove_context(self.get_context(c1)) self.assertEquals(len(self.graph), oldLen) self.assertEquals(len(graph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEquals(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEquals(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assert_(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assert_(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assert_(triple in self.graph) self.graph.remove(triple) # now gone! self.assert_(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assert_(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): if not isinstance(c, basestring): return c.identifier return c self.assert_(self.c1 in map(cid, self.graph.contexts())) self.assert_(self.c2 in map(cid, self.graph.contexts())) contextList = map(cid, list(self.graph.contexts(triple))) self.assert_(self.c1 in contextList) self.assert_(self.c2 in contextList) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEquals(len(Graph(self.graph.store, c1)), 1) self.assertEquals(len(self.get_context(c1)), 1) self.graph.remove_context(self.get_context(c1)) self.assert_(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEquals(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEquals triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob,))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte(set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)])) asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)])) asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte(set(c), set([(bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)])) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
class ManifestHelper(object): def __init__(self, uri=None): self.uri = None if uri: self.uri = uri self.reset() def reset(self): self.g = None if self.uri: self.g = ConjunctiveGraph(identifier=self.uri) else: self.g = ConjunctiveGraph() self.namespaces = {} self.urihelper = URIHelper(self.namespaces) #add defaults for prefix, ns in NAMESPACES.iteritems(): self.add_namespace(prefix, ns) def from_string(self, textfile, format="xml", encoding="utf-8"): self.reset() self.g.parse(textfile, format) return def triple_exists(self, s, p, o): if not type(self.g).__name__ in ['ConjunctiveGraph', 'Graph']: return False if s == '*': s = None if p == '*': p = None if o == '*': o = None if not isinstance(s, URIRef) and not isinstance(s, BNode) and not s == None: s = self.urihelper.get_uriref(s) if not isinstance(p, URIRef) and not p == None: p = self.urihelper.parse_uri(p) if not isinstance(o, URIRef) and not isinstance(o, Literal) and not isinstance(o, BNode) and not o == None: if not isinstance(o, basestring): o = unicode(o) o = self.urihelper.parse_uri(o, return_Literal_not_Exception=True) count = 0 for ans_s, ans_p, ans_o in self.g.triples((s, p, o)): count += 1 if count > 0: return True else: return False def list_objects(self, s, p): objects = [] if not type(self.g).__name__ in ['ConjunctiveGraph', 'Graph']: return objects if s == '*': s = None if p == '*': p = None if not isinstance(s, URIRef) and not isinstance(s, BNode) and not s == None: s = self.urihelper.get_uriref(s) if not isinstance(p, URIRef) and not p == None: p = self.urihelper.parse_uri(p) for o in self.g.objects(s, p): objects.append(o) return objects def add_triple(self, s, p, o): if not isinstance(s, URIRef) and not isinstance(s, BNode): s = self.urihelper.get_uriref(s) if not isinstance(p, URIRef): p = self.urihelper.parse_uri(p) if not isinstance(o, URIRef) and not isinstance(o, Literal) and not isinstance(o, BNode): if not isinstance(o, basestring): o = unicode(o) o = self.urihelper.parse_uri(o, return_Literal_not_Exception=True) self.g.add((s, p, o)) self.g.commit() return def add_namespace(self, prefix, uri): if not isinstance (prefix, basestring): raise TypeError('Add namespace: prefix is not of type string or unicode') if not isinstance(uri, (URIRef, Namespace)): if not isinstance(uri, basestring): raise TypeError('Add namespace: namespace is not of type string or unicode') if not isinstance(prefix, unicode): prefix = unicode(prefix) if isinstance(uri, basestring) and not isinstance(uri, unicode): uri = unicode(uri) self.namespaces[prefix] = self.urihelper.get_namespace(uri) if prefix not in self.urihelper.namespaces: self.urihelper.namespaces[prefix] = self.urihelper.get_namespace(uri) self.g.bind(prefix, self.namespaces[prefix]) return def del_namespace(self, prefix, ns): if prefix in self.namespaces: del self.namespaces[prefix] return def del_triple(self, s, p, o=None): if not type(self.g).__name__ in ['ConjunctiveGraph', 'Graph']: return if s == '*': s = None if p == '*': p = None if o == '*': o = None if not isinstance(s, URIRef) and not isinstance(s, BNode) and not s == None: s = self.urihelper.get_uriref(s) if not isinstance(p, URIRef) and not p == None: p = self.urihelper.parse_uri(p) if not isinstance(o, URIRef) and not isinstance(o, Literal) and not isinstance(o, BNode) and not o == None: if not isinstance(o, basestring): o = unicode(o) o = self.urihelper.parse_uri(o, return_Literal_not_Exception=True) self.g.remove((s, p, o)) return def get_graph(self): return self.g def to_string(self, format="xml"): if type(self.g).__name__ in ['ConjunctiveGraph', 'Graph'] and len(self.g)>0: self.g.commit() ans_str = self.g.serialize(format=format, encoding="utf-8")+"\n" return ans_str else: return u'<?xml version="1.0" encoding="UTF-8"?>\n'
parts = re.split(r' +', label) label = ' '.join(l.lower().capitalize() for l in parts).strip() position = 0 else: parts = line.split("\t") label = parts.pop().strip() range = parts.pop(0).strip() position = len(parts) + 1 # if there's no range then we've got a chunk of text that needs # to be added to the last concept we added to the graph if not range: uri = range_uri(lc_class[-1][0]) old_label = list(g.objects(uri, SKOS.prefLabel))[0] new_label = "%s %s" % (old_label, label) g.remove((uri, SKOS.prefLabel, old_label)) g.add((uri, SKOS.prefLabel, Literal(new_label, 'en'))) continue lc_class = lc_class[0:position] lc_class.insert(position, (range, label)) label = '--'.join([c[1] for c in lc_class]) uri = range_uri(range) g.add((uri, RDF.type, SKOS.Concept)) g.add((uri, SKOS.prefLabel, Literal(label, 'en'))) g.add((uri, SKOS.notation, Literal(range, datatype=LCC))) if position == 0: g.add((LCCO, SKOS.hasTopConcept, uri))
class ContextTestCase(unittest.TestCase): store_name = 'default' path = None storetest = True create = True michel = URIRef(u'michel') tarek = URIRef(u'tarek') bob = URIRef(u'bob') likes = URIRef(u'likes') hates = URIRef(u'hates') pizza = URIRef(u'pizza') cheese = URIRef(u'cheese') c1 = URIRef(u'context-1') c2 = URIRef(u'context-2') def setUp(self): self.graph = ConjunctiveGraph(store=self.store_name) self.graph.destroy(self.path) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) try: self.graph.close() except: pass def get_context(self, identifier): assert isinstance(identifier, URIRef) or \ isinstance(identifier, BNode), type(identifier) return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) # self.assertEquals(len(self.graph), len(graph)) self.assertEquals( len(list(self.graph.triples((None, None, None)))), len(list(graph.triples((None, None, None))))) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEquals(len(graph), oldLen + 10) self.assertEquals(len(self.get_context(c1)), oldLen + 10) self.graph.remove_context(self.get_context(c1)) self.assertEquals(len(self.graph), oldLen) self.assertEquals(len(graph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEquals(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEquals(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assert_(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assert_(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assert_(triple in self.graph) self.graph.remove(triple) # now gone! self.assert_(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assert_(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): if not isinstance(c, basestring): return c.identifier return c self.assert_(self.c1 in map(cid, self.graph.contexts())) self.assert_(self.c2 in map(cid, self.graph.contexts())) contextList = map(cid, list(self.graph.contexts(triple))) self.assert_(self.c1 in contextList) self.assert_(self.c2 in contextList) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEquals(len(Graph(self.graph.store, c1)), 1) self.assertEquals(len(self.get_context(c1)), 1) self.graph.remove_context(self.get_context(c1)) self.assert_(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEquals(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEquals triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob,))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte(set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)])) asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)])) asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte(set(c), set([ (bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)])) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
class MemoryStore: """A class that combines and syncronieses n-quad files and an in-memory quad store. This class contains information about all graphs, their corresponding URIs and pathes in the file system. For every Graph (context of Quad-Store) exists a FileReference object (n-quad) that enables versioning (with git) and persistence. """ def __init__(self): """Initialize a new MemoryStore instance.""" logger = logging.getLogger('quit.core.MemoryStore') logger.debug('Create an instance of MemoryStore') self.store = ConjunctiveGraph(identifier='default') return def getgraphuris(self): """Method to get all available named graphs. Returns: A list containing all graph uris found in store. """ graphs = [] for graph in self.store.contexts(): if isinstance(graph, BNode) or str(graph.identifier) == 'default': pass else: graphs.append(graph.identifier) return graphs def getgraphcontent(self, graphuri): """Get the serialized content of a named graph. Args: graphuri: The URI of a named graph. Returns: content: A list of strings where each string is a quad. """ data = [] context = self.store.get_context(URIRef(graphuri)) triplestring = context.serialize(format='nt').decode('UTF-8') # Since we have triples here, we transform them to quads by adding the graphuri # TODO This might cause problems if ' .\n' will be part of a literal. # Maybe a regex would be a better solution triplestring = triplestring.replace(' .\n', ' <' + graphuri + '> .\n') data = triplestring.splitlines() data.remove('') return data def getstoreobject(self): """Get the conjunctive graph object. Returns: graph: A list of strings where each string is a quad. """ def graphexists(self, graphuri): """Ask if a named graph FileReference object for a named graph URI. Args: graphuri: A string containing the URI of a named graph Returns: True or False """ if self.store.get_context(URIRef(graphuri)) is None: return False else: return True def addfile(self, filename, serialization): """Add a file to the store. Args: filename: A String for the path to the file. serialization: A String containg the RDF format Raises: ValueError if the given file can't be parsed as nquads. """ try: self.store.parse(source=filename, format=serialization) except Exception as e: logger.debug(e) logger.debug("Could not import file: {}. " + "Make sure the file exists and contains data in {}". format(filename, serialization)) def addquads(self, quads): """Add quads to the MemoryStore. Args: quads: Rdflib.quads that should be added to the MemoryStore. """ self.store.addN(quads) self.store.commit() def query(self, querystring): """Execute a SPARQL select query. Args: querystring: A string containing a SPARQL ask or select query. Returns: The SPARQL result set """ return self.store.query(querystring) def update(self, querystring, versioning=True): """Execute a SPARQL update query and update the store. This method executes a SPARQL update query and updates and commits all affected files. Args: querystring: A string containing a SPARQL upate query. """ # methods of rdflib ConjunciveGraph if versioning: actions = evalUpdate(self.store, querystring) self.store.update(querystring) return actions else: self.store.update(querystring) return return def removequads(self, quads): """Remove quads from the MemoryStore. Args: quads: Rdflib.quads that should be removed to the MemoryStore. """ self.store.remove((quads)) self.store.commit() return def exit(self): """Execute actions on API shutdown.""" return
def _ted_as_json_ld(sg): g = ConjunctiveGraph() g.__iadd__(sg) for res in g.query("""SELECT ?p ?name WHERE { ?p a <%s> ; <%s> ?name}""" % (WOT.Property, WOT.interactionName)): g.remove((res.p, WOT.interactionName, res.name)) g.add((res.p, WOT.propertyName, res.name)) for res in g.query("""SELECT ?p ?name WHERE { ?p a <%s> ; <%s> ?name}""" % (WOT.Action, WOT.interactionName)): g.remove((res.p, WOT.interactionName, res.name)) g.add((res.p, WOT.actionName, res.name)) for res in g.query("""SELECT ?p ?name WHERE { ?p a <%s> ; <%s> ?name}""" % (WOT.Event, WOT.interactionName)): g.remove((res.p, WOT.interactionName, res.name)) g.add((res.p, WOT.eventName, res.name)) context = build_context(g) if 'pid' in context: context['pid'] = str(WOT.interactionName) if 'aid' in context: context['aid'] = str(WOT.interactionName) if 'eid' in context: context['eid'] = str(WOT.interactionName) cg = skolemize(g) ted_nquads = cg.serialize(format='nquads') ld = jsonld.from_rdf(ted_nquads) td_frame = jsonld.compact( jsonld.frame(ld, { 'context': context, '@type': str(CORE.ThingDescription) }), context) td_context = td_frame['@context'] del td_frame['@context'] ted_frame = jsonld.compact( jsonld.frame(ld, { 'context': context, '@type': str(CORE.ThingEcosystemDescription) }), context) ted_context = ted_frame['@context'] del ted_frame['@context'] component_ids = [] ted_components = ted_frame.get('describes', {}).get('components', []) if isinstance(ted_components, dict) or isinstance(ted_components, str): ted_components = [ted_components] for component in ted_components: # if it does not contain 'describedBy' it is a resource cid = component['@id'] if isinstance( component, dict) and 'describedBy' in component else component component_ids.append(cid) if component_ids: ted_frame['describes']['components'] = component_ids if '@graph' not in td_frame: source_td_frame = copy.deepcopy(td_frame) td_frame = {'@graph': []} if source_td_frame: td_frame['@graph'].append(source_td_frame) td_frame['@graph'].append(ted_frame) td_frame['@context'] = merge_two_dicts(td_context, ted_context) try: for pdata in path_data("$..interactions", td_frame['@graph']): if isinstance(pdata, list): for int_dict in pdata: replace_interaction_name(int_dict) else: replace_interaction_name(pdata) except TypeError: pass return json.dumps(td_frame, indent=3, sort_keys=True)
class TabLinker(object): defaultNamespacePrefix = "http://lod.cedar-project.nl/resource/" annotationsNamespacePrefix = "http://lod.cedar-project.nl/annotations/" namespaces = { "dcterms": Namespace("http://purl.org/dc/terms/"), "skos": Namespace("http://www.w3.org/2004/02/skos/core#"), "d2s": Namespace("http://lod.cedar-project.nl/core/"), "qb": Namespace("http://purl.org/linked-data/cube#"), "owl": Namespace("http://www.w3.org/2002/07/owl#"), } annotationNamespaces = { "np": Namespace("http://www.nanopub.org/nschema#"), "oa": Namespace("http://www.w3.org/ns/openannotation/core/"), "xsd": Namespace("http://www.w3.org/2001/XMLSchema#"), "dct": Namespace("http://purl.org/dc/terms/"), } def __init__(self, filename, config, level=logging.DEBUG): """TabLinker constructor Keyword arguments: filename -- String containing the name of the current Excel file being examined config -- Configuration object, loaded from .ini file level -- A logging level as defined in the logging module """ self.config = config self.filename = filename self.log = logging.getLogger("TabLinker") self.log.setLevel(level) self.log.debug("Initializing Graphs") self.initGraphs() self.log.debug("Setting Scope") basename = os.path.basename(filename) basename = re.search("(.*)\.xls", basename).group(1) self.setScope(basename) self.log.debug("Loading Excel file {0}.".format(filename)) self.rb = open_workbook(filename, formatting_info=True) self.log.debug("Reading styles") self.styles = Styles(self.rb) self.log.debug("Copied Workbook to writable copy") self.wb = copy(self.rb) def initGraphs(self): """Initialize the graphs, set default namespaces, and add schema information""" self.graph = ConjunctiveGraph() # Create a separate graph for annotations self.annotationGraph = ConjunctiveGraph() self.log.debug("Adding namespaces to graphs") # Bind namespaces to graphs for namespace in self.namespaces: self.graph.namespace_manager.bind(namespace, self.namespaces[namespace]) # Same for annotation graph for namespace in self.annotationNamespaces: self.annotationGraph.namespace_manager.bind(namespace, self.annotationNamespaces[namespace]) self.log.debug("Adding some schema information (dimension and measure properties) ") self.addDataCellProperty() self.graph.add((self.namespaces["d2s"]["dimension"], RDF.type, self.namespaces["qb"]["DimensionProperty"])) self.graph.add((self.namespaces["d2s"]["label"], RDF.type, RDF["Property"])) def addDataCellProperty(self): """Add definition of data cell resource to graph""" if len(self.config.get("dataCell", "propertyName")) > 0: self.dataCellPropertyName = self.config.get("dataCell", "propertyName") else: self.dataCellPropertyName = "hasValue" self.graph.add( (self.namespaces["d2s"][self.dataCellPropertyName], RDF.type, self.namespaces["qb"]["MeasureProperty"]) ) # Take labels from config if len(self.config.get("dataCell", "labels")) > 0: labels = self.config.get("dataCell", "labels").split(":::") for label in labels: labelProperties = label.split("-->") if len(labelProperties[0]) > 0 and len(labelProperties[1]) > 0: self.graph.add( ( self.namespaces["d2s"][self.dataCellPropertyName], RDFS.label, Literal(labelProperties[1], labelProperties[0]), ) ) if len(self.config.get("dataCell", "literalType")) > 0: self.graph.add( ( self.namespaces["d2s"][self.dataCellPropertyName], RDFS.range, URIRef(self.config.get("dataCell", "literalType")), ) ) def setScope(self, fileBasename): """Set the default namespace and base for all URIs of the current workbook""" self.fileBasename = fileBasename scopeNamespace = self.defaultNamespacePrefix + fileBasename + "/" # Annotations go to a different namespace annotationScopeNamespace = self.annotationsNamespacePrefix + fileBasename + "/" self.log.debug("Adding namespace for {0}: {1}".format(fileBasename, scopeNamespace)) self.namespaces["scope"] = Namespace(scopeNamespace) self.annotationNamespaces["scope"] = Namespace(annotationScopeNamespace) self.graph.namespace_manager.bind("", self.namespaces["scope"]) self.annotationGraph.namespace_manager.bind("", self.annotationNamespaces["scope"]) def doLink(self): """Start tablinker for all sheets in workbook""" self.log.info("Starting TabLinker for all sheets in workbook") for n in range(self.rb.nsheets): self.log.debug("Starting with sheet {0}".format(n)) self.r_sheet = self.rb.sheet_by_index(n) self.w_sheet = self.wb.get_sheet(n) self.rowns, self.colns = self.getValidRowsCols() self.sheet_qname = urllib.quote(re.sub("\s", "_", self.r_sheet.name)) self.log.debug("Base for QName generator set to: {0}".format(self.sheet_qname)) self.log.debug("Starting parser") self.parseSheet() ### # Utility Functions ### def insideMergeBox(self, i, j): """ Check if the specified cell is inside a merge box Arguments: i -- row j -- column Returns: True/False -- depending on whether the cell is inside a merge box """ self.merged_cells = self.r_sheet.merged_cells for crange in self.merged_cells: rlo, rhi, clo, chi = crange if i <= rhi - 1 and i >= rlo and j <= chi - 1 and j >= clo: return True return False def getMergeBoxCoord(self, i, j): """ Get the top-left corner cell of the merge box containing the specified cell Arguments: i -- row j -- column Returns: (k, l) -- Coordinates of the top-left corner of the merge box """ if not self.insideMergeBox(i, j): return (-1, -1) self.merged_cells = self.r_sheet.merged_cells for crange in self.merged_cells: rlo, rhi, clo, chi = crange if i <= rhi - 1 and i >= rlo and j <= chi - 1 and j >= clo: return (rlo, clo) def getType(self, style): """Get type for a given excel style. Style name must be prefixed by 'TL ' Arguments: style -- Style (string) to check type for Returns: String -- The type of this field. In case none is found, 'unknown' """ typematch = re.search("TL\s(.*)", style) if typematch: cellType = typematch.group(1) else: cellType = "Unknown" return cellType def isEmpty(self, i, j): """Check whether cell is empty. Arguments: i -- row j -- column Returns: True/False -- depending on whether the cell is empty """ if ( self.r_sheet.cell(i, j).ctype == XL_CELL_EMPTY or self.r_sheet.cell(i, j).ctype == XL_CELL_BLANK ) or self.r_sheet.cell(i, j).value == "": return True else: return False def isEmptyRow(self, i, colns): """ Determine whether the row 'i' is empty by iterating over all its cells Arguments: i -- The index of the row to be checked. colns -- The number of columns to be checked Returns: true -- if the row is empty false -- if the row is not empty """ for j in range(0, colns): if not self.isEmpty(i, j): return False return True def isEmptyColumn(self, j, rowns): """ Determine whether the column 'j' is empty by iterating over all its cells Arguments: j -- The index of the column to be checked. rowns -- The number of rows to be checked Returns: true -- if the column is empty false -- if the column is not empty """ for i in range(0, rowns): if not self.isEmpty(i, j): return False return True def getValidRowsCols(self): """ Determine the number of non-empty rows and columns in the Excel sheet Returns: rowns -- number of rows colns -- number of columns """ colns = number_of_good_cols(self.r_sheet) rowns = number_of_good_rows(self.r_sheet) # Check whether the number of good columns and rows are correct while self.isEmptyRow(rowns - 1, colns): rowns = rowns - 1 while self.isEmptyColumn(colns - 1, rowns): colns = colns - 1 self.log.debug("Number of rows with content: {0}".format(rowns)) self.log.debug("Number of columns with content: {0}".format(colns)) return rowns, colns def getQName(self, names): """ Create a valid QName from a string or dictionary of names Arguments: names -- Either dictionary of names or string of a name. Returns: qname -- a valid QName for the dictionary or string """ if type(names) == dict: qname = self.sheet_qname for k in names: qname = qname + "/" + self.processString(names[k]) else: qname = self.sheet_qname + "/" + self.processString(names) self.log.debug("Minted new QName: {}".format(qname)) return qname def processString(self, string): """ Remove illegal characters (comma, brackets, etc) from string, and replace it with underscore. Useful for URIs Arguments: string -- The string representing the value of the source cell Returns: processedString -- The processed string """ return urllib.quote(re.sub("\s|\(|\)|,|\.", "_", unicode(string).strip()).encode("utf-8", "ignore")) def addValue(self, source_cell_value, altLabel=None): """ Add a "value" + optional label to the graph for a cell in the source Excel sheet. The value is typically the value stored in the source cell itself, but may also be a copy of another cell (e.g. in the case of 'idem.'). Arguments: source_cell_value -- The string representing the value of the source cell Returns: source_cell_value_qname -- a valid QName for the value of the source cell """ source_cell_value_qname = self.getQName(source_cell_value) self.graph.add( ( self.namespaces["scope"][source_cell_value_qname], self.namespaces["qb"]["dataSet"], self.namespaces["scope"][self.sheet_qname], ) ) self.graph.add( ( self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["value"], self.namespaces["scope"][source_cell_value_qname], ) ) # If the source_cell_value is actually a dictionary (e.g. in the case of HierarchicalRowHeaders), then use the last element of the row hierarchy as prefLabel # Otherwise just use the source_cell_value as prefLabel if type(source_cell_value) == dict: self.graph.add( ( self.namespaces["scope"][source_cell_value_qname], self.namespaces["skos"].prefLabel, Literal(source_cell_value.values()[-1], "nl"), ) ) if altLabel and altLabel != source_cell_value.values()[-1]: # If altLabel has a value (typically for HierarchicalRowHeaders) different from the last element in the row hierarchy, we add it as alternative label. self.graph.add( ( self.namespaces["scope"][source_cell_value_qname], self.namespaces["skos"].altLabel, Literal(altLabel, "nl"), ) ) else: # Try to parse a date to add the appropriate datatype to the literal try: isodate.parse_datetime(source_cell_value) self.log.debug("Datetime on this cell: %s" % source_cell_value) self.graph.add( ( self.namespaces["scope"][source_cell_value_qname], self.namespaces["skos"].prefLabel, Literal(source_cell_value, datatype=XSD.datetime), ) ) except (ValueError, isodate.isoerror.ISO8601Error, AttributeError): self.log.debug("No datetime on this cell") self.graph.add( ( self.namespaces["scope"][source_cell_value_qname], self.namespaces["skos"].prefLabel, Literal(source_cell_value, "nl"), ) ) if altLabel and altLabel != source_cell_value: # If altLabel has a value (typically for HierarchicalRowHeaders) different from the source_cell_value, we add it as alternative label. self.graph.add( ( self.namespaces["scope"][source_cell_value_qname], self.namespaces["skos"].altLabel, Literal(altLabel, "nl"), ) ) return source_cell_value_qname def parseSheet(self): """ Parses the currently selected sheet in the workbook, takes no arguments. Iterates over all cells in the Excel sheet and produces relevant RDF Triples. """ self.log.info("Parsing {0} rows and {1} columns.".format(self.rowns, self.colns)) self.column_dimensions = {} self.property_dimensions = {} self.row_dimensions = {} self.rowhierarchy = {} # Get dictionary of annotations self.annotations = self.r_sheet.cell_note_map for i in range(0, self.rowns): self.rowhierarchy[i] = {} for j in range(0, self.colns): # Parse cell data self.source_cell = self.r_sheet.cell(i, j) self.source_cell_name = cellname(i, j) self.style = self.styles[self.source_cell].name self.cellType = self.getType(self.style) self.source_cell_qname = self.getQName(self.source_cell_name) self.log.debug( '({},{}) {}/{}: "{}"'.format(i, j, self.cellType, self.source_cell_name, self.source_cell.value) ) # Try to parse ints to avoid ugly _0 URIs try: if int(self.source_cell.value) == self.source_cell.value: self.source_cell.value = int(self.source_cell.value) except ValueError: self.log.debug("(%s.%s) No parseable int" % (i, j)) # Parse annotation (if any) if self.config.get("annotations", "enabled") == "1": if (i, j) in self.annotations: self.parseAnnotation(i, j) # Parse even if empty if self.cellType == "HRowHeader": self.updateRowHierarchy(i, j) if self.cellType == "Data": self.parseData(i, j) if self.cellType == "ColHeader": self.parseColHeader(i, j) if self.cellType == "RowProperty": self.parseRowProperty(i, j) if not self.isEmpty(i, j): self.graph.add( ( self.namespaces["scope"][self.source_cell_qname], RDF.type, self.namespaces["d2s"][self.cellType], ) ) self.graph.add( ( self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["cell"], Literal(self.source_cell_name), ) ) # self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['d2s']['col'],Literal(colname(j)))) # self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['d2s']['row'],Literal(i+1))) # self.graph.add((self.namespaces['scope'][self.source_cell_qname] isrow row if self.cellType == "Title": self.parseTitle(i, j) elif self.cellType == "RowHeader": self.parseRowHeader(i, j) elif self.cellType == "HRowHeader": self.parseHierarchicalRowHeader(i, j) elif self.cellType == "RowLabel": self.parseRowLabel(i, j) self.log.info("Done parsing...") def updateRowHierarchy(self, i, j): """ Build up lists for hierarchical row headers. Cells marked as hierarchical row header are often empty meaning that their intended value is stored somewhere else in the Excel sheet. Keyword arguments: int i -- row number int j -- col number Returns: New row hierarchy dictionary """ if self.isEmpty(i, j) or str(self.source_cell.value).lower().strip() == "id.": # If the cell is empty, and a HierarchicalRowHeader, add the value of the row header above it. # If the cell above is not in the rowhierarchy, don't do anything. # If the cell is exactly 'id.', add the value of the row header above it. try: self.rowhierarchy[i][j] = self.rowhierarchy[i - 1][j] self.log.debug("({},{}) Copied from above\nRow hierarchy: {}".format(i, j, self.rowhierarchy[i])) except: # REMOVED because of double slashes in uris # self.rowhierarchy[i][j] = self.source_cell.value self.log.debug("({},{}) Top row, added nothing\nRow hierarchy: {}".format(i, j, self.rowhierarchy[i])) elif str(self.source_cell.value).lower().startswith("id.") or str(self.source_cell.value).lower().startswith( "id " ): # If the cell starts with 'id.', add the value of the row above it, and append the rest of the cell's value. suffix = self.source_cell.value[3:] try: self.rowhierarchy[i][j] = self.rowhierarchy[i - 1][j] + suffix self.log.debug("({},{}) Copied from above+suffix\nRow hierarchy {}".format(i, j, self.rowhierarchy[i])) except: self.rowhierarchy[i][j] = self.source_cell.value self.log.debug("({},{}) Top row, added value\nRow hierarchy {}".format(i, j, self.rowhierarchy[i])) elif not self.isEmpty(i, j): self.rowhierarchy[i][j] = self.source_cell.value self.log.debug("({},{}) Added value\nRow hierarchy {}".format(i, j, self.rowhierarchy[i])) return self.rowhierarchy def parseHierarchicalRowHeader(self, i, j): """ Create relevant triples for the cell marked as HierarchicalRowHeader (i, j are row and column) """ # Use the rowhierarchy to create a unique qname for the cell's contents, give the source_cell's original value as extra argument self.log.debug("Parsing HierarchicalRowHeader") self.source_cell_value_qname = self.addValue(self.rowhierarchy[i], altLabel=self.source_cell.value) # Now that we know the source cell's value qname, add a d2s:isDimension link and the skos:Concept type self.graph.add( ( self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["isDimension"], self.namespaces["scope"][self.source_cell_value_qname], ) ) self.graph.add((self.namespaces["scope"][self.source_cell_qname], RDF.type, self.namespaces["skos"].Concept)) hierarchy_items = self.rowhierarchy[i].items() try: parent_values = dict(hierarchy_items[:-1]) self.log.debug(i, j, "Parent value: " + str(parent_values)) parent_value_qname = self.getQName(parent_values) self.graph.add( ( self.namespaces["scope"][self.source_cell_value_qname], self.namespaces["skos"]["broader"], self.namespaces["scope"][parent_value_qname], ) ) except: self.log.debug(i, j, "Top of hierarchy") # Get the properties to use for the row headers try: properties = [] for dim_qname in self.property_dimensions[j]: properties.append(dim_qname) except KeyError: self.log.debug("({}.{}) No row dimension for cell".format(i, j)) self.row_dimensions.setdefault(i, []).append((self.source_cell_value_qname, properties)) def parseRowLabel(self, i, j): """ Create relevant triples for the cell marked as Label (i, j are row and column) """ self.log.debug("Parsing Row Label") # Get the QName of the HierarchicalRowHeader cell that this label belongs to, based on the rowhierarchy for this row (i) hierarchicalRowHeader_value_qname = self.getQName(self.rowhierarchy[i]) prefLabels = self.graph.objects( self.namespaces["scope"][hierarchicalRowHeader_value_qname], self.namespaces["skos"].prefLabel ) for label in prefLabels: # If the hierarchicalRowHeader QName already has a preferred label, turn it into a skos:altLabel self.graph.remove( (self.namespaces["scope"][hierarchicalRowHeader_value_qname], self.namespaces["skos"].prefLabel, label) ) self.graph.add( (self.namespaces["scope"][hierarchicalRowHeader_value_qname], self.namespaces["skos"].altLabel, label) ) self.log.debug( "Turned skos:prefLabel {} for {} into a skos:altLabel".format(label, hierarchicalRowHeader_value_qname) ) # Add the value of the label cell as skos:prefLabel to the header cell self.graph.add( ( self.namespaces["scope"][hierarchicalRowHeader_value_qname], self.namespaces["skos"].prefLabel, Literal(self.source_cell.value, "nl"), ) ) # Record that this source_cell_qname is the label for the HierarchicalRowHeader cell self.graph.add( ( self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["isLabel"], self.namespaces["scope"][hierarchicalRowHeader_value_qname], ) ) def parseRowHeader(self, i, j): """ Create relevant triples for the cell marked as RowHeader (i, j are row and column) """ rowHeaderValue = "" # Don't attach the cell value to the namespace if it's already a URI isURI = urlparse(str(self.source_cell.value)) if isURI.scheme and isURI.netloc: rowHeaderValue = URIRef(self.source_cell.value) else: self.source_cell_value_qname = self.addValue(self.source_cell.value) rowHeaderValue = self.namespaces["scope"][self.source_cell_value_qname] self.graph.add( (self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["isDimension"], rowHeaderValue) ) self.graph.add((rowHeaderValue, RDF.type, self.namespaces["d2s"]["Dimension"])) self.graph.add((rowHeaderValue, RDF.type, self.namespaces["skos"].Concept)) # Get the properties to use for the row headers try: properties = [] for dim_qname in self.property_dimensions[j]: properties.append(dim_qname) except KeyError: self.log.debug("({}.{}) No properties for cell".format(i, j)) self.row_dimensions.setdefault(i, []).append((rowHeaderValue, properties)) # Use the column dimensions dictionary to find the objects of the d2s:dimension property try: for dim_qname in self.column_dimensions[j]: self.graph.add( (rowHeaderValue, self.namespaces["d2s"]["dimension"], self.namespaces["scope"][dim_qname]) ) except KeyError: self.log.debug("({}.{}) No column dimension for cell".format(i, j)) return def parseColHeader(self, i, j): """ Create relevant triples for the cell marked as Header (i, j are row and column) """ if self.isEmpty(i, j): if self.insideMergeBox(i, j): k, l = self.getMergeBoxCoord(i, j) self.source_cell_value_qname = self.addValue(self.r_sheet.cell(k, l).value) else: return else: self.source_cell_value_qname = self.addValue(self.source_cell.value) self.graph.add( ( self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["isDimension"], self.namespaces["scope"][self.source_cell_value_qname], ) ) self.graph.add( (self.namespaces["scope"][self.source_cell_value_qname], RDF.type, self.namespaces["d2s"]["Dimension"]) ) self.graph.add((self.namespaces["scope"][self.source_cell_qname], RDF.type, self.namespaces["skos"].Concept)) # Add the value qname to the column_dimensions list for that column self.column_dimensions.setdefault(j, []).append(self.source_cell_value_qname) return def parseRowProperty(self, i, j): """ Create relevant triples for the cell marked as Property (i, j are row and column) """ if self.isEmpty(i, j): if self.insideMergeBox(i, j): k, l = self.getMergeBoxCoord(i, j) self.source_cell_value_qname = self.addValue(self.r_sheet.cell(k, l).value) else: return else: self.source_cell_value_qname = self.addValue(self.source_cell.value) self.graph.add( ( self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["isDimensionProperty"], self.namespaces["scope"][self.source_cell_value_qname], ) ) self.graph.add( ( self.namespaces["scope"][self.source_cell_value_qname], RDF.type, self.namespaces["qb"]["DimensionProperty"], ) ) self.graph.add((self.namespaces["scope"][self.source_cell_value_qname], RDF.type, RDF["Property"])) self.property_dimensions.setdefault(j, []).append(self.source_cell_value_qname) return def parseTitle(self, i, j): """ Create relevant triples for the cell marked as Title (i, j are row and column) """ self.source_cell_value_qname = self.addValue(self.source_cell.value) self.graph.add( ( self.namespaces["scope"][self.sheet_qname], self.namespaces["d2s"]["title"], self.namespaces["scope"][self.source_cell_value_qname], ) ) self.graph.add( (self.namespaces["scope"][self.source_cell_value_qname], RDF.type, self.namespaces["d2s"]["Dimension"]) ) return def parseData(self, i, j): """ Create relevant triples for the cell marked as Data (i, j are row and column) """ if self.isEmpty(i, j) and self.config.get("dataCell", "implicitZeros") == "0": return observation = BNode() self.graph.add( (self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["isObservation"], observation) ) self.graph.add((observation, RDF.type, self.namespaces["qb"]["Observation"])) self.graph.add((observation, self.namespaces["qb"]["dataSet"], self.namespaces["scope"][self.sheet_qname])) if self.isEmpty(i, j) and self.config.get("dataCell", "implicitZeros") == "1": self.graph.add((observation, self.namespaces["d2s"][self.dataCellPropertyName], Literal(0))) else: self.graph.add( (observation, self.namespaces["d2s"][self.dataCellPropertyName], Literal(self.source_cell.value)) ) # Use the row dimensions dictionary to find the properties that link data values to row headers try: for (dim_qname, properties) in self.row_dimensions[i]: for p in properties: self.graph.add((observation, self.namespaces["d2s"][p], dim_qname)) except KeyError: self.log.debug("({}.{}) No row dimension for cell".format(i, j)) # Use the column dimensions dictionary to find the objects of the d2s:dimension property try: for dim_qname in self.column_dimensions[j]: self.graph.add((observation, self.namespaces["d2s"]["dimension"], self.namespaces["scope"][dim_qname])) except KeyError: self.log.debug("({}.{}) No column dimension for cell".format(i, j)) def parseAnnotation(self, i, j): """ Create relevant triples for the annotation attached to cell (i, j) """ if self.config.get("annotations", "model") == "oa": # Create triples according to Open Annotation model body = BNode() self.annotationGraph.add( ( self.annotationNamespaces["scope"][self.source_cell_qname], RDF.type, self.annotationNamespaces["oa"]["Annotation"], ) ) self.annotationGraph.add( ( self.annotationNamespaces["scope"][self.source_cell_qname], self.annotationNamespaces["oa"]["hasBody"], body, ) ) self.annotationGraph.add( ( body, RDF.value, Literal( self.annotations[(i, j)] .text.replace("\n", " ") .replace("\r", " ") .replace("\r\n", " ") .encode("utf-8") ), ) ) self.annotationGraph.add( ( self.annotationNamespaces["scope"][self.source_cell_qname], self.annotationNamespaces["oa"]["hasTarget"], self.namespaces["scope"][self.source_cell_qname], ) ) self.annotationGraph.add( ( self.annotationNamespaces["scope"][self.source_cell_qname], self.annotationNamespaces["oa"]["annotator"], Literal(self.annotations[(i, j)].author.encode("utf-8")), ) ) self.annotationGraph.add( ( self.annotationNamespaces["scope"][self.source_cell_qname], self.annotationNamespaces["oa"]["annotated"], Literal( datetime.datetime.fromtimestamp(os.path.getmtime(self.filename)).strftime("%Y-%m-%d"), datatype=self.annotationNamespaces["xsd"]["date"], ), ) ) self.annotationGraph.add( ( self.annotationNamespaces["scope"][self.source_cell_qname], self.annotationNamespaces["oa"]["generator"], URIRef("https://github.com/Data2Semantics/TabLinker"), ) ) self.annotationGraph.add( ( self.annotationNamespaces["scope"][self.source_cell_qname], self.annotationNamespaces["oa"]["generated"], Literal( datetime.datetime.now().strftime("%Y-%m-%d"), datatype=self.annotationNamespaces["xsd"]["date"] ), ) ) self.annotationGraph.add( ( self.annotationNamespaces["scope"][self.source_cell_qname], self.annotationNamespaces["oa"]["modelVersion"], URIRef("http://www.openannotation.org/spec/core/20120509.html"), ) ) else: # Create triples according to Nanopublications model print "Nanopublications not implemented yet!"
def test_roundtrip(): # these are some RDF files that HexT can round-trip since the have no # literals with no datatype declared: TEST_DIR = Path(__file__).parent.absolute() / "nt" files_to_skip = { "paths-04.nt": "subject literal", "even_more_literals.nt": "JSON decoding error", "literals-02.nt": "JSON decoding error", "more_literals.nt": "JSON decoding error", "test.ntriples": "JSON decoding error", "literals-05.nt": "JSON decoding error", "i18n-01.nt": "JSON decoding error", "literals-04.nt": "JSON decoding error", "rdflibtest01.nt": "JSON decoding error", "rdflibtest05.nt": "JSON decoding error", } tests = 0 skipped = 0 skip = False print() p = TEST_DIR.glob("**/*") for f in [x for x in p if x.is_file()]: tests += 1 print(f"Test {tests}: {f}") if f.name not in files_to_skip.keys(): try: cg = ConjunctiveGraph().parse(f, format="nt") # print(cg.serialize(format="n3")) except: print(f"Skipping: could not NT parse") skipped += 1 skip = True if not skip: cg2 = ConjunctiveGraph() cg2.parse(data=cg.serialize(format="hext"), format="hext", publicID=cg2.default_context.identifier) if cg2.context_aware: for context in cg2.contexts(): for triple in context.triples((None, None, None)): if type(triple[2]) == Literal: if triple[2].datatype == XSD.string: context.remove( (triple[0], triple[1], triple[2])) context.add((triple[0], triple[1], Literal(str(triple[2])))) else: for triple in cg2.triples((None, None, None)): if type(triple[2]) == Literal: if triple[2].datatype == XSD.string: cg2.remove((triple[0], triple[1], triple[2])) cg2.add((triple[0], triple[1], Literal(str(triple[2])))) # print(cg2.serialize(format="trig")) assert cg.isomorphic(cg2) skip = False else: print(f"Skipping: {files_to_skip[f.name]}") print(f"No. tests: {tests}") print(f"No. tests skipped: {skipped}")
hasPart = URIRef('http://www.loa-cnr.it/ontologies/DUL.owl#hasPart') follows = URIRef('http://www.loa-cnr.it/ontologies/DUL.owl#follows') involvedEquipment = base_ns['involvedEquipment'] global_msgs_dict = dict() global_variants_dict = dict() global_modules_dict = dict() global_fes_dict = dict() entities_dict = dict() remove_original = False clone_g = ConjunctiveGraph() # TODO: remove NamedIndividual for all entities for s, p, o in original_g.triples((None, RDF.type, OWL.NamedIndividual)): original_g.remove((s, p, o)) for i, clone in enumerate(clones): if i == len(clones) - 1: remove_original = True # copy all device entities for dev in original_g.subjects(RDF.type, device): # their associated triples for s, p, o in original_g.triples((dev, None, None)): new_s = clone + '-' + unicode(s).split('#')[1] new_s = amberg_ns[new_s] if p in (RDF.type, amberg_ns['hasSkill']): if remove_original: original_g.remove((s, p, o)) clone_g.add((new_s, p, o)) elif p in (hasPart, amberg_ns['connectsTo']):
def renderermain(graphinput, user): # Record rdf time.sleep(5) print(" ################ Intent Renderer ###################") print("\n") rendergraph = ConjunctiveGraph() rendergraph = graphinput hasService = URIRef('ex:hasService') hasArguments = URIRef('ex:hasArguments') hasCondition = URIRef('ex:hasCondition') hasBandwidth = URIRef('ex:hasBandwidth') hasDate = URIRef('ex:hasDate') hasTime = URIRef('ex:hasTime') hasZone = URIRef('ex:hasZone') #for subj, pred, obj in rendergraph: # print subj, pred, obj posbw = renderbw(user, rendergraph) print("Rendering bandwidth permissions.... " + posbw) for subj, pred, obj in rendergraph: if str(obj) == "bwnolimit": # MK Update this rendergraph.remove((subj, pred, obj)) rendergraph.add((subj, hasBandwidth, Literal(posbw))) timefns = rendertime(user, rendergraph) for t in timefns: for subj, pred, obj in rendergraph: # print subj,pred, obj if t.service.upper() == str(subj).upper(): rendergraph.remove((subj, pred, obj)) fulltime = t.args # print fulltime tdate = fulltime.split()[0] ttime = fulltime.split()[1] ttime = ttime.replace(':', ".") ztime = fulltime.split()[2] rendergraph.add((subj, hasDate, Literal(tdate))) rendergraph.add((subj, hasTime, Literal(ttime))) rendergraph.add((subj, hasZone, Literal(ztime))) endpointdata = rendertopology(user, rendergraph) # for a in endpointdata: # print a.service, a.args # update graph for subj, pred, obj in rendergraph: for a in endpointdata: if str(obj).upper() == a.service.upper(): rendergraph.remove((subj, pred, obj)) dotcheck = a.args dotcheck = a.args.replace(':', ".") rendergraph.add((subj, pred, Literal(dotcheck))) # if unfriendly asked # if isolated asked # renderprovision() call nsi print("Final rendering graph created and saved....") rdot = Digraph(comment='Rendered Intent') for subj, pred, obj in rendergraph: #print "new" #print subj, pred, obj rdot.node(subj, subj) rdot.node(obj, obj) rdot.edge(subj, obj, pred, constraint='false') # print(dot.source) rdot.format = 'png' rdot.render('../static/renderintent.dot', view=False) # call an exe file.... # extracting data from RDF graph constructed print("Creating rendered graph in html output....") try: fname = '../templates/renderedinput.html' file = open(fname, 'w') firstpart = """<!DOCTYPE html> <html> <head> <title>Rendered Intent</title> <meta name="description" content="A concept map diagram ." /> <meta charset="UTF-8"> <script src="go.js"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/gojs/1.6.7/go-debug.js"></script> <link href="../assets/css/goSamples.css" rel="stylesheet" type="text/css" /> <script src="goSamples.js"></script> <script id="code"> function init() { if (window.goSamples) goSamples(); // init for these samples -- you don't need to call this var $ = go.GraphObject.make; // for conciseness in defining templates myDiagram = $(go.Diagram, "myDiagramDiv", // must name or refer to the DIV HTML element {\n initialAutoScale: go.Diagram.Uniform, // an initial automatic zoom-to-fit\n contentAlignment: go.Spot.Center, // align document to the center of the viewport\n layout:\n $(go.ForceDirectedLayout, // automatically spread nodes apart\n { defaultSpringLength: 30, defaultElectricalCharge: 100 })\n });\n // define each Node's appearance\n myDiagram.nodeTemplate = $(go.Node, "Auto", // the whole node panel { locationSpot: go.Spot.Center }, // define the node's outer shape, which will surround the TextBlock\n $(go.Shape, "Rectangle", { fill: $(go.Brush, "Linear", { 0: "rgb(254, 201, 0)", 1: "rgb(254, 162, 0)" }), stroke: "black" }), $(go.TextBlock, { font: "bold 10pt helvetica, bold arial, sans-serif", margin: 4 }, new go.Binding("text", "text")) ); // replace the default Link template in the linkTemplateMap myDiagram.linkTemplate = $(go.Link, // the whole link panel $(go.Shape, // the link shape { stroke: "black" }), $(go.Shape, // the arrowhead { toArrow: "standard", stroke: null }), $(go.Panel, "Auto", $(go.Shape, // the label background, which becomes transparent around the edges { fill: $(go.Brush, "Radial", { 0: "rgb(240, 240, 240)", 0.3: "rgb(240, 240, 240)", 1: "rgba(240, 240, 240, 0)" }), stroke: null }), $(go.TextBlock, // the label text { textAlign: "center", font: "10pt helvetica, arial, sans-serif", stroke: "#555555", margin: 4 }, new go.Binding("text", "text")) )); // create the model for the concept map\n""" file.write(firstpart) cnode = 0 # create a list of ids for plotting the js graph uniqueidlist = [] for subj, pred, obj in rendergraph: flagnodefound = 0 for j in uniqueidlist: if Literal(subj) == j: flagnodefound = 1 if flagnodefound == 0: uniqueidlist.append(Literal(subj)) for subj, pred, obj in rendergraph: flagnodefound = 0 for j in uniqueidlist: if Literal(obj) == j: flagnodefound = 1 # print "found name " + j if flagnodefound == 0: # print obj uniqueidlist.append(Literal(obj)) # adding links nodeDAstring = '' linkDAstring = '' tempstr = "" for j in uniqueidlist: # print nodeDAstring # print uniqueidlist.index(j) checkcommas = j.replace("'", "") tempstr = "{ key:" + str(uniqueidlist.index(j)) + \ ", text: '" + checkcommas + "' }," # print tempstr nodeDAstring += tempstr tempstr = "" for subj, pred, obj in rendergraph: # print uniqueidlist.index(Literal(subj)), # uniqueidlist.index(Literal(obj)), pred tempstr = "{ from:" + str(uniqueidlist.index( Literal(subj))) + ", to:" + str( uniqueidlist.index( Literal(obj))) + ", text: '" + Literal(pred) + "'}," linkDAstring += tempstr file.write(""" var nodeDataArray = [""") file.write(nodeDAstring) file.write("{} ];") file.write(" var linkDataArray = [") file.write(linkDAstring) file.write("{} ];") secondpart = """ myDiagram.model = new go.GraphLinksModel(nodeDataArray, linkDataArray); } </script> </head> <body onload="init()"> <div id="sample"> <h3>Rendered Input</h3> <div id="myDiagramDiv" style="background-color: whitesmoke; border: solid 1px black; width: 100%; height: 700px"></div> <p> The Rendered intent created by INDIRA to call NSI. </p> </div> </body> </html>""" file.write(secondpart) file.close() except: print("file writing error occured") sys.exit(0) ############################# print("Calling NSI......") # check if file exists try: os.remove('./nsibash.sh') except OSError: pass #cmd ='./test' # os.system(cmd) #test=subprocess.Popen(["..\..\opennsa\./onsa --help"],stdout=subprocess.PIPE) # output=test.communicate()[0] # need to extract data from graph locallysave_eps = [] localsrcname = "" localdestname = "" for subj, pred, obj in rendergraph: #print subj, pred, obj if Literal(subj).lower() == 'connect': locallysave_eps.append(Literal(obj)) #print obj if Literal(subj).lower() == 'disconnect': locallysave_eps.append(Literal(obj)) #print obj if Literal(subj).lower() == 'transfer': locallysave_eps.append(Literal(obj)) if pred == hasBandwidth: if Literal(obj).lower() == 'unlimited': localbwvalue = 100 else: numberextracted = Literal(obj) #print numberextracted localbwvalue = int(numberextracted) #print localbwvalue if Literal(subj).upper() == 'SCHEDULESTART': year = 2016 month = 11 day = 13 hr = 12 minu = 00 secs = 00 localzone = 'GMT' # convertime='' # print subj if pred == hasDate: datestring = Literal(obj) year, month, day = datestring.split("-") if pred == hasTime: timestring_local = Literal(obj) hr, minu, secs = timestring_local.split(".") if pred == hasZone: localzone = Literal(obj).split("+")[0] convertzone = timezone(localzone) converttime = convertzone.localize( datetime(int(year), int(month), int(day), int(hr), int(minu), int(secs))) # print converttime converttime2 = converttime.astimezone(timezone('GMT')) # print converttime2 converttime2 = str(converttime2) newdatensi = converttime2.split(" ")[0] timehalf = converttime2.split(" ")[1] newtimensi = timehalf.split("+")[0] newstarttime = newdatensi + "T" + newtimensi # print newstarttime if Literal(subj).upper() == 'SCHEDULESTOP': #print "stop" year = 2016 month = 11 day = 13 hr = 17 minu = 00 secs = 00 localzone = 'GMT' # convertime='' #print subj if pred == hasDate: datestring = Literal(obj) year, month, day = datestring.split("-") if pred == hasTime: timestring_local = Literal(obj) hr, minu, secs = timestring_local.split(".") if pred == hasZone: localzone = Literal(obj).split("+")[0] convertzone = timezone(localzone) converttime = convertzone.localize( datetime(int(year), int(month), int(day), int(hr), int(minu), int(secs))) #print converttime converttime2 = converttime.astimezone(timezone('GMT')) #print converttime2 converttime2 = str(converttime2) newdatensi = converttime2.split(" ")[0] timehalf = converttime2.split(" ")[1] newtimensi = timehalf.split("+")[0] newstoptime = newdatensi + "T" + newtimensi #print newstoptime # remove after testing # localsrcname=locallysave_eps[0] # localdestname=locallysave_eps[1] #print "Connection points: " + len(locallysave_eps) if len(locallysave_eps) >= 2: localsrcname = locallysave_eps[0] localdestname = locallysave_eps[1] else: print("NSI called") #only takes two site names as arguments. Please start again!" time.sleep(2) globalidnsi = "urn:uuid:6e1f288a-5a26-4ad8-a9bc-eb91785cee15" #print localsrcname #print localdestname # HARD CODED VALUES hardsource = "es.net:2013::lbl-mr2:xe-9_3_0:+#1000" harddestination = "es.net:2013::bnl-mr2:xe-1_2_0:+#1000" ########################## #print "Creating bash file...." try: fname = './nsibash.sh' # print "h" file = open(fname, 'w') file.write("#!/bin/bash") file.write("\n") file.write("cd ../../opennsa") file.write("\n") # print "g" #print "constructing nsi commands...." params = "./onsa reserveprovision" # print params params = params + " -g " + globalidnsi # print params params = params + " -d " + harddestination # localdestname # print params params = params + " -s " + hardsource # localsrcname # print params params = params + " -b " + str(localbwvalue) # print params # print newstarttime # print newstoptime params = params + " -a " + newstarttime # print params params = params + " -e " + newstoptime # print params params = params + " -u https://nsi-aggr-west.es.net:443/nsi-v2/ConnectionServiceProvider" params = params + " -p es.net:2013:nsa:nsi-aggr-west" params = params + " -r canada.eh:2016:nsa:requester" params = params + " -h 198.128.151.17 -o 8443" params = params + " -l /etc/hostcert/muclient.crt -k /etc/hostcert/muclient.key" params = params + " -i /etc/ssl/certs/ -y -x -z -v -q;" # print params file.write(params) file.write("\n") file.write("exit;") file.close() except: pass #####print("file writing error occured") ### #sys.exit(0) #print "Running the bash file...." time.sleep(2) print("\n\n") print("OOPS! Something has gone horribly wrong!") return 1
class RDFCrawler: logger = logging.getLogger(__name__) def __init__(self, uri, domains=set()): """ :param uri: root URI to start crawling . :param domains: list of permits domains to crawl. """ self.root = uri self.graph_route = 'graph_store_%s' % hash(self.root) self.graph = ConjunctiveGraph('Sleepycat') self.graph.open(self.graph_route, create=True) self._filter_domains = domains self._filter_domains.add(uri) self.last_process_time = 0.0 self.lock = RLock() def filter_uris(self, uri_list): """ :param uri_list: list of URIs to be filtered. :return: filtered list of URIs. """ return [ uri for uri in uri_list for match in self._filter_domains if match in str(uri) ] def _has_context(self, graph, subject): """ :param subject: the URIRef or URI to check if it has current context. :return: True if subject has a current context. """ return len(graph.get_context(self._get_context_id(subject))) > 1 @staticmethod def _get_context_id(subject): """ :param subject: URIRef or URI from which the get context id. :return: context id of the resource. Example: subject -> http://www.example.org/#fragment context_id -> http://www.example.org/ """ return str(subject).split('#')[0] def start(self): """ start method for crawling. """ self.lock.acquire(True) # Erase old graph for q in self.graph.quads(): self.graph.remove(q) # Crawl for data logging.info('Start crawling: %s' % self.root) start_time = time.time() self._crawl([self.root]) end_time = time.time() self.last_process_time = end_time - start_time logging.info( 'Crawling complete after: %s seconds with %s predicates.' % (self.last_process_time, len(self.graph))) self.lock.release() def _crawl(self, uri_list): """ Recursive method that crawl RDF objects :param uri_list: list of URIs to crawl """ if len(uri_list) > 0: for uri in uri_list: try: # A few considerations about parsing params. # publicID = uri due to redirection issues # Format = None due to default params use 'XML' self.graph.parse(uri, publicID=uri, format=None) logging.info('[OK]: %s' % uri) except Exception as e: logging.info('[Error]: %s: %s' % (uri, e)) # Check that there are context that remains without parsing objects = set([ self._get_context_id(o) for o in set(self.graph.objects(None, None)) if isinstance(o, URIRef) and not self._has_context(self.graph, o) ]) self._crawl(self.filter_uris(objects))
class ContextTestCase(unittest.TestCase): def setUp(self): self.store = FastStore() self.graph = ConjunctiveGraph(self.store) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 contextualizedGraph = Graph(self.graph.store, self.c1) contextualizedGraph.add(triple) print(self.store.statements()) self.assertEqual(len(self.graph), len(contextualizedGraph)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.graph.get_context(c1)) contextualizedGraph = Graph(self.graph.store, c1) initialLen = len(self.graph) for i in range(0, 10): contextualizedGraph.add((BNode(), self.hates, self.hates)) self.assertEqual(len(contextualizedGraph), initialLen + 10) self.assertEqual(len(self.graph.get_context(c1)), initialLen + 10) self.graph.remove_context(self.graph.get_context(c1)) self.assertEqual(len(self.graph), initialLen) self.assertEqual(len(contextualizedGraph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEqual(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEqual(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assertTrue(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assertTrue(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assertTrue(triple in self.graph) # now fully remove self.graph.remove(triple) self.assertTrue(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assertTrue(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): return c.identifier self.assertTrue(self.c1 in map(cid, self.graph.contexts())) self.assertTrue(self.c2 in map(cid, self.graph.contexts())) contextList = list(map(cid, list(self.graph.contexts(triple)))) self.assertTrue(self.c1 in contextList, (self.c1, contextList)) self.assertTrue(self.c2 in contextList, (self.c2, contextList)) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEqual(len(Graph(self.graph.store, c1)), 1) self.assertEqual(len(self.graph.get_context(c1)), 1) self.graph.remove_context(self.graph.get_context(c1)) self.assertTrue(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEqual(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEqual triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.graph.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob, ))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte( set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)])) asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)])) asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte( set(c), set([(bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)])) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
from rdflib import ConjunctiveGraph g = ConjunctiveGraph() g.parse('../data/hm_17_1.rss') #len(g) import sys sys.path.append('../') from model.namespaces import * from model.bibo import Article from rdfalchemy import rdfSubject nsm = g._get_namespace_manager() nsm.bind('prism', 'http:prism.com') print g.serialize() #PRISM2 = Namespace('http://prismstandard.org/namespaces/basic/2.0/') for s, p, o in g.triples((None, RDF.type, RSS.item)): g.add((s, p, BIBO.Article)) g.remove((s, p, o)) rdfSubject.db = g l = list(Article.ClassInstances()) a = l[1] print a.title print a.creators print a.sPg
def graph_plan(plan, fountain, agp): def extract_cycle_roots(): c_roots = {} for c_id, c_node in described_cycles.items(): c_root_types = set({}) for crt in plan_graph.objects(c_node, AGORA.expectedType): crt_qname = plan_graph.qname(crt) c_root_types.update(_type_subtree(fountain, crt_qname)) c_roots[c_id] = c_root_types return c_roots def inc_tree_length(tree, l): if tree not in tree_lengths: tree_lengths[tree] = 0 tree_lengths[tree] += l def add_variable(p_node, vid, subject=True): sub_node = BNode(str(vid).replace('?', 'var_')) if subject: plan_graph.add((p_node, AGORA.subject, sub_node)) else: plan_graph.add((p_node, AGORA.object, sub_node)) plan_graph.set((sub_node, RDF.type, AGORA.Variable)) plan_graph.set((sub_node, RDFS.label, Literal(str(vid), datatype=XSD.string))) def describe_cycle(cycle_id, cg): c_node = BNode('cycle{}'.format(cycle_id)) cg = cg.get_context(c_node) cg.add((c_node, RDF.type, AGORA.Cycle)) previous_node = c_node c_steps = cycles[cycle_id] cycle_type = c_steps[0].get('type') for et in _type_subtree(fountain, cycle_type): cg.add((c_node, AGORA.expectedType, __extend_uri(prefixes, et))) for j, step in enumerate(c_steps): prop = step.get('property') b_node = BNode(previous_node.n3() + '/' + prop) cg.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop))) c_expected_type = step.get('type') cg.add((b_node, AGORA.expectedType, __extend_uri(prefixes, c_expected_type))) cg.add((previous_node, AGORA.next, b_node)) previous_node = b_node return c_node def is_extensible(node, node_patterns): extensible = True near_patterns = node_patterns.copy() for prev in tree_graph.subjects(AGORA.next, node): for sib_node in tree_graph.objects(prev, AGORA.next): if sib_node != res.n: near_patterns.update(set(tree_graph.objects(sib_node, AGORA.byPattern))) subjects = set() for p_node in near_patterns: p_subject = list(plan_graph.objects(p_node, AGORA.subject)).pop() if not isinstance(p_subject, URIRef): subject_str = list(plan_graph.objects(p_subject, RDFS.label)).pop().toPython() else: subject_str = str(p_subject) subjects.add(subject_str) if subjects and set.difference(subjects, roots): extensible = False return extensible def enrich_type_patterns(node_patterns): for p_node in node_patterns: p_pred = list(plan_graph.objects(p_node, AGORA.predicate)).pop() if p_pred == RDF.type: p_type = list(plan_graph.objects(p_node, AGORA.object)).pop() if isinstance(p_type, URIRef): for et in [et for et in expected_types if et == p_type]: q_expected_types = _type_subtree(fountain, tree_graph.qname(et)) for et_q in q_expected_types: tree_graph.add((res.n, AGORA.expectedType, __extend_uri(prefixes, et_q))) else: for et in expected_types: q_expected_types = _type_subtree(fountain, tree_graph.qname(et)) for et_q in q_expected_types: tree_graph.add((res.n, AGORA.expectedType, __extend_uri(prefixes, et_q))) def apply_cycle_extensions(c_roots, node_types): for c_id, root_types in c_roots.items(): found_extension = False for n, expected in node_types.items(): if set.intersection(set(root_types), set(expected)): tree_graph.add((n, AGORA.isCycleStartOf, described_cycles[c_id])) found_extension = True if not found_extension: plan_graph.remove_context(plan_graph.get_context(described_cycles[c_id])) def include_path(elm, p_seeds, p_steps, cycles, check): m = hashlib.md5() for s in p_seeds: m.update(s) elm_uri = __extend_uri(prefixes, elm) b_tree = BNode(m.digest().encode('base64').strip()) s_trees.add(b_tree) tree_graph.set((b_tree, RDF.type, AGORA.SearchTree)) tree_graph.add((b_tree, AGORA.fromType, elm_uri)) for seed in p_seeds: tree_graph.add((b_tree, AGORA.hasSeed, URIRef(seed))) for cycle_id in filter(lambda x: x not in described_cycles.keys(), cycles): c_node = describe_cycle(cycle_id, plan_graph) described_cycles[cycle_id] = c_node plan_graph.get_context(c_node).add((b_tree, AGORA.goesThroughCycle, c_node)) previous_node = b_tree inc_tree_length(b_tree, len(p_steps)) root_index = -1 pp = [] for j, step in enumerate(p_steps): prop = step.get('property') pp.append(prop) path_root = step.get('root', None) if path_root and root_index < 0: root_index = j base_id = path_root or b_tree base_id += '/' if j < len(p_steps) - 1 or (pattern[1] == RDF.type and isinstance(pattern[2], URIRef)): b_node = BNode(base_id + '/'.join(pp)) tree_graph.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop))) else: b_node = BNode(base_id + '/'.join(pp)) tree_graph.add((b_node, AGORA.expectedType, __extend_uri(prefixes, step.get('type')))) tree_graph.add((previous_node, AGORA.next, b_node)) previous_node = b_node p_node = _get_pattern_node(pattern, patterns) if pattern[1] == RDF.type and isinstance(pattern[2], URIRef): b_id = '{}_{}_{}'.format(pattern[0].n3(plan_graph.namespace_manager), pattern[1].n3(plan_graph.namespace_manager), pattern[2].n3(plan_graph.namespace_manager)) b_node = BNode(b_id) tree_graph.add((b_node, AGORA.expectedType, pattern[2])) tree_graph.add((previous_node, AGORA.next, b_node)) tree_graph.add((b_node, AGORA.byPattern, p_node)) if check: tree_graph.add((b_node, AGORA.checkType, Literal(check))) else: tree_graph.add((previous_node, AGORA.byPattern, p_node)) plan_graph = ConjunctiveGraph() plan_graph.bind('agora', AGORA) prefixes = plan.get('prefixes') ef_plan = plan.get('plan') tree_lengths = {} s_trees = set([]) patterns = {} described_cycles = {} for (prefix, u) in prefixes.items(): plan_graph.bind(prefix, u) tree_graph = plan_graph.get_context('trees') for i, tp_plan in enumerate(ef_plan): paths = tp_plan.get('paths') pattern = tp_plan.get('pattern') hints = tp_plan.get('hints') cycles = {} for c in tp_plan.get('cycles'): cid = str(c['cycle']) c_steps = c['steps'] cycles[cid] = c_steps if len(c_steps) > 1: cycles[cid + 'r'] = list(reversed(c_steps)) context = BNode('space_{}'.format(tp_plan.get('context'))) for path in paths: steps = path.get('steps') seeds = path.get('seeds') check = path.get('check', None) ty = None if not len(steps) and len(seeds): ty = pattern[2] elif len(steps): ty = steps[0].get('type') if ty: include_path(ty, seeds, steps, cycles, check) for t in s_trees: tree_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer))) pattern_node = _get_pattern_node(pattern, patterns) plan_graph.add((context, AGORA.definedBy, pattern_node)) plan_graph.set((context, RDF.type, AGORA.SearchSpace)) plan_graph.add((pattern_node, RDF.type, AGORA.TriplePattern)) plan_graph.add((pattern_node, RDFS.label, Literal(pattern_node.toPython()))) (sub, pred, obj) = pattern if isinstance(sub, BNode): add_variable(pattern_node, str(sub)) elif isinstance(sub, URIRef): plan_graph.add((pattern_node, AGORA.subject, sub)) if isinstance(obj, BNode): add_variable(pattern_node, str(obj), subject=False) elif isinstance(obj, Literal): node = BNode(str(obj).replace(' ', '').replace(':', '')) plan_graph.add((pattern_node, AGORA.object, node)) plan_graph.set((node, RDF.type, AGORA.Literal)) plan_graph.set((node, AGORA.value, obj)) else: plan_graph.add((pattern_node, AGORA.object, obj)) plan_graph.add((pattern_node, AGORA.predicate, pred)) if pred == RDF.type: if 'check' in hints: plan_graph.add((pattern_node, AGORA.checkType, Literal(hints['check'], datatype=XSD.boolean))) expected_res = tree_graph.query("""SELECT DISTINCT ?n WHERE { ?n agora:expectedType ?type }""") node_types = {} roots = set(_extract_roots(agp)) for res in expected_res: expected_types = list(tree_graph.objects(res.n, AGORA.expectedType)) q_expected_types = set(map(lambda x: tree_graph.qname(x), expected_types)) q_expected_types = filter( lambda x: not set.intersection(set(fountain.get_type(x)['super']), q_expected_types), q_expected_types) type_hierarchy = len(q_expected_types) == 1 tree_graph.add((res.n, AGORA.typeHierarchy, Literal(type_hierarchy))) direct_patterns = set(tree_graph.objects(res.n, AGORA.byPattern)) enrich_type_patterns(direct_patterns) if is_extensible(res.n, direct_patterns): node_types[res.n] = q_expected_types c_roots = extract_cycle_roots() apply_cycle_extensions(c_roots, node_types) for t in s_trees: tree_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer))) from_types = set([plan_graph.qname(x) for x in plan_graph.objects(t, AGORA.fromType)]) def_from_types = filter(lambda x: not set.intersection(set(fountain.get_type(x)['sub']), from_types), from_types) for dft in def_from_types: tree_graph.set((t, AGORA.fromType, __extend_uri(prefixes, dft))) for res in plan_graph.query("""SELECT ?tree ?sub ?nxt WHERE { ?tree a agora:SearchTree ; agora:next ?nxt . ?nxt agora:byPattern [ agora:subject ?sub ] }"""): if isinstance(res.sub, URIRef): plan_graph.set((res.tree, AGORA.hasSeed, res.sub)) plan_graph.remove((res.nxt, AGORA.isCycleStartOf, None)) _inform_on_inverses(plan_graph, fountain, prefixes) return plan_graph
def graph_plan(plan, fountain): plan_graph = ConjunctiveGraph() plan_graph.bind('agora', AGORA) prefixes = plan.get('prefixes') ef_plan = plan.get('plan') tree_lengths = {} s_trees = set([]) patterns = {} for (prefix, u) in prefixes.items(): plan_graph.bind(prefix, u) def __get_pattern_node(p): if p not in patterns: patterns[p] = BNode('tp_{}'.format(len(patterns))) return patterns[p] def __inc_tree_length(tree, l): if tree not in tree_lengths: tree_lengths[tree] = 0 tree_lengths[tree] += l def __add_variable(p_node, vid, subject=True): sub_node = BNode(str(vid).replace('?', 'var_')) if subject: plan_graph.add((p_node, AGORA.subject, sub_node)) else: plan_graph.add((p_node, AGORA.object, sub_node)) plan_graph.set((sub_node, RDF.type, AGORA.Variable)) plan_graph.set((sub_node, RDFS.label, Literal(str(vid), datatype=XSD.string))) def include_path(elm, p_seeds, p_steps): elm_uri = __extend_uri(prefixes, elm) path_g = plan_graph.get_context(elm_uri) b_tree = BNode(elm_uri) s_trees.add(b_tree) path_g.set((b_tree, RDF.type, AGORA.SearchTree)) path_g.set((b_tree, AGORA.fromType, elm_uri)) for seed in p_seeds: path_g.add((b_tree, AGORA.hasSeed, URIRef(seed))) previous_node = b_tree __inc_tree_length(b_tree, len(p_steps)) for j, step in enumerate(p_steps): prop = step.get('property') b_node = BNode(previous_node.n3() + prop) if j < len(p_steps) - 1 or pattern[1] == RDF.type: path_g.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop))) path_g.add((b_node, AGORA.expectedType, __extend_uri(prefixes, step.get('type')))) path_g.add((previous_node, AGORA.next, b_node)) previous_node = b_node p_node = __get_pattern_node(pattern) path_g.add((previous_node, AGORA.byPattern, p_node)) for i, tp_plan in enumerate(ef_plan): paths = tp_plan.get('paths') pattern = tp_plan.get('pattern') hints = tp_plan.get('hints') context = BNode('space_{}'.format(tp_plan.get('context'))) for path in paths: steps = path.get('steps') seeds = path.get('seeds') if not len(steps) and len(seeds): include_path(pattern[2], seeds, steps) elif len(steps): ty = steps[0].get('type') include_path(ty, seeds, steps) for t in s_trees: plan_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer))) pattern_node = __get_pattern_node(pattern) plan_graph.add((context, AGORA.definedBy, pattern_node)) plan_graph.set((context, RDF.type, AGORA.SearchSpace)) plan_graph.add((pattern_node, RDF.type, AGORA.TriplePattern)) (sub, pred, obj) = pattern if isinstance(sub, BNode): __add_variable(pattern_node, str(sub)) elif isinstance(sub, URIRef): plan_graph.add((pattern_node, AGORA.subject, sub)) if isinstance(obj, BNode): __add_variable(pattern_node, str(obj), subject=False) elif isinstance(obj, Literal): node = BNode(str(obj).replace(' ', '')) plan_graph.add((pattern_node, AGORA.object, node)) plan_graph.set((node, RDF.type, AGORA.Literal)) plan_graph.set((node, AGORA.value, Literal(str(obj), datatype=XSD.string))) else: plan_graph.add((pattern_node, AGORA.object, obj)) plan_graph.add((pattern_node, AGORA.predicate, pred)) if pred == RDF.type: if 'check' in hints: plan_graph.add((pattern_node, AGORA.checkType, Literal(hints['check'], datatype=XSD.boolean))) sub_expected = plan_graph.subjects(predicate=AGORA.expectedType) for s in sub_expected: expected_types = list(plan_graph.objects(s, AGORA.expectedType)) for et in expected_types: plan_graph.remove((s, AGORA.expectedType, et)) q_expected_types = [plan_graph.qname(t) for t in expected_types] expected_types = [d for d in expected_types if not set.intersection(set(fountain.get_type(plan_graph.qname(d)).get('super')), set(q_expected_types))] for et in expected_types: plan_graph.add((s, AGORA.expectedType, et)) return plan_graph
class WineRDFDatabase(object): def __init__(self): """Creates the RDF graph""" print 'Initialize RDF graph, set namespace mappings' self.classes = self.valid_classes() self.graph = ConjunctiveGraph() self.graph.bind('base', BASE) self.graph.bind('rdf', RDF) self.graph.bind('rdfs', RDFS) self.graph.bind('vocab', VOCAB) self.graph.bind('wine', WINE) self.graph.bind('wine_prod', WINE_PROD) self.graph.bind('whisky', WHISKY) for wine in Wine.objects.all(): self.add_wine(wine) for wine_producer in WineProducer.objects.all(): self.add_wine_producer(wine_producer) print 'Added %i triples ' % len(self.graph) def valid_classes(self): """Returns a list of wine styles in the wines.rdf""" classes = set() root = etree.parse(os.path.join(PROJECT_ROOT, 'backend', 'wine.rdf')) for elem in root.iter(): id = '{%s}ID' % RDF if id in elem.attrib: classes.add(elem.attrib[id]) return classes def add_wine_producer(self, wine_producer): """Add a WineProducer model to the graph""" self.graph.add((URIRef(WINE_PROD[str(wine_producer.id)]), URIRef(RDF['type']), URIRef(BASE['Winery']))) self.graph.add((URIRef(WINE_PROD[str(wine_producer.id)]), URIRef(RDF['type']), URIRef(VOCAB['organzation']))) self.graph.add((URIRef(WINE_PROD[str(wine_producer.id)]), URIRef(RDF['label']), Literal(wine_producer.name))) self.graph.add((URIRef(WINE_PROD[str(wine_producer.id)]), URIRef(VOCAB['address']), Literal(wine_producer.address))) def add_wine(self, wine): """Add a Wine model to the graph""" self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(BASE['hasMaker']), URIRef(WINE_PROD[str(wine.wine_producer.id)]))) self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDFS['label']), Literal(wine.name))) region = wine.region if region == 'California' or region.startswith('Santa Barbara'): region = 'CaliforniaRegion' if region in self.classes: self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDF['locatedIn']), URIRef(BASE[region]))) else: self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDF['locatedIn']), Literal(region))) style = wine.style.replace(' ','') if style.endswith('Port'): style = 'Port' if style in self.classes: self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDF['type']), URIRef(BASE[style]))) #else: # self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDF['type']), # Literal(style))) if wine.color: self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(BASE['hasColor']), URIRef(BASE[wine.color.replace(' ','')]))) if wine.percentage_alcohol: self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(WHISKY['abv']), Literal(wine.percentage_alcohol))) if wine.vintage: self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(BASE['hasVintageYear']), Literal(wine.vintage))) def remove_resource(self, resource): """Removes from the graph all triples that have `resource` as the subject or object""" for triple in self.query_graph(subj=resource, obj=resource): self.graph.remove(triple) def query_graph(self, subj=None, pred=None, obj=None, exhaustive=False): """Return a graph of all triples with subect `sub`, predicate `pred` OR object `obj. If `exhaustive`, return all subelements of the given arguments (If sub is http://127.0.0.1/api/v1/wine/, return http://127.0.0.1/api/v1/wine/{s} for all s). Arguments must be of type URIRef or Literal""" g = ConjunctiveGraph() count = 0 if not isinstance(subj, list): subj = [subj] for sub in subj: for uri_s, uri_p, uri_o in sorted(self.graph): s, p, o = str(uri_s), str(uri_p), str(uri_o) if exhaustive: s = s.rpartition('/')[0] p = p.rpartition('/')[0] o = o.rpartition('/')[0] else: s = s[:-1] if s.endswith('/') else s p = p[:-1] if p.endswith('/') else p o = o[:-1] if o.endswith('/') else o if (sub and sub == s) or (pred and pred == p) or (obj and obj == o): g.add((uri_s, uri_p, uri_o)) count += 1 return g def write_graph(self, format='pretty-xml'): """Serialize the graph to the backend directory""" if not format in VALID_FORMATS: raise ValueError('`format` must be xml, pretty-xml, turtle or n3') abspath = '%s%s' % \ (os.path.join(PROJECT_ROOT, 'backend', 'wine_graph_store'), VALID_FORMATS[format]) with open(abspath, 'w') as f: f.write(self.graph.serialize(format=format)) def read_graph(self, filename): """Replace the graph with the data from this file in the backend dir""" if not filename.endswith('rdf'): raise ValueError('`filename` must be an rdf file') self.graph = ConjunctiveGraph() abspath = os.path.join(PROJECT_ROOT, 'backend', filename) self.graph.parse(abspath, format='xml')
class ContextTestCase(unittest.TestCase): storetest = True store_name = "default" create = True michel = URIRef(u"michel") tarek = URIRef(u"tarek") bob = URIRef(u"bob") likes = URIRef(u"likes") hates = URIRef(u"hates") pizza = URIRef(u"pizza") cheese = URIRef(u"cheese") c1 = URIRef(u"context-1") c2 = URIRef(u"context-2") def setUp(self): self.graph = ConjunctiveGraph(self.store_name, self.identifier) self.graph.open(self.tmppath, create=self.create) # self.store = plugin.get(self.store_name, store.Store)( # configuration=self.tmppath, identifier=self.identifier) # self.graph = ConjunctiveGraph(self.store_name, identifier=self.identifier) # self.graph.destroy(self.tmppath) # self.graph.open(self.tmppath) def tearDown(self): # self.graph.destroy(self.tmppath) try: self.graph.close() except: pass if os.path.exists(self.tmppath): os.unlink(self.tmppath) def get_context(self, identifier): assert isinstance(identifier, URIRef) or isinstance(identifier, BNode), type(identifier) return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEquals(len(self.graph), len(graph)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEquals(len(graph), oldLen + 10) self.assertEquals(len(self.get_context(c1)), oldLen + 10) self.graph.remove_context(self.get_context(c1)) self.assertEquals(len(self.graph), oldLen) self.assertEquals(len(graph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEquals(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEquals(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assert_(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assert_(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assert_(triple in self.graph) self.graph.remove(triple) # now gone! self.assert_(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assert_(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): if not isinstance(c, basestring): return c.identifier return c self.assert_(self.c1 in map(cid, self.graph.contexts())) self.assert_(self.c2 in map(cid, self.graph.contexts())) contextList = map(cid, list(self.graph.contexts(triple))) self.assert_(self.c1 in contextList) self.assert_(self.c2 in contextList) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEquals(len(Graph(self.graph.store, c1)), 1) self.assertEquals(len(self.get_context(c1)), 1) self.graph.remove_context(self.get_context(c1)) self.assert_(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEquals(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEquals triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob,))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte( set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)]), ) asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)])) asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte( set(c), set( [ (bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese), ] ), ) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
class TestInfer(TestCase): def setUp(self): self.model = ConjunctiveGraph() add_default_schemas(self.model) self.model.parse(data=MINI_FOAF_ONTOLOGY, format='turtle') def test_class(self): fooNS = Namespace('http://example.org/') self.model.parse(data=FOAF_DATA, format='turtle') inference = Infer(self.model) s = [fooNS['me.jpg'], RDF['type'], RDFS['Class']] found = list(self.model.triples(s)) self.assertEqual(len(found), 0) inference._rule_class() s = [fooNS['me.jpg'], RDF['type'], RDFS['Class']] found = list(self.model.triples(s)) self.assertEqual(len(found), 1) def test_inverse_of(self): fooNS = Namespace('http://example.org/') self.model.parse(data=FOAF_DATA, format='turtle') inference = Infer(self.model) depiction = (None, FOAF['depiction'], fooNS['me.jpg']) size = len(self.model) found_statements = list(self.model.triples(depiction)) self.assertEqual(len(found_statements), 0) inference._rule_inverse_of() found_statements = list(self.model.triples(depiction)) self.assertEqual(len(found_statements), 1) # we should've added one statement. self.assertEqual(len(self.model), size + 1) size = len(self.model) inference._rule_inverse_of() # we should already have both versions in our model self.assertEqual(len(self.model), size) def test_validate_types(self): fooNS = Namespace('http://example.org/') self.model.parse(data=FOAF_DATA, format='turtle') inference = Infer(self.model) errors = list(inference._validate_types()) self.assertEqual(len(errors), 0) s = (fooNS['document'], DC['title'], Literal("bleem")) self.model.add(s) errors = list(inference._validate_types()) self.assertEqual(len(errors), 1) def test_validate_undefined_properties_in_schemas(self): fooNS = Namespace('http://example.org/') inference = Infer(self.model) errors = list(inference._validate_undefined_properties()) self.assertEqual(len(errors), 0) def test_validate_undefined_properties_in_inference(self): fooNS = Namespace('http://example.org/') foafNS = Namespace('http://xmlns.com/foaf/0.1/') self.model.parse(data=FOAF_DATA, format='turtle') inference = Infer(self.model) errors = list(inference._validate_undefined_properties()) self.assertEqual(len(errors), 2) inference = Infer(self.model) errors = list(inference._validate_property_types()) self.assertEqual(len(errors), 0) s = (fooNS['me.jpg'], FOAF['firstName'], Literal("name")) self.model.add(s) errors = list(inference._validate_property_types()) self.assertEqual(len(errors), 1) startswith = 'Domain of ' self.assertEqual(errors[0][:len(startswith)], startswith) self.assertTrue('http://example.org/me.jpg' in errors[0]) endswith = 'http://xmlns.com/foaf/0.1/Person' self.assertEqual(errors[0][-len(endswith):], endswith) self.model.remove(s) errors = list(inference._validate_property_types()) self.assertEqual(len(errors), 0) s = (fooNS['foo.txt'], RDF['type'], FOAF['Document']) self.model.add(s) s = (fooNS['me.jpg'], FOAF['depicts'], FOAF['foo.txt']) self.model.add(s) errors = list(inference._validate_property_types()) self.assertEqual(len(errors), 1) startswith = 'Range of ' self.assertEqual(errors[0][:len(startswith)], startswith) self.assertTrue('http://example.org/me.jpg' in errors[0]) endswith = 'http://www.w3.org/2002/07/owl#Thing' self.assertEqual(errors[0][-len(endswith):], endswith) self.model.remove(s) def test_property_multiple_domain_types(self): """Can we process a property with multiple domain types? """ turtle = """ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix foo: <http://example.org/> . @prefix bar: <http://example.com/> . foo:AClass a rdfs:Class . foo:BClass a rdfs:Class . bar:ABarClass a rdfs:Class . foo:aprop a rdf:Property ; rdfs:domain foo:AClass ; rdfs:domain bar:ABarClass ; rdfs:range foo:BClass . foo:object a foo:BClass . foo:subject a foo:AClass ; foo:aprop foo:object . bar:subject a bar:ABarClass ; foo:aprop foo:object . """ self.model.parse(data=turtle, format='turtle') inference = Infer(self.model) errmsg = list(inference._validate_property_types()) self.assertEqual(len(errmsg), 0)
def main(argv): workdir="/data/" inputfile='' outputfile="output.nq" try: opts, args = getopt.getopt(argv,"hi:o:",["inputfile="]) except getopt.GetoptError: print ('test.py --inputfile <inputfile>') sys.exit(2) for opt, arg in opts: if opt in ("-i", "--inputfile"): inputfile = arg # print ('Input file is "', inputfile) inputdata=inputfile.split('.') # TODO: change all this to take the absolute full path as arg (e.g.: /data/input.nq) input_full_path=workdir + inputfile data=inputdata[0] datatype=inputdata[1] if(datatype == "nq"): g = ConjunctiveGraph(identifier="http://kraken/graph/data/"+ data) g.default_context.parse(input_full_path, format='nquads') else: g = Graph() #for n3 if datatype == "nt": g.default_context.parse(input_full_path,format='nt') elif datatype == "ttl": g.default_context.parse(input_full_path,format='n3') patternstring1 = re.compile("^([A-Z]|[a-z]+)+$") patternstring = re.compile("\w+") patterndatey = re.compile("^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$") patterndatem = re.compile("^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)\d\d$") patterndated = re.compile("^(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.](19|20)\d\d$") patternfloat=re.compile("^[-+]?[0-9]*\.?[0-9]+$") for s, p, o in g: if patternstring.match(o) != None and len(o)>=2 and "symbol" not in str(p): #gene symbols are detected as lang # print(s,p,o) # print(detect(o))#works but string needs to be at least 3 characters- choice between string and lang if "name" in str(p): g.remove((s, p, o)) # g.add((s, p, Literal(o, lang=detect(o)))) #works but string needs to be at least 3 charachters- choice between string and lang g.add((s, p, Literal(o, datatype=XSD.string))) elif patterndatey.match(o) != None or patterndated.match(o) != None or patterndatem.match(o)!= None: # print(o) g.remove((s, p, o)) g.add((s, p,Literal(o, datatype=XSD.date))) elif re.search('true', o, re.IGNORECASE) or re.search('false', o, re.IGNORECASE): # print(o) g.remove((s, p, o)) g.add((s, p, Literal(o, datatype=XSD.boolean))) elif patternfloat.match(o) != None: g.remove((s, p, o)) g.add((s, p, Literal(o, datatype=XSD.float))) elif patternstring1.match(o) != None: g.remove((s, p, o)) g.add((s, p, Literal(o, datatype=XSD.string))) if(datatype == "nq"): g.serialize(destination=workdir + outputfile, format='nquads') elif datatype == "nt": g.serialize(destination=workdir + outputfile, format='nt') elif datatype == "ttl": g.default_context.parse(workdir + outputfile,format='n3')
print("Loading the graph") g = ConjunctiveGraph() g.parse(args.input, format=args.format) print("Convert DOIs in lowercase form") doi_to_remove = [] doi_to_add = [] for s, p, o in g.triples((None, LITERAL.hasLiteralValue, None)): o_str = str(o) lower_o_str = o_str.lower() if o_str != lower_o_str: doi_to_remove.append((s, p, o)) doi_to_add.append((s, p, Literal(lower_o_str))) for s, p, o in doi_to_remove: g.remove((s, p, o)) for s, p, o in doi_to_add: g.add((s, p, o)) if not args.avoid: print("Check additional mapping in the oc/ccc triplestore") rf = ResourceFinder(ts_url=triplestore_url, default_dir=default_dir) with open(args.table, "a") as f: for s, p, o in g.triples((None, DATACITE.hasIdentifier, None)): if str(s) not in mapping_table: is_doi = False is_isbn = False id_string = None for s1, p2, o2 in g.triples((o, None, None)): if p2 == DATACITE.usesIdentifierScheme: if o2 == DATACITE.doi:
class Store: def __init__(self, tripleFile): self.graph = ConjunctiveGraph() self.storefn = abspath(tripleFile) self.storeuri = 'file://' + self.storefn if exists(self.storefn): self.graph.load(self.storeuri, format='n3') self.graph.bind('mo', MusicOntology) self.graph.bind('ourvocab', OurVocab) self.graph.bind('dc', DC) self.graph.bind('foaf', foaf) self.graph.bind('geo', geo) self.graph.bind('dbpediaowl', dbpediaowl) self.graph.bind('rev', 'http://purl.org/stuff/rev#') def save(self): self.graph.serialize(self.storeuri, format='n3') def addTrack(self, mbid, track): trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % mbid) self.graph.add((trackuri, RDF.type, MusicOntology.Track)) self.graph.add((trackuri, DC.title, Literal(track['name']))) self.graph.add( (trackuri, OurVocab.has_playcount, Literal(track['playcount']))) self.graph.add((trackuri, OurVocab.has_listener_count, Literal(track['listeners']))) if track['artist']['mbid'] != '': artisturi = URIRef('http://musicbrainz.org/artist/%s#_' % track['artist']['mbid']) self.graph.add((artisturi, RDF.type, MusicOntology.MusicArtist)) self.graph.add((trackuri, MusicOntology.performer, artisturi)) self.graph.add( (artisturi, foaf.name, Literal(track['artist']['name']))) if isinstance(track['toptags'], dict) and 'tag' in track['toptags'].keys(): for tag in track['toptags']['tag']: if isinstance(tag, dict): self.graph.add( (trackuri, OurVocab.has_tag, Literal(tag['name']))) def addArtist(self, trackMBID, artistData, trackData): trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % trackMBID) #If there is no mbid, it means there is no earlier artist entry in triplestore if trackData['artist']['mbid'] == '': artisturi = URIRef(artistData['artist']['value'].encode('utf-8')) if artistData['artist']['type'] == 'artist': self.graph.add( (artisturi, RDF.type, MusicOntology.MusicArtist)) else: self.graph.add((artisturi, RDF.type, MusicOntology.MusicGroup)) self.graph.add((trackuri, MusicOntology.performer, artisturi)) self.graph.add( (artisturi, foaf.name, Literal(trackData['artist']['name'].encode('utf-8')))) #if there is an artist entry, make sure the artist/band association is appropriate else: artisturi = URIRef('http://musicbrainz.org/artist/%s#_' % trackData['artist']['mbid']) if artistData['artist']['type'] == "band" and\ (artisturi, RDF.type, MusicOntology.MusicArtist) in self.graph: self.graph.remove( (artisturi, RDF.type, MusicOntology.MusicArtist)) self.graph.add((artisturi, RDF.type, MusicOntology.MusicGroup)) #now the location data! if 'hometown' not in artistData.keys(): return if "http" in artistData['hometown']['value']: townuri = URIRef(artistData['hometown']['value'].encode('utf-8')) if (townuri, RDF.type, dbpediaowl.Place) not in self.graph: self.graph.add((townuri, RDF.type, dbpediaowl.Place)) if "hometownName" in artistData.keys(): self.graph.add((townuri, foaf.name, Literal(artistData['hometownName'] ['value'].encode('utf-8')))) if "coordinates" in artistData.keys(): self.graph.add((townuri, geo.geometry, Literal(artistData['coordinates'] ['value'].encode('utf-8')))) self.graph.add((artisturi, dbpediaowl.hometown, townuri)) else: self.graph.add((artisturi, dbpediaowl.hometown, Literal(artistData['hometown']['value']))) def _matchAlbum(self, trackInfo, albumFiles): """ A function to return the correct match of an album given a track. Deprecated for most cases where the match is done using mbids. Use only for cases where there is no mbid link betweeb album and track. """ try: albumName = trackInfo['album']['name'] artistName = trackInfo['artist']['name'] except: return None for af in albumFiles: albumInfo = json.load(file(af)) albumInfo = albumInfo['album'] if albumName == albumInfo['name'] and artistName == albumInfo[ 'artist']: return af def addAlbum(self, trackMBID, albumInfo): """ A function to add album data into triple store. At the moment, only the releasedate is taken from the album data. More to be added soon. """ try: albumInfo = albumInfo['album'] except: return if 'releasedate' not in albumInfo.keys(): return trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % trackMBID) self.graph.add( (trackuri, OurVocab.has_releasedate, Literal(albumInfo['releasedate'].strip().encode('utf-8'))))
class ContextTestCase(unittest.TestCase): store = "default" slow = True tmppath = None def setUp(self): try: self.graph = ConjunctiveGraph(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix="test", dir="/tmp", suffix=".sqlite") else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u"michel") self.tarek = URIRef(u"tarek") self.bob = URIRef(u"bob") self.likes = URIRef(u"likes") self.hates = URIRef(u"hates") self.pizza = URIRef(u"pizza") self.cheese = URIRef(u"cheese") self.c1 = URIRef(u"context-1") self.c2 = URIRef(u"context-2") # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): if self.store == "SQLite": raise SkipTest("Skipping known issue with __len__") self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEqual(len(self.graph), len(graph)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.graph.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEqual(len(graph), oldLen + 10) self.assertEqual(len(self.graph.get_context(c1)), oldLen + 10) self.graph.remove_context(self.graph.get_context(c1)) self.assertEqual(len(self.graph), oldLen) self.assertEqual(len(graph), 0) def testLenInMultipleContexts(self): if self.store == "SQLite": raise SkipTest("Skipping known issue with __len__") oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEqual(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEqual(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assertTrue(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assertTrue(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assertTrue(triple in self.graph) self.graph.remove(triple) # now gone! self.assertTrue(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assertTrue(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): return c.identifier self.assertTrue(self.c1 in map(cid, self.graph.contexts())) self.assertTrue(self.c2 in map(cid, self.graph.contexts())) contextList = list(map(cid, list(self.graph.contexts(triple)))) self.assertTrue(self.c1 in contextList, (self.c1, contextList)) self.assertTrue(self.c2 in contextList, (self.c2, contextList)) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEqual(len(Graph(self.graph.store, c1)), 1) self.assertEqual(len(self.graph.get_context(c1)), 1) self.graph.remove_context(self.graph.get_context(c1)) self.assertTrue(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEqual(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEqual triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.graph.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob, ))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte( set(c.subject_objects(likes)), set([ (tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza), ]), ) asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)])) asserte( set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)]), ) asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)])) asserte( set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)]), ) asserte( set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)]), ) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte( set(c), set([ (bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese), ]), ) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
def convert(teifile, namespace): #graph_uri = "http://contextus.net/resource/blue_velvet/" ns = Namespace(namespace) graph = ConjunctiveGraph() graph.load(teifile, format="rdfa") graph.bind("default", ns) to_update = "" for prefix, nsuri in graph.namespaces(): #print("prefix: " + str(prefix) + " - " + str(nsuri)) if nsuri in ns: to_update = nsuri for s, p, o in graph: # print s, p, o if to_update != "" and to_update in s: graph.remove((s, p, o)) s = URIRef(s.replace(to_update, ns)) graph.add((s, p, o)) act = "" scene = "" line = "" char = 0 loc = 0 #timeline = ns['timeline/narrative'] #graph.add((timeline, RDF.type, ome['Timeline'])) tree = ET.parse(teifile) cast = dict() titleNode = tree.find('//title') castItems = tree.findall('/text/body/div1/castList//castItem') for castItem in castItems: actorNode = castItem.find('actor') roleNode = castItem.find('role') if roleNode != None: id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id") #print("Found castItem!") actor = None role = None # Check to see if we already have an entry if (roleNode != None and roleNode.get("about")): charname = roleNode.get("about") if (charname.find(":") > -1): nmsp, nom = charname.split(":", 1) charcode = "character/" + str(char) charref = nmsp + ":" + charcode + "]" role = extractCURIEorURI(graph, charref, nom[0:-1]) char += 1 #print("1:" + charname + ": adding id " + id + " to " + role) else: role = extractCURIEorURI(graph, charname) #print("2:" + charname + ": adding id " + id + " to " + role) cast[id] = role graph.add((role, RDF.type, omb['Character'])) #print(charname + ": adding id " + id + " to " + role) if (actorNode != None and actorNode.get("about")): actor = extractCURIEorURI(graph, actorNode.get("about")) graph.add((actor, RDF.type, omb['Being'])) if actor != None and role != None: graph.add((actor, omb['portrays'], role)) graph.add((role, omb['portrayed-by'], actor)) eventCount = 1 groupCount = 1 prior_event = None actItems = tree.findall('/text/body/div1') ref = "" for actItem in actItems: if actItem.get("type") == "act": act = actItem.get("n") sceneItems = actItem.findall('div2') for sceneItem in sceneItems: #print("Found sceneItems!") if sceneItem.get("type") == "scene": scene = sceneItem.get("n") # Work out the location of this scene location = None stageItems = sceneItem.findall("stage") #internalnum = 1 stagenum = 0 speechnum = 1 for stageItem in stageItems: if stageItem.get("type") == "location": # The RDFa parser doesn't handle the type - so we can grab that here. if stageItem.get("about") != None: locname = stageItem.get("about") # Adding location type/oml:space for location if stageItem.get("typeof") and stageItem.get("about"): type = extractCURIEorURI(graph, stageItem.get("typeof")) #print "1. Location: " + str(location) + " Type: " + str(type) elif stageItem.get("about"): #print "2. Location: " + str(locname) type = extractCURIEorURI(graph, oml['Space']) # Get location value and add rdfs:label is location is not using the TEI value if (locname.find(":") > -1): nmsp, nom = locname.split(":", 1) loccode = "location/" + str(loc) locref = nmsp + ":" + loccode + "]" location = extractCURIEorURI( graph, locref, nom[0:-1]) loc += 1 graph.add(( location, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(nom[0:-1]))) else: location = extractCURIEorURI( graph, stageItem.get("about")) # Add location to graph graph.add((location, RDF.type, type)) else: location = "" #print("Adding location type: " + type + " (" + location + ")") if cast: # Work out a list of all cast in a given section currentCast = list() speakers = list() # Iterate through elements within stageItem # Find speaker events and add to list of current cast for inclusion in social event # Find reference events and add to ongoing social event ? # Find stage events # If event is an entrance then # create social event for people talking before entrance # create travel event i.e. entrance # add new arrival to current cast list # If event is exit event then # create social event for people talking before exit # create travel event i.e. exit # if leavers are not named directly the calculate who is leaving # remove leavers from current cast list # If reach end of scene then create social event with current cast list #Also need to check if social event before exit has same composition as social event after exit since then they should be merged event = ns['event/' + str(eventCount)] group = ns['group/' + str(groupCount)] refersTo = list() #parent = None speakerNodes = list() speakerRef = list() xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str( perseusid) + ":act=" + str(act) + ":scene=" + str(scene) stagecount = 0 stage_array = list() for node in sceneItem.getiterator(): #print("Node: " + node.tag) """ if node.tag == "lb": if node.get("ed") == "F1": line = node.get("n") if titleNode != None: ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line) else: ref = str(act) + "." + str(scene) + "." + str(line) #xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "'])" xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) #print("Ref: " + xpointer) """ if node.tag == "sp": id = node.get("who") if id and cast: speakers.append(cast[id[1:]]) speakerNodes.append(node) if perseusid == None: speakerRef.append(ref) else: #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)" speechRef = xpointer + "#xpointer(//div2/sp[" + str( speechnum) + "])" speakerRef.append(speechRef) #print("Line ref: " + ref) if cast[id[1:]] not in currentCast: currentCast.append(cast[id[1:]]) #internalnum = 1 speechnum += 1 stagecount = 0 previousl = 0 for subnode in node.getiterator(): if subnode.tag == "l": previousl += 1 if subnode.tag == "stage": #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n") stage_array.append(previousl) stagecount += 1 elif node.tag == "stage": if stagecount > 0: s_max = len(stage_array) diff = s_max - stagecount #if diff == 0: # stagenum += 1 entRef = xpointer + "#xpointer(//div2/sp[" + str( speechnum - 1) + "]/l[" + str( stage_array[diff]) + "]/stage)" #internalnum += 1 stagecount -= 1 else: stagenum += 1 entRef = xpointer + "#xpointer(//div2/stage[" + str( stagenum) + "])" if node.get("type") == "entrance": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), Literal(ref))) else: #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)" graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), URIRef(entRef))) #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Found entrence event!") if location: graph.add((event, ome['to'], location)) involved = node.get("about") if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] chunks = involved.split() chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: striped = chunk.strip() if (len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"): striped = striped[1:-1] currentCast.append(cast[striped]) if chunk_count > 1: graph.add( (group, ome['contains'], cast[striped])) if en == chunk_count: event_label = event_label[ 0:-2] + " and " + striped graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " arrive"))) elif en < chunk_count: event_label += striped + ", " else: #print("Adding person as subject-entity to entry event " + str(eventCount)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(striped + " arrives"))) graph.add((event, ome['has-subject-entity'], cast[striped])) en += 1 if chunk_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to entry event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] if (prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/' + str(eventCount)] if node.get("type") == "exit": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), Literal(ref))) else: #exitRef = xpointer #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef))) graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), URIRef(entRef))) #print("Found entrence event!") if location != None: graph.add((event, ome['from'], location)) involved = node.get("about") if involved.strip() == "" or "-all" in involved: # Remove everyone #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #for peep in currentCast: # print(peep) if len(currentCast) > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for peep in currentCast: short_ref = "" for key, value in cast.iteritems(): if peep == value: short_ref = key if len(currentCast) > 1: graph.add((group, ome['contains'], peep)) if en == len(currentCast): event_label = event_label[ 0:-2] + " and " + short_ref graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exuant event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], peep)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(short_ref + " leaves"))) en += 1 if len(currentCast) > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exuant event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] currentCast = list() elif "!" in involved: #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Event: " + involved); if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] involved = involved.strip() if (len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"): involved = involved[2:-1] #print("involved: " + involved) striped = involved.strip() c_ids = striped.split() chunks = list() for stay in c_ids: #print("Staying: " + cast[stay]) chunks.append(cast[stay]) staying = list() going = list() for player in currentCast: #print("Player: " + player) if player in chunks: staying.append(player) else: going.append(player) going_count = len(going) if going_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for ghost in going: #print("ghost: " + ghost) short_ref = "" for key, value in cast.iteritems(): if ghost == value: short_ref = key if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if going_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(going): event_label = event_label[ 0:-2] + " and " + short_ref graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(going): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], ghost)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(short_ref + " leaves"))) en += 1 if going_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] else: #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] striped = involved.strip() chunks = striped.split() #print("striped: " + striped) chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: #print("chunk: " + chunk) ghost = cast[chunk] #print("ghost: " + ghost) if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if chunk_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(currentCast): event_label = event_label[ 0:-2] + " and " + chunk graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += chunk + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], ghost)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(chunk + " leaves"))) en += 1 if chunk_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] if (prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/' + str(eventCount)] #elif node.tag == "rs": # #print("Found rs node") # if parent: # #print("Parent type is " + parent.tag) # if parent.tag == "p" or parent.tag == "l": # refersTo.append(node.get("about")) #parent = node # Add Social Events for all the people who spoke since the last break (if there were any) #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers))) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] group = ns['group/' + str(groupCount)] speakers = list() speakerNodes = list() currentCast = list() speakerRef = list() print graph.serialize(format='xml')
class TabLinker(object): defaultNamespacePrefix = 'http://example.org/resource/' annotationsNamespacePrefix = 'http://example.org/annotation/' namespaces = { 'dcterms':Namespace('http://purl.org/dc/terms/'), 'skos':Namespace('http://www.w3.org/2004/02/skos/core#'), 'tablink':Namespace('http://example.org/ns#'), 'qb':Namespace('http://purl.org/linked-data/cube#'), 'owl':Namespace('http://www.w3.org/2002/07/owl#') } annotationNamespaces = { 'np':Namespace('http://www.nanopub.org/nschema#'), 'oa':Namespace('http://www.w3.org/ns/openannotation/core/'), 'xsd':Namespace('http://www.w3.org/2001/XMLSchema#'), 'dct':Namespace('http://purl.org/dc/terms/') } def __init__(self, filename, config, level = logging.DEBUG): """TabLinker constructor Keyword arguments: filename -- String containing the name of the current Excel file being examined config -- Configuration object, loaded from .ini file level -- A logging level as defined in the logging module """ self.config = config self.filename = filename self.log = logging.getLogger("TabLinker") self.log.setLevel(level) self.log.debug('Initializing Graphs') self.initGraphs() self.log.debug('Setting Scope') basename = os.path.basename(filename) basename = re.search('(.*)\.xls',basename).group(1) self.setScope(basename) self.log.debug('Loading Excel file {0}.'.format(filename)) self.rb = open_workbook(filename, formatting_info=True) self.log.debug('Reading styles') self.styles = Styles(self.rb) self.log.debug('Copied Workbook to writable copy') self.wb = copy(self.rb) def initGraphs(self): """ Initialize the graphs, set default namespaces, and add schema information """ self.graph = ConjunctiveGraph() # Create a separate graph for annotations self.annotationGraph = ConjunctiveGraph() self.log.debug('Adding namespaces to graphs') # Bind namespaces to graphs for namespace in self.namespaces: self.graph.namespace_manager.bind(namespace, self.namespaces[namespace]) # Same for annotation graph for namespace in self.annotationNamespaces: self.annotationGraph.namespace_manager.bind(namespace, self.annotationNamespaces[namespace]) # Add schema information self.log.debug('Adding some schema information (dimension and measure properties) ') self.addDataCellProperty() # Add dimensions self.graph.add((self.namespaces['tablink']['dimension'], RDF.type, self.namespaces['qb']['DimensionProperty'])) #self.graph.add((self.namespaces['tablink']['label'], RDF.type, RDF['Property'])) def addDataCellProperty(self): """ Add definition of data cell resource to graph """ if len(self.config.get('dataCell', 'propertyName')) > 0 : self.dataCellPropertyName = self.config.get('dataCell', 'propertyName') else : self.dataCellPropertyName = 'hasValue' self.graph.add((self.namespaces['tablink'][self.dataCellPropertyName], RDF.type, self.namespaces['qb']['MeasureProperty'])) #Take labels from config if len(self.config.get('dataCell', 'labels')) > 0 : labels = self.config.get('dataCell', 'labels').split(':::') for label in labels : labelProperties = label.split('-->') if len(labelProperties[0]) > 0 and len(labelProperties[1]) > 0 : self.graph.add((self.namespaces['tablink'][self.dataCellPropertyName], RDFS.label, Literal(labelProperties[1],labelProperties[0]))) if len(self.config.get('dataCell', 'literalType')) > 0 : self.graph.add((self.namespaces['tablink'][self.dataCellPropertyName], RDFS.range, URIRef(self.config.get('dataCell', 'literalType')))) def setScope(self, fileBasename): """Set the default namespace and base for all URIs of the current workbook""" self.fileBasename = fileBasename scopeNamespace = self.defaultNamespacePrefix + fileBasename + '/' # Annotations go to a different namespace annotationScopeNamespace = self.annotationsNamespacePrefix + fileBasename + '/' self.log.debug('Adding namespace for {0}: {1}'.format(fileBasename, scopeNamespace)) self.namespaces['scope'] = Namespace(scopeNamespace) self.annotationNamespaces['scope'] = Namespace(annotationScopeNamespace) self.graph.namespace_manager.bind('', self.namespaces['scope']) self.annotationGraph.namespace_manager.bind('', self.annotationNamespaces['scope']) def doLink(self): """Start tablinker for all sheets in workbook""" self.log.info('Starting TabLinker for all sheets in workbook') for n in range(self.rb.nsheets) : self.log.info('Starting with sheet {0}'.format(n)) self.r_sheet = self.rb.sheet_by_index(n) self.w_sheet = self.wb.get_sheet(n) self.rowns, self.colns = self.getValidRowsCols() self.sheet_qname = urllib.quote(re.sub('\s','_',self.r_sheet.name)) self.log.info('Base for QName generator set to: {0}'.format(self.sheet_qname)) self.log.debug('Starting parser') self.parseSheet() ### # Utility Functions ### def insideMergeBox(self, i, j): """ Check if the specified cell is inside a merge box Arguments: i -- row j -- column Returns: True/False -- depending on whether the cell is inside a merge box """ self.merged_cells = self.r_sheet.merged_cells for crange in self.merged_cells: rlo, rhi, clo, chi = crange if i <= rhi - 1 and i >= rlo and j <= chi - 1 and j >= clo: return True return False def getMergeBoxCoord(self, i, j): """ Get the top-left corner cell of the merge box containing the specified cell Arguments: i -- row j -- column Returns: (k, l) -- Coordinates of the top-left corner of the merge box """ if not self.insideMergeBox(i,j): return (-1, -1) self.merged_cells = self.r_sheet.merged_cells for crange in self.merged_cells: rlo, rhi, clo, chi = crange if i <= rhi - 1 and i >= rlo and j <= chi - 1 and j >= clo: return (rlo, clo) def getType(self, style): """Get type for a given excel style. Style name must be prefixed by 'TL ' Arguments: style -- Style (string) to check type for Returns: String -- The type of this field. In case none is found, 'unknown' """ typematch = re.search('TL\s(.*)',style) if typematch : cellType = typematch.group(1) else : cellType = 'Unknown' return cellType def isEmpty(self, i,j): """Check whether cell is empty. Arguments: i -- row j -- column Returns: True/False -- depending on whether the cell is empty """ if (self.r_sheet.cell(i,j).ctype == XL_CELL_EMPTY or self.r_sheet.cell(i,j).ctype == XL_CELL_BLANK) or self.r_sheet.cell(i,j).value == '' : return True else : return False def isEmptyRow(self, i, colns): """ Determine whether the row 'i' is empty by iterating over all its cells Arguments: i -- The index of the row to be checked. colns -- The number of columns to be checked Returns: true -- if the row is empty false -- if the row is not empty """ for j in range(0,colns) : if not self.isEmpty(i,j): return False return True def isEmptyColumn(self, j, rowns ): """ Determine whether the column 'j' is empty by iterating over all its cells Arguments: j -- The index of the column to be checked. rowns -- The number of rows to be checked Returns: true -- if the column is empty false -- if the column is not empty """ for i in range(0,rowns) : if not self.isEmpty(i,j): return False return True def getValidRowsCols(self) : """ Determine the number of non-empty rows and columns in the Excel sheet Returns: rowns -- number of rows colns -- number of columns """ colns = number_of_good_cols(self.r_sheet) rowns = number_of_good_rows(self.r_sheet) # Check whether the number of good columns and rows are correct while self.isEmptyRow(rowns-1, colns) : rowns = rowns - 1 while self.isEmptyColumn(colns-1, rowns) : colns = colns - 1 self.log.debug('Number of rows with content: {0}'.format(rowns)) self.log.debug('Number of columns with content: {0}'.format(colns)) return rowns, colns def getQName(self, names): """ Create a valid QName from a string or dictionary of names Arguments: names -- Either dictionary of names or string of a name. Returns: qname -- a valid QName for the dictionary or string """ if type(names) == dict : qname = self.sheet_qname for k in names : qname = qname + '_' + self.processString(names[k]) else : qname = self.sheet_qname + '_' + self.processString(names) self.log.debug('Minted new QName: {}'.format(qname)) return qname def getColHeaderLabel(self, colheaders): label = '_'.join(colheaders) return label def getColHeaderValueURI(self, colheaders): label = self.getColHeaderLabel(colheaders) uri = self.namespaces['scope'][self.processString(label)] return uri def getColHeaderPropertyURI(self, index): uri = self.namespaces['scope']['HColHeader' + str(index)] return uri def processString(self, string): """ Remove illegal characters (comma, brackets, etc) from string, and replace it with underscore. Useful for URIs Arguments: string -- The string representing the value of the source cell Returns: processedString -- The processed string """ # TODO accents too return urllib.quote(re.sub('\s|\(|\)|,|\.','_',unicode(string).strip().replace('/', '-')).encode('utf-8', 'ignore')) def addValue(self, source_cell_value, altLabel=None): """ Add a "value" + optional label to the graph for a cell in the source Excel sheet. The value is typically the value stored in the source cell itself, but may also be a copy of another cell (e.g. in the case of 'idem.'). Arguments: source_cell_value -- The string representing the value of the source cell Returns: source_cell_value_qname -- a valid QName for the value of the source cell """ source_cell_value_qname = self.getQName(source_cell_value) #self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['qb']['dataSet'],self.namespaces['scope'][self.sheet_qname])) #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['value'],self.namespaces['scope'][source_cell_value_qname])) # If the source_cell_value is actually a dictionary (e.g. in the case of HierarchicalRowHeaders), then use the last element of the row hierarchy as prefLabel # Otherwise just use the source_cell_value as prefLabel if type(source_cell_value) == dict : self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['skos'].prefLabel,Literal(source_cell_value.values()[-1],'nl'))) if altLabel and altLabel != source_cell_value.values()[-1]: # If altLabel has a value (typically for HierarchicalRowHeaders) different from the last element in the row hierarchy, we add it as alternative label. self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['skos'].altLabel,Literal(altLabel,'nl'))) else : self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['skos'].prefLabel,Literal(source_cell_value,'nl'))) if altLabel and altLabel != source_cell_value: # If altLabel has a value (typically for HierarchicalRowHeaders) different from the source_cell_value, we add it as alternative label. self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['skos'].altLabel,Literal(altLabel,'nl'))) return source_cell_value_qname def parseSheet(self): """ Parses the currently selected sheet in the workbook, takes no arguments. Iterates over all cells in the Excel sheet and produces relevant RDF Triples. """ self.log.info("Parsing {0} rows and {1} columns.".format(self.rowns,self.colns)) self.column_dimensions = {} self.property_dimensions = {} self.row_dimensions = {} self.rowhierarchy = {} # Get dictionary of annotations self.annotations = self.r_sheet.cell_note_map for i in range(0,self.rowns): self.rowhierarchy[i] = {} for j in range(0, self.colns): # Parse cell data self.source_cell = self.r_sheet.cell(i,j) self.source_cell_name = cellname(i,j) self.style = self.styles[self.source_cell].name self.cellType = self.getType(self.style) self.source_cell_qname = self.getQName(self.source_cell_name) self.log.debug("({},{}) {}/{}: \"{}\"". format(i,j,self.cellType, self.source_cell_name, self.source_cell.value)) # Try to parse ints to avoid ugly _0 URIs try: if int(self.source_cell.value) == self.source_cell.value: self.source_cell.value = int(self.source_cell.value) except ValueError: self.log.debug("(%s.%s) No parseable int" % (i,j)) # Parse annotation (if any) if self.config.get('annotations', 'enabled') == "1": if (i,j) in self.annotations: self.parseAnnotation(i, j) # Parse cell even if empty if self.cellType == 'Data': self.parseData(i, j) elif (self.cellType == 'HRowHeader') : self.updateRowHierarchy(i, j) elif self.cellType == 'ColHeader' : self.parseColHeader(i, j) elif self.cellType == 'RowProperty' : self.parseRowProperty(i, j) # If cell not empty, check for more types if not self.isEmpty(i,j) : #self.graph.add((self.namespaces['scope'][self.source_cell_qname],RDF.type,self.namespaces['tablink'][self.cellType])) #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['cell'],Literal(self.source_cell_name))) #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['col'],Literal(colname(j)))) #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['row'],Literal(i+1))) #self.graph.add((self.namespaces['scope'][self.source_cell_qname] isrow row if self.cellType == 'Title' : self.parseTitle(i, j) elif self.cellType == 'RowHeader' : self.parseRowHeader(i, j) elif self.cellType == 'HRowHeader' : self.parseHierarchicalRowHeader(i, j) elif self.cellType == 'RowLabel' : self.parseRowLabel(i, j) # Add additional information about the hierarchy of column headers for value in self.column_dimensions.values(): for index in range(1, len(value)): uri_sub = self.getColHeaderValueURI(value[:index+1]) uri_top = self.getColHeaderValueURI(value[:index]) self.graph.add((uri_sub, self.namespaces['tablink']['subColHeaderOf'], uri_top)) self.graph.add((uri_sub, self.namespaces['tablink']['depth'], Literal(index))) self.graph.add((uri_top, self.namespaces['tablink']['depth'], Literal(index-1))) self.log.info("Done parsing...") def updateRowHierarchy(self, i, j) : """ Build up lists for hierarchical row headers. Cells marked as hierarchical row header are often empty meaning that their intended value is stored somewhere else in the Excel sheet. Keyword arguments: int i -- row number int j -- col number Returns: New row hierarchy dictionary """ if (self.isEmpty(i,j) or str(self.source_cell.value).lower().strip() == 'id.') : # If the cell is empty, and a HierarchicalRowHeader, add the value of the row header above it. # If the cell above is not in the rowhierarchy, don't do anything. # If the cell is exactly 'id.', add the value of the row header above it. try : self.rowhierarchy[i][j] = self.rowhierarchy[i-1][j] self.log.debug("({},{}) Copied from above\nRow hierarchy: {}".format(i,j,self.rowhierarchy[i])) except : # REMOVED because of double slashes in uris # self.rowhierarchy[i][j] = self.source_cell.value self.log.debug("({},{}) Top row, added nothing\nRow hierarchy: {}".format(i,j,self.rowhierarchy[i])) elif str(self.source_cell.value).lower().startswith('id.') or str(self.source_cell.value).lower().startswith('id '): # If the cell starts with 'id.', add the value of the row above it, and append the rest of the cell's value. suffix = self.source_cell.value[3:] try : self.rowhierarchy[i][j] = self.rowhierarchy[i-1][j]+suffix self.log.debug("({},{}) Copied from above+suffix\nRow hierarchy {}".format(i,j,self.rowhierarchy[i])) except : self.rowhierarchy[i][j] = self.source_cell.value self.log.debug("({},{}) Top row, added value\nRow hierarchy {}".format(i,j,self.rowhierarchy[i])) elif not self.isEmpty(i,j) : self.rowhierarchy[i][j] = self.source_cell.value self.log.debug("({},{}) Added value\nRow hierarchy {}".format(i,j,self.rowhierarchy[i])) return self.rowhierarchy def parseHierarchicalRowHeader(self, i, j) : """ Create relevant triples for the cell marked as HierarchicalRowHeader (i, j are row and column) """ # Use the rowhierarchy to create a unique qname for the cell's contents, # give the source_cell's original value as extra argument self.log.debug("Parsing HierarchicalRowHeader") # Add all the values for (index, value) in self.rowhierarchy[i].items(): prop = self.property_dimensions[index] self.row_dimensions.setdefault(i,{}) self.row_dimensions[i][self.namespaces['scope'][prop]]= Literal(value) # Relate the hierarchical headers keys = self.rowhierarchy[i].keys() for i in range(len(keys)-1): prop_top = self.namespaces['scope'][self.property_dimensions[keys[i]]] prop_sub = self.namespaces['scope'][self.property_dimensions[keys[i+1]]] self.graph.add((prop_sub, self.namespaces['tablink']['subPropertyOf'], prop_top)) def parseRowLabel(self, i, j): """ Create relevant triples for the cell marked as Label (i, j are row and column) """ self.log.debug("Parsing Row Label") # Get the QName of the HierarchicalRowHeader cell that this label belongs to, based on the rowhierarchy for this row (i) hierarchicalRowHeader_value_qname = self.getQName(self.rowhierarchy[i]) prefLabels = self.graph.objects(self.namespaces['scope'][hierarchicalRowHeader_value_qname], self.namespaces['skos'].prefLabel) for label in prefLabels : # If the hierarchicalRowHeader QName already has a preferred label, turn it into a skos:altLabel self.graph.remove((self.namespaces['scope'][hierarchicalRowHeader_value_qname],self.namespaces['skos'].prefLabel,label)) self.graph.add((self.namespaces['scope'][hierarchicalRowHeader_value_qname],self.namespaces['skos'].altLabel,label)) self.log.debug("Turned skos:prefLabel {} for {} into a skos:altLabel".format(label, hierarchicalRowHeader_value_qname)) # Add the value of the label cell as skos:prefLabel to the header cell # self.graph.add((self.namespaces['scope'][hierarchicalRowHeader_value_qname], self.namespaces['skos'].prefLabel, Literal(self.source_cell.value, 'nl'))) # Record that this source_cell_qname is the label for the HierarchicalRowHeader cell # self.graph.add((self.namespaces['scope'][self.source_cell_qname], self.namespaces['tablink']['isLabel'], self.namespaces['scope'][hierarchicalRowHeader_value_qname])) def parseRowHeader(self, i, j) : """ Create relevant triples for the cell marked as RowHeader (i, j are row and column) """ rowHeaderValue = "" # Don't attach the cell value to the namespace if it's already a URI isURI = urlparse(str(self.source_cell.value)) if isURI.scheme and isURI.netloc: rowHeaderValue = URIRef(self.source_cell.value) else: self.source_cell_value_qname = self.source_cell.value rowHeaderValue = Literal(self.source_cell_value_qname) # Get the properties to use for the row headers prop = self.property_dimensions[j] self.row_dimensions.setdefault(i,{}) self.row_dimensions[i][self.namespaces['scope'][prop]]= rowHeaderValue return def parseColHeader(self, i, j) : """ Create relevant triples for the cell marked as Header (i, j are row and column) """ cell_content = self.processString(self.source_cell.value) if self.isEmpty(i,j): if self.insideMergeBox(i,j): k, l = self.getMergeBoxCoord(i,j) # If we are in a vertical merge box, skip adding the dimension if l == j: return # Update cell content cell_content = self.processString(self.r_sheet.cell(k,l).value) else: return # Add the value qname to the column_dimensions list for that column self.column_dimensions.setdefault(j,[self.sheet_qname]).append(cell_content) # Add the data to the graph resource = self.getColHeaderValueURI(self.column_dimensions[j]) self.graph.add((resource, RDF.type, self.namespaces['tablink']['ColumnHeader'])) self.graph.add((resource, self.namespaces['skos']['prefLabel'], Literal(cell_content))) self.graph.add((resource, self.namespaces['tablink']['cell'], Literal(self.source_cell_name))) return def parseRowProperty(self, i, j) : """ Create relevant triples for the cell marked as Property (i, j are row and column) """ if self.isEmpty(i,j): if self.insideMergeBox(i,j): k, l = self.getMergeBoxCoord(i,j) self.source_cell_value_qname = self.addValue(self.r_sheet.cell(k,l).value) else: return else: self.source_cell_value_qname = self.addValue(self.source_cell.value) #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['isDimensionProperty'],self.namespaces['scope'][self.source_cell_value_qname])) #self.graph.add((self.namespaces['scope'][self.source_cell_value_qname],RDF.type,self.namespaces['qb']['DimensionProperty'])) #self.graph.add((self.namespaces['scope'][self.source_cell_value_qname],RDF.type,RDF['Property'])) #self.property_dimensions.setdefault(j,[]).append(self.source_cell_value_qname) self.property_dimensions[j] = self.source_cell_value_qname # Add to graph resource = self.namespaces['scope'][self.property_dimensions[j]] self.graph.add((resource, RDF.type, self.namespaces['tablink']['RowProperty'])) return def parseTitle(self, i, j) : """ Create relevant triples for the cell marked as Title (i, j are row and column) """ self.graph.add((self.namespaces['scope'][self.sheet_qname], self.namespaces['tablink']['title'], Literal(self.source_cell.value))) return def parseData(self, i,j) : """ Create relevant triples for the cell marked as Data (i, j are row and column) """ if self.isEmpty(i,j) and self.config.get('dataCell', 'implicitZeros') == '0': return # Use the fully qualified name of the cell for the resource name observation = self.namespaces['scope'][self.source_cell_qname] # It's an observation self.graph.add((observation, RDF.type, self.namespaces['qb']['Observation'])) # It's in the data set defined by the current sheet self.graph.add((observation, self.namespaces['qb']['dataSet'], self.namespaces['scope'][self.sheet_qname])) # Add it's value # TODO type the value if self.isEmpty(i,j) and self.config.get('dataCell', 'implicitZeros') == '1': self.graph.add((observation, self.namespaces['scope'][self.dataCellPropertyName], Literal(0))) else: self.graph.add((observation, self.namespaces['scope'][self.dataCellPropertyName], Literal(self.source_cell.value))) # Use the row dimensions dictionary to find the properties that link # data values to row headers try : for (prop, value) in self.row_dimensions[i].iteritems() : self.graph.add((observation, prop, value)) except KeyError : self.log.debug("({}.{}) No row dimension for cell".format(i,j)) # Use the column dimensions dictionary to find the objects of the # d2s:dimension property self.graph.add((observation, self.namespaces['tablink']['dimension'], self.getColHeaderValueURI(self.column_dimensions[j]))) def parseAnnotation(self, i, j) : """ Create relevant triples for the annotation attached to cell (i, j) """ if self.config.get('annotations', 'model') == 'oa': # Create triples according to Open Annotation model body = BNode() self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], RDF.type, self.annotationNamespaces['oa']['Annotation'] )) self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], self.annotationNamespaces['oa']['hasBody'], body )) self.annotationGraph.add((body, RDF.value, Literal(self.annotations[(i,j)].text.replace("\n", " ").replace("\r", " ").replace("\r\n", " ").encode('utf-8')) )) self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], self.annotationNamespaces['oa']['hasTarget'], self.namespaces['scope'][self.source_cell_qname] )) self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], self.annotationNamespaces['oa']['annotator'], Literal(self.annotations[(i,j)].author.encode('utf-8')) )) self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], self.annotationNamespaces['oa']['annotated'], Literal(datetime.datetime.fromtimestamp(os.path.getmtime(self.filename)).strftime("%Y-%m-%d"),datatype=self.annotationNamespaces['xsd']['date']) )) self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], self.annotationNamespaces['oa']['generator'], URIRef("https://github.com/Data2Semantics/TabLinker") )) self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], self.annotationNamespaces['oa']['generated'], Literal(datetime.datetime.now().strftime("%Y-%m-%d"), datatype=self.annotationNamespaces['xsd']['date']) )) self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], self.annotationNamespaces['oa']['modelVersion'], URIRef("http://www.openannotation.org/spec/core/20120509.html") )) else: # Create triples according to Nanopublications model print "Nanopublications not implemented yet!"
class ContextTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): print self.store self.graph = ConjunctiveGraph(store=self.store) if self.store == "MySQL": from mysql import configString from rdflib.store.MySQL import MySQL path=configString MySQL().destroy(path) else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() shutil.rmtree(self.tmppath) def get_context(self, identifier): assert isinstance(identifier, URIRef) or \ isinstance(identifier, BNode), type(identifier) return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEquals(len(self.graph), len(graph)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEquals(len(graph), oldLen + 10) self.assertEquals(len(self.get_context(c1)), oldLen + 10) self.graph.remove_context(self.get_context(c1)) self.assertEquals(len(self.graph), oldLen) self.assertEquals(len(graph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEquals(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEquals(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assert_(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assert_(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assert_(triple in self.graph) self.graph.remove(triple) # now gone! self.assert_(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assert_(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): return c.identifier self.assert_(self.c1 in map(cid, self.graph.contexts())) self.assert_(self.c2 in map(cid, self.graph.contexts())) contextList = map(cid, list(self.graph.contexts(triple))) self.assert_(self.c1 in contextList) self.assert_(self.c2 in contextList) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEquals(len(Graph(self.graph.store, c1)), 1) self.assertEquals(len(self.get_context(c1)), 1) self.graph.remove_context(self.get_context(c1)) self.assert_(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEquals(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEquals triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob,))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte(set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)])) asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)])) asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte(set(c), set([(bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)])) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
def convert(teifile, namespace): #graph_uri = "http://contextus.net/resource/blue_velvet/" ns = Namespace(namespace) graph = ConjunctiveGraph() graph.load(teifile, format="rdfa") graph.bind("default", ns) to_update = "" for prefix, nsuri in graph.namespaces(): #print("prefix: " + str(prefix) + " - " + str(nsuri)) if nsuri in ns: to_update = nsuri for s, p, o in graph: # print s, p, o if to_update != "" and to_update in s: graph.remove((s, p, o)) s = URIRef(s.replace(to_update, ns)) graph.add((s, p, o)) act = "" scene = "" line = "" char = 0 loc = 0 #timeline = ns['timeline/narrative'] #graph.add((timeline, RDF.type, ome['Timeline'])) tree = ET.parse(teifile) cast = dict() titleNode = tree.find('//title') castItems = tree.findall('/text/body/div1/castList//castItem') for castItem in castItems: actorNode = castItem.find('actor') roleNode = castItem.find('role') if roleNode != None: id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id") #print("Found castItem!") actor = None role = None # Check to see if we already have an entry if(roleNode != None and roleNode.get("about")): charname = roleNode.get("about") if(charname.find(":") > -1): nmsp,nom = charname.split(":", 1) charcode = "character/" + str(char) charref = nmsp + ":" + charcode + "]" role = extractCURIEorURI(graph, charref,nom[0:-1]) char += 1 #print("1:" + charname + ": adding id " + id + " to " + role) else: role = extractCURIEorURI(graph, charname) #print("2:" + charname + ": adding id " + id + " to " + role) cast[id] = role graph.add((role, RDF.type, omb['Character'])) #print(charname + ": adding id " + id + " to " + role) if(actorNode != None and actorNode.get("about")): actor = extractCURIEorURI(graph, actorNode.get("about")) graph.add((actor, RDF.type, omb['Being'])) if actor != None and role != None: graph.add((actor, omb['portrays'], role)) graph.add((role, omb['portrayed-by'], actor)) eventCount = 1 groupCount = 1 prior_event = None actItems = tree.findall('/text/body/div1') ref = "" for actItem in actItems: if actItem.get("type") == "act": act = actItem.get("n") sceneItems = actItem.findall('div2') for sceneItem in sceneItems: #print("Found sceneItems!") if sceneItem.get("type") == "scene": scene = sceneItem.get("n") # Work out the location of this scene location = None stageItems = sceneItem.findall("stage") #internalnum = 1 stagenum = 0 speechnum = 1 for stageItem in stageItems: if stageItem.get("type") == "location": # The RDFa parser doesn't handle the type - so we can grab that here. if stageItem.get("about") != None: locname = stageItem.get("about") # Adding location type/oml:space for location if stageItem.get("typeof") and stageItem.get("about"): type = extractCURIEorURI(graph, stageItem.get("typeof")) #print "1. Location: " + str(location) + " Type: " + str(type) elif stageItem.get("about"): #print "2. Location: " + str(locname) type = extractCURIEorURI(graph, oml['Space']) # Get location value and add rdfs:label is location is not using the TEI value if(locname.find(":") > -1): nmsp,nom = locname.split(":", 1) loccode = "location/" + str(loc) locref = nmsp + ":" + loccode + "]" location = extractCURIEorURI(graph, locref, nom[0:-1]) loc += 1 graph.add((location, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(nom[0:-1]))) else: location = extractCURIEorURI(graph, stageItem.get("about")) # Add location to graph graph.add((location, RDF.type, type)) else: location = "" #print("Adding location type: " + type + " (" + location + ")") if cast: # Work out a list of all cast in a given section currentCast = list() speakers = list() # Iterate through elements within stageItem # Find speaker events and add to list of current cast for inclusion in social event # Find reference events and add to ongoing social event ? # Find stage events # If event is an entrance then # create social event for people talking before entrance # create travel event i.e. entrance # add new arrival to current cast list # If event is exit event then # create social event for people talking before exit # create travel event i.e. exit # if leavers are not named directly the calculate who is leaving # remove leavers from current cast list # If reach end of scene then create social event with current cast list #Also need to check if social event before exit has same composition as social event after exit since then they should be merged event = ns['event/'+str(eventCount)] group = ns['group/'+str(groupCount)] refersTo = list() #parent = None speakerNodes = list() speakerRef = list() xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) stagecount = 0 stage_array = list() for node in sceneItem.getiterator(): #print("Node: " + node.tag) """ if node.tag == "lb": if node.get("ed") == "F1": line = node.get("n") if titleNode != None: ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line) else: ref = str(act) + "." + str(scene) + "." + str(line) #xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "'])" xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) #print("Ref: " + xpointer) """ if node.tag == "sp": id = node.get("who") if id and cast: speakers.append(cast[id[1:]]) speakerNodes.append(node) if perseusid == None: speakerRef.append(ref) else: #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)" speechRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum) + "])"; speakerRef.append(speechRef) #print("Line ref: " + ref) if cast[id[1:]] not in currentCast: currentCast.append(cast[id[1:]]) #internalnum = 1 speechnum += 1 stagecount = 0 previousl = 0 for subnode in node.getiterator(): if subnode.tag == "l": previousl += 1 if subnode.tag == "stage": #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n") stage_array.append(previousl) stagecount += 1 elif node.tag == "stage": if stagecount > 0: s_max = len(stage_array) diff = s_max - stagecount #if diff == 0: # stagenum += 1 entRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum - 1) + "]/l[" + str(stage_array[diff]) +"]/stage)"; #internalnum += 1 stagecount -= 1 else: stagenum += 1 entRef = xpointer + "#xpointer(//div2/stage[" + str(stagenum) +"])"; if node.get("type") == "entrance": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref))) else: #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)" graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef))) #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Found entrence event!") if location: graph.add((event, ome['to'], location)) involved = node.get("about") if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] chunks = involved.split() chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: striped = chunk.strip() if(len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"): striped = striped[1:-1] currentCast.append(cast[striped]) if chunk_count > 1: graph.add((group, ome['contains'], cast[striped])) if en == chunk_count: event_label = event_label[0:-2] + " and " + striped graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " arrive"))) elif en < chunk_count: event_label += striped + ", " else: #print("Adding person as subject-entity to entry event " + str(eventCount)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(striped + " arrives"))) graph.add((event, ome['has-subject-entity'], cast[striped])) en += 1 if chunk_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to entry event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] if(prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/'+str(eventCount)] if node.get("type") == "exit": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref))) else: #exitRef = xpointer #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef))) graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef))) #print("Found entrence event!") if location != None: graph.add((event, ome['from'], location)) involved = node.get("about") if involved.strip() == "" or "-all" in involved: # Remove everyone #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #for peep in currentCast: # print(peep) if len(currentCast) > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for peep in currentCast: short_ref = "" for key, value in cast.iteritems(): if peep == value: short_ref = key if len(currentCast) > 1: graph.add((group, ome['contains'], peep)) if en == len(currentCast): event_label = event_label[0:-2] + " and " + short_ref graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exuant event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], peep)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves"))) en += 1 if len(currentCast) > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exuant event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] currentCast = list() elif "!" in involved: #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Event: " + involved); if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] involved = involved.strip() if(len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"): involved = involved[2:-1] #print("involved: " + involved) striped = involved.strip() c_ids = striped.split() chunks = list() for stay in c_ids: #print("Staying: " + cast[stay]) chunks.append(cast[stay]) staying = list() going = list() for player in currentCast: #print("Player: " + player) if player in chunks: staying.append(player) else: going.append(player) going_count = len(going) if going_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for ghost in going: #print("ghost: " + ghost) short_ref = "" for key, value in cast.iteritems(): if ghost == value: short_ref = key if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if going_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(going): event_label = event_label[0:-2] + " and " + short_ref graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(going): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], ghost)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves"))) en += 1 if going_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] else: #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] striped = involved.strip() chunks = striped.split() #print("striped: " + striped) chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: #print("chunk: " + chunk) ghost = cast[chunk] #print("ghost: " + ghost) if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if chunk_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(currentCast): event_label = event_label[0:-2] + " and " + chunk graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += chunk + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], ghost)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(chunk + " leaves"))) en += 1 if chunk_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] if(prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/'+str(eventCount)] #elif node.tag == "rs": # #print("Found rs node") # if parent: # #print("Parent type is " + parent.tag) # if parent.tag == "p" or parent.tag == "l": # refersTo.append(node.get("about")) #parent = node # Add Social Events for all the people who spoke since the last break (if there were any) #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers))) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] group = ns['group/'+str(groupCount)] speakers = list() speakerNodes = list() currentCast = list() speakerRef = list() print graph.serialize(format='xml')
class RDFCrawler: logger = logging.getLogger(__name__) def __init__(self, uri, domains=set()): """ :param uri: root URI to start crawling . :param domains: list of permits domains to crawl. """ self.root = uri self.graph_route = 'graph_store_%s' % hash(self.root) self.graph = ConjunctiveGraph('Sleepycat') self.graph.open(self.graph_route, create=True) self._filter_domains = domains self._filter_domains.add(uri) self.last_process_time = 0.0 self.lock = RLock() def filter_uris(self, uri_list): """ :param uri_list: list of URIs to be filtered. :return: filtered list of URIs. """ return [uri for uri in uri_list for match in self._filter_domains if match in str(uri)] def _has_context(self, graph, subject): """ :param subject: the URIRef or URI to check if it has current context. :return: True if subject has a current context. """ return len(graph.get_context(self._get_context_id(subject))) > 1 @staticmethod def _get_context_id(subject): """ :param subject: URIRef or URI from which the get context id. :return: context id of the resource. Example: subject -> http://www.example.org/#fragment context_id -> http://www.example.org/ """ return str(subject).split('#')[0] def start(self): """ start method for crawling. """ self.lock.acquire(True) # Erase old graph for q in self.graph.quads(): self.graph.remove(q) # Crawl for data logging.info('Start crawling: %s' % self.root) start_time = time.time() self._crawl([self.root]) end_time = time.time() self.last_process_time = end_time - start_time logging.info('Crawling complete after: %s seconds with %s predicates.' % (self.last_process_time, len(self.graph))) self.lock.release() def _crawl(self, uri_list): """ Recursive method that crawl RDF objects :param uri_list: list of URIs to crawl """ if len(uri_list) > 0: for uri in uri_list: try: # A few considerations about parsing params. # publicID = uri due to redirection issues # Format = None due to default params use 'XML' self.graph.parse(uri, publicID=uri, format=None) logging.info('[OK]: %s' % uri) except Exception as e: logging.info('[Error]: %s: %s' % (uri, e)) # Check that there are context that remains without parsing objects = set([self._get_context_id(o) for o in set(self.graph.objects(None, None)) if isinstance(o, URIRef) and not self._has_context(self.graph, o)]) self._crawl(self.filter_uris(objects))