class PostgreSQLStoreTests(unittest.TestCase): storetest = True store_name = "PostgreSQL" path = configString create = True def setUp(self): self.graph = Graph(store=self.store_name) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() import os if hasattr(self, "path") and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + "/" + f) os.rmdir(self.path) elif len(self.path.split(":")) == 1: os.unlink(self.path) else: os.remove(self.path) def test_PostgreSQL_testN3_store(self): testN3Store("PostgreSQL", configString)
def create_graph(datafile): graph = Graph(store=STORE) # fp, path = tempfile.mkstemp(suffix='.sqlite') graph.destroy(configString) graph.open(configString, create=True) t1 = time.time() graph.parse(location=datafile, format='n3') t2 = time.time() print("%s loaded in %ss" % (datasize, t2 - t1)) return graph
class MySQLStoreTests(unittest.TestCase): storetest = True store_name = "MySQL" path = configString create = True identifier = "rdflib_test" def setUp(self): self.graph = Graph(store=self.store_name) self.graph.destroy(self.path) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close()
class PostgreSQLStoreTests(unittest.TestCase): storetest = True store_name = "PostgreSQL" path = configString create = True def setUp(self): self.graph = Graph(store=self.store_name) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() def test_PostgreSQL_testN3_store(self): testN3Store('PostgreSQL', configString)
class GraphTest(TestCase): """ Testing the basic graph functionality. Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/graph_case.py """ # noqa: E501 store_name = "Django" storetest = True path = '' create = True michel = URIRef(u'michel') tarek = URIRef(u'tarek') bob = URIRef(u'bob') likes = URIRef(u'likes') hates = URIRef(u'hates') pizza = URIRef(u'pizza') cheese = URIRef(u'cheese') def setUp(self): self.graph = Graph(store=self.store_name) self.graph.destroy(self.path) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.add((tarek, likes, pizza)) self.graph.add((tarek, likes, cheese)) self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.add((bob, likes, cheese)) self.graph.add((bob, hates, pizza)) self.graph.add((bob, hates, michel)) self.graph.commit() def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.remove((tarek, likes, pizza)) self.graph.remove((tarek, likes, cheese)) self.graph.remove((michel, likes, pizza)) self.graph.remove((michel, likes, cheese)) self.graph.remove((bob, likes, cheese)) self.graph.remove((bob, hates, pizza)) self.graph.remove((bob, hates, michel)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese triples = self.graph.triples Any = None self.addStuff() # unbound subjects self.assertEquals(len(list(triples((Any, likes, pizza)))), 2) self.assertEquals(len(list(triples((Any, hates, pizza)))), 1) self.assertEquals(len(list(triples((Any, likes, cheese)))), 3) self.assertEquals(len(list(triples((Any, hates, cheese)))), 0) # unbound objects self.assertEquals(len(list(triples((michel, likes, Any)))), 2) self.assertEquals(len(list(triples((tarek, likes, Any)))), 2) self.assertEquals(len(list(triples((bob, hates, Any)))), 2) self.assertEquals(len(list(triples((bob, likes, Any)))), 1) # unbound predicates self.assertEquals(len(list(triples((michel, Any, cheese)))), 1) self.assertEquals(len(list(triples((tarek, Any, cheese)))), 1) self.assertEquals(len(list(triples((bob, Any, pizza)))), 1) self.assertEquals(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects self.assertEquals(len(list(triples((Any, hates, Any)))), 2) self.assertEquals(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects self.assertEquals(len(list(triples((michel, Any, Any)))), 2) self.assertEquals(len(list(triples((bob, Any, Any)))), 3) self.assertEquals(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates self.assertEquals(len(list(triples((Any, Any, pizza)))), 3) self.assertEquals(len(list(triples((Any, Any, cheese)))), 3) self.assertEquals(len(list(triples((Any, Any, michel)))), 1) # all unbound self.assertEquals(len(list(triples((Any, Any, Any)))), 7) self.removeStuff() self.assertEquals(len(list(triples((Any, Any, Any)))), 0) def testConnected(self): graph = self.graph self.addStuff() self.assertEquals(True, graph.connected()) jeroen = URIRef("jeroen") unconnected = URIRef("unconnected") graph.add((jeroen, self.likes, unconnected)) self.assertEquals(False, graph.connected()) def testSub(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g1.add((self.bob, self.likes, self.cheese)) g2.add((self.bob, self.likes, self.cheese)) g3 = g1 - g2 self.assertEquals(len(g3), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, False) g1 -= g2 self.assertEquals(len(g1), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, False) def testGraphAdd(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g2.add((self.bob, self.likes, self.cheese)) g3 = g1 + g2 self.assertEquals(len(g3), 2) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, True) g1 += g2 self.assertEquals(len(g1), 2) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, True) def testGraphIntersection(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g1.add((self.michel, self.likes, self.cheese)) g2.add((self.bob, self.likes, self.cheese)) g2.add((self.michel, self.likes, self.cheese)) g3 = g1 * g2 self.assertEquals(len(g3), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, False) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, False) self.assertEquals((self.michel, self.likes, self.cheese) in g3, True) g1 *= g2 self.assertEquals(len(g1), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, False) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, False) self.assertEquals((self.michel, self.likes, self.cheese) in g1, True)
class GraphTest(test.TestCase): """ Testing the basic graph functionality. Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/graph_case.py """ store_name = "Django" storetest = True path = '' create = True michel = URIRef(u'michel') tarek = URIRef(u'tarek') bob = URIRef(u'bob') likes = URIRef(u'likes') hates = URIRef(u'hates') pizza = URIRef(u'pizza') cheese = URIRef(u'cheese') def setUp(self): self.graph = Graph(store=self.store_name) self.graph.destroy(self.path) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.add((tarek, likes, pizza)) self.graph.add((tarek, likes, cheese)) self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.add((bob, likes, cheese)) self.graph.add((bob, hates, pizza)) self.graph.add((bob, hates, michel)) self.graph.commit() def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.remove((tarek, likes, pizza)) self.graph.remove((tarek, likes, cheese)) self.graph.remove((michel, likes, pizza)) self.graph.remove((michel, likes, cheese)) self.graph.remove((bob, likes, cheese)) self.graph.remove((bob, hates, pizza)) self.graph.remove((bob, hates, michel)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese triples = self.graph.triples Any = None self.addStuff() # unbound subjects self.assertEquals(len(list(triples((Any, likes, pizza)))), 2) self.assertEquals(len(list(triples((Any, hates, pizza)))), 1) self.assertEquals(len(list(triples((Any, likes, cheese)))), 3) self.assertEquals(len(list(triples((Any, hates, cheese)))), 0) # unbound objects self.assertEquals(len(list(triples((michel, likes, Any)))), 2) self.assertEquals(len(list(triples((tarek, likes, Any)))), 2) self.assertEquals(len(list(triples((bob, hates, Any)))), 2) self.assertEquals(len(list(triples((bob, likes, Any)))), 1) # unbound predicates self.assertEquals(len(list(triples((michel, Any, cheese)))), 1) self.assertEquals(len(list(triples((tarek, Any, cheese)))), 1) self.assertEquals(len(list(triples((bob, Any, pizza)))), 1) self.assertEquals(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects self.assertEquals(len(list(triples((Any, hates, Any)))), 2) self.assertEquals(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects self.assertEquals(len(list(triples((michel, Any, Any)))), 2) self.assertEquals(len(list(triples((bob, Any, Any)))), 3) self.assertEquals(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates self.assertEquals(len(list(triples((Any, Any, pizza)))), 3) self.assertEquals(len(list(triples((Any, Any, cheese)))), 3) self.assertEquals(len(list(triples((Any, Any, michel)))), 1) # all unbound self.assertEquals(len(list(triples((Any, Any, Any)))), 7) self.removeStuff() self.assertEquals(len(list(triples((Any, Any, Any)))), 0) def testConnected(self): graph = self.graph self.addStuff() self.assertEquals(True, graph.connected()) jeroen = URIRef("jeroen") unconnected = URIRef("unconnected") graph.add((jeroen, self.likes, unconnected)) self.assertEquals(False, graph.connected()) def testSub(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g1.add((self.bob, self.likes, self.cheese)) g2.add((self.bob, self.likes, self.cheese)) g3 = g1 - g2 self.assertEquals(len(g3), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, False) g1 -= g2 self.assertEquals(len(g1), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, False) def testGraphAdd(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g2.add((self.bob, self.likes, self.cheese)) g3 = g1 + g2 self.assertEquals(len(g3), 2) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, True) g1 += g2 self.assertEquals(len(g1), 2) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, True) def testGraphIntersection(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g1.add((self.michel, self.likes, self.cheese)) g2.add((self.bob, self.likes, self.cheese)) g2.add((self.michel, self.likes, self.cheese)) g3 = g1 * g2 self.assertEquals(len(g3), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, False) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, False) self.assertEquals((self.michel, self.likes, self.cheese) in g3, True) g1 *= g2 self.assertEquals(len(g1), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, False) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, False) self.assertEquals((self.michel, self.likes, self.cheese) in g1, True)
class TestLevelDBGraphCore(unittest.TestCase): def setUp(self): store = "LevelDB" self.graph = Graph(store=store) self.path = configString self.graph.open(self.path, create=True) def tearDown(self): self.graph.destroy(self.path) try: self.graph.close() except: pass if getattr(self, 'path', False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): shutil.rmtree(self.path) elif len(self.path.split(':')) == 1: os.unlink(self.path) else: os.remove(self.path) def test_namespaces(self): self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/") self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/") self.assert_(len(list(self.graph.namespaces())) == 5) self.assert_(('foaf', rdflib.term.URIRef(u'http://xmlns.com/foaf/0.1/') ) in list(self.graph.namespaces())) def test_readable_index(self): print(readable_index(111)) def test_create_db(self): michel = rdflib.URIRef(u'michel') likes = rdflib.URIRef(u'likes') pizza = rdflib.URIRef(u'pizza') cheese = rdflib.URIRef(u'cheese') self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.close() if getattr(self, 'path', False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): shutil.rmtree(self.path) elif len(self.path.split(':')) == 1: os.unlink(self.path) else: os.remove(self.path) self.graph.store.open(self.path, create=True) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 0) def test_missing_db_exception(self): self.graph.store.close() if getattr(self, 'path', False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): shutil.rmtree(self.path) elif len(self.path.split(':')) == 1: os.unlink(self.path) else: os.remove(self.path) self.graph.store.open(self.path, create=True) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 0) def test_reopening_db(self): michel = rdflib.URIRef(u'michel') likes = rdflib.URIRef(u'likes') pizza = rdflib.URIRef(u'pizza') cheese = rdflib.URIRef(u'cheese') self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.close() self.graph.store.open(self.path, create=False) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 2) def test_reopening_missing_db(self): self.graph.store.close() self.assertRaises(ValueError, self.graph.store.open, ('/tmp/NotAnExistingDB'), create=False) def test_isopen_db(self): self.assert_(self.graph.store.is_open() == True) self.graph.store.close() self.assert_(self.graph.store.is_open() == False)
class SWAnalyzer: def __init__(self, sparql_endpoint, identifier, configstring, store=None, proxy=None, subprocess=True): self.sparql_endpoint = sparql_endpoint self.store = store if store is None: print "Creating SPARQLStore for %s" % self.sparql_endpoint store = SPARQLStore(self.sparql_endpoint) self.graph = Graph(store) else: self.identifier = URIRef(identifier) self.configstring = configstring self.graph = Graph(store, identifier=self.identifier) self.subprocess = subprocess if proxy != None: print "Initilizing proxy..." proxy = urllib2.ProxyHandler({"http": urlparse(proxy).netloc}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) # @abc.abstractmethod def open(self): if self.store is not None: self.graph.open(self.configstring, create=True) def close(self): if self.store is not None: self.graph.destroy(self.configstring) self.graph.close() def load_graph(self): self.uri_pattern = self.get_uri_pattern()[1] def get_triples(self): query = "SELECT DISTINCT * { ?s ?p ?o }" qres = self.graph.query(query) return qres.result def get_triples_count(self): query = "SELECT (COUNT(*) AS ?no) { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_classes(self): query = "SELECT DISTINCT ?class WHERE { [] a ?class }" qres = self.graph.query(query) return qres.result def get_classes_count(self): query = "SELECT COUNT(distinct ?o) AS ?no { ?s rdf:type ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_properties(self): query = "SELECT DISTINCT ?p WHERE { ?s ?p ?o }" qres = self.graph.query(query) return qres.result def get_properties_count(self): query = "SELECT COUNT(distinct ?p) AS ?no WHERE { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_subjects(self): query = "SELECT DISTINCT ?s WHERE { ?s ?p ?o }" qres = self.graph.query(query) return qres.result def get_subjects_count(self): query = "SELECT COUNT(distinct ?s) WHERE { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_properties_count(self): query = "SELECT COUNT(distinct ?s) AS ?no WHERE { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_objects(self): query = "SELECT DISTINCT ?o WHERE { ?s ?p ?o }" qres = self.graph.query(query) return qres.result def get_objects_count(self): query = "SELECT COUNT(distinct ?o) AS ?no WHERE { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_class_instances(self, class_name): query = "SELECT DISTINCT ?s WHERE { ?s a <" + class_name + "> }" qres = self.graph.query(query) return qres.result def get_class_instances_count(self, class_name): query = "SELECT COUNT(distinct ?s) AS ?no WHERE { ?s a <" + class_name + "> }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_all_classes_instances(self): instances = {} for c in self.get_classes(): clazz = str(c[0].encode("utf-8")) instances[clazz] = self.get_class_instances_count(clazz) return instances def get_all_predicate_triples(self): predicates = {} for p in self.get_properties(): predicate = str(p[0].encode("utf-8")) predicates[predicate] = self.get_property_count(predicate) return predicates def get_property(self, property_name): query = "SELECT * WHERE { ?s <" + property_name + "> ?o }" qres = self.graph.query(query) return qres.result def get_property_count(self, property_name): query = "SELECT (COUNT(*) AS ?no) WHERE { ?s <" + property_name + "> ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_entities(self): query = ( 'SELECT DISTINCT ?s WHERE { ?s a [] . FILTER ((!isBlank(?s)) && regex(str(?s), "^' + self.uri_pattern + '"))}' ) qres = self.graph.query(query) return qres.result def get_entities_count(self): query = ( 'SELECT COUNT(distinct ?s) AS ?no WHERE { ?s a [] . FILTER ((!isBlank(?s)) && regex(str(?s), "^' + self.uri_pattern + '"))}' ) qres = self.graph.query(query) return int(qres.result[0][0]) def get_all_links(self): query = """SELECT * WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && isIRI(?s) && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" qres = self.graph.query(query) return qres.result def get_all_links_count(self): query = """SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && isIRI(?s) && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" qres = self.graph.query(query) return int(qres.result[0][0]) def get_ingoing_links(self): query = ( '''SELECT * WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && !regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return qres.result def get_ingoing_links_count(self): query = ( '''SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && !regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return int(qres.result[0][0]) def get_outgoing_links(self): query = ( '''SELECT * WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && !regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return qres.result def get_outgoing_links_count(self): query = ( '''SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && !regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return int(qres.result[0][0]) def get_inner_links(self): query = ( '''SELECT * WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return qres.result def get_inner_links_count(self): query = ( '''SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return int(qres.result[0][0]) def get_vocabularies(self): property_list = [str(p[0].encode("utf-8")) for p in self.get_properties()] return self.get_patterns(property_list) def get_uri_pattern(self): subjects = self.get_subjects() subject_list = [] for subject in subjects: subject_list.append(str(subject[0].encode("utf-8"))) return self.get_pattern(subject_list) def get_pattern(self, collection): processes = 10 collection = [e for e in collection if e.find("http://") == 0] result = namespace_finder.find_pattern(collection, branches=processes, subprocesses=False, verbose=False) return result def get_patterns(self, uri_list): temp_list = [] temp_list += uri_list patterns = [] while len(temp_list) > 0: pos = temp_list[0].rfind("#") if pos == -1: pos = temp_list[0].rfind("/") if pos > -1: pattern = temp_list[0][:pos] patterns.append(pattern) temp_list = [e for e in temp_list if not e.startswith(pattern)] return patterns def map_subprocess(self, data): if self.subprocess: pool = Pool(branches) result = pool.map(check_for_semantic, data) pool.close() pool.terminate() return result else: return map(check_for_semantic, data) def get_linksets(self, branches=5): temp_links = self.get_outgoing_links() empty = False out_datasets = [] outgoing_links = [] val = URLValidator(verify_exists=False) for obj in temp_links: try: val(str(obj[0].encode("utf-8"))) outgoing_links.append(str(obj[0].encode("utf-8"))) except: pass while not empty: out_pattern = self.get_pattern(outgoing_links) outgoing_links = [ e for e in outgoing_links if (e.find(out_pattern[1]) != 0) and ((e + "/").find(out_pattern[1]) != 0) ] out_datasets.append(out_pattern[1]) if len(outgoing_links) == 0: empty = True if len(out_datasets) < branches: branches = len(out_datasets) # print len(self.graph) # print self.graph result = self.map_subprocess( zip(out_datasets, repeat(self.uri_pattern), repeat(self.identifier), repeat(self.configstring)) ) # print result linksets = {} for item in result: temp_dict = eval(str(item)) for key in temp_dict.keys(): linksets[key] = temp_dict[key] return linksets
class TestKyotoCabinetGraphCore(unittest.TestCase): def setUp(self): store = "KyotoCabinet" self.graph = Graph(store=store) self.path = configString self.graph.open(self.path, create=True) def tearDown(self): self.graph.destroy(self.path) try: self.graph.close() except: pass if getattr(self, "path", False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + "/" + f) os.rmdir(self.path) elif len(self.path.split(":")) == 1: os.unlink(self.path) else: os.remove(self.path) def test_namespaces(self): self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/") self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/") self.assert_(len(list(self.graph.namespaces())) == 5) self.assert_(("foaf", rdflib.term.URIRef(u"http://xmlns.com/foaf/0.1/")) in list(self.graph.namespaces())) def test_play_journal(self): self.assertRaises(NotImplementedError, self.graph.store.play_journal, {"graph": self.graph}) def test_readable_index(self): print(readable_index(111)) def test_create_db(self): michel = rdflib.URIRef(u"michel") likes = rdflib.URIRef(u"likes") pizza = rdflib.URIRef(u"pizza") cheese = rdflib.URIRef(u"cheese") self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.close() if getattr(self, "path", False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + "/" + f) os.rmdir(self.path) elif len(self.path.split(":")) == 1: os.unlink(self.path) else: os.remove(self.path) self.graph.store.open(self.path, create=True) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 0) def test_missing_db_exception(self): self.graph.store.close() if getattr(self, "path", False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + "/" + f) os.rmdir(self.path) elif len(self.path.split(":")) == 1: os.unlink(self.path) else: os.remove(self.path) self.graph.store.open(self.path, create=True) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 0) def test_reopening_db(self): michel = rdflib.URIRef(u"michel") likes = rdflib.URIRef(u"likes") pizza = rdflib.URIRef(u"pizza") cheese = rdflib.URIRef(u"cheese") self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.close() self.graph.store.open(self.path, create=False) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 2) def test_reopening_missing_db(self): self.graph.store.close() self.assertRaises(ValueError, self.graph.store.open, ("/tmp/NotAnExistingDB"), create=False) def test_isopen_db(self): self.assert_(self.graph.store.is_open() == True) self.graph.store.close() self.assert_(self.graph.store.is_open() == False)
class KnowledgeGraph: """ Knowledge Graph Class A wrapper around an imported rdflib.Graph object with convenience functions """ graph = None _property_distribution = {} def __init__(self, graph=None): self.logger = logging.getLogger() self.logger.debug("Initiating Knowledge Graph") if graph is not None: if type(graph) is Graph: self.graph = graph elif type(graph) is str: self.graph = self._read([graph]) elif type(graph) is list: self.graph = self._read(graph) else: raise TypeError(":: Wrong input type: {}; requires path to RDF" " graph or rdflib.graph.Graph object".format(type(graph))) else: self.graph = Graph() self._property_distribution = Counter(self.graph.predicates()) self.logger.debug("Knowledge Graph ({} facts) succesfully imported".format(len(self.graph))) def _read(self, paths=None): graph = Graph() for path in paths: assert is_readable(path) if not is_gzip(path): graph.parse(path, format=guess_format(path)) else: self.logger.debug("Input recognized as gzip file") with gzip.open(path, 'rb') as f: graph.parse(f, format=guess_format(path[:-3])) return graph def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.graph.destroy("store") self.graph.close(True) def __len__(self): return len(self.graph) ### Generators ### def atoms(self, separate_literals=True): self.logger.debug("Yielding atoms (separated literals: {})".format( separate_literals)) seen = set() for s, p, o in self.graph.triples((None, None, None)): for atom in (s, o): if separate_literals and isinstance(atom, Literal): atom = self.UniqueLiteral(s, p, atom) if atom in seen: continue seen.add(atom) yield atom def non_terminal_atoms(self): self.logger.debug("Yielding non-terminal atoms") for atom in frozenset(self.graph.subjects()): yield(atom) def terminal_atoms(self): self.logger.debug("Yielding terminal atoms") non_terminal_atoms = list(self.non_terminal_atoms()) for atom in list(self.graph.objects()): if atom in non_terminal_atoms: continue yield(atom) def attributes(self): self.logger.debug("Yielding attributes") for obj in self.graph.objects(): if type(obj) is Literal: yield(obj) def entities(self, omit_blank_nodes=False): self.logger.debug("Yielding entities") for res in self.atoms(): if (type(res) is Literal or (omit_blank_nodes and type(res) is BNode)): continue yield(res) def objecttype_properties(self): # return unique properties attributes = frozenset(self.attributes()) self.logger.debug("Yielding OT predicates") for p in self.graph.predicates(): if len(set(self.graph.objects(None, p))-attributes) <= 0: # p is only used with a literal as object continue yield(p) def datatype_properties(self): # return unique properties objecttype_properties = set(self.objecttype_properties()) self.logger.debug("Yielding DT predicates") for p in self.graph.predicates(): if p in objecttype_properties: continue yield(p) def properties(self): self.logger.debug("Yielding properties") for p in self.graph.predicates(): yield(p) def triples(self, triple=(None, None, None), separate_literals=True): self.logger.debug("Yielding triples (triple {})".format(triple)) for s,p,o in self.graph.triples(triple): if separate_literals and isinstance(o, Literal): o = self.UniqueLiteral(s, p, o) yield s, p, o ## Statistics def property_frequency(self, property=None): if property is None: return self._property_distribution elif property in self._property_distribution: return self._property_distribution[property] def attribute_frequency(self, property, limit=None): attribute_freq = Counter(self.graph.objects(None, property)) if limit is None: return attribute_freq.most_common() else: return attribute_freq.most_common(limit) ## Operators def sample(self, strategy=None, **kwargs): """ Sample this graph using the given strategy returns a KnowledgeGraph instance """ if strategy is None: raise ValueError('Strategy cannot be left undefined') self.logger.debug("Sampling graph") return strategy.sample(self, **kwargs) def quickSort(self, lst): """Needed to sort deterministically when using UniqueLiterals""" less = list() pivotList = list() more = list() if len(lst) <= 1: return lst pivot = lst[0] for member in lst: if str(member) < str(pivot): less.append(member) elif str(member) > str(pivot): more.append(member) else: pivotList.append(member) less = self.quickSort(less) more = self.quickSort(more) return less + pivotList + more class UniqueLiteral(Literal): # literal with unique hash, irrespective of content def __new__(cls, s, p, o): self = super().__new__(cls, str(o), o.language, o.datatype, normalize=None) self.s = str(s) self.p = str(p) return self def __hash__(self): base = self.s + self.p + str(self) for attr in [self.language, self.datatype]: if attr is not None: base += str(attr) return hash(base) def __eq__(self, other): if type(other) is not type(self): return False return hash(repr(self)) == hash(repr(other)) @total_ordering def __lt__(self, other): if type(other) is not type(self): return False if str(self) < str(other): return True if self.s < other.s: return True if self.p < other.p: return True return False