class PostgreSQLStoreTests(unittest.TestCase):
    storetest = True
    store_name = "PostgreSQL"
    path = configString
    create = True

    def setUp(self):
        self.graph = Graph(store=self.store_name)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        self.graph.close()
        import os

        if hasattr(self, "path") and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    for f in os.listdir(self.path):
                        os.unlink(self.path + "/" + f)
                    os.rmdir(self.path)
                elif len(self.path.split(":")) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)

    def test_PostgreSQL_testN3_store(self):
        testN3Store("PostgreSQL", configString)
Example #2
0
def create_graph(datafile):
    graph = Graph(store=STORE)
    # fp, path = tempfile.mkstemp(suffix='.sqlite')
    graph.destroy(configString)
    graph.open(configString, create=True)
    t1 = time.time()
    graph.parse(location=datafile, format='n3')
    t2 = time.time()
    print("%s loaded in %ss" % (datasize, t2 - t1))
    return graph
Example #3
0
class MySQLStoreTests(unittest.TestCase):
    storetest = True
    store_name = "MySQL"
    path = configString
    create = True
    identifier = "rdflib_test"

    def setUp(self):
        self.graph = Graph(store=self.store_name)
        self.graph.destroy(self.path)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        self.graph.close()
class PostgreSQLStoreTests(unittest.TestCase):
    storetest = True
    store_name = "PostgreSQL"
    path = configString
    create = True

    def setUp(self):
        self.graph = Graph(store=self.store_name)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        self.graph.close()

    def test_PostgreSQL_testN3_store(self):
        testN3Store('PostgreSQL', configString)
class PostgreSQLStoreTests(unittest.TestCase):
    storetest = True
    store_name = "PostgreSQL"
    path = configString
    create = True

    def setUp(self):
        self.graph = Graph(store=self.store_name)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        self.graph.close()

    def test_PostgreSQL_testN3_store(self):
        testN3Store('PostgreSQL', configString)
Example #6
0
class GraphTest(TestCase):
    """
    Testing the basic graph functionality.

    Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/graph_case.py
    """  # noqa: E501
    store_name = "Django"
    storetest = True
    path = ''
    create = True

    michel = URIRef(u'michel')
    tarek = URIRef(u'tarek')
    bob = URIRef(u'bob')
    likes = URIRef(u'likes')
    hates = URIRef(u'hates')
    pizza = URIRef(u'pizza')
    cheese = URIRef(u'cheese')

    def setUp(self):
        self.graph = Graph(store=self.store_name)
        self.graph.destroy(self.path)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        self.graph.close()

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese

        self.graph.add((tarek, likes, pizza))
        self.graph.add((tarek, likes, cheese))
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.add((bob, likes, cheese))
        self.graph.add((bob, hates, pizza))
        self.graph.add((bob, hates, michel))
        self.graph.commit()

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese

        self.graph.remove((tarek, likes, pizza))
        self.graph.remove((tarek, likes, cheese))
        self.graph.remove((michel, likes, pizza))
        self.graph.remove((michel, likes, cheese))
        self.graph.remove((bob, likes, cheese))
        self.graph.remove((bob, hates, pizza))
        self.graph.remove((bob, hates, michel))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        triples = self.graph.triples
        Any = None

        self.addStuff()

        # unbound subjects
        self.assertEquals(len(list(triples((Any, likes, pizza)))), 2)
        self.assertEquals(len(list(triples((Any, hates, pizza)))), 1)
        self.assertEquals(len(list(triples((Any, likes, cheese)))), 3)
        self.assertEquals(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects
        self.assertEquals(len(list(triples((michel, likes, Any)))), 2)
        self.assertEquals(len(list(triples((tarek, likes, Any)))), 2)
        self.assertEquals(len(list(triples((bob, hates, Any)))), 2)
        self.assertEquals(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates
        self.assertEquals(len(list(triples((michel, Any, cheese)))), 1)
        self.assertEquals(len(list(triples((tarek, Any, cheese)))), 1)
        self.assertEquals(len(list(triples((bob, Any, pizza)))), 1)
        self.assertEquals(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects
        self.assertEquals(len(list(triples((Any, hates, Any)))), 2)
        self.assertEquals(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects
        self.assertEquals(len(list(triples((michel, Any, Any)))), 2)
        self.assertEquals(len(list(triples((bob, Any, Any)))), 3)
        self.assertEquals(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates
        self.assertEquals(len(list(triples((Any, Any, pizza)))), 3)
        self.assertEquals(len(list(triples((Any, Any, cheese)))), 3)
        self.assertEquals(len(list(triples((Any, Any, michel)))), 1)

        # all unbound
        self.assertEquals(len(list(triples((Any, Any, Any)))), 7)
        self.removeStuff()
        self.assertEquals(len(list(triples((Any, Any, Any)))), 0)

    def testConnected(self):
        graph = self.graph
        self.addStuff()
        self.assertEquals(True, graph.connected())

        jeroen = URIRef("jeroen")
        unconnected = URIRef("unconnected")

        graph.add((jeroen, self.likes, unconnected))

        self.assertEquals(False, graph.connected())

    def testSub(self):
        g1 = Graph()
        g2 = Graph()

        g1.add((self.tarek, self.likes, self.pizza))
        g1.add((self.bob, self.likes, self.cheese))

        g2.add((self.bob, self.likes, self.cheese))

        g3 = g1 - g2

        self.assertEquals(len(g3), 1)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g3, False)

        g1 -= g2

        self.assertEquals(len(g1), 1)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g1, False)

    def testGraphAdd(self):
        g1 = Graph()
        g2 = Graph()

        g1.add((self.tarek, self.likes, self.pizza))

        g2.add((self.bob, self.likes, self.cheese))

        g3 = g1 + g2

        self.assertEquals(len(g3), 2)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g3, True)

        g1 += g2

        self.assertEquals(len(g1), 2)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g1, True)

    def testGraphIntersection(self):
        g1 = Graph()
        g2 = Graph()

        g1.add((self.tarek, self.likes, self.pizza))
        g1.add((self.michel, self.likes, self.cheese))

        g2.add((self.bob, self.likes, self.cheese))
        g2.add((self.michel, self.likes, self.cheese))

        g3 = g1 * g2

        self.assertEquals(len(g3), 1)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g3, False)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.michel, self.likes, self.cheese) in g3, True)

        g1 *= g2

        self.assertEquals(len(g1), 1)

        self.assertEquals((self.tarek, self.likes, self.pizza) in g1, False)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.michel, self.likes, self.cheese) in g1, True)
Example #7
0
class GraphTest(test.TestCase):
    """
    Testing the basic graph functionality.

    Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/graph_case.py
    """
    store_name = "Django"
    storetest = True
    path = ''
    create = True

    michel = URIRef(u'michel')
    tarek = URIRef(u'tarek')
    bob = URIRef(u'bob')
    likes = URIRef(u'likes')
    hates = URIRef(u'hates')
    pizza = URIRef(u'pizza')
    cheese = URIRef(u'cheese')

    def setUp(self):
        self.graph = Graph(store=self.store_name)
        self.graph.destroy(self.path)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        self.graph.close()

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese

        self.graph.add((tarek, likes, pizza))
        self.graph.add((tarek, likes, cheese))
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.add((bob, likes, cheese))
        self.graph.add((bob, hates, pizza))
        self.graph.add((bob, hates, michel))
        self.graph.commit()

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese

        self.graph.remove((tarek, likes, pizza))
        self.graph.remove((tarek, likes, cheese))
        self.graph.remove((michel, likes, pizza))
        self.graph.remove((michel, likes, cheese))
        self.graph.remove((bob, likes, cheese))
        self.graph.remove((bob, hates, pizza))
        self.graph.remove((bob, hates, michel))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        triples = self.graph.triples
        Any = None

        self.addStuff()

        # unbound subjects
        self.assertEquals(len(list(triples((Any, likes, pizza)))), 2)
        self.assertEquals(len(list(triples((Any, hates, pizza)))), 1)
        self.assertEquals(len(list(triples((Any, likes, cheese)))), 3)
        self.assertEquals(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects
        self.assertEquals(len(list(triples((michel, likes, Any)))), 2)
        self.assertEquals(len(list(triples((tarek, likes, Any)))), 2)
        self.assertEquals(len(list(triples((bob, hates, Any)))), 2)
        self.assertEquals(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates
        self.assertEquals(len(list(triples((michel, Any, cheese)))), 1)
        self.assertEquals(len(list(triples((tarek, Any, cheese)))), 1)
        self.assertEquals(len(list(triples((bob, Any, pizza)))), 1)
        self.assertEquals(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects
        self.assertEquals(len(list(triples((Any, hates, Any)))), 2)
        self.assertEquals(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects
        self.assertEquals(len(list(triples((michel, Any, Any)))), 2)
        self.assertEquals(len(list(triples((bob, Any, Any)))), 3)
        self.assertEquals(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates
        self.assertEquals(len(list(triples((Any, Any, pizza)))), 3)
        self.assertEquals(len(list(triples((Any, Any, cheese)))), 3)
        self.assertEquals(len(list(triples((Any, Any, michel)))), 1)

        # all unbound
        self.assertEquals(len(list(triples((Any, Any, Any)))), 7)
        self.removeStuff()
        self.assertEquals(len(list(triples((Any, Any, Any)))), 0)

    def testConnected(self):
        graph = self.graph
        self.addStuff()
        self.assertEquals(True, graph.connected())

        jeroen = URIRef("jeroen")
        unconnected = URIRef("unconnected")

        graph.add((jeroen, self.likes, unconnected))

        self.assertEquals(False, graph.connected())

    def testSub(self):
        g1 = Graph()
        g2 = Graph()

        g1.add((self.tarek, self.likes, self.pizza))
        g1.add((self.bob, self.likes, self.cheese))

        g2.add((self.bob, self.likes, self.cheese))

        g3 = g1 - g2

        self.assertEquals(len(g3), 1)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g3, False)

        g1 -= g2

        self.assertEquals(len(g1), 1)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g1, False)

    def testGraphAdd(self):
        g1 = Graph()
        g2 = Graph()

        g1.add((self.tarek, self.likes, self.pizza))

        g2.add((self.bob, self.likes, self.cheese))

        g3 = g1 + g2

        self.assertEquals(len(g3), 2)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g3, True)

        g1 += g2

        self.assertEquals(len(g1), 2)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g1, True)

    def testGraphIntersection(self):
        g1 = Graph()
        g2 = Graph()

        g1.add((self.tarek, self.likes, self.pizza))
        g1.add((self.michel, self.likes, self.cheese))

        g2.add((self.bob, self.likes, self.cheese))
        g2.add((self.michel, self.likes, self.cheese))

        g3 = g1 * g2

        self.assertEquals(len(g3), 1)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g3, False)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.michel, self.likes, self.cheese) in g3, True)

        g1 *= g2

        self.assertEquals(len(g1), 1)

        self.assertEquals((self.tarek, self.likes, self.pizza) in g1, False)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.michel, self.likes, self.cheese) in g1, True)
Example #8
0
class TestLevelDBGraphCore(unittest.TestCase):
    def setUp(self):
        store = "LevelDB"
        self.graph = Graph(store=store)
        self.path = configString
        self.graph.open(self.path, create=True)

    def tearDown(self):
        self.graph.destroy(self.path)
        try:
            self.graph.close()
        except:
            pass
        if getattr(self, 'path', False) and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    shutil.rmtree(self.path)
                elif len(self.path.split(':')) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)

    def test_namespaces(self):
        self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/")
        self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/")
        self.assert_(len(list(self.graph.namespaces())) == 5)
        self.assert_(('foaf', rdflib.term.URIRef(u'http://xmlns.com/foaf/0.1/')
                      ) in list(self.graph.namespaces()))

    def test_readable_index(self):
        print(readable_index(111))

    def test_create_db(self):
        michel = rdflib.URIRef(u'michel')
        likes = rdflib.URIRef(u'likes')
        pizza = rdflib.URIRef(u'pizza')
        cheese = rdflib.URIRef(u'cheese')
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        self.graph.store.close()
        if getattr(self, 'path', False) and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    shutil.rmtree(self.path)
                elif len(self.path.split(':')) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)
        self.graph.store.open(self.path, create=True)
        ntriples = self.graph.triples((None, None, None))
        self.assert_(len(list(ntriples)) == 0)

    def test_missing_db_exception(self):
        self.graph.store.close()
        if getattr(self, 'path', False) and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    shutil.rmtree(self.path)
                elif len(self.path.split(':')) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)
        self.graph.store.open(self.path, create=True)
        ntriples = self.graph.triples((None, None, None))
        self.assert_(len(list(ntriples)) == 0)

    def test_reopening_db(self):
        michel = rdflib.URIRef(u'michel')
        likes = rdflib.URIRef(u'likes')
        pizza = rdflib.URIRef(u'pizza')
        cheese = rdflib.URIRef(u'cheese')
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        self.graph.store.close()
        self.graph.store.open(self.path, create=False)
        ntriples = self.graph.triples((None, None, None))
        self.assert_(len(list(ntriples)) == 2)

    def test_reopening_missing_db(self):
        self.graph.store.close()
        self.assertRaises(ValueError,
                          self.graph.store.open, ('/tmp/NotAnExistingDB'),
                          create=False)

    def test_isopen_db(self):
        self.assert_(self.graph.store.is_open() == True)
        self.graph.store.close()
        self.assert_(self.graph.store.is_open() == False)
Example #9
0
class SWAnalyzer:
    def __init__(self, sparql_endpoint, identifier, configstring, store=None, proxy=None, subprocess=True):
        self.sparql_endpoint = sparql_endpoint
        self.store = store

        if store is None:
            print "Creating SPARQLStore for %s" % self.sparql_endpoint
            store = SPARQLStore(self.sparql_endpoint)
            self.graph = Graph(store)
        else:
            self.identifier = URIRef(identifier)
            self.configstring = configstring
            self.graph = Graph(store, identifier=self.identifier)

        self.subprocess = subprocess

        if proxy != None:
            print "Initilizing proxy..."
            proxy = urllib2.ProxyHandler({"http": urlparse(proxy).netloc})
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)

    # @abc.abstractmethod
    def open(self):
        if self.store is not None:
            self.graph.open(self.configstring, create=True)

    def close(self):
        if self.store is not None:
            self.graph.destroy(self.configstring)

        self.graph.close()

    def load_graph(self):
        self.uri_pattern = self.get_uri_pattern()[1]

    def get_triples(self):
        query = "SELECT DISTINCT * { ?s ?p ?o }"
        qres = self.graph.query(query)
        return qres.result

    def get_triples_count(self):
        query = "SELECT (COUNT(*) AS ?no) { ?s ?p ?o  }"
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_classes(self):
        query = "SELECT DISTINCT ?class WHERE { [] a ?class }"
        qres = self.graph.query(query)
        return qres.result

    def get_classes_count(self):
        query = "SELECT COUNT(distinct ?o) AS ?no { ?s rdf:type ?o }"
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_properties(self):
        query = "SELECT DISTINCT ?p WHERE { ?s ?p ?o }"
        qres = self.graph.query(query)
        return qres.result

    def get_properties_count(self):
        query = "SELECT COUNT(distinct ?p) AS ?no WHERE { ?s ?p ?o }"
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_subjects(self):
        query = "SELECT DISTINCT ?s WHERE { ?s ?p ?o }"
        qres = self.graph.query(query)
        return qres.result

    def get_subjects_count(self):
        query = "SELECT COUNT(distinct ?s) WHERE { ?s ?p ?o }"
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_properties_count(self):
        query = "SELECT COUNT(distinct ?s) AS ?no WHERE { ?s ?p ?o }"
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_objects(self):
        query = "SELECT DISTINCT ?o WHERE { ?s ?p ?o }"
        qres = self.graph.query(query)
        return qres.result

    def get_objects_count(self):
        query = "SELECT COUNT(distinct ?o) AS ?no WHERE { ?s ?p ?o }"
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_class_instances(self, class_name):
        query = "SELECT DISTINCT ?s WHERE { ?s a <" + class_name + "> }"
        qres = self.graph.query(query)
        return qres.result

    def get_class_instances_count(self, class_name):
        query = "SELECT COUNT(distinct ?s) AS ?no WHERE { ?s a <" + class_name + "> }"
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_all_classes_instances(self):
        instances = {}
        for c in self.get_classes():
            clazz = str(c[0].encode("utf-8"))
            instances[clazz] = self.get_class_instances_count(clazz)
        return instances

    def get_all_predicate_triples(self):
        predicates = {}
        for p in self.get_properties():
            predicate = str(p[0].encode("utf-8"))
            predicates[predicate] = self.get_property_count(predicate)
        return predicates

    def get_property(self, property_name):
        query = "SELECT * WHERE { ?s <" + property_name + "> ?o }"
        qres = self.graph.query(query)
        return qres.result

    def get_property_count(self, property_name):
        query = "SELECT (COUNT(*) AS ?no) WHERE { ?s <" + property_name + "> ?o }"
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_entities(self):
        query = (
            'SELECT DISTINCT ?s WHERE { ?s a [] . FILTER ((!isBlank(?s)) && regex(str(?s), "^'
            + self.uri_pattern
            + '"))}'
        )
        qres = self.graph.query(query)
        return qres.result

    def get_entities_count(self):
        query = (
            'SELECT COUNT(distinct ?s) AS ?no WHERE { ?s a [] . FILTER ((!isBlank(?s)) && regex(str(?s), "^'
            + self.uri_pattern
            + '"))}'
        )
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_all_links(self):
        query = """SELECT * WHERE { ?s ?p ?o . 
                   FILTER (!isBlank(?s) && !isBlank(?o) && isIRI(?s) && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}"""
        qres = self.graph.query(query)
        return qres.result

    def get_all_links_count(self):
        query = """SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . 
                   FILTER (!isBlank(?s) && !isBlank(?o) && isIRI(?s) && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}"""
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_ingoing_links(self):
        query = (
            '''SELECT * WHERE { ?s ?p ?o . 
FILTER (!isBlank(?s) && !isBlank(?o) && !regex(str(?s), "'''
            + self.uri_pattern
            + '''") && isIRI(?s) && regex(str(?o), "'''
            + self.uri_pattern
            + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}"""
        )
        qres = self.graph.query(query)
        return qres.result

    def get_ingoing_links_count(self):
        query = (
            '''SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . 
FILTER (!isBlank(?s) && !isBlank(?o) && !regex(str(?s), "'''
            + self.uri_pattern
            + '''") && isIRI(?s) && regex(str(?o), "'''
            + self.uri_pattern
            + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}"""
        )
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_outgoing_links(self):
        query = (
            '''SELECT * WHERE { ?s ?p ?o . 
FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "'''
            + self.uri_pattern
            + '''") && isIRI(?s) && !regex(str(?o), "'''
            + self.uri_pattern
            + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}"""
        )
        qres = self.graph.query(query)
        return qres.result

    def get_outgoing_links_count(self):
        query = (
            '''SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . 
FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "'''
            + self.uri_pattern
            + '''") && isIRI(?s) && !regex(str(?o), "'''
            + self.uri_pattern
            + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}"""
        )
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_inner_links(self):
        query = (
            '''SELECT * WHERE { ?s ?p ?o . 
FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "'''
            + self.uri_pattern
            + '''") && isIRI(?s) && regex(str(?o), "'''
            + self.uri_pattern
            + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}"""
        )
        qres = self.graph.query(query)
        return qres.result

    def get_inner_links_count(self):
        query = (
            '''SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . 
FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "'''
            + self.uri_pattern
            + '''") && isIRI(?s) && regex(str(?o), "'''
            + self.uri_pattern
            + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}"""
        )
        qres = self.graph.query(query)
        return int(qres.result[0][0])

    def get_vocabularies(self):
        property_list = [str(p[0].encode("utf-8")) for p in self.get_properties()]
        return self.get_patterns(property_list)

    def get_uri_pattern(self):
        subjects = self.get_subjects()
        subject_list = []
        for subject in subjects:
            subject_list.append(str(subject[0].encode("utf-8")))
        return self.get_pattern(subject_list)

    def get_pattern(self, collection):
        processes = 10
        collection = [e for e in collection if e.find("http://") == 0]
        result = namespace_finder.find_pattern(collection, branches=processes, subprocesses=False, verbose=False)
        return result

    def get_patterns(self, uri_list):
        temp_list = []
        temp_list += uri_list

        patterns = []
        while len(temp_list) > 0:
            pos = temp_list[0].rfind("#")
            if pos == -1:
                pos = temp_list[0].rfind("/")
            if pos > -1:
                pattern = temp_list[0][:pos]
                patterns.append(pattern)
                temp_list = [e for e in temp_list if not e.startswith(pattern)]
        return patterns

    def map_subprocess(self, data):
        if self.subprocess:
            pool = Pool(branches)
            result = pool.map(check_for_semantic, data)
            pool.close()
            pool.terminate()
            return result
        else:
            return map(check_for_semantic, data)

    def get_linksets(self, branches=5):
        temp_links = self.get_outgoing_links()
        empty = False
        out_datasets = []
        outgoing_links = []
        val = URLValidator(verify_exists=False)
        for obj in temp_links:
            try:
                val(str(obj[0].encode("utf-8")))
                outgoing_links.append(str(obj[0].encode("utf-8")))
            except:
                pass
        while not empty:
            out_pattern = self.get_pattern(outgoing_links)
            outgoing_links = [
                e for e in outgoing_links if (e.find(out_pattern[1]) != 0) and ((e + "/").find(out_pattern[1]) != 0)
            ]
            out_datasets.append(out_pattern[1])
            if len(outgoing_links) == 0:
                empty = True
        if len(out_datasets) < branches:
            branches = len(out_datasets)
        # print len(self.graph)
        # print self.graph

        result = self.map_subprocess(
            zip(out_datasets, repeat(self.uri_pattern), repeat(self.identifier), repeat(self.configstring))
        )

        # print result
        linksets = {}
        for item in result:
            temp_dict = eval(str(item))
            for key in temp_dict.keys():
                linksets[key] = temp_dict[key]
        return linksets
Example #10
0
class TestKyotoCabinetGraphCore(unittest.TestCase):
    def setUp(self):
        store = "KyotoCabinet"
        self.graph = Graph(store=store)
        self.path = configString
        self.graph.open(self.path, create=True)

    def tearDown(self):
        self.graph.destroy(self.path)
        try:
            self.graph.close()
        except:
            pass
        if getattr(self, "path", False) and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    for f in os.listdir(self.path):
                        os.unlink(self.path + "/" + f)
                    os.rmdir(self.path)
                elif len(self.path.split(":")) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)

    def test_namespaces(self):
        self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/")
        self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/")
        self.assert_(len(list(self.graph.namespaces())) == 5)
        self.assert_(("foaf", rdflib.term.URIRef(u"http://xmlns.com/foaf/0.1/")) in list(self.graph.namespaces()))

    def test_play_journal(self):
        self.assertRaises(NotImplementedError, self.graph.store.play_journal, {"graph": self.graph})

    def test_readable_index(self):
        print(readable_index(111))

    def test_create_db(self):
        michel = rdflib.URIRef(u"michel")
        likes = rdflib.URIRef(u"likes")
        pizza = rdflib.URIRef(u"pizza")
        cheese = rdflib.URIRef(u"cheese")
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        self.graph.store.close()
        if getattr(self, "path", False) and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    for f in os.listdir(self.path):
                        os.unlink(self.path + "/" + f)
                    os.rmdir(self.path)
                elif len(self.path.split(":")) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)
        self.graph.store.open(self.path, create=True)
        ntriples = self.graph.triples((None, None, None))
        self.assert_(len(list(ntriples)) == 0)

    def test_missing_db_exception(self):
        self.graph.store.close()
        if getattr(self, "path", False) and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    for f in os.listdir(self.path):
                        os.unlink(self.path + "/" + f)
                    os.rmdir(self.path)
                elif len(self.path.split(":")) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)
        self.graph.store.open(self.path, create=True)
        ntriples = self.graph.triples((None, None, None))
        self.assert_(len(list(ntriples)) == 0)

    def test_reopening_db(self):
        michel = rdflib.URIRef(u"michel")
        likes = rdflib.URIRef(u"likes")
        pizza = rdflib.URIRef(u"pizza")
        cheese = rdflib.URIRef(u"cheese")
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.commit()
        self.graph.store.close()
        self.graph.store.open(self.path, create=False)
        ntriples = self.graph.triples((None, None, None))
        self.assert_(len(list(ntriples)) == 2)

    def test_reopening_missing_db(self):
        self.graph.store.close()
        self.assertRaises(ValueError, self.graph.store.open, ("/tmp/NotAnExistingDB"), create=False)

    def test_isopen_db(self):
        self.assert_(self.graph.store.is_open() == True)
        self.graph.store.close()
        self.assert_(self.graph.store.is_open() == False)
Example #11
0
class KnowledgeGraph:
    """ Knowledge Graph Class
    A wrapper around an imported rdflib.Graph object with convenience functions
    """
    graph = None
    _property_distribution = {}

    def __init__(self, graph=None):
        self.logger = logging.getLogger()
        self.logger.debug("Initiating Knowledge Graph")

        if graph is not None:
            if type(graph) is Graph:
                self.graph = graph
            elif type(graph) is str:
                self.graph = self._read([graph])
            elif type(graph) is list:
                self.graph = self._read(graph)
            else:
                raise TypeError(":: Wrong input type: {}; requires path to RDF"
                                " graph or rdflib.graph.Graph object".format(type(graph)))
        else:
            self.graph = Graph()

        self._property_distribution = Counter(self.graph.predicates())
        self.logger.debug("Knowledge Graph ({} facts) succesfully imported".format(len(self.graph)))

    def _read(self, paths=None):
        graph = Graph()
        for path in paths:
            assert is_readable(path)
            if not is_gzip(path):
                graph.parse(path, format=guess_format(path))
            else:
                self.logger.debug("Input recognized as gzip file")
                with gzip.open(path, 'rb') as f:
                    graph.parse(f, format=guess_format(path[:-3]))

        return graph

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.graph.destroy("store")
        self.graph.close(True)

    def __len__(self):
        return len(self.graph)

    ### Generators ###

    def atoms(self, separate_literals=True):
        self.logger.debug("Yielding atoms (separated literals: {})".format(
            separate_literals))
        seen = set()
        for s, p, o in self.graph.triples((None, None, None)):
            for atom in (s, o):
                if separate_literals and isinstance(atom, Literal):
                    atom = self.UniqueLiteral(s, p, atom)
                if atom in seen:
                    continue
                seen.add(atom)

                yield atom

    def non_terminal_atoms(self):
        self.logger.debug("Yielding non-terminal atoms")
        for atom in frozenset(self.graph.subjects()):
            yield(atom)

    def terminal_atoms(self):
        self.logger.debug("Yielding terminal atoms")
        non_terminal_atoms = list(self.non_terminal_atoms())
        for atom in list(self.graph.objects()):
            if atom in non_terminal_atoms:
                continue

            yield(atom)

    def attributes(self):
        self.logger.debug("Yielding attributes")
        for obj in self.graph.objects():
            if type(obj) is Literal:
                yield(obj)

    def entities(self, omit_blank_nodes=False):
        self.logger.debug("Yielding entities")
        for res in self.atoms():
            if (type(res) is Literal or
               (omit_blank_nodes and type(res) is BNode)):
                continue

            yield(res)

    def objecttype_properties(self):
        # return unique properties
        attributes = frozenset(self.attributes())
        self.logger.debug("Yielding OT predicates")
        for p in self.graph.predicates():
            if len(set(self.graph.objects(None, p))-attributes) <= 0:
                # p is only used with a literal as object
                continue

            yield(p)

    def datatype_properties(self):
        # return unique properties
        objecttype_properties = set(self.objecttype_properties())
        self.logger.debug("Yielding DT predicates")
        for p in self.graph.predicates():
            if p in objecttype_properties:
                continue

            yield(p)

    def properties(self):
        self.logger.debug("Yielding properties")
        for p in self.graph.predicates():
            yield(p)

    def triples(self, triple=(None, None, None), separate_literals=True):
        self.logger.debug("Yielding triples (triple {})".format(triple))
        for s,p,o in self.graph.triples(triple):
            if separate_literals and isinstance(o, Literal):
                o = self.UniqueLiteral(s, p, o)
            yield s, p, o

    ## Statistics
    def property_frequency(self, property=None):
        if property is None:
            return self._property_distribution
        elif property in self._property_distribution:
            return self._property_distribution[property]

    def attribute_frequency(self, property, limit=None):
        attribute_freq = Counter(self.graph.objects(None, property))
        if limit is None:
            return attribute_freq.most_common()
        else:
            return attribute_freq.most_common(limit)

    ## Operators
    def sample(self, strategy=None, **kwargs):
        """ Sample this graph using the given strategy
        returns a KnowledgeGraph instance
        """
        if strategy is None:
            raise ValueError('Strategy cannot be left undefined')

        self.logger.debug("Sampling graph")
        return strategy.sample(self, **kwargs)

    def quickSort(self, lst):
        """Needed to sort deterministically when using UniqueLiterals"""
        less = list()
        pivotList = list()
        more = list()

        if len(lst) <= 1:
            return lst

        pivot = lst[0]
        for member in lst:
            if str(member) < str(pivot):
                less.append(member)
            elif str(member) > str(pivot):
                more.append(member)
            else:
                pivotList.append(member)

        less = self.quickSort(less)
        more = self.quickSort(more)

        return less + pivotList + more

    class UniqueLiteral(Literal):
        # literal with unique hash, irrespective of content
        def __new__(cls, s, p, o):
            self = super().__new__(cls, str(o), o.language, o.datatype, normalize=None)
            self.s = str(s)
            self.p = str(p)

            return self

        def __hash__(self):
            base = self.s + self.p + str(self)
            for attr in [self.language, self.datatype]:
                if attr is not None:
                    base += str(attr)

            return hash(base)

        def __eq__(self, other):
            if type(other) is not type(self):
                return False
            return hash(repr(self)) == hash(repr(other))

        @total_ordering
        def __lt__(self, other):
            if type(other) is not type(self):
                return False

            if str(self) < str(other):
                return True
            if self.s < other.s:
                return True
            if self.p < other.p:
                return True

            return False