Esempio n. 1
0
def dump_as_rdf(g: Dataset, table_name: str) -> bool:
    """
    Dump the contents of Graph g in RDF turtle
    :param g: Dataset to dump
    :param table_name: name of the base table
    :return: success indicator
    """

    # Propagate the mapped concepts up the tree
    def add_to_ancestors(s: URIRef, vm: URIRef):
        g.add((s, ISO['enumeratedConceptualDomain.hasMember'], vm))
        for parent in g.objects(s, SKOS.broader):
            add_to_ancestors(parent, vm)

    if COMPUTE_MEMBERS and EXPLICIT_MEMBERS:
        for subj, obj in g.subject_objects(SKOS.exactMatch):
            add_to_ancestors(subj, obj)
        # TODO: this gives us a list of all concepts in the scheme... useful?
        for scheme, tc in g.subject_objects(SKOS.hasTopConcept):
            for member in g.objects(
                    tc, ISO['enumeratedConceptualDomain.hasMember']):
                g.add((scheme, ISO['enumeratedConceptualDomain.hasMember'],
                       member))

    for name, ns in namespaces.items():
        g.bind(name.lower(), ns)
    outfile = os.path.join(DATA_DIR, table_name + '.ttl')
    print(f"Saving output to {outfile}")
    g.serialize(outfile, format='turtle')
    print(f"{len(g)} triples written")
    return True
Esempio n. 2
0
class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" %
                           self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix='test',
                                      dir='/tmp',
                                      suffix='.sqlite')
        elif self.store == "SPARQLUpdateStore":
            root = "http://localhost:3030/ukpp/"
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'urn:michel')
        self.tarek = URIRef(u'urn:tarek')
        self.bob = URIRef(u'urn:bob')
        self.likes = URIRef(u'urn:likes')
        self.hates = URIRef(u'urn:hates')
        self.pizza = URIRef(u'urn:pizza')
        self.cheese = URIRef(u'urn:cheese')

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef(u'urn:context-1')
        self.c2 = URIRef(u'urn:context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)

    def tearDown(self):
        self.graph.close()
        if self.store == "SPARQLUpdateStore":
            pass
        else:
            if os.path.isdir(self.tmppath):
                shutil.rmtree(self.tmppath)
            else:
                os.remove(self.tmppath)

    def testGraphAware(self):

        if not self.graph.store.graph_aware: return

        g = self.graph
        g1 = g.graph(self.c1)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # added graph exists
            self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                              set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty
        self.assertEquals(len(g1), 0)

        g1.add((self.tarek, self.likes, self.pizza))

        # added graph still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEquals(len(g1), 1)

        g1.remove((self.tarek, self.likes, self.pizza))

        # added graph is empty
        self.assertEquals(len(g1), 0)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # graph still exists, although empty
            self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                              set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)

        # graph is gone
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testDefaultGraph(self):
        # Something the default graph is read-only (e.g. TDB in union mode)
        if self.store == "SPARQLUpdateStore":
            print "Please make sure updating the default graph " \
                  "is supported by your SPARQL endpoint"

        self.graph.add((self.tarek, self.likes, self.pizza))
        self.assertEquals(len(self.graph), 1)
        # only default exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEquals(len(self.graph), 0)
        # default still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self):
        # Union depends on the SPARQL endpoint configuration
        if self.store == "SPARQLUpdateStore":
            print "Please make sure your SPARQL endpoint has not configured " \
                  "its default graph as the union of the named graphs"
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)), [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
Esempio n. 3
0
class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest(
                "Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(
                prefix='test', dir='/tmp', suffix='.sqlite')
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        if os.path.isdir(self.tmppath):
            shutil.rmtree(self.tmppath)
        else:
            os.remove(self.tmppath)


    def testGraphAware(self): 
        if not self.graph.store.graph_aware: return 
        
        g = self.graph
        g1 = g.graph(self.c1)
        
        
        # added graph exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty 
        self.assertEquals(len(g1), 0)
        
        g1.add( (self.tarek, self.likes, self.pizza) )

        # added graph still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEquals(len(g1), 1)

        g1.remove( (self.tarek, self.likes, self.pizza) )

        # added graph is empty 
        self.assertEquals(len(g1), 0)

        # graph still exists, although empty
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)
                
        # graph is gone
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([DATASET_DEFAULT_GRAPH_ID]))
        
    def testDefaultGraph(self): 
        
        self.graph.add(( self.tarek, self.likes, self.pizza))
        self.assertEquals(len(self.graph), 1)
        # only default exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEquals(len(self.graph), 0)
        # default still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self): 
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)), 
                         [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
Esempio n. 4
0
class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest(
                "Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(
                prefix='test', dir='/tmp', suffix='.sqlite')
        elif self.store == "SPARQLUpdateStore":
            root = HOST + DB
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'urn:michel')
        self.tarek = URIRef(u'urn:tarek')
        self.bob = URIRef(u'urn:bob')
        self.likes = URIRef(u'urn:likes')
        self.hates = URIRef(u'urn:hates')
        self.pizza = URIRef(u'urn:pizza')
        self.cheese = URIRef(u'urn:cheese')

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef(u'urn:context-1')
        self.c2 = URIRef(u'urn:context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)

    def tearDown(self):
        self.graph.close()
        if self.store == "SPARQLUpdateStore":
            pass
        else:
            if os.path.isdir(self.tmppath):
                shutil.rmtree(self.tmppath)
            else:
                os.remove(self.tmppath)

    def testGraphAware(self):

        if not self.graph.store.graph_aware:
            return

        g = self.graph
        g1 = g.graph(self.c1)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # added graph exists
            self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                             set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty
        self.assertEqual(len(g1), 0)

        g1.add((self.tarek, self.likes, self.pizza))

        # added graph still exists
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEqual(len(g1), 1)

        g1.remove((self.tarek, self.likes, self.pizza))

        # added graph is empty
        self.assertEqual(len(g1), 0)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # graph still exists, although empty
            self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                             set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)

        # graph is gone
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([DATASET_DEFAULT_GRAPH_ID]))

    def testDefaultGraph(self):
        # Something the default graph is read-only (e.g. TDB in union mode)
        if self.store == "SPARQLUpdateStore":
            print("Please make sure updating the default graph "
                  "is supported by your SPARQL endpoint")

        self.graph.add((self.tarek, self.likes, self.pizza))
        self.assertEqual(len(self.graph), 1)
        # only default exists
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEqual(len(self.graph), 0)
        # default still exists
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self):
        # Union depends on the SPARQL endpoint configuration
        if self.store == "SPARQLUpdateStore":
            print("Please make sure your SPARQL endpoint has not configured "
                  "its default graph as the union of the named graphs")
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)),
                         [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
Esempio n. 5
0
class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" %
                           self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix='test',
                                      dir='/tmp',
                                      suffix='.sqlite')
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        if os.path.isdir(self.tmppath):
            shutil.rmtree(self.tmppath)
        else:
            os.remove(self.tmppath)

    def testGraphAware(self):
        if not self.graph.store.graph_aware: return

        g = self.graph
        g1 = g.graph(self.c1)

        # added graph exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty
        self.assertEquals(len(g1), 0)

        g1.add((self.tarek, self.likes, self.pizza))

        # added graph still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEquals(len(g1), 1)

        g1.remove((self.tarek, self.likes, self.pizza))

        # added graph is empty
        self.assertEquals(len(g1), 0)

        # graph still exists, although empty
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)

        # graph is gone
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testDefaultGraph(self):

        self.graph.add((self.tarek, self.likes, self.pizza))
        self.assertEquals(len(self.graph), 1)
        # only default exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEquals(len(self.graph), 0)
        # default still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self):
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)), [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
Esempio n. 6
0
class DatasetTestCase(unittest.TestCase):
    store = "default"
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" %
                           self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix="test",
                                      dir="/tmp",
                                      suffix=".sqlite")
        elif self.store == "SPARQLUpdateStore":
            root = HOST + DB
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef("urn:michel")
        self.tarek = URIRef("urn:tarek")
        self.bob = URIRef("urn:bob")
        self.likes = URIRef("urn:likes")
        self.hates = URIRef("urn:hates")
        self.pizza = URIRef("urn:pizza")
        self.cheese = URIRef("urn:cheese")

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef("urn:context-1")
        self.c2 = URIRef("urn:context-2")

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)

    def tearDown(self):
        self.graph.close()
        if self.store == "SPARQLUpdateStore":
            pass
        else:
            if os.path.isdir(self.tmppath):
                shutil.rmtree(self.tmppath)
            else:
                os.remove(self.tmppath)

    def testGraphAware(self):

        if not self.graph.store.graph_aware:
            return

        g = self.graph
        g1 = g.graph(self.c1)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # added graph exists
            self.assertEqual(
                set(x.identifier for x in self.graph.contexts()),
                set([self.c1, DATASET_DEFAULT_GRAPH_ID]),
            )

        # added graph is empty
        self.assertEqual(len(g1), 0)

        g1.add((self.tarek, self.likes, self.pizza))

        # added graph still exists
        self.assertEqual(
            set(x.identifier for x in self.graph.contexts()),
            set([self.c1, DATASET_DEFAULT_GRAPH_ID]),
        )

        # added graph contains one triple
        self.assertEqual(len(g1), 1)

        g1.remove((self.tarek, self.likes, self.pizza))

        # added graph is empty
        self.assertEqual(len(g1), 0)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # graph still exists, although empty
            self.assertEqual(
                set(x.identifier for x in self.graph.contexts()),
                set([self.c1, DATASET_DEFAULT_GRAPH_ID]),
            )

        g.remove_graph(self.c1)

        # graph is gone
        self.assertEqual(
            set(x.identifier for x in self.graph.contexts()),
            set([DATASET_DEFAULT_GRAPH_ID]),
        )

    def testDefaultGraph(self):
        # Something the default graph is read-only (e.g. TDB in union mode)
        if self.store == "SPARQLUpdateStore":
            print("Please make sure updating the default graph "
                  "is supported by your SPARQL endpoint")

        self.graph.add((self.tarek, self.likes, self.pizza))
        self.assertEqual(len(self.graph), 1)
        # only default exists
        self.assertEqual(
            set(x.identifier for x in self.graph.contexts()),
            set([DATASET_DEFAULT_GRAPH_ID]),
        )

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEqual(len(self.graph), 0)
        # default still exists
        self.assertEqual(
            set(x.identifier for x in self.graph.contexts()),
            set([DATASET_DEFAULT_GRAPH_ID]),
        )

    def testNotUnion(self):
        # Union depends on the SPARQL endpoint configuration
        if self.store == "SPARQLUpdateStore":
            print("Please make sure your SPARQL endpoint has not configured "
                  "its default graph as the union of the named graphs")
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)), [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])

    def testIter(self):
        """PR 1382: adds __iter__ to Dataset"""
        d = Dataset()
        uri_a = URIRef("https://example.com/a")
        uri_b = URIRef("https://example.com/b")
        uri_c = URIRef("https://example.com/c")
        uri_d = URIRef("https://example.com/d")

        d.add_graph(URIRef("https://example.com/g1"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")
               ))  # pointless addition: duplicates above

        d.add_graph(URIRef("https://example.com/g2"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2")))
        d.add((uri_a, uri_b, uri_d,
               URIRef("https://example.com/g1")))  # new, uri_d

        # traditional iterator
        i_trad = 0
        for t in d.quads((None, None, None)):
            i_trad += 1

        # new Dataset.__iter__ iterator
        i_new = 0
        for t in d:
            i_new += 1

        self.assertEqual(i_new, i_trad)  # both should be 3