def test_hext_dataset_linecount(): d = Dataset() assert len(d) == 0 d.parse(Path(__file__).parent / "test_parser_hext_multigraph.ndjson", format="hext", publicID=d.default_context.identifier) total_triples = 0 # count all the triples in the Dataset for context in d.contexts(): for triple in context.triples((None, None, None)): total_triples += 1 assert total_triples == 18 # count the number of serialized Hextuples, should be 22, as per the original file lc = len(d.serialize(format="hext").splitlines()) assert lc == 22
def update_test(t): # the update-eval tests refer to graphs on http://example.org rdflib_sparql_module.SPARQL_LOAD_GRAPHS = False uri, name, comment, data, graphdata, query, res, syntax = t if uri in skiptests: raise SkipTest() try: g = Dataset() if not res: if syntax: translateUpdate(parseUpdate(open(query[7:]))) else: try: translateUpdate(parseUpdate(open(query[7:]))) raise AssertionError("Query shouldn't have parsed!") except: pass # negative syntax test return resdata, resgraphdata = res # read input graphs if data: g.default_context.load(data, format=_fmt(data)) if graphdata: for x, l in graphdata: g.load(x, publicID=URIRef(l), format=_fmt(x)) req = translateUpdate(parseUpdate(open(query[7:]))) evalUpdate(g, req) # read expected results resg = Dataset() if resdata: resg.default_context.load(resdata, format=_fmt(resdata)) if resgraphdata: for x, l in resgraphdata: resg.load(x, publicID=URIRef(l), format=_fmt(x)) eq(set(x.identifier for x in g.contexts() if x != g.default_context), set(x.identifier for x in resg.contexts() if x != resg.default_context), 'named graphs in datasets do not match') assert isomorphic(g.default_context, resg.default_context), \ 'Default graphs are not isomorphic' for x in g.contexts(): if x == g.default_context: continue assert isomorphic(x, resg.get_context(x.identifier)), \ "Graphs with ID %s are not isomorphic" % x.identifier except Exception, e: if isinstance(e, AssertionError): failed_tests.append(uri) fails[str(e)] += 1 else: error_tests.append(uri) errors[str(e)] += 1 if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL: print "======================================" print uri print name print comment if not res: if syntax: print "Positive syntax test" else: print "Negative syntax test" if data: print "----------------- DATA --------------------" print ">>>", data print open(data[7:]).read() if graphdata: print "----------------- GRAPHDATA --------------------" for x, l in graphdata: print ">>>", x, l print open(x[7:]).read() print "----------------- Request -------------------" print ">>>", query print open(query[7:]).read() if res: if resdata: print "----------------- RES DATA --------------------" print ">>>", resdata print open(resdata[7:]).read() if resgraphdata: print "----------------- RES GRAPHDATA -------------------" for x, l in resgraphdata: print ">>>", x, l print open(x[7:]).read() print "------------- MY RESULT ----------" print g.serialize(format='trig') try: pq = translateUpdate(parseUpdate(open(query[7:]).read())) print "----------------- Parsed ------------------" pprintAlgebra(pq) # print pq except: print "(parser error)" print decodeStringEscape(unicode(e)) import pdb pdb.post_mortem(sys.exc_info()[2]) raise
def update_test(t): # the update-eval tests refer to graphs on http://example.org rdflib_sparql_module.SPARQL_LOAD_GRAPHS = False uri, name, comment, data, graphdata, query, res, syntax = t if uri in skiptests: raise SkipTest() try: g = Dataset() if not res: if syntax: with bopen(query[7:]) as f: translateUpdate(parseUpdate(f)) else: try: with bopen(query[7:]) as f: translateUpdate(parseUpdate(f)) raise AssertionError("Query shouldn't have parsed!") except: pass # negative syntax test return resdata, resgraphdata = res # read input graphs if data: g.default_context.load(data, format=_fmt(data)) if graphdata: for x, l in graphdata: g.load(x, publicID=URIRef(l), format=_fmt(x)) with bopen(query[7:]) as f: req = translateUpdate(parseUpdate(f)) evalUpdate(g, req) # read expected results resg = Dataset() if resdata: resg.default_context.load(resdata, format=_fmt(resdata)) if resgraphdata: for x, l in resgraphdata: resg.load(x, publicID=URIRef(l), format=_fmt(x)) eq( set(x.identifier for x in g.contexts() if x != g.default_context), set(x.identifier for x in resg.contexts() if x != resg.default_context), "named graphs in datasets do not match", ) assert isomorphic( g.default_context, resg.default_context), "Default graphs are not isomorphic" for x in g.contexts(): if x == g.default_context: continue assert isomorphic(x, resg.get_context( x.identifier)), ("Graphs with ID %s are not isomorphic" % x.identifier) except Exception as e: if isinstance(e, AssertionError): failed_tests.append(uri) fails[str(e)] += 1 else: error_tests.append(uri) errors[str(e)] += 1 if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL: print("======================================") print(uri) print(name) print(comment) if not res: if syntax: print("Positive syntax test") else: print("Negative syntax test") if data: print("----------------- DATA --------------------") print(">>>", data) print(bopen_read_close(data[7:])) if graphdata: print("----------------- GRAPHDATA --------------------") for x, l in graphdata: print(">>>", x, l) print(bopen_read_close(x[7:])) print("----------------- Request -------------------") print(">>>", query) print(bopen_read_close(query[7:])) if res: if resdata: print("----------------- RES DATA --------------------") print(">>>", resdata) print(bopen_read_close(resdata[7:])) if resgraphdata: print( "----------------- RES GRAPHDATA -------------------") for x, l in resgraphdata: print(">>>", x, l) print(bopen_read_close(x[7:])) print("------------- MY RESULT ----------") print(g.serialize(format="trig")) try: pq = translateUpdate(parseUpdate(bopen_read_close(query[7:]))) print("----------------- Parsed ------------------") pprintAlgebra(pq) # print pq except: print("(parser error)") print(decodeStringEscape(str(e))) import pdb pdb.post_mortem(sys.exc_info()[2]) raise
def performUpdate(options): endpoint = options['endpoint'] inputFile = options['inputfile'] updateCondition = options['updatecondition'] preprocessupdate = options['preprocessupdate'] limit = int(options['limit']) offset = int(options['offset']) inputData = Dataset() print("Parsing input data...") devnull = inputData.parse(inputFile, format='trig') print("Found %d named graphs" % len([ d for d in list(inputData.contexts()) if d.identifier.startswith("http") ])) headers = {'Accept': 'text/turtle'} # Query the endpoint and determine which graphs are new, changed, or unchanged graphs = {'new': [], 'changed': [], 'unchanged': []} queryTemplate = """ CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <%s> { ?s ?p ?o }} """ print("Comparing with named graphs at endpoint %s" % endpoint) for context in tqdm([ d for d in list(inputData.contexts()) if d.identifier.startswith("http") ][offset:offset + limit]): r = requests.get(endpoint, headers=headers, params={"query": queryTemplate % context.identifier}) if r.ok: remoteGraph = Graph() remoteGraph.parse(data=r.text, format='turtle') if not len(remoteGraph): graphs['new'].append((context, False)) elif graphsAreTheSame(context, remoteGraph, preprocessupdate): graphs['unchanged'].append((context, remoteGraph)) else: graphs['changed'].append((context, remoteGraph)) else: print(r.text) # Output statistics: print("\nComparison Result:") print("%d graph%s %s not exist at the endpoint and will be added" % (len(graphs['new']), "" if len(graphs['new']) == 1 else "s", "does" if len(graphs['new']) == 1 else "do")) print("%d graph%s already exist%s but %s different in the input file" % (len(graphs['changed']), "" if len(graphs['changed']) == 1 else "s", "s" if len(graphs['changed']) == 1 else "", "is" if len(graphs['changed']) == 1 else "are")) print("%d graph%s %s identical in both the input file and endpoint" % (len(graphs['unchanged']), "" if len(graphs['unchanged']) == 1 else "s", "is" if len(graphs['unchanged']) == 1 else "are")) # All new graphs should be included in the update graphsToUpdate = [d[0] for d in graphs['new']] # Only graphs where the new graph matches the update condition should be updated # If no update condition is set, all changed should be updated if updateCondition: count = 0 for graphPair in graphs['changed']: for result in graphPair[1].query(updateCondition): if result: graphsToUpdate.append(graphPair[0]) count += 1 print( "\n%d out of %d graph%s will be overwritten based on the update condition" % (count, len(graphs['changed']), "" if len(graphs['changed']) == 1 else "s")) else: graphsToUpdate += [d[0] for d in graphs['changed']] # Perform update for g in tqdm(graphsToUpdate): putGraph(g, endpoint)
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix='test', dir='/tmp', suffix='.sqlite') elif self.store == "SPARQLUpdateStore": root = "http://localhost:3030/ukpp/" self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'urn:michel') self.tarek = URIRef(u'urn:tarek') self.bob = URIRef(u'urn:bob') self.likes = URIRef(u'urn:likes') self.hates = URIRef(u'urn:hates') self.pizza = URIRef(u'urn:pizza') self.cheese = URIRef(u'urn:cheese') # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef(u'urn:context-1') self.c2 = URIRef(u'urn:context-2') # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c) def tearDown(self): self.graph.close() if self.store == "SPARQLUpdateStore": pass else: if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # added graph exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEquals(len(g1), 0) g1.add((self.tarek, self.likes, self.pizza)) # added graph still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEquals(len(g1), 1) g1.remove((self.tarek, self.likes, self.pizza)) # added graph is empty self.assertEquals(len(g1), 0) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # graph still exists, although empty self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): # Something the default graph is read-only (e.g. TDB in union mode) if self.store == "SPARQLUpdateStore": print "Please make sure updating the default graph " \ "is supported by your SPARQL endpoint" self.graph.add((self.tarek, self.likes, self.pizza)) self.assertEquals(len(self.graph), 1) # only default exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEquals(len(self.graph), 0) # default still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): # Union depends on the SPARQL endpoint configuration if self.store == "SPARQLUpdateStore": print "Please make sure your SPARQL endpoint has not configured " \ "its default graph as the union of the named graphs" g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest( "Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # added graph exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEquals(len(g1), 0) g1.add( (self.tarek, self.likes, self.pizza) ) # added graph still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEquals(len(g1), 1) g1.remove( (self.tarek, self.likes, self.pizza) ) # added graph is empty self.assertEquals(len(g1), 0) # graph still exists, although empty self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): self.graph.add(( self.tarek, self.likes, self.pizza)) self.assertEquals(len(self.graph), 1) # only default exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEquals(len(self.graph), 0) # default still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
class RDFtoUmlDiagram(): """ Transform a RDF dataset to an UML diagram """ def __init__(self, namespace, showObjs=True, showClasses=False, showNamespace=True): self.ds = Dataset() self.d = UmlPygraphVizDiagram() self.show_objs = showObjs self.show_classes = showClasses self.namespace = namespace self.show_namespaces = showNamespace self.add_namespaces(self.namespace) def load_rdf(self, filename, input_format=None): if input_format: rdf_format = input_format elif filename is not sys.stdin: format_list = { '.xml': 'xml', '.rdf': 'xml', '.owl': 'xml', '.n3': 'n3', '.ttl': 'turtle', '.nt': 'nt', '.trig': 'trig', '.nq': 'nquads', '': 'turtle' } extension = splitext(filename.name)[1] rdf_format = format_list[extension] else: rdf_format = 'turtle' print("using rdf format: " + rdf_format) temp = self.ds.graph("file://" + filename.name) temp.parse(filename.name, format=rdf_format) def add_namespaces(self, namespaces): if namespaces: for ns in namespaces: self.ds.namespace_manager.bind(ns[0], ns[1]) def start_subgraph(self, graph_name): self.d.start_subgraph(graph_name.strip('[<>]:_')) def add_object_node(self, object_name, classes_name, attributes): self.d.add_object_node(self.ds.namespace_manager.qname(object_name), classes_name, attributes) def add_class_node(self, class_name, attributes): self.d.add_class_node(self.ds.namespace_manager.qname(class_name), attributes) def add_edge(self, src, dst, predicate): self.d.add_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst), self.ds.namespace_manager.qname(predicate)) def add_subclass_edge(self, src, dst): self.d.add_subclass_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst)) def create_namespace_box(self): # Create Namespace box label = """< <table align="left" cellborder="0"> <tr><td align='center' colspan='2'><b>Namespaces</b></td></tr>""" for ns in sorted(self.ds.namespaces()): label += "<tr><td align='left'>%s:</td><td align='left'>%s</td></tr>" % ( ns[0], ns[1]) label += "</table> >" self.d.set_label(label) def output_dot(self, filename): self.d.write_to_file(filename) def visualize(self, filename): self.d.visualize(filename, self.ds.namespaces()) def create_diagram(self): # Iterate over all graphs for graph in self.ds.contexts(): graph_name = graph.n3() if graph_name == "[<urn:x-rdflib:default>]": break graph = graph.skolemize() if len(graph) > 0: self.start_subgraph(graph_name) if self.show_objs: self.create_object_nodes(graph) if self.show_classes: self.create_class_nodes(graph) self.d.add_undescribed_nodes() if self.show_namespaces: self.create_namespace_box() def create_object_nodes(self, graph): # object nodes query_nodes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?node WHERE { ?node a ?class. FILTER (?class not IN (rdfs:Class, owl:Class, owl:Property, owl:ObjectProperty, owl:DatatypeProperty)) } ORDER BY ?node""" result_nodes = graph.query(query_nodes) for row_nodes in result_nodes: # adding the classes to the node (can be more than one) query_classes = """SELECT DISTINCT ?class WHERE { %s a ?class. } ORDER BY ?class""" % row_nodes['node'].n3() result_classes = graph.query(query_classes) classes = [] for row_classes in result_classes: if not self.show_classes: classes.append( self.ds.namespace_manager.qname(row_classes['class'])) else: self.add_edge( row_nodes['node'], row_classes['class'], "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") # adding the attributes to the node query_attributes = """SELECT DISTINCT ?p ?o WHERE { %s ?p ?o. FILTER (isLiteral(?o)) } ORDER BY ?p ?o""" % row_nodes['node'].n3() result_attributes = graph.query(query_attributes) attributes = [] for row_attributes in result_attributes: attributes.append( self.ds.namespace_manager.qname(row_attributes['p']) + " = " + str(row_attributes['o'])) self.add_object_node(row_nodes['node'], ", ".join(classes), attributes) # object node connections query_connections = """SELECT DISTINCT ?c1 ?c2 ?p WHERE { ?c1 ?p ?c2. FILTER (!isLiteral(?c2)) FILTER (?p not IN (rdf:type, rdfs:domain, rdfs:range, rdfs:subClassOf)) } ORDER BY ?c1 ?p ?c2""" result_connections = graph.query(query_connections) for row_connections in result_connections: self.add_edge(row_connections['c1'], row_connections['c2'], row_connections['p']) def create_class_nodes(self, graph): # RDFS stuff query_classes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?class WHERE { ?class a ?c . FILTER (?c in (rdfs:Class, owl:Class)) } ORDER BY ?class""" result_classes = graph.query(query_classes) for row_classes in result_classes: query_datatype_property = """ PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT DISTINCT ?property ?range WHERE { ?property rdfs:domain %s; a owl:DatatypeProperty. OPTIONAL{ ?property rdfs:range ?range. } } ORDER BY ?property""" % row_classes['class'].n3() result_datatype_property = graph.query(query_datatype_property) attributes = [] for r in result_datatype_property: text = self.ds.namespace_manager.qname(r['property']) if r['range']: text += " = " + self.ds.namespace_manager.qname(r['range']) attributes.append(text) self.add_class_node(row_classes['class'], attributes) query_object_property = """SELECT DISTINCT ?src ?dest ?property WHERE { ?property a <http://www.w3.org/2002/07/owl#ObjectProperty>; rdfs:domain ?src; rdfs:range ?dest. } ORDER BY ?src ?property ?dest""" result_object_property = graph.query(query_object_property) for row_object_property in result_object_property: self.add_edge(row_object_property['src'], row_object_property['dest'], row_object_property['property']) query_subclass = """SELECT DISTINCT ?src ?dest WHERE { ?src rdfs:subClassOf ?dest. } ORDER BY ?src ?dest""" result_subclass = graph.query(query_subclass) for row_subclass in result_subclass: self.add_subclass_edge(row_subclass['src'], row_subclass['dest'])
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest( "Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') elif self.store == "SPARQLUpdateStore": root = HOST + DB self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'urn:michel') self.tarek = URIRef(u'urn:tarek') self.bob = URIRef(u'urn:bob') self.likes = URIRef(u'urn:likes') self.hates = URIRef(u'urn:hates') self.pizza = URIRef(u'urn:pizza') self.cheese = URIRef(u'urn:cheese') # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef(u'urn:context-1') self.c2 = URIRef(u'urn:context-2') # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c) def tearDown(self): self.graph.close() if self.store == "SPARQLUpdateStore": pass else: if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # added graph exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEqual(len(g1), 0) g1.add((self.tarek, self.likes, self.pizza)) # added graph still exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEqual(len(g1), 1) g1.remove((self.tarek, self.likes, self.pizza)) # added graph is empty self.assertEqual(len(g1), 0) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # graph still exists, although empty self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): # Something the default graph is read-only (e.g. TDB in union mode) if self.store == "SPARQLUpdateStore": print("Please make sure updating the default graph " "is supported by your SPARQL endpoint") self.graph.add((self.tarek, self.likes, self.pizza)) self.assertEqual(len(self.graph), 1) # only default exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEqual(len(self.graph), 0) # default still exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): # Union depends on the SPARQL endpoint configuration if self.store == "SPARQLUpdateStore": print("Please make sure your SPARQL endpoint has not configured " "its default graph as the union of the named graphs") g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix='test', dir='/tmp', suffix='.sqlite') else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # added graph exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEquals(len(g1), 0) g1.add((self.tarek, self.likes, self.pizza)) # added graph still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEquals(len(g1), 1) g1.remove((self.tarek, self.likes, self.pizza)) # added graph is empty self.assertEquals(len(g1), 0) # graph still exists, although empty self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): self.graph.add((self.tarek, self.likes, self.pizza)) self.assertEquals(len(self.graph), 1) # only default exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEquals(len(self.graph), 0) # default still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
class RDFtoUmlDiagram(): """ Transform a RDF dataset to an UML diagram """ def __init__(self, showObjs, showClasses, namespace): self.ds = Dataset() self.d = UmlPygraphVizDiagram() self.show_objs = showObjs self.show_classes = showClasses self.namespace = namespace self.add_namespaces(self.namespace) def load_rdf(self, filename, input_format=None): if input_format: rdf_format = input_format elif filename is not sys.stdin: format_list = {'.xml': 'xml', '.rdf': 'xml', '.owl': 'xml', '.n3': 'n3', '.ttl': 'turtle', '.nt': 'nt', '.trig': 'trig', '.nq': 'nquads', '': 'turtle'} extension = splitext(filename.name)[1] rdf_format = format_list[extension] else: rdf_format = 'turtle' print("using rdf format: " + rdf_format) temp = self.ds.graph("file://"+filename.name) temp.parse(filename.name, format=rdf_format) def add_namespaces(self, namespaces): if namespaces: for ns in namespaces: self.ds.namespace_manager.bind(ns[0],ns[1]) def start_subgraph(self, graph_name): self.d.start_subgraph(graph_name.strip('[<>]:_')) def add_object_node(self, object_name, classes_name, attributes): self.d.add_object_node(self.ds.namespace_manager.qname(object_name), classes_name, attributes) def add_class_node(self, class_name, attributes): self.d.add_class_node(self.ds.namespace_manager.qname(class_name), attributes) def add_edge(self, src, dst, predicate): self.d.add_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst), self.ds.namespace_manager.qname(predicate)) def add_subclass_edge(self, src, dst): self.d.add_subclass_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst)) def create_namespace_box(self): # Create Namespace box label = """< <table align="left" cellborder="0"> <tr><td align='center' colspan='2'><b>Namespaces</b></td></tr>""" for ns in sorted(self.ds.namespaces()): label += "<tr><td align='left'>%s:</td><td align='left'>%s</td></tr>" % (ns[0], ns[1] ) label += "</table> >" self.d.set_label(label) def output_dot(self, filename): self.d.write_to_file(filename) def visualize(self, filename): self.d.visualize(filename, self.ds.namespaces()) def create_diagram(self, object_nodes=True, class_nodes=False): # Iterate over all graphs for graph in self.ds.contexts(): graph_name = graph.n3() if graph_name == "[<urn:x-rdflib:default>]": break graph = graph.skolemize() if len(graph) > 0: self.start_subgraph(graph_name) if self.show_objs: self.create_object_nodes(graph) if self.show_classes: self.create_class_nodes(graph) self.d.add_undescribed_nodes() self.create_namespace_box() def create_object_nodes(self, graph): # object nodes query_nodes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?node WHERE { ?node a ?class. FILTER (?class not IN (rdfs:Class, owl:Class, owl:Property, owl:ObjectProperty, owl:DatatypeProperty)) } ORDER BY ?node""" result_nodes = graph.query(query_nodes) for row_nodes in result_nodes: # adding the classes to the node (can be more than one) query_classes = """SELECT DISTINCT ?class WHERE { %s a ?class. } ORDER BY ?class""" % row_nodes['node'].n3() result_classes = graph.query(query_classes) classes = [] for row_classes in result_classes: if not self.show_classes: classes.append(self.ds.namespace_manager.qname(row_classes['class'])) else: self.add_edge(row_nodes['node'], row_classes['class'], "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") # adding the attributes to the node query_attributes = """SELECT DISTINCT ?p ?o WHERE { %s ?p ?o. FILTER (isLiteral(?o)) } ORDER BY ?p ?o""" % row_nodes['node'].n3() result_attributes = graph.query(query_attributes) attributes = [] for row_attributes in result_attributes: attributes.append( self.ds.namespace_manager.qname(row_attributes['p']) + " = " + str(row_attributes['o'])) self.add_object_node(row_nodes['node'], ", ".join(classes), attributes) # object node connections query_connections = """SELECT DISTINCT ?c1 ?c2 ?p WHERE { ?c1 ?p ?c2. FILTER (!isLiteral(?c2)) FILTER (?p not IN (rdf:type, rdfs:domain, rdfs:range, rdfs:subClassOf)) } ORDER BY ?c1 ?p ?c2""" result_connections = graph.query(query_connections) for row_connections in result_connections: self.add_edge(row_connections['c1'], row_connections['c2'], row_connections['p']) def create_class_nodes(self, graph): # RDFS stuff query_classes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?class WHERE { ?class a ?c . FILTER (?c in (rdfs:Class, owl:Class)) } ORDER BY ?class""" result_classes = graph.query(query_classes) for row_classes in result_classes: query_datatype_property = """ PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT DISTINCT ?property ?range WHERE { ?property rdfs:domain %s; a owl:DatatypeProperty. OPTIONAL{ ?property rdfs:range ?range. } } ORDER BY ?property""" % row_classes['class'].n3() result_datatype_property = graph.query(query_datatype_property) attributes = [] for r in result_datatype_property: text = self.ds.namespace_manager.qname(r['property']) if r['range']: text += " = " + self.ds.namespace_manager.qname(r['range']) attributes.append(text) self.add_class_node(row_classes['class'], attributes) query_object_property = """SELECT DISTINCT ?src ?dest ?property WHERE { ?property a <http://www.w3.org/2002/07/owl#ObjectProperty>; rdfs:domain ?src; rdfs:range ?dest. } ORDER BY ?src ?property ?dest""" result_object_property = graph.query(query_object_property) for row_object_property in result_object_property: self.add_edge(row_object_property['src'], row_object_property['dest'], row_object_property['property']) query_subclass = """SELECT DISTINCT ?src ?dest WHERE { ?src rdfs:subClassOf ?dest. } ORDER BY ?src ?dest""" result_subclass = graph.query(query_subclass) for row_subclass in result_subclass: self.add_subclass_edge(row_subclass['src'], row_subclass['dest'])
class DatasetTestCase(unittest.TestCase): store = "default" slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix="test", dir="/tmp", suffix=".sqlite") elif self.store == "SPARQLUpdateStore": root = HOST + DB self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef("urn:michel") self.tarek = URIRef("urn:tarek") self.bob = URIRef("urn:bob") self.likes = URIRef("urn:likes") self.hates = URIRef("urn:hates") self.pizza = URIRef("urn:pizza") self.cheese = URIRef("urn:cheese") # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef("urn:context-1") self.c2 = URIRef("urn:context-2") # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c) def tearDown(self): self.graph.close() if self.store == "SPARQLUpdateStore": pass else: if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # added graph exists self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID]), ) # added graph is empty self.assertEqual(len(g1), 0) g1.add((self.tarek, self.likes, self.pizza)) # added graph still exists self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID]), ) # added graph contains one triple self.assertEqual(len(g1), 1) g1.remove((self.tarek, self.likes, self.pizza)) # added graph is empty self.assertEqual(len(g1), 0) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # graph still exists, although empty self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID]), ) g.remove_graph(self.c1) # graph is gone self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID]), ) def testDefaultGraph(self): # Something the default graph is read-only (e.g. TDB in union mode) if self.store == "SPARQLUpdateStore": print("Please make sure updating the default graph " "is supported by your SPARQL endpoint") self.graph.add((self.tarek, self.likes, self.pizza)) self.assertEqual(len(self.graph), 1) # only default exists self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID]), ) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEqual(len(self.graph), 0) # default still exists self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID]), ) def testNotUnion(self): # Union depends on the SPARQL endpoint configuration if self.store == "SPARQLUpdateStore": print("Please make sure your SPARQL endpoint has not configured " "its default graph as the union of the named graphs") g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza]) def testIter(self): """PR 1382: adds __iter__ to Dataset""" d = Dataset() uri_a = URIRef("https://example.com/a") uri_b = URIRef("https://example.com/b") uri_c = URIRef("https://example.com/c") uri_d = URIRef("https://example.com/d") d.add_graph(URIRef("https://example.com/g1")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1"))) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1") )) # pointless addition: duplicates above d.add_graph(URIRef("https://example.com/g2")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2"))) d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/g1"))) # new, uri_d # traditional iterator i_trad = 0 for t in d.quads((None, None, None)): i_trad += 1 # new Dataset.__iter__ iterator i_new = 0 for t in d: i_new += 1 self.assertEqual(i_new, i_trad) # both should be 3