def load_statements(): a = datetime.datetime.now() logger.info(f"start loading ds at: {a}") ds = Dataset() ds.parse(STATEMENTS, format=TYPE) b = datetime.datetime.now() logger.info(f"finished loading ds at: {b}") logger.info(f"ds loaded: {ds}") logger.info(f"ds loaded in {b - a}") return ds
def test_load_from_file(self): ds = Dataset() ds.parse("geoStatements.trig", format="trig") async def f(): await self.aiotest.addN( (i for i in ds.quads((None, None, None, None)))) print("ds loaded") self.loop.run_until_complete(asyncio.gather(f()))
def _get_single_graph_from_trig(trig_file: Optional[str] = None, data: Optional[str] = None) -> rdflib.Graph: if trig_file is None and data is None: raise RuntimeError("One of trig_file OR data *must* be specified.") dataset = Dataset() dataset.parse(format="trig", source=trig_file, data=data) graphs_with_triples = [g for g in dataset.graphs() if len(g) > 0] assert ( len(graphs_with_triples) == 1 ), f"Found {len(graphs_with_triples)} non-trivial graphs in {trig_file}. Expected one." return graphs_with_triples[0]
def test_roundtrip(): d = Dataset() d.parse(Path(__file__).parent / "test_parser_hext_multigraph.ndjson", format="hext", publicID=d.default_context.identifier) d.default_union = True with open(str( Path(__file__).parent / "test_parser_hext_multigraph.ndjson")) as i: ordered_input = "".join(sorted(i.readlines())).strip() ordered_output = "\n".join(sorted( d.serialize(format="hext").split("\n"))).strip() assert ordered_output == ordered_input
def test_hext_dataset_linecount(): d = Dataset() assert len(d) == 0 d.parse(Path(__file__).parent / "test_parser_hext_multigraph.ndjson", format="hext", publicID=d.default_context.identifier) total_triples = 0 # count all the triples in the Dataset for context in d.contexts(): for triple in context.triples((None, None, None)): total_triples += 1 assert total_triples == 18 # count the number of serialized Hextuples, should be 22, as per the original file lc = len(d.serialize(format="hext").splitlines()) assert lc == 22
def test_hext_json_representation(): """Tests to see if every link in the ND-JSON Hextuple result is, in fact, JSON""" d = Dataset() trig_data = """ PREFIX ex: <http://example.com/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> ex:g1 { ex:s1 ex:p1 ex:o1 , ex:o2 ; ex:p2 [ a owl:Thing ; rdf:value "thingy" ; ] ; ex:p3 "Object 3" , "Object 4 - English"@en ; ex:p4 "2021-12-03"^^xsd:date ; ex:p5 42 ; ex:p6 "42" ; . } ex:g2 { ex:s1 ex:p1 ex:o1 , ex:o2 ; . ex:s11 ex:p11 ex:o11 , ex:o12 . } # default graph triples ex:s1 ex:p1 ex:o1 , ex:o2 . ex:s21 ex:p21 ex:o21 , ex:o22 . """ d.parse(data=trig_data, format="trig") out = d.serialize(format="hext") for line in out.splitlines(): j = json.loads(line) assert isinstance(j, list)
def performUpdate(options): endpoint = options['endpoint'] inputFile = options['inputfile'] updateCondition = options['updatecondition'] preprocessupdate = options['preprocessupdate'] limit = int(options['limit']) offset = int(options['offset']) inputData = Dataset() print("Parsing input data...") devnull = inputData.parse(inputFile, format='trig') print("Found %d named graphs" % len([ d for d in list(inputData.contexts()) if d.identifier.startswith("http") ])) headers = {'Accept': 'text/turtle'} # Query the endpoint and determine which graphs are new, changed, or unchanged graphs = {'new': [], 'changed': [], 'unchanged': []} queryTemplate = """ CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <%s> { ?s ?p ?o }} """ print("Comparing with named graphs at endpoint %s" % endpoint) for context in tqdm([ d for d in list(inputData.contexts()) if d.identifier.startswith("http") ][offset:offset + limit]): r = requests.get(endpoint, headers=headers, params={"query": queryTemplate % context.identifier}) if r.ok: remoteGraph = Graph() remoteGraph.parse(data=r.text, format='turtle') if not len(remoteGraph): graphs['new'].append((context, False)) elif graphsAreTheSame(context, remoteGraph, preprocessupdate): graphs['unchanged'].append((context, remoteGraph)) else: graphs['changed'].append((context, remoteGraph)) else: print(r.text) # Output statistics: print("\nComparison Result:") print("%d graph%s %s not exist at the endpoint and will be added" % (len(graphs['new']), "" if len(graphs['new']) == 1 else "s", "does" if len(graphs['new']) == 1 else "do")) print("%d graph%s already exist%s but %s different in the input file" % (len(graphs['changed']), "" if len(graphs['changed']) == 1 else "s", "s" if len(graphs['changed']) == 1 else "", "is" if len(graphs['changed']) == 1 else "are")) print("%d graph%s %s identical in both the input file and endpoint" % (len(graphs['unchanged']), "" if len(graphs['unchanged']) == 1 else "s", "is" if len(graphs['unchanged']) == 1 else "are")) # All new graphs should be included in the update graphsToUpdate = [d[0] for d in graphs['new']] # Only graphs where the new graph matches the update condition should be updated # If no update condition is set, all changed should be updated if updateCondition: count = 0 for graphPair in graphs['changed']: for result in graphPair[1].query(updateCondition): if result: graphsToUpdate.append(graphPair[0]) count += 1 print( "\n%d out of %d graph%s will be overwritten based on the update condition" % (count, len(graphs['changed']), "" if len(graphs['changed']) == 1 else "s")) else: graphsToUpdate += [d[0] for d in graphs['changed']] # Perform update for g in tqdm(graphsToUpdate): putGraph(g, endpoint)
app = Flask(__name__) api = Api(app) # set up a custom formatter to return turtle in text/plain to browsers custom_formatter = flask_rdf.FormatSelector() custom_formatter.wildcard_mimetype = 'application/ld+json' custom_formatter.add_format('application/ld+json', 'json-ld') custom_decorator = flask_rdf.flask.Decorator(custom_formatter) ds = Dataset(default_union=True) with open('./dectectorfinalstate.owl', "r") as f: result = ds.parse(f, format="application/rdf+xml") class HelloWorld(Resource): @custom_decorator def get(self): return ds api.add_resource(HelloWorld, '/detectorfinalstate') @app.route("/") def main(): # This is cached, so for development it is better # to use make_response # return send_file('templates/index.html')' return make_response(open('templates/index.html').read())
def test_hext_dataset(): """Tests context-aware (multigraph) data""" d = Dataset() trig_data = """ PREFIX ex: <http://example.com/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> ex:g1 { ex:s1 ex:p1 ex:o1 , ex:o2 ; ex:p2 [ a owl:Thing ; rdf:value "thingy" ; ] ; ex:p3 "Object 3" , "Object 4 - English"@en ; ex:p4 "2021-12-03"^^xsd:date ; ex:p5 42 ; ex:p6 "42" ; . } ex:g2 { ex:s1 ex:p1 ex:o1 , ex:o2 ; . ex:s11 ex:p11 ex:o11 , ex:o12 . } # default graph triples ex:s1 ex:p1 ex:o1 , ex:o2 . ex:s21 ex:p21 ex:o21 , ex:o22 . """ d.parse(data=trig_data, format="trig", publicID=d.default_context.identifier) out = d.serialize(format="hext") # note: cant' test for BNs in result as they will be different ever time testing_lines = [ [ False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""]' ], [ False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""]' ], [ False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]' ], [ False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""]' ], [ False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o12", "globalId", "", "http://example.com/g2"]' ], [ False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g2"]' ], [ False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o11", "globalId", "", "http://example.com/g2"]' ], [ False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g2"]' ], [ False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g1"]' ], [False, '["http://example.com/s1", "http://example.com/p2"'], [ False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]' ], [ False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", "http://example.com/g1"]' ], [ False, '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "http://example.com/g1"]' ], [ False, '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]' ], [ False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]' ], [ False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g1"]' ], [ False, '["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]' ], [ False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]' ], ] for line in out.splitlines(): for test in testing_lines: if test[1] in line: test[0] = True assert all([x[0] for x in testing_lines])