def Query(self, request: SageQuery, context: grpc.ServicerContext) -> SageResponse: graph: Graph = None try: query = request.query graph_name = request.default_graph_uri next_link = request.next_link if len(request.next_link) > 0 else None if not self._dataset.has_graph(graph_name): context.abort(code=404, details=f"RDF Graph {graph_name} not found on the server.") graph = self._dataset.get_graph(graph_name) # decode next_link or build query execution plan cardinalities = dict() start = time() if next_link is not None: if self._dataset.is_stateless: saved_plan = next_link else: saved_plan = self._dataset.statefull_manager.get_plan(next_link) plan = load(decode_saved_plan(saved_plan), self._dataset) else: plan, cardinalities = parse_query(query, self._dataset, graph_name) loading_time = (time() - start) * 1000 # execute query engine = SageEngine() quota = graph.quota / 1000 max_results = graph.max_results bindings, saved_plan, is_done, abort_reason = run(engine.execute(plan, quota, max_results)) # commit or abort (if necessary) if abort_reason is not None: graph.abort() context.abort(code=500, details=f"The SPARQL query has been aborted for the following reason: '{abort_reason}'") else: graph.commit() # encode saved plan if query execution is not done yet and there was no abort start = time() next_page = None if (not is_done) and abort_reason is None: next_page = encode_saved_plan(saved_plan) if not self._dataset.is_stateless: # generate the plan ID if this is the first time we execute this plan plan_id = next_link if next_link is not None else str(uuid4()) self._dataset.statefull_manager.save_plan(plan_id, next_page) next_page = plan_id elif is_done and (not self._dataset.is_stateless) and next_link is not None: # delete the saved plan, as it will not be reloaded anymore self._dataset.statefull_manager.delete_plan(next_link) exportTime = (time() - start) * 1000 # create response response = SageResponse(is_done = is_done, next_link = next_page) for binding in create_bindings(bindings): response.bindings.append(binding) return response except Exception as err: if graph is not None: graph.abort() context.abort(code=500, details=f"A server-side error has occurred: {str(err)}")
async def test_filter_iterator_interrupt(): expression = "?p = <http://schema.org/eligibleRegion>" iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done, _) = await engine.execute(iterator, 10e-7, 2) assert len(results) <= 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] tmp = len(results) reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100')) (results, saved, done, _) = await engine.execute(reloaded, 10e7) assert len(results) + tmp == 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] assert done
async def test_filter_iterator_interrupt(): context = { 'quantum': 10e-7, 'max_results': 10e7 } expression = "?p = <http://schema.org/eligibleRegion>" scan = ProjectionIterator(ScanIterator(hdtDoc, triple, context), context) iterator = FilterIterator(scan, expression, context) (results, saved, done, _) = await engine.execute(iterator, context) assert len(results) <= 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] tmp = len(results) context['quantum'] = 10e7 reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100'), context) (results, saved, done, _) = await engine.execute(reloaded, context) assert len(results) + tmp == 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] assert done
def build_query_plan(query, dataset, default_graph, saved_plan=None): """Build a pipeline of iterators used to evaluate a query""" cardinalities = [] if saved_plan is not None: return load(saved_plan, dataset), [] root = None if query['type'] == 'union': root, cardinalities = build_union_plan(query['union'], dataset, default_graph) elif query['type'] == 'bgp': root, cardinalities = build_join_plan(query['bgp'], dataset, default_graph) else: raise Exception('Unkown query type found during query optimization') # apply (possible) filter clause(s) if 'filters' in query and len(query['filters']) > 0: # exclude empty strings filters = list(filter(lambda x: len(x) > 0, query['filters'])) if len(filters) > 0: # reduce all filters in a conjunctive expression expression = reduce(lambda x, y: "({}) && ({})".format(x, y), filters) root = FilterIterator(root, expression) return root, cardinalities
def execute_query(query, default_graph_uri, next_link, dataset, mimetype, url): """ Execute a query using the SageEngine and returns the appropriate HTTP response. Any failure will results in a rollback/abort on the current query execution. """ graph = None try: graph_name = format_graph_uri(default_graph_uri, url) if not dataset.has_graph(graph_name): logging.error("No RDF graph matching the default URI provided was found.") return sage_http_error("No RDF graph matching the default URI provided was found.") graph = dataset.get_graph(graph_name) # decode next_link or build query execution plan cardinalities = dict() start = time() if next_link is not None: plan = load(decode_saved_plan(next_link), dataset) else: plan, cardinalities = parse_query(query, dataset, graph_name, url) loading_time = (time() - start) * 1000 # execute query engine = SageEngine() quota = graph.quota / 1000 max_results = graph.max_results bindings, saved_plan, is_done = engine.execute(plan, quota, max_results) # commit (if necessary) graph.commit() # compute controls for the next page start = time() next_page = None if not is_done: next_page = encode_saved_plan(saved_plan) exportTime = (time() - start) * 1000 stats = {"cardinalities": cardinalities, "import": loading_time, "export": exportTime} # send response if mimetype == "application/sparql-results+json": return Response(responses.w3c_json_streaming(bindings, next_page, stats, url), content_type='application/json') if mimetype == "application/xml" or mimetype == "application/sparql-results+xml": return Response(responses.w3c_xml(bindings, next_page, stats), content_type="application/xml") if mimetype == "application/json": return Response(responses.raw_json_streaming(bindings, next_page, stats, url), content_type='application/json') # otherwise, return the HTML version return render_template("sage_page.html", query=query, default_graph_uri=default_graph_uri, bindings=bindings, next_page=next_page, stats=stats) except Exception as err: # abort all ongoing transactions (if required) # then forward the exception to the main loop logging.error(f"sage execute_query error: {err}") if graph is not None: graph.abort() raise err
async def execute_query(query: str, default_graph_uri: str, next_link: Optional[str], dataset: Dataset) -> Tuple[List[Dict[str, str]], Optional[str], Dict[str, str]]: """Execute a query using the SageEngine and returns the appropriate HTTP response. Any failure will results in a rollback/abort on the current query execution. Args: * query: SPARQL query to execute. * default_graph_uri: URI of the default RDF graph to use. * next_link: URI to a saved plan. Can be `None` if query execution should starts from the beginning. * dataset: RDF dataset on which the query is executed. Returns: A tuple (`bindings`, `next_page`, `stats`) where: * `bindings` is a list of query results. * `next_page` is a link to saved query execution state. Sets to `None` if query execution completed during the time quantum. * `stats` are statistics about query execution. Throws: Any exception that have occured during query execution. """ graph = None try: if not dataset.has_graph(default_graph_uri): raise HTTPException(status_code=404, detail=f"RDF Graph {default_graph_uri} not found on the server.") graph = dataset.get_graph(default_graph_uri) context = dict() context['quantum'] = graph.quota context['max_results'] = graph.max_results # decode next_link or build query execution plan cardinalities = dict() start = time() if next_link is not None: if dataset.is_stateless: saved_plan = next_link else: saved_plan = dataset.statefull_manager.get_plan(next_link) plan = load(decode_saved_plan(saved_plan), dataset, context) else: plan, cardinalities = parse_query(query, dataset, default_graph_uri, context) logging.info(f'loading time: {(time() - start) * 1000}ms') loading_time = (time() - start) * 1000 # execute query engine = SageEngine() bindings, saved_plan, is_done, abort_reason = await engine.execute(plan, context) # commit or abort (if necessary) if abort_reason is not None: graph.abort() raise HTTPException(status_code=500, detail=f"The SPARQL query has been aborted for the following reason: '{abort_reason}'") else: graph.commit() start = time() # encode saved plan if query execution is not done yet and there was no abort next_page = None if (not is_done) and abort_reason is None: next_page = encode_saved_plan(saved_plan) if not dataset.is_stateless: # generate the plan ID if this is the first time we execute this plan plan_id = next_link if next_link is not None else str(uuid4()) dataset.statefull_manager.save_plan(plan_id, next_page) next_page = plan_id elif is_done and (not dataset.is_stateless) and next_link is not None: # delete the saved plan, as it will not be reloaded anymore dataset.statefull_manager.delete_plan(next_link) logging.info(f'export time: {(time() - start) * 1000}ms') exportTime = (time() - start) * 1000 stats = {"cardinalities": cardinalities, "import": loading_time, "export": exportTime} return (bindings, next_page, stats) except Exception as err: # abort all ongoing transactions, then forward the exception to the main loop logging.error(traceback.format_exc()) if graph is not None: graph.abort() raise err