예제 #1
0
  def Query(self, request: SageQuery, context: grpc.ServicerContext) -> SageResponse:
    graph: Graph = None
    try:
      query = request.query
      graph_name = request.default_graph_uri
      next_link = request.next_link if len(request.next_link) > 0 else None
      if not self._dataset.has_graph(graph_name):
        context.abort(code=404, details=f"RDF Graph {graph_name} not found on the server.")
      graph = self._dataset.get_graph(graph_name)

      # decode next_link or build query execution plan
      cardinalities = dict()
      start = time()
      if next_link is not None:
        if self._dataset.is_stateless:
            saved_plan = next_link
        else:
            saved_plan = self._dataset.statefull_manager.get_plan(next_link)
        plan = load(decode_saved_plan(saved_plan), self._dataset)
      else:
        plan, cardinalities = parse_query(query, self._dataset, graph_name)
      loading_time = (time() - start) * 1000

      # execute query
      engine = SageEngine()
      quota = graph.quota / 1000
      max_results = graph.max_results
      bindings, saved_plan, is_done, abort_reason = run(engine.execute(plan, quota, max_results))

      # commit or abort (if necessary)
      if abort_reason is not None:
        graph.abort()
        context.abort(code=500, details=f"The SPARQL query has been aborted for the following reason: '{abort_reason}'")
      else:
        graph.commit()

      # encode saved plan if query execution is not done yet and there was no abort
      start = time()
      next_page = None
      if (not is_done) and abort_reason is None:
        next_page = encode_saved_plan(saved_plan)
        if not self._dataset.is_stateless:
          # generate the plan ID if this is the first time we execute this plan
          plan_id = next_link if next_link is not None else str(uuid4())
          self._dataset.statefull_manager.save_plan(plan_id, next_page)
          next_page = plan_id
      elif is_done and (not self._dataset.is_stateless) and next_link is not None:
        # delete the saved plan, as it will not be reloaded anymore
        self._dataset.statefull_manager.delete_plan(next_link)
      exportTime = (time() - start) * 1000

      # create response
      response = SageResponse(is_done = is_done, next_link = next_page)
      for binding in create_bindings(bindings):
        response.bindings.append(binding)
      return response
    except Exception as err:
      if graph is not None:
        graph.abort()
      context.abort(code=500, details=f"A server-side error has occurred: {str(err)}")
예제 #2
0
async def test_filter_iterator_interrupt():
    expression = "?p = <http://schema.org/eligibleRegion>"
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'],
                                   triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done, _) = await engine.execute(iterator, 10e-7, 2)
    assert len(results) <= 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    tmp = len(results)
    reloaded = load(saved.SerializeToString(),
                    DummyDataset(hdtDoc, 'watdiv100'))
    (results, saved, done, _) = await engine.execute(reloaded, 10e7)
    assert len(results) + tmp == 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    assert done
예제 #3
0
async def test_filter_iterator_interrupt():
    context = { 'quantum': 10e-7, 'max_results': 10e7 }
    expression = "?p = <http://schema.org/eligibleRegion>"
    scan = ProjectionIterator(ScanIterator(hdtDoc, triple, context), context)
    iterator = FilterIterator(scan, expression, context)
    (results, saved, done, _) = await engine.execute(iterator, context)
    assert len(results) <= 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    tmp = len(results)
    context['quantum'] = 10e7
    reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100'), context)
    (results, saved, done, _) = await engine.execute(reloaded, context)
    assert len(results) + tmp == 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    assert done
예제 #4
0
def build_query_plan(query, dataset, default_graph, saved_plan=None):
    """Build a pipeline of iterators used to evaluate a query"""
    cardinalities = []
    if saved_plan is not None:
        return load(saved_plan, dataset), []

    root = None
    if query['type'] == 'union':
        root, cardinalities = build_union_plan(query['union'], dataset,
                                               default_graph)
    elif query['type'] == 'bgp':
        root, cardinalities = build_join_plan(query['bgp'], dataset,
                                              default_graph)
    else:
        raise Exception('Unkown query type found during query optimization')

    # apply (possible) filter clause(s)
    if 'filters' in query and len(query['filters']) > 0:
        # exclude empty strings
        filters = list(filter(lambda x: len(x) > 0, query['filters']))
        if len(filters) > 0:
            # reduce all filters in a conjunctive expression
            expression = reduce(lambda x, y: "({}) && ({})".format(x, y),
                                filters)
            root = FilterIterator(root, expression)
    return root, cardinalities
def execute_query(query, default_graph_uri, next_link, dataset, mimetype, url):
    """
        Execute a query using the SageEngine and returns the appropriate HTTP response.
        Any failure will results in a rollback/abort on the current query execution.
    """
    graph = None
    try:
        graph_name = format_graph_uri(default_graph_uri, url)
        if not dataset.has_graph(graph_name):
            logging.error("No RDF graph matching the default URI provided was found.")
            return sage_http_error("No RDF graph matching the default URI provided was found.")
        graph = dataset.get_graph(graph_name)
        # decode next_link or build query execution plan
        cardinalities = dict()
        start = time()

        if next_link is not None:
            plan = load(decode_saved_plan(next_link), dataset)
        else:
            plan, cardinalities = parse_query(query, dataset, graph_name, url)
        loading_time = (time() - start) * 1000
        # execute query
        engine = SageEngine()
        quota = graph.quota / 1000
        max_results = graph.max_results
        bindings, saved_plan, is_done = engine.execute(plan, quota, max_results)

        # commit (if necessary)
        graph.commit()

        # compute controls for the next page
        start = time()
        next_page = None
        if not is_done:
            next_page = encode_saved_plan(saved_plan)
        exportTime = (time() - start) * 1000
        stats = {"cardinalities": cardinalities, "import": loading_time, "export": exportTime}

        # send response
        if mimetype == "application/sparql-results+json":
            return Response(responses.w3c_json_streaming(bindings, next_page, stats, url),
                            content_type='application/json')
        if mimetype == "application/xml" or mimetype == "application/sparql-results+xml":
            return Response(responses.w3c_xml(bindings, next_page, stats), content_type="application/xml")
        if mimetype == "application/json":
            return Response(responses.raw_json_streaming(bindings, next_page, stats, url),
                            content_type='application/json')
        # otherwise, return the HTML version
        return render_template("sage_page.html", query=query, default_graph_uri=default_graph_uri, bindings=bindings,
                               next_page=next_page, stats=stats)
    except Exception as err:
        # abort all ongoing transactions (if required)
        # then forward the exception to the main loop
        logging.error(f"sage execute_query error: {err}")
        if graph is not None:
            graph.abort()
        raise err
예제 #6
0
async def execute_query(query: str, default_graph_uri: str, next_link: Optional[str], dataset: Dataset) -> Tuple[List[Dict[str, str]], Optional[str], Dict[str, str]]:
    """Execute a query using the SageEngine and returns the appropriate HTTP response.

    Any failure will results in a rollback/abort on the current query execution.

    Args:
      * query: SPARQL query to execute.
      * default_graph_uri: URI of the default RDF graph to use.
      * next_link: URI to a saved plan. Can be `None` if query execution should starts from the beginning.
      * dataset: RDF dataset on which the query is executed.

    Returns:
      A tuple (`bindings`, `next_page`, `stats`) where:
      * `bindings` is a list of query results.
      * `next_page` is a link to saved query execution state. Sets to `None` if query execution completed during the time quantum.
      * `stats` are statistics about query execution.

    Throws: Any exception that have occured during query execution.
    """
    graph = None
    try:
        if not dataset.has_graph(default_graph_uri):
            raise HTTPException(status_code=404, detail=f"RDF Graph {default_graph_uri} not found on the server.")
        graph = dataset.get_graph(default_graph_uri)

        context = dict()
        context['quantum'] = graph.quota
        context['max_results'] = graph.max_results

        # decode next_link or build query execution plan
        cardinalities = dict()
        start = time()
        if next_link is not None:
            if dataset.is_stateless:
                saved_plan = next_link
            else:
                saved_plan = dataset.statefull_manager.get_plan(next_link)
            plan = load(decode_saved_plan(saved_plan), dataset, context)
        else:
            plan, cardinalities = parse_query(query, dataset, default_graph_uri, context)
        logging.info(f'loading time: {(time() - start) * 1000}ms')
        loading_time = (time() - start) * 1000

        # execute query
        engine = SageEngine()
        bindings, saved_plan, is_done, abort_reason = await engine.execute(plan, context)

        # commit or abort (if necessary)
        if abort_reason is not None:
            graph.abort()
            raise HTTPException(status_code=500, detail=f"The SPARQL query has been aborted for the following reason: '{abort_reason}'")
        else:
            graph.commit()

        start = time()
        # encode saved plan if query execution is not done yet and there was no abort
        next_page = None
        if (not is_done) and abort_reason is None:
            next_page = encode_saved_plan(saved_plan)
            if not dataset.is_stateless:
                # generate the plan ID if this is the first time we execute this plan
                plan_id = next_link if next_link is not None else str(uuid4())
                dataset.statefull_manager.save_plan(plan_id, next_page)
                next_page = plan_id
        elif is_done and (not dataset.is_stateless) and next_link is not None:
            # delete the saved plan, as it will not be reloaded anymore
            dataset.statefull_manager.delete_plan(next_link)

        logging.info(f'export time: {(time() - start) * 1000}ms')
        exportTime = (time() - start) * 1000
        stats = {"cardinalities": cardinalities, "import": loading_time, "export": exportTime}

        return (bindings, next_page, stats)
    except Exception as err:
        # abort all ongoing transactions, then forward the exception to the main loop
        logging.error(traceback.format_exc())
        if graph is not None:
            graph.abort()
        raise err