Example #1
0
  def Query(self, request: SageQuery, context: grpc.ServicerContext) -> SageResponse:
    graph: Graph = None
    try:
      query = request.query
      graph_name = request.default_graph_uri
      next_link = request.next_link if len(request.next_link) > 0 else None
      if not self._dataset.has_graph(graph_name):
        context.abort(code=404, details=f"RDF Graph {graph_name} not found on the server.")
      graph = self._dataset.get_graph(graph_name)

      # decode next_link or build query execution plan
      cardinalities = dict()
      start = time()
      if next_link is not None:
        if self._dataset.is_stateless:
            saved_plan = next_link
        else:
            saved_plan = self._dataset.statefull_manager.get_plan(next_link)
        plan = load(decode_saved_plan(saved_plan), self._dataset)
      else:
        plan, cardinalities = parse_query(query, self._dataset, graph_name)
      loading_time = (time() - start) * 1000

      # execute query
      engine = SageEngine()
      quota = graph.quota / 1000
      max_results = graph.max_results
      bindings, saved_plan, is_done, abort_reason = run(engine.execute(plan, quota, max_results))

      # commit or abort (if necessary)
      if abort_reason is not None:
        graph.abort()
        context.abort(code=500, details=f"The SPARQL query has been aborted for the following reason: '{abort_reason}'")
      else:
        graph.commit()

      # encode saved plan if query execution is not done yet and there was no abort
      start = time()
      next_page = None
      if (not is_done) and abort_reason is None:
        next_page = encode_saved_plan(saved_plan)
        if not self._dataset.is_stateless:
          # generate the plan ID if this is the first time we execute this plan
          plan_id = next_link if next_link is not None else str(uuid4())
          self._dataset.statefull_manager.save_plan(plan_id, next_page)
          next_page = plan_id
      elif is_done and (not self._dataset.is_stateless) and next_link is not None:
        # delete the saved plan, as it will not be reloaded anymore
        self._dataset.statefull_manager.delete_plan(next_link)
      exportTime = (time() - start) * 1000

      # create response
      response = SageResponse(is_done = is_done, next_link = next_page)
      for binding in create_bindings(bindings):
        response.bindings.append(binding)
      return response
    except Exception as err:
      if graph is not None:
        graph.abort()
      context.abort(code=500, details=f"A server-side error has occurred: {str(err)}")
def execute_query(query, default_graph_uri, next_link, dataset, mimetype, url):
    """
        Execute a query using the SageEngine and returns the appropriate HTTP response.
        Any failure will results in a rollback/abort on the current query execution.
    """
    graph = None
    try:
        graph_name = format_graph_uri(default_graph_uri, url)
        if not dataset.has_graph(graph_name):
            logging.error("No RDF graph matching the default URI provided was found.")
            return sage_http_error("No RDF graph matching the default URI provided was found.")
        graph = dataset.get_graph(graph_name)
        # decode next_link or build query execution plan
        cardinalities = dict()
        start = time()

        if next_link is not None:
            plan = load(decode_saved_plan(next_link), dataset)
        else:
            plan, cardinalities = parse_query(query, dataset, graph_name, url)
        loading_time = (time() - start) * 1000
        # execute query
        engine = SageEngine()
        quota = graph.quota / 1000
        max_results = graph.max_results
        bindings, saved_plan, is_done = engine.execute(plan, quota, max_results)

        # commit (if necessary)
        graph.commit()

        # compute controls for the next page
        start = time()
        next_page = None
        if not is_done:
            next_page = encode_saved_plan(saved_plan)
        exportTime = (time() - start) * 1000
        stats = {"cardinalities": cardinalities, "import": loading_time, "export": exportTime}

        # send response
        if mimetype == "application/sparql-results+json":
            return Response(responses.w3c_json_streaming(bindings, next_page, stats, url),
                            content_type='application/json')
        if mimetype == "application/xml" or mimetype == "application/sparql-results+xml":
            return Response(responses.w3c_xml(bindings, next_page, stats), content_type="application/xml")
        if mimetype == "application/json":
            return Response(responses.raw_json_streaming(bindings, next_page, stats, url),
                            content_type='application/json')
        # otherwise, return the HTML version
        return render_template("sage_page.html", query=query, default_graph_uri=default_graph_uri, bindings=bindings,
                               next_page=next_page, stats=stats)
    except Exception as err:
        # abort all ongoing transactions (if required)
        # then forward the exception to the main loop
        logging.error(f"sage execute_query error: {err}")
        if graph is not None:
            graph.abort()
        raise err
Example #3
0
def sage_query_debug(config_file, default_graph_uri, query, file, limit):
    """
        debug a SPARQL query on an embedded Sage Server.

        Example usage: sage-query config.yaml http://example.org/swdf-postgres -f queries/spo.sparql
    """
    # assert that we have a query to evaluate
    if query is None and file is None:
        print(
            "Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations."
        )
        exit(1)

    ## setting the log level of the asyncio logger to logging.DEBUG, for example the following snippet of code can be run at startup of the application:
    #logging.basicConfig(level=logging.WARNING)
    logging.basicConfig(level=logging.DEBUG)

    if limit is None:
        limit = inf

    # load query from file if required
    if file is not None:
        with open(file) as query_file:
            query = query_file.read()

    dataset = load_config(config_file)
    if dataset is None:
        print("config file {config_file} not found")
        exit(1)
    graph = dataset.get_graph(default_graph_uri)
    if graph is None:
        print("RDF Graph  not found:" + default_graph_uri)
        exit(1)
    engine = SageEngine()
    cards = list()
    context = dict()
    context['quantum'] = 1000000
    context['max_results'] = 1000000
    from time import time
    context['start_timestamp'] = time()
    iterator, cards = parse_query(query, dataset, default_graph_uri, context)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(execute(engine, iterator, limit))
    loop.close()
Example #4
0
async def execute_query(query: str, default_graph_uri: str, next_link: Optional[str], dataset: Dataset) -> Tuple[List[Dict[str, str]], Optional[str], Dict[str, str]]:
    """Execute a query using the SageEngine and returns the appropriate HTTP response.

    Any failure will results in a rollback/abort on the current query execution.

    Args:
      * query: SPARQL query to execute.
      * default_graph_uri: URI of the default RDF graph to use.
      * next_link: URI to a saved plan. Can be `None` if query execution should starts from the beginning.
      * dataset: RDF dataset on which the query is executed.

    Returns:
      A tuple (`bindings`, `next_page`, `stats`) where:
      * `bindings` is a list of query results.
      * `next_page` is a link to saved query execution state. Sets to `None` if query execution completed during the time quantum.
      * `stats` are statistics about query execution.

    Throws: Any exception that have occured during query execution.
    """
    graph = None
    try:
        if not dataset.has_graph(default_graph_uri):
            raise HTTPException(status_code=404, detail=f"RDF Graph {default_graph_uri} not found on the server.")
        graph = dataset.get_graph(default_graph_uri)

        context = dict()
        context['quantum'] = graph.quota
        context['max_results'] = graph.max_results

        # decode next_link or build query execution plan
        cardinalities = dict()
        start = time()
        if next_link is not None:
            if dataset.is_stateless:
                saved_plan = next_link
            else:
                saved_plan = dataset.statefull_manager.get_plan(next_link)
            plan = load(decode_saved_plan(saved_plan), dataset, context)
        else:
            plan, cardinalities = parse_query(query, dataset, default_graph_uri, context)
        logging.info(f'loading time: {(time() - start) * 1000}ms')
        loading_time = (time() - start) * 1000

        # execute query
        engine = SageEngine()
        bindings, saved_plan, is_done, abort_reason = await engine.execute(plan, context)

        # commit or abort (if necessary)
        if abort_reason is not None:
            graph.abort()
            raise HTTPException(status_code=500, detail=f"The SPARQL query has been aborted for the following reason: '{abort_reason}'")
        else:
            graph.commit()

        start = time()
        # encode saved plan if query execution is not done yet and there was no abort
        next_page = None
        if (not is_done) and abort_reason is None:
            next_page = encode_saved_plan(saved_plan)
            if not dataset.is_stateless:
                # generate the plan ID if this is the first time we execute this plan
                plan_id = next_link if next_link is not None else str(uuid4())
                dataset.statefull_manager.save_plan(plan_id, next_page)
                next_page = plan_id
        elif is_done and (not dataset.is_stateless) and next_link is not None:
            # delete the saved plan, as it will not be reloaded anymore
            dataset.statefull_manager.delete_plan(next_link)

        logging.info(f'export time: {(time() - start) * 1000}ms')
        exportTime = (time() - start) * 1000
        stats = {"cardinalities": cardinalities, "import": loading_time, "export": exportTime}

        return (bindings, next_page, stats)
    except Exception as err:
        # abort all ongoing transactions, then forward the exception to the main loop
        logging.error(traceback.format_exc())
        if graph is not None:
            graph.abort()
        raise err
Example #5
0
async def test_parse_rowid(query, cardinality):
    iterator, cards = parse_query(query, dataset, 'context')
    print("pipeline:")
    print(iterator)
    assert len(cards) >= 0
    assert iterator is not None
Example #6
0
 async def test_query_parser(self, query, cardinality):
     iterator, cards = parse_query(query, dataset, 'watdiv100')
     assert len(cards) > 0
     assert iterator is not None
 async def test_query_parser(self, query, cardinality):
     context= { 'quantum': 10e7, 'max_results': 10e7, 'start_timestamp': 0 }
     iterator, cards = parse_query(query, dataset, 'watdiv100', context)
     assert len(cards) > 0
     assert iterator is not None
Example #8
0
def explain(query, file, config_file, graph_uri, indentnb, update, parse):
    coloredlogs.install(level='INFO',
                        fmt='%(asctime)s - %(levelname)s %(message)s')
    logger = logging.getLogger(__name__)

    if query is None and file is None:
        print(
            "Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations."
        )
        exit(1)

    # load query from file if required
    if file is not None:
        with open(file) as query_file:
            query = query_file.read()

    dataset = load_config(config_file)
    if dataset is None:
        print("config file {config_file} not found")
        exit(1)

    graph = dataset.get_graph(graph_uri)
    if graph is None:
        print("RDF Graph  not found:" + graph_uri)
        exit(1)

    engine = SageEngine()
    pp = pprint.PrettyPrinter(indent=indentnb)

    if query is None:
        exit(1)

    print("------------")
    print("Query")
    print("------------")
    print(query)

    if update:
        pq = parseUpdate(query)
    else:
        pq = parseQuery(query)

    if pq is None:
        exit(1)

    if parse:
        print("------------")
        print("Parsed Query")
        print("------------")
        pp.pprint(pq)
        print(prettify_parsetree(pq))

    if update:
        tq = translateUpdate(pq)
    else:
        tq = translateQuery(pq)
    print("------------")
    print("Algebra")
    print("------------")
    print(pprintAlgebra(tq))

    #logical_plan = tq.algebra
    cards = list()

    iterator, cards = parse_query(query, dataset, graph_uri)

    print("-----------------")
    print("Iterator pipeline")
    print("-----------------")
    print(iterator)
    print("-----------------")
    print("Cardinalities")
    print("-----------------")
    pp.pprint(cards)