Example #1
0
def sage_query_debug(config_file, default_graph_uri, query, file, limit):
    """
        debug a SPARQL query on an embedded Sage Server.

        Example usage: sage-query config.yaml http://example.org/swdf-postgres -f queries/spo.sparql
    """
    # assert that we have a query to evaluate
    if query is None and file is None:
        print(
            "Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations."
        )
        exit(1)

    ## setting the log level of the asyncio logger to logging.DEBUG, for example the following snippet of code can be run at startup of the application:
    #logging.basicConfig(level=logging.WARNING)
    logging.basicConfig(level=logging.DEBUG)

    if limit is None:
        limit = inf

    # load query from file if required
    if file is not None:
        with open(file) as query_file:
            query = query_file.read()

    dataset = load_config(config_file)
    if dataset is None:
        print("config file {config_file} not found")
        exit(1)
    graph = dataset.get_graph(default_graph_uri)
    if graph is None:
        print("RDF Graph  not found:" + default_graph_uri)
        exit(1)
    engine = SageEngine()
    cards = list()
    context = dict()
    context['quantum'] = 1000000
    context['max_results'] = 1000000
    from time import time
    context['start_timestamp'] = time()
    iterator, cards = parse_query(query, dataset, default_graph_uri, context)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(execute(engine, iterator, limit))
    loop.close()
Example #2
0
def get_server(config_file: str, port=8000, workers=10) -> grpc.Server:
  """Create a SaGe SPARQL query server powered by gRPC.
  
  Args:
    * config_file: Path to the SaGe configuration file, in YAML format.
    * port: Host port to run the gRPC server.
    * workers: Number of thread workers used by the gRPC server.
  
  Returns:
    A SaGe gRPC server built from the input configuration file.
  """
  logging.basicConfig()

  dataset = load_config(config_file)
  service = SageQueryService(dataset)

  server = grpc.server(ThreadPoolExecutor(max_workers=10))
  service_pb2_grpc.add_SageSPARQLServicer_to_server(service, server)
  
  server.add_insecure_port(f'[::]:{port}')
  return server
Example #3
0
def run_app(config_file: str) -> FastAPI:
    """Create the HTTP server, compatible with uvicorn/gunicorn.

    Argument: SaGe configuration file, in YAML format.

    Returns: The FastAPI HTTP application.
    """
    # enable uvloop for SPARQL query processing
    set_event_loop_policy(uvloop.EventLoopPolicy())
    # set recursion depth (due to pyparsing issues)
    setrecursionlimit(3000)

    # create the HTTP server & activate CORS
    app = FastAPI()
    app.add_middleware(
        CORSMiddleware,
        allow_origins=["*"],
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )

    # Build the RDF dataset from the configuration file
    dataset = load_config(config_file)

    @app.get("/")
    async def root():
        return "The SaGe SPARQL query server is running!"

    @app.get("/sparql")
    async def sparql_get(
        request: Request,
        query: str = Query(..., description="The SPARQL query to execute."),
        default_graph_uri: str = Query(..., alias="default-graph-uri", description="The URI of the default RDF graph queried."),
        next_link: str = Query(None, alias="next", description="(Optional) A next link used to resume query execution from a saved state.")
    ):
        """Execute a SPARQL query using the Web Preemption model"""
        try:
            mimetypes = request.headers['accept'].split(",")
            server_url = urlunparse(request.url.components[0:3] + (None, None, None))
            bindings, next_page, stats = await execute_query(query, default_graph_uri, next_link, dataset)
            return create_response(mimetypes, bindings, next_page, stats, server_url)
        except HTTPException as err:
            raise err
        except Exception as err:
            logging.error(err)
            raise HTTPException(status_code=500, detail=str(err))

    @app.post("/sparql")
    async def sparql_post(request: Request, item: SagePostQuery):
        """Execute a SPARQL query using the Web Preemption model"""
        try:
            start = time()
            mimetypes = request.headers['accept'].split(",")
            server_url = urlunparse(request.url.components[0:3] + (None, None, None))
            exec_start = time()
            bindings, next_page, stats = await execute_query(item.query, item.defaultGraph, item.next, dataset)
            logging.info(f'query execution time: {(time() - exec_start) * 1000}ms')
            serialization_start = time()
            response = create_response(mimetypes, bindings, next_page, stats, server_url)
            logging.info(f'serialization time: {(time() - serialization_start) * 1000}ms')
            logging.info(f'execution time: {(time() - start) * 1000}ms')
            return response
        except HTTPException as err:
            raise err
        except Exception as err:
            logging.error(err)
            raise HTTPException(status_code=500, detail=str(err))

    @app.get("/void/", description="Get the VoID description of the SaGe server")
    async def server_void(request: Request):
        """Describe all RDF datasets hosted by the Sage endpoint"""
        try:
            mimetypes = request.headers['accept'].split(",")
            url = urlunparse(request.url.components[0:3] + (None, None, None))
            if url.endswith('/'):
                url = url[0:len(url) - 1]
            void_format, res_mimetype = choose_void_format(mimetypes)
            description = many_void(url, dataset, void_format)
            return Response(description, media_type=res_mimetype)
        except Exception as err:
            logging.error(err)
            raise HTTPException(status_code=500, detail=str(err))

    @app.get("/.well-known/void/")
    async def well_known():
        """Alias for /void/"""
        return RedirectResponse(url="/void/")

    @app.get("/void/{graph_name}", description="Get the VoID description of a RDF Graph hosted by the SaGe server")
    async def graph_void(request: Request, graph_name: str = Field(..., description="Name of the RDF Graph")):
        """Get the VoID description of a RDF Graph hosted by the SaGe server"""
        graph = dataset.get_graph(graph_name)
        if graph is None:
            raise HTTPException(status_code=404, detail=f"RDF Graph {graph_name} not found on the server.")
        try:
            mimetypes = request.headers['accept'].split(",")
            url = urlunparse(request.url.components[0:3] + (None, None, None))
            if url.endswith('/'):
                url = url[0:len(url) - 1]
            descriptor = VoidDescriptor(url, graph)
            void_format, res_mimetype = choose_void_format(mimetypes)
            return Response(descriptor.describe(void_format), media_type=res_mimetype)
        except Exception as err:
            logging.error(err)
            raise HTTPException(status_code=500, detail=str(err))

    return app
Example #4
0
# void_test.py
# Author: Thomas MINIER - MIT License 2017-2018
from pytest import mark
from sage.database.core.yaml_config import load_config
from sage.database.descriptors import VoidDescriptor, many_void
from rdflib import Graph

dataset = load_config("tests/data/test_config.yaml")


@mark.skip(reason="The general VoID format has changed")
def test_describe_dataset_void():
    ref_graph = Graph()
    ref_graph.parse("tests/descriptors/data/watdiv100_description.ttl",
                    format="ttl")
    # generate description
    url = "http://testserver/sparql/watdiv100"
    graph = dataset.get_graph(url)
    descriptor = VoidDescriptor(url, graph)
    desc_graph = Graph()
    desc_graph.parse(data=descriptor.describe("turtle"), format="ttl")
    assert ref_graph.isomorphic(desc_graph)


@mark.skip(reason="The general VoID format has changed")
def test_describe_many_dataset_void():
    ref_graph = Graph()
    ref_graph.parse("tests/descriptors/data/describe_all.ttl", format="ttl")
    # generate description
    url = "http://localhost:8000"
    desc_graph = Graph()
Example #5
0
def explain(query, file, config_file, graph_uri, indentnb, update, parse):
    coloredlogs.install(level='INFO',
                        fmt='%(asctime)s - %(levelname)s %(message)s')
    logger = logging.getLogger(__name__)

    if query is None and file is None:
        print(
            "Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations."
        )
        exit(1)

    # load query from file if required
    if file is not None:
        with open(file) as query_file:
            query = query_file.read()

    dataset = load_config(config_file)
    if dataset is None:
        print("config file {config_file} not found")
        exit(1)

    graph = dataset.get_graph(graph_uri)
    if graph is None:
        print("RDF Graph  not found:" + graph_uri)
        exit(1)

    engine = SageEngine()
    pp = pprint.PrettyPrinter(indent=indentnb)

    if query is None:
        exit(1)

    print("------------")
    print("Query")
    print("------------")
    print(query)

    if update:
        pq = parseUpdate(query)
    else:
        pq = parseQuery(query)

    if pq is None:
        exit(1)

    if parse:
        print("------------")
        print("Parsed Query")
        print("------------")
        pp.pprint(pq)
        print(prettify_parsetree(pq))

    if update:
        tq = translateUpdate(pq)
    else:
        tq = translateQuery(pq)
    print("------------")
    print("Algebra")
    print("------------")
    print(pprintAlgebra(tq))

    #logical_plan = tq.algebra
    cards = list()

    iterator, cards = parse_query(query, dataset, graph_uri)

    print("-----------------")
    print("Iterator pipeline")
    print("-----------------")
    print(iterator)
    print("-----------------")
    print("Cardinalities")
    print("-----------------")
    pp.pprint(cards)
def test_custom_backend_invalid_declaration_config():
    with pytest.raises(SyntaxError):
        load_config('tests/database/invalid_declaration.yaml')
def test_custom_backend_missing_params_config():
    with pytest.raises(SyntaxError):
        load_config('tests/database/missing_params.yaml')
def test_custom_backend_config():
    dataset = load_config('tests/database/config.yaml')
    assert dataset.has_graph('http://localhost:8000/sparql/foo-graph')
    assert dataset.get_graph('http://localhost:8000/sparql/foo-graph').search(
        None, None, None) == 'moo'