def sage_query_debug(config_file, default_graph_uri, query, file, limit): """ debug a SPARQL query on an embedded Sage Server. Example usage: sage-query config.yaml http://example.org/swdf-postgres -f queries/spo.sparql """ # assert that we have a query to evaluate if query is None and file is None: print( "Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations." ) exit(1) ## setting the log level of the asyncio logger to logging.DEBUG, for example the following snippet of code can be run at startup of the application: #logging.basicConfig(level=logging.WARNING) logging.basicConfig(level=logging.DEBUG) if limit is None: limit = inf # load query from file if required if file is not None: with open(file) as query_file: query = query_file.read() dataset = load_config(config_file) if dataset is None: print("config file {config_file} not found") exit(1) graph = dataset.get_graph(default_graph_uri) if graph is None: print("RDF Graph not found:" + default_graph_uri) exit(1) engine = SageEngine() cards = list() context = dict() context['quantum'] = 1000000 context['max_results'] = 1000000 from time import time context['start_timestamp'] = time() iterator, cards = parse_query(query, dataset, default_graph_uri, context) loop = asyncio.get_event_loop() loop.run_until_complete(execute(engine, iterator, limit)) loop.close()
def get_server(config_file: str, port=8000, workers=10) -> grpc.Server: """Create a SaGe SPARQL query server powered by gRPC. Args: * config_file: Path to the SaGe configuration file, in YAML format. * port: Host port to run the gRPC server. * workers: Number of thread workers used by the gRPC server. Returns: A SaGe gRPC server built from the input configuration file. """ logging.basicConfig() dataset = load_config(config_file) service = SageQueryService(dataset) server = grpc.server(ThreadPoolExecutor(max_workers=10)) service_pb2_grpc.add_SageSPARQLServicer_to_server(service, server) server.add_insecure_port(f'[::]:{port}') return server
def run_app(config_file: str) -> FastAPI: """Create the HTTP server, compatible with uvicorn/gunicorn. Argument: SaGe configuration file, in YAML format. Returns: The FastAPI HTTP application. """ # enable uvloop for SPARQL query processing set_event_loop_policy(uvloop.EventLoopPolicy()) # set recursion depth (due to pyparsing issues) setrecursionlimit(3000) # create the HTTP server & activate CORS app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Build the RDF dataset from the configuration file dataset = load_config(config_file) @app.get("/") async def root(): return "The SaGe SPARQL query server is running!" @app.get("/sparql") async def sparql_get( request: Request, query: str = Query(..., description="The SPARQL query to execute."), default_graph_uri: str = Query(..., alias="default-graph-uri", description="The URI of the default RDF graph queried."), next_link: str = Query(None, alias="next", description="(Optional) A next link used to resume query execution from a saved state.") ): """Execute a SPARQL query using the Web Preemption model""" try: mimetypes = request.headers['accept'].split(",") server_url = urlunparse(request.url.components[0:3] + (None, None, None)) bindings, next_page, stats = await execute_query(query, default_graph_uri, next_link, dataset) return create_response(mimetypes, bindings, next_page, stats, server_url) except HTTPException as err: raise err except Exception as err: logging.error(err) raise HTTPException(status_code=500, detail=str(err)) @app.post("/sparql") async def sparql_post(request: Request, item: SagePostQuery): """Execute a SPARQL query using the Web Preemption model""" try: start = time() mimetypes = request.headers['accept'].split(",") server_url = urlunparse(request.url.components[0:3] + (None, None, None)) exec_start = time() bindings, next_page, stats = await execute_query(item.query, item.defaultGraph, item.next, dataset) logging.info(f'query execution time: {(time() - exec_start) * 1000}ms') serialization_start = time() response = create_response(mimetypes, bindings, next_page, stats, server_url) logging.info(f'serialization time: {(time() - serialization_start) * 1000}ms') logging.info(f'execution time: {(time() - start) * 1000}ms') return response except HTTPException as err: raise err except Exception as err: logging.error(err) raise HTTPException(status_code=500, detail=str(err)) @app.get("/void/", description="Get the VoID description of the SaGe server") async def server_void(request: Request): """Describe all RDF datasets hosted by the Sage endpoint""" try: mimetypes = request.headers['accept'].split(",") url = urlunparse(request.url.components[0:3] + (None, None, None)) if url.endswith('/'): url = url[0:len(url) - 1] void_format, res_mimetype = choose_void_format(mimetypes) description = many_void(url, dataset, void_format) return Response(description, media_type=res_mimetype) except Exception as err: logging.error(err) raise HTTPException(status_code=500, detail=str(err)) @app.get("/.well-known/void/") async def well_known(): """Alias for /void/""" return RedirectResponse(url="/void/") @app.get("/void/{graph_name}", description="Get the VoID description of a RDF Graph hosted by the SaGe server") async def graph_void(request: Request, graph_name: str = Field(..., description="Name of the RDF Graph")): """Get the VoID description of a RDF Graph hosted by the SaGe server""" graph = dataset.get_graph(graph_name) if graph is None: raise HTTPException(status_code=404, detail=f"RDF Graph {graph_name} not found on the server.") try: mimetypes = request.headers['accept'].split(",") url = urlunparse(request.url.components[0:3] + (None, None, None)) if url.endswith('/'): url = url[0:len(url) - 1] descriptor = VoidDescriptor(url, graph) void_format, res_mimetype = choose_void_format(mimetypes) return Response(descriptor.describe(void_format), media_type=res_mimetype) except Exception as err: logging.error(err) raise HTTPException(status_code=500, detail=str(err)) return app
# void_test.py # Author: Thomas MINIER - MIT License 2017-2018 from pytest import mark from sage.database.core.yaml_config import load_config from sage.database.descriptors import VoidDescriptor, many_void from rdflib import Graph dataset = load_config("tests/data/test_config.yaml") @mark.skip(reason="The general VoID format has changed") def test_describe_dataset_void(): ref_graph = Graph() ref_graph.parse("tests/descriptors/data/watdiv100_description.ttl", format="ttl") # generate description url = "http://testserver/sparql/watdiv100" graph = dataset.get_graph(url) descriptor = VoidDescriptor(url, graph) desc_graph = Graph() desc_graph.parse(data=descriptor.describe("turtle"), format="ttl") assert ref_graph.isomorphic(desc_graph) @mark.skip(reason="The general VoID format has changed") def test_describe_many_dataset_void(): ref_graph = Graph() ref_graph.parse("tests/descriptors/data/describe_all.ttl", format="ttl") # generate description url = "http://localhost:8000" desc_graph = Graph()
def explain(query, file, config_file, graph_uri, indentnb, update, parse): coloredlogs.install(level='INFO', fmt='%(asctime)s - %(levelname)s %(message)s') logger = logging.getLogger(__name__) if query is None and file is None: print( "Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations." ) exit(1) # load query from file if required if file is not None: with open(file) as query_file: query = query_file.read() dataset = load_config(config_file) if dataset is None: print("config file {config_file} not found") exit(1) graph = dataset.get_graph(graph_uri) if graph is None: print("RDF Graph not found:" + graph_uri) exit(1) engine = SageEngine() pp = pprint.PrettyPrinter(indent=indentnb) if query is None: exit(1) print("------------") print("Query") print("------------") print(query) if update: pq = parseUpdate(query) else: pq = parseQuery(query) if pq is None: exit(1) if parse: print("------------") print("Parsed Query") print("------------") pp.pprint(pq) print(prettify_parsetree(pq)) if update: tq = translateUpdate(pq) else: tq = translateQuery(pq) print("------------") print("Algebra") print("------------") print(pprintAlgebra(tq)) #logical_plan = tq.algebra cards = list() iterator, cards = parse_query(query, dataset, graph_uri) print("-----------------") print("Iterator pipeline") print("-----------------") print(iterator) print("-----------------") print("Cardinalities") print("-----------------") pp.pprint(cards)
def test_custom_backend_invalid_declaration_config(): with pytest.raises(SyntaxError): load_config('tests/database/invalid_declaration.yaml')
def test_custom_backend_missing_params_config(): with pytest.raises(SyntaxError): load_config('tests/database/missing_params.yaml')
def test_custom_backend_config(): dataset = load_config('tests/database/config.yaml') assert dataset.has_graph('http://localhost:8000/sparql/foo-graph') assert dataset.get_graph('http://localhost:8000/sparql/foo-graph').search( None, None, None) == 'moo'