Ejemplo n.º 1
0
def execute_query(query, default_graph_uri, next_link, dataset, mimetype, url):
    """Execute a query using the SageEngine and returns the appropriate HTTP response"""
    graph_name = format_graph_uri(default_graph_uri, url)
    if not dataset.has_graph(graph_name):
        return sage_http_error(
            "No RDF graph matching the default URI provided was found.")
    graph = dataset.get_graph(graph_name)
    # decode next_link or build query execution plan
    cardinalities = dict()
    start = time()
    if next_link is not None:
        plan = load(decode_saved_plan(next_link), dataset)
    else:
        plan, cardinalities = parse_query(query, dataset, graph_name, url)
    loading_time = (time() - start) * 1000
    # execute query
    engine = SageEngine()
    quota = graph.quota / 1000
    max_results = graph.max_results
    bindings, saved_plan, is_done = engine.execute(plan, quota, max_results)

    # compute controls for the next page
    start = time()
    next_page = None
    if not is_done:
        next_page = encode_saved_plan(saved_plan)
    exportTime = (time() - start) * 1000
    stats = {
        "cardinalities": cardinalities,
        "import": loading_time,
        "export": exportTime
    }

    # send response
    if mimetype == "application/sparql-results+json":
        return Response(responses.w3c_json_streaming(bindings, next_page,
                                                     stats, url),
                        content_type='application/json')
    if mimetype == "application/xml" or mimetype == "application/sparql-results+xml":
        return Response(responses.w3c_xml(bindings, next_page, stats),
                        content_type="application/xml")
    if mimetype == "application/json":
        return Response(responses.raw_json_streaming(bindings, next_page,
                                                     stats, url),
                        content_type='application/json')
    # otherwise, return the HTML version
    return render_template("sage_page.html",
                           query=query,
                           default_graph_uri=default_graph_uri,
                           bindings=bindings,
                           next_page=next_page,
                           stats=stats)
Ejemplo n.º 2
0
# scan_test.py
# Author: Thomas MINIER - MIT License 2017-2018
from query_engine.sage_engine import SageEngine
from query_engine.iterators.scan import ScanIterator
from database.hdt_file_connector import HDTFileConnector

hdtDoc = HDTFileConnector('tests/data/test.hdt')
engine = SageEngine()
triple = {
    'subject': '?s',
    'predicate': '?p',
    'object': '?o',
    'graph': 'watdiv100'
}


def test_scan_read():
    iterator, card = hdtDoc.search_triples(triple['subject'],
                                           triple['predicate'],
                                           triple['object'])
    scan = ScanIterator(iterator, triple, card)
    (results, saved, done) = engine.execute(scan, 10e7)
    assert len(results) == card
    assert done


def test_scan_save_nointerrupt():
    iterator, card = hdtDoc.search_triples(triple['subject'],
                                           triple['predicate'],
                                           triple['object'])
    scan = ScanIterator(iterator, triple, card)
Ejemplo n.º 3
0
    def lookup_entity(graph_name, entity):
        """Evaluates a DESCRIBE query over a RDF dataset"""
        logger.debug('[IP: {}] [/lookup/] Querying {}'.format(
            request.environ['REMOTE_ADDR'], graph_name))
        graph = dataset.get_graph(graph_name)
        if graph is None:
            abort(404)
        url = secure_url(request.url)
        try:
            engine = SageEngine()

            # Get entity and possible next link
            entity_uri = secure_url(request.base_url)
            next_link = request.args.get("next", default=None)
            post_query = build_describe_query(entity_uri)

            logger.debug('[IP: {}] [/lookup/] Entity={}'.format(
                request.environ['REMOTE_ADDR'], entity_uri))
            quota = graph.quota / 1000
            max_results = graph.max_results

            # Load next link
            if next_link is not None:
                logger.debug(
                    '[/lookup/{}] Saved plan found, decoding "next" link'.
                    format(graph_name))
                next_link = decode_saved_plan(next_link)
            else:
                logger.debug('[/lookup/{}] Query to evaluate: {}'.format(
                    graph_name, post_query))

            # build physical query plan, then execute it with the given quota
            logger.debug(
                '[/lookup/{}] Starting query evaluation...'.format(graph_name))
            # start = time()
            plan, cardinalities = build_query_plan(post_query, dataset,
                                                   graph_name, next_link)
            # loading_time = (time() - start) * 1000
            bindings, saved_plan, is_done = engine.execute(
                plan, quota, max_results)
            logger.debug(
                '[/lookup/{}] Query evaluation completed'.format(graph_name))

            # compute controls for the next page
            # start = time()
            next_page = None
            if is_done:
                logger.debug(
                    '[/lookup/{}] Query completed under the time quota'.format(
                        graph_name))
            else:
                logger.debug(
                    '[/lookup/{}] The query was not completed under the time quota...'
                    .format(graph_name))
                logger.debug(
                    '[/lookup/{}] Saving the execution to plan to generate a "next" link'
                    .format(graph_name))
                next_page = encode_saved_plan(saved_plan)
                logger.debug(
                    '[/lookup/{}] "next" link successfully generated'.format(
                        graph_name))
            # exportTime = (time() - start) * 1000
            # stats = {"import": loading_time, "export": exportTime}
            triples = bindings_to_triple(entity_uri, bindings, url)

            headers = dict()
            if next_page is not None:
                headers["X-Sage-Next"] = "{}?next={}".format(
                    entity_uri, next_page)
                headers[
                    "Link"] = "<{}?next={}>; rel=\"next\"; title=\"Next page\"".format(
                        entity_uri, next_page)

            return Response(responses.ntriples_streaming(triples),
                            content_type="application/ntriples",
                            headers=headers)
        except Exception as e:
            logger.error(e)
            abort(500)
Ejemplo n.º 4
0
    def sparql_query(graph_name):
        """WARNING: old API, deprecated"""
        graph = dataset.get_graph(graph_name)
        if graph is None:
            abort(404)

        logger.debug('[/sparql/] Corresponding dataset found')
        mimetype = request.accept_mimetypes.best_match([
            "application/json", "application/xml",
            "application/sparql-results+json", "application/sparql-results+xml"
        ])
        url = secure_url(request.url)
        try:
            # A GET request always returns the homepage of the dataset
            if request.method == "GET" or (not request.is_json):
                dinfo = graph.describe(url)
                dinfo['@id'] = url
                void_desc = {
                    "nt": VoidDescriptor(url, graph).describe("ntriples"),
                    "ttl": VoidDescriptor(url, graph).describe("turtle"),
                    "xml": VoidDescriptor(url, graph).describe("xml")
                }
                return render_template("website/sage_dataset.html",
                                       dataset_info=dinfo,
                                       void_desc=void_desc)

            engine = SageEngine()
            post_query, err = QueryRequest().load(request.get_json())
            if err is not None and len(err) > 0:
                return Response(format_marshmallow_errors(err), status=400)
            quota = graph.quota / 1000
            max_results = graph.max_results

            # Load next link
            next_link = None
            if 'next' in post_query:
                next_link = decode_saved_plan(post_query["next"])

            # build physical query plan, then execute it with the given quota
            start = time()
            plan, cardinalities = build_query_plan(post_query["query"],
                                                   dataset, graph_name,
                                                   next_link)
            loading_time = (time() - start) * 1000  # convert in milliseconds
            bindings, saved_plan, is_done = engine.execute(
                plan, quota, max_results)

            # compute controls for the next page
            start = time()
            next_page = None
            if not is_done:
                next_page = encode_saved_plan(saved_plan)
            exportTime = (time() - start) * 1000  # convert in milliseconds
            stats = {
                "cardinalities": cardinalities,
                "import": loading_time,
                "export": exportTime
            }

            if mimetype == "application/sparql-results+json":
                res = Response(responses.w3c_json_streaming(
                    bindings, next_page, stats, url),
                               content_type='application/json')
            if mimetype == "application/xml" or mimetype == "application/sparql-results+xml":
                res = Response(responses.w3c_xml(bindings, next_page, stats),
                               content_type="application/xml")
            else:
                res = Response(responses.raw_json_streaming(
                    bindings, next_page, stats, url),
                               content_type='application/json')
            # set deprecation warning in headers
            res.headers.add(
                "Warning",
                "199 SaGe/2.0 \"You are using a deprecated API. Consider uppgrading to the SaGe SPARQL query API. See http://sage.univ-nantes.fr/documentation fore more details.\""
            )
            return res
        except Exception as e:
            logger.error(e)
            abort(500)
Ejemplo n.º 5
0
    def sparql_query(dataset_name):
        logger.info('[IP: {}] [/sparql/] Querying {}'.format(
            request.environ['REMOTE_ADDR'], dataset_name))
        dataset = datasets.get_dataset(dataset_name)
        if dataset is None:
            abort(404)

        logger.debug('[/sparql/] Corresponding dataset found')
        mimetype = request.accept_mimetypes.best_match([
            "application/json", "application/xml",
            "application/sparql-results+json", "application/sparql-results+xml"
        ])
        url = secure_url(request.url)
        try:
            # A GET request always returns the homepage of the dataset
            if request.method == "GET" or (not request.is_json):
                dinfo = dataset.describe(url)
                dinfo['@id'] = url
                void_desc = {
                    "nt": VoidDescriptor(url, dataset).describe("ntriples"),
                    "ttl": VoidDescriptor(url, dataset).describe("turtle"),
                    "xml": VoidDescriptor(url, dataset).describe("xml")
                }
                return render_template("sage.html",
                                       dataset_info=dinfo,
                                       void_desc=void_desc)

            engine = SageEngine()
            post_query, err = QueryRequest().load(request.get_json())
            if err is not None and len(err) > 0:
                return Response(format_marshmallow_errors(err), status=400)
            logger.info('[IP: {}] [/sparql/] Query={}'.format(
                request.environ['REMOTE_ADDR'], post_query))
            quota = dataset.quota / 1000
            max_results = dataset.max_results

            # Load next link
            next_link = None
            if 'next' in post_query:
                logger.debug(
                    '[/sparql/{}] Saved plan found, decoding "next" link'.
                    format(dataset_name))
                next_link = decode_saved_plan(post_query["next"])
            else:
                logger.debug('[/sparql/{}] Query to evaluate: {}'.format(
                    dataset_name, post_query))

            # build physical query plan, then execute it with the given quota
            logger.debug('[/sparql/{}] Starting query evaluation...'.format(
                dataset_name))
            start = time()
            plan, cardinalities = build_query_plan(post_query["query"],
                                                   dataset, next_link)
            loading_time = (time() - start) * 1000
            bindings, saved_plan, is_done = engine.execute(
                plan, quota, max_results)
            logger.debug(
                '[/sparql/{}] Query evaluation completed'.format(dataset_name))

            # compute controls for the next page
            start = time()
            next_page = None
            if is_done:
                logger.debug(
                    '[/sparql/{}] Query completed under the time quota'.format(
                        dataset_name))
            else:
                logger.debug(
                    '[/sparql/{}] The query was not completed under the time quota...'
                    .format(dataset_name))
                logger.debug(
                    '[/sparql/{}] Saving the execution to plan to generate a "next" link'
                    .format(dataset_name))
                next_page = encode_saved_plan(saved_plan)
                logger.debug(
                    '[/sparql/{}] "next" link successfully generated'.format(
                        dataset_name))
            exportTime = (time() - start) * 1000
            stats = {
                "cardinalities": cardinalities,
                "import": loading_time,
                "export": exportTime
            }

            if mimetype == "application/sparql-results+json":
                return json.jsonify(responses.json(bindings, next_page, stats))
            if mimetype == "application/xml" or mimetype == "application/sparql-results+xml":
                return Response(responses.xml(bindings, next_page, stats),
                                content_type="application/xml")
            return json.jsonify(responses.raw_json(bindings, next_page, stats))
        except Exception:
            abort(500)