Esempio n. 1
0
    def void_dataset(graph_name):
        """Describe one RDF dataset"""
        try:
            logger.debug(
                '[/void/] Loading VoID descriptions for dataset {}'.format(
                    graph_name))
            graph = dataset.get_graph(graph_name)
            if graph is None:
                abort(404)

            logger.debug('[/void/] Corresponding dataset found')
            mimetype = request.accept_mimetypes.best_match([
                "application/n-triples", "text/turtle", "application/xml",
                "application/n-quads", "application/trig", "application/json",
                "application/json+ld"
            ])
            url = secure_url(request.url_root)
            if url.endswith('/'):
                url = url[0:len(url) - 1]
            descriptor = VoidDescriptor(url, graph)
            format, mimetype = choose_format(mimetype)
            return Response(descriptor.describe(format), content_type=mimetype)
        except Exception as e:
            logger.error(e)
            abort(500)
Esempio n. 2
0
def test_describe_dataset_void():
    ref_graph = Graph()
    ref_graph.parse("tests/descriptors/data/watdiv100_description.ttl",
                    format="ttl")
    # generate description
    url = "http://localhost:8000/sparql/watdiv100"
    graph = dataset.get_graph("watdiv100")
    descriptor = VoidDescriptor(url, graph)
    desc_graph = Graph()
    desc_graph.parse(data=descriptor.describe("turtle"), format="ttl")
    assert ref_graph.isomorphic(desc_graph)
Esempio n. 3
0
    def sparql_query(graph_name):
        """WARNING: old API, deprecated"""
        graph = dataset.get_graph(graph_name)
        if graph is None:
            abort(404)

        logger.debug('[/sparql/] Corresponding dataset found')
        mimetype = request.accept_mimetypes.best_match([
            "application/json", "application/xml",
            "application/sparql-results+json", "application/sparql-results+xml"
        ])
        url = secure_url(request.url)
        try:
            # A GET request always returns the homepage of the dataset
            if request.method == "GET" or (not request.is_json):
                dinfo = graph.describe(url)
                dinfo['@id'] = url
                void_desc = {
                    "nt": VoidDescriptor(url, graph).describe("ntriples"),
                    "ttl": VoidDescriptor(url, graph).describe("turtle"),
                    "xml": VoidDescriptor(url, graph).describe("xml")
                }
                return render_template("website/sage_dataset.html",
                                       dataset_info=dinfo,
                                       void_desc=void_desc)

            engine = SageEngine()
            post_query, err = QueryRequest().load(request.get_json())
            if err is not None and len(err) > 0:
                return Response(format_marshmallow_errors(err), status=400)
            quota = graph.quota / 1000
            max_results = graph.max_results

            # Load next link
            next_link = None
            if 'next' in post_query:
                next_link = decode_saved_plan(post_query["next"])

            # build physical query plan, then execute it with the given quota
            start = time()
            plan, cardinalities = build_query_plan(post_query["query"],
                                                   dataset, graph_name,
                                                   next_link)
            loading_time = (time() - start) * 1000  # convert in milliseconds
            bindings, saved_plan, is_done = engine.execute(
                plan, quota, max_results)

            # compute controls for the next page
            start = time()
            next_page = None
            if not is_done:
                next_page = encode_saved_plan(saved_plan)
            exportTime = (time() - start) * 1000  # convert in milliseconds
            stats = {
                "cardinalities": cardinalities,
                "import": loading_time,
                "export": exportTime
            }

            if mimetype == "application/sparql-results+json":
                res = Response(responses.w3c_json_streaming(
                    bindings, next_page, stats, url),
                               content_type='application/json')
            if mimetype == "application/xml" or mimetype == "application/sparql-results+xml":
                res = Response(responses.w3c_xml(bindings, next_page, stats),
                               content_type="application/xml")
            else:
                res = Response(responses.raw_json_streaming(
                    bindings, next_page, stats, url),
                               content_type='application/json')
            # set deprecation warning in headers
            res.headers.add(
                "Warning",
                "199 SaGe/2.0 \"You are using a deprecated API. Consider uppgrading to the SaGe SPARQL query API. See http://sage.univ-nantes.fr/documentation fore more details.\""
            )
            return res
        except Exception as e:
            logger.error(e)
            abort(500)
Esempio n. 4
0
    def sparql_query(dataset_name):
        logger.info('[IP: {}] [/sparql/] Querying {}'.format(
            request.environ['REMOTE_ADDR'], dataset_name))
        dataset = datasets.get_dataset(dataset_name)
        if dataset is None:
            abort(404)

        logger.debug('[/sparql/] Corresponding dataset found')
        mimetype = request.accept_mimetypes.best_match([
            "application/json", "application/xml",
            "application/sparql-results+json", "application/sparql-results+xml"
        ])
        url = secure_url(request.url)
        try:
            # A GET request always returns the homepage of the dataset
            if request.method == "GET" or (not request.is_json):
                dinfo = dataset.describe(url)
                dinfo['@id'] = url
                void_desc = {
                    "nt": VoidDescriptor(url, dataset).describe("ntriples"),
                    "ttl": VoidDescriptor(url, dataset).describe("turtle"),
                    "xml": VoidDescriptor(url, dataset).describe("xml")
                }
                return render_template("sage.html",
                                       dataset_info=dinfo,
                                       void_desc=void_desc)

            engine = SageEngine()
            post_query, err = QueryRequest().load(request.get_json())
            if err is not None and len(err) > 0:
                return Response(format_marshmallow_errors(err), status=400)
            logger.info('[IP: {}] [/sparql/] Query={}'.format(
                request.environ['REMOTE_ADDR'], post_query))
            quota = dataset.quota / 1000
            max_results = dataset.max_results

            # Load next link
            next_link = None
            if 'next' in post_query:
                logger.debug(
                    '[/sparql/{}] Saved plan found, decoding "next" link'.
                    format(dataset_name))
                next_link = decode_saved_plan(post_query["next"])
            else:
                logger.debug('[/sparql/{}] Query to evaluate: {}'.format(
                    dataset_name, post_query))

            # build physical query plan, then execute it with the given quota
            logger.debug('[/sparql/{}] Starting query evaluation...'.format(
                dataset_name))
            start = time()
            plan, cardinalities = build_query_plan(post_query["query"],
                                                   dataset, next_link)
            loading_time = (time() - start) * 1000
            bindings, saved_plan, is_done = engine.execute(
                plan, quota, max_results)
            logger.debug(
                '[/sparql/{}] Query evaluation completed'.format(dataset_name))

            # compute controls for the next page
            start = time()
            next_page = None
            if is_done:
                logger.debug(
                    '[/sparql/{}] Query completed under the time quota'.format(
                        dataset_name))
            else:
                logger.debug(
                    '[/sparql/{}] The query was not completed under the time quota...'
                    .format(dataset_name))
                logger.debug(
                    '[/sparql/{}] Saving the execution to plan to generate a "next" link'
                    .format(dataset_name))
                next_page = encode_saved_plan(saved_plan)
                logger.debug(
                    '[/sparql/{}] "next" link successfully generated'.format(
                        dataset_name))
            exportTime = (time() - start) * 1000
            stats = {
                "cardinalities": cardinalities,
                "import": loading_time,
                "export": exportTime
            }

            if mimetype == "application/sparql-results+json":
                return json.jsonify(responses.json(bindings, next_page, stats))
            if mimetype == "application/xml" or mimetype == "application/sparql-results+xml":
                return Response(responses.xml(bindings, next_page, stats),
                                content_type="application/xml")
            return json.jsonify(responses.raw_json(bindings, next_page, stats))
        except Exception:
            abort(500)