def _iter(self, sparql_results_type, fields, bindings, boolean, triples): queue = Queue.Queue() graph = Graph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) triples = list(triples) with statsd.timer('humfrey.streaming.rdflib-serializer.add-triples.' + self.plugin_name): graph += triples serializer_thread = threading.Thread(target=self._serialize_to_queue, args=(graph, queue)) with statsd.timer('humfrey.streaming.rdflib-serializer.serialize.' + self.plugin_name): serializer_thread.start() while True: type, value = queue.get() if type == 'data': yield value elif type == 'sentinel': break elif type == 'exception': raise value[0], value[1], value[2] serializer_thread.join()
def get(self): """ Returns an in-memory object representing the stream. You will either get a SparqlResultsList, a bool, or a ConjunctiveGraph. """ if self._cached_get is None: sparql_results_type = self.get_sparql_results_type() if sparql_results_type == 'resultset': self._cached_get = SparqlResultList(self.get_fields(), self.get_bindings()) elif sparql_results_type == 'boolean': self._cached_get = self.get_boolean() elif sparql_results_type == 'graph': graph = rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += self.get_triples() self._cached_get = graph else: raise AssertionError( "Unexpected results type: {0}".format(sparql_results_type)) for name in ('query', 'duration'): if hasattr(self, name): setattr(self._cached_get, name, getattr(self, name)) return self._cached_get
def label2(self): for prefix, uri in NS.iteritems(): if self._identifier.startswith(uri): localpart = self._identifier[len(uri):] if LOCALPART_RE.match(localpart): return '%s:%s' % (prefix, localpart) return self._identifier
def __init__(self, url, update_url=None, namespaces={}, preferred_media_types=()): self._url, self._update_url = url, update_url self._namespaces = NS.copy() self._namespaces.update(namespaces) self._cache = defaultdict(dict) self._accept_header = self._get_accept_header(preferred_media_types or self._supported_media_types)
def get(self, request): query = request.REQUEST.get('query') form = SparqlQueryForm(request.REQUEST if query else None, formats=self.get_format_choices()) context = self.context context.update({ 'namespaces': sorted(NS.items()), 'form': form, 'store': self.store }) if form.is_valid(): try: results = self.perform_query( request, query, form.cleaned_data['common_prefixes']) except QueryError, e: context['error'] = e.message context['status_code'] = e.status_code else: context['additional_headers'][ 'X-Humfrey-SPARQL-Duration'] = results.duration context['queries'] = [results.query] context['duration'] = results.duration context['results'] = results if results.format_type == 'sparql-results': return self._sparql_results_view(request, context) elif results.format_type == 'graph': return self._graph_view(request, context) raise AssertionError("Unexpected format type: {0}".format( results.format_type))
def get(self, request): privileges = self.get_user_privileges(request) query = request.REQUEST.get('query') form = SparqlQueryForm(request.REQUEST if query else None, formats=self.get_format_choices()) context = { 'namespaces': sorted(NS.items()), 'form': form, 'store': self.store, } if privileges['throttle']: additional_headers = context['additional_headers'] = { 'X-Humfrey-SPARQL-Throttle-Threshold': privileges['throttle_threshold'], 'X-Humfrey-SPARQL-Deny-Threshold': privileges['deny_threshold'], 'X-Humfrey-SPARQL-Intensity-Decay': privileges['intensity_decay'], } if form.is_valid(): try: results, intensity = self.perform_query(request, query, form.cleaned_data['common_prefixes'], privileges) if intensity is not None: additional_headers['X-Humfrey-SPARQL-Intensity'] = intensity except urllib2.HTTPError, e: context['error'] = e.read() #parse(e).find('.//pre').text context['status_code'] = e.code except self.ConcurrentQueryException, e: context['error'] = "You cannot perform more than one query at a time.\nPlease wait for your previous query to complete or time out first." context['status_code'] = 403
def execute(self, transform_manager): for prefix, uri in NS.iteritems(): try: self.load_vocabulary(transform_manager, prefix, uri) except Exception, e: logger.exception("Failed to load vocabulary: %r from %r", prefix, uri)
def get(self, request): query = request.REQUEST.get('query') form = SparqlQueryForm(request.REQUEST if query else None, formats=self.get_format_choices()) context = self.context context.update({ 'namespaces': sorted(NS.items()), 'form': form, 'store': self.store }) if form.is_valid(): try: results = self.perform_query(request, query, form.cleaned_data['common_prefixes']) except QueryError, e: context['error'] = e.message context['status_code'] = e.status_code else: context['additional_headers']['X-Humfrey-SPARQL-Duration'] = results.duration context['queries'] = [results.query] context['duration'] = results.duration context['results'] = results if results.format_type == 'sparql-results': return self._sparql_results_view(request, context) elif results.format_type == 'graph': return self._graph_view(request, context) raise AssertionError("Unexpected format type: {0}".format(results.format_type))
def __init__(self, url, update_url=None, namespaces={}, preferred_media_types=()): self._url, self._update_url = url, update_url self._namespaces = NS.copy() self._namespaces.update(namespaces) self._cache = defaultdict(dict) self._accept_header = self._get_accept_header( preferred_media_types or self._supported_media_types)
def get(self): """ Returns an in-memory object representing the stream. You will either get a SparqlResultsList, a bool, or a ConjunctiveGraph. """ if self._cached_get is None: sparql_results_type = self.get_sparql_results_type() if sparql_results_type == 'resultset': self._cached_get = SparqlResultList(self.get_fields(), self.get_bindings()) elif sparql_results_type == 'boolean': self._cached_get = self.get_boolean() elif sparql_results_type == 'graph': graph = rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += self.get_triples() self._cached_get = graph else: raise AssertionError("Unexpected results type: {0}".format(sparql_results_type)) for name in ('query', 'duration'): if hasattr(self, name): setattr(self._cached_get, name, getattr(self, name)) return self._cached_get
def get(self, request): additional_headers = {} # Apache helpfully(!?) unescapes encoded hash characters. If we get one # we know that the browser sent a '%23' (or else would have stripped it # as a fragment identifier. We replace it with a '%23' so that our URI # canonicalisation doesn't get stuck in an endless redirect loop. doc_url = request.build_absolute_uri().replace('#', '%23') # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo', # has a type (ergo 'bar' is a format), and if not we assume that # 'foo.bar' is part of the URI for formats in (None, ()): uri, format, is_local = doc_backward(doc_url, formats) if uri and not IRI.match(uri): raise Http404("Invalid IRI") if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404("Could not resolve URL to a URI") types = self.get_types(uri) if types: break doc_url = doc_url.rsplit('.', 1)[0] else: logger.debug( "Resource has no type, so is probably not known in these parts: %r", uri) raise Http404( "Resource has no type, so is probably not known in these parts" ) expected_doc_url = urlparse.urljoin( doc_url, doc_forward(uri, request, format=format, described=True)) if self.check_canonical and expected_doc_url != doc_url: logger.debug( "Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) doc_uri = rdflib.URIRef( doc_forward(uri, request, format=None, described=True)) self.context.update({ 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, }) subject_uri, doc_uri = self.context['subject_uri'], self.context[ 'doc_uri'] types = self.context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404("Graph was empty") self.template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] if set(subject._graph.objects( subject._identifier, NS.rdf.type)) & set(map( expand, types)): self.template_name = tn break self.context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': map(self.endpoint.normalize_query, queries), 'template_name': self.template_name, }) self.set_renderers() for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(self.request, self.context) if additional_context: self.context.update(additional_context) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: if request.renderers: format = request.renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url # NOTE: This getattrs every atttr on subject, so would force # memoization on any cached attributes. We call it as late as # possible to make sure the graph won't change afterwards, making # those cached results incorrect. self.conneg += subject if self.context['format']: try: return self.render_to_format(format=format) except KeyError: raise Http404 else: return self.render()
def get(self, request): additional_headers = {} doc_url = request.build_absolute_uri() uri, format, is_local = doc_backward(doc_url, set(self._renderers_by_format)) if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404 expected_doc_url = doc_forward(uri, request, format=format, described=True) types = self.get_types(uri) if not types: logger.debug("Resource has no type, so is probably not known in these parts: %r", uri) raise Http404 if self.check_canonical and expected_doc_url != doc_url: logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: renderers = self.get_renderers(request) if renderers: format = renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True)) context = { 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, } subject_uri, doc_uri = context['subject_uri'], context['doc_uri'] types = context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404 for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(request=request, graph=graph, doc_uri=doc_uri, subject_uri=subject_uri, subject=subject, endpoint=self.endpoint, renderers=self._renderers) if additional_context: context.update(additional_context) context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': queries, 'template_name': subject.template_name, }) template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] print tn, types, subject.get_all('rdf:type') if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)): template_name = tn break if context['format']: try: return self.render_to_format(request, context, template_name, format) except KeyError: raise Http404 else: return self.render(request, context, template_name)
def __init__(self, url, update_url=None, namespaces={}): self._url, self._update_url = url, update_url self._namespaces = NS.copy() self._namespaces.update(namespaces) self._cache = defaultdict(dict)
def get(self, request): additional_headers = {} # Apache helpfully(!?) unescapes encoded hash characters. If we get one # we know that the browser sent a '%23' (or else would have stripped it # as a fragment identifier. We replace it with a '%23' so that our URI # canonicalisation doesn't get stuck in an endless redirect loop. doc_url = request.build_absolute_uri().replace('#', '%23') # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo', # has a type (ergo 'bar' is a format), and if not we assume that # 'foo.bar' is part of the URI for formats in (None, ()): uri, format, is_local = doc_backward(doc_url, formats) if uri and not IRI.match(uri): raise Http404("Invalid IRI") if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404("Could not resolve URL to a URI") types = self.get_types(uri) if types: break doc_url = doc_url.rsplit('.', 1)[0] else: logger.debug("Resource has no type, so is probably not known in these parts: %r", uri) raise Http404("Resource has no type, so is probably not known in these parts") expected_doc_url = urlparse.urljoin(doc_url, doc_forward(uri, request, format=format, described=True)) if self.check_canonical and expected_doc_url != doc_url: logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True)) self.context.update({ 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, }) subject_uri, doc_uri = self.context['subject_uri'], self.context['doc_uri'] types = self.context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404("Graph was empty") self.template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)): self.template_name = tn break self.context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': map(self.endpoint.normalize_query, queries), 'template_name': self.template_name, }) self.set_renderers() for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(self.request, self.context) if additional_context: self.context.update(additional_context) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: if request.renderers: format = request.renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url # NOTE: This getattrs every atttr on subject, so would force # memoization on any cached attributes. We call it as late as # possible to make sure the graph won't change afterwards, making # those cached results incorrect. self.conneg += subject if self.context['format']: try: return self.render_to_format(format=format) except KeyError: raise Http404 else: return self.render()