def get(self, request): uri = rdflib.URIRef(request.GET.get('uri', '')) try: url = urlparse.urlparse(uri) except Exception: raise Http404 if not IRI.match(uri): return HttpResponseTemporaryRedirect(unicode(uri)) elif request.GET.get('source') == 'purl': return HttpResponseSeeOther(doc_forward(uri, described=True)) elif self.get_types(uri): return HttpResponsePermanentRedirect(doc_forward(uri, described=True)) elif url.scheme in ('http', 'https') and url.netloc and url.path.startswith('/'): return HttpResponseTemporaryRedirect(unicode(uri)) else: raise Http404
def get(self, request): uri = rdflib.URIRef(request.build_absolute_uri()) if not IRI.match(uri): raise Http404("Invalid IRI") if not self.get_types(uri): raise Http404("URI has no types; not known around here") description_url = doc_forward(uri, described=True) for pattern, target, mimetypes in self.id_mapping_redirects: match = pattern.match(str(uri)) if match and self.override_redirect(request, description_url, mimetypes): description_url = target % match.groupdict() break return HttpResponseSeeOther(description_url)
def get(self, request): uri = rdflib.URIRef(request.GET.get('uri', '')) try: url = urlparse.urlparse(uri) except Exception: raise Http404 try: token = request.GET['token'] except KeyError: raise Http404 if token != self.get_uri_token(uri): raise Http404 if not IRI.match(uri): return HttpResponseTemporaryRedirect(unicode(uri)) elif request.GET.get('source') == 'purl': return HttpResponseSeeOther(doc_forward(uri, described=True)) elif self.get_types(uri): return HttpResponsePermanentRedirect( doc_forward(uri, described=True)) elif url.scheme in ( 'http', 'https') and url.netloc and url.path.startswith('/'): return HttpResponseTemporaryRedirect(unicode(uri)) else: raise Http404
def get(self, request): additional_headers = {} # Apache helpfully(!?) unescapes encoded hash characters. If we get one # we know that the browser sent a '%23' (or else would have stripped it # as a fragment identifier. We replace it with a '%23' so that our URI # canonicalisation doesn't get stuck in an endless redirect loop. doc_url = request.build_absolute_uri().replace('#', '%23') # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo', # has a type (ergo 'bar' is a format), and if not we assume that # 'foo.bar' is part of the URI for formats in (None, ()): uri, format, is_local = doc_backward(doc_url, formats) if uri and not IRI.match(uri): raise Http404("Invalid IRI") if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404("Could not resolve URL to a URI") types = self.get_types(uri) if types: break doc_url = doc_url.rsplit('.', 1)[0] else: logger.debug("Resource has no type, so is probably not known in these parts: %r", uri) raise Http404("Resource has no type, so is probably not known in these parts") expected_doc_url = urlparse.urljoin(doc_url, doc_forward(uri, request, format=format, described=True)) if self.check_canonical and expected_doc_url != doc_url: logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True)) self.context.update({ 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, }) subject_uri, doc_uri = self.context['subject_uri'], self.context['doc_uri'] types = self.context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404("Graph was empty") self.template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)): self.template_name = tn break self.context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': map(self.endpoint.normalize_query, queries), 'template_name': self.template_name, }) self.set_renderers() for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(self.request, self.context) if additional_context: self.context.update(additional_context) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: if request.renderers: format = request.renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url # NOTE: This getattrs every atttr on subject, so would force # memoization on any cached attributes. We call it as late as # possible to make sure the graph won't change afterwards, making # those cached results incorrect. self.conneg += subject if self.context['format']: try: return self.render_to_format(format=format) except KeyError: raise Http404 else: return self.render()
def get(self, request): additional_headers = {} # Apache helpfully(!?) unescapes encoded hash characters. If we get one # we know that the browser sent a '%23' (or else would have stripped it # as a fragment identifier. We replace it with a '%23' so that our URI # canonicalisation doesn't get stuck in an endless redirect loop. doc_url = request.build_absolute_uri().replace('#', '%23') # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo', # has a type (ergo 'bar' is a format), and if not we assume that # 'foo.bar' is part of the URI for formats in (None, ()): uri, format, is_local = doc_backward(doc_url, formats) if uri and not IRI.match(uri): raise Http404("Invalid IRI") if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404("Could not resolve URL to a URI") types = self.get_types(uri) if types: break doc_url = doc_url.rsplit('.', 1)[0] else: logger.debug( "Resource has no type, so is probably not known in these parts: %r", uri) raise Http404( "Resource has no type, so is probably not known in these parts" ) expected_doc_url = urlparse.urljoin( doc_url, doc_forward(uri, request, format=format, described=True)) if self.check_canonical and expected_doc_url != doc_url: logger.debug( "Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) doc_uri = rdflib.URIRef( doc_forward(uri, request, format=None, described=True)) self.context.update({ 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, }) subject_uri, doc_uri = self.context['subject_uri'], self.context[ 'doc_uri'] types = self.context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404("Graph was empty") self.template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] if set(subject._graph.objects( subject._identifier, NS.rdf.type)) & set(map( expand, types)): self.template_name = tn break self.context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': map(self.endpoint.normalize_query, queries), 'template_name': self.template_name, }) self.set_renderers() for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(self.request, self.context) if additional_context: self.context.update(additional_context) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: if request.renderers: format = request.renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url # NOTE: This getattrs every atttr on subject, so would force # memoization on any cached attributes. We call it as late as # possible to make sure the graph won't change afterwards, making # those cached results incorrect. self.conneg += subject if self.context['format']: try: return self.render_to_format(format=format) except KeyError: raise Http404 else: return self.render()