Example #1
0
 def get(self, request):
     uri = rdflib.URIRef(request.GET.get('uri', ''))
     try:
         url = urlparse.urlparse(uri)
     except Exception:
         raise Http404
     if not IRI.match(uri):
         return HttpResponseTemporaryRedirect(unicode(uri))
     elif request.GET.get('source') == 'purl':
         return HttpResponseSeeOther(doc_forward(uri, described=True))
     elif self.get_types(uri):
         return HttpResponsePermanentRedirect(doc_forward(uri, described=True))
     elif url.scheme in ('http', 'https') and url.netloc and url.path.startswith('/'):
         return HttpResponseTemporaryRedirect(unicode(uri))
     else:
         raise Http404
Example #2
0
    def get(self, request):
        uri = rdflib.URIRef(request.build_absolute_uri())
        if not IRI.match(uri):
            raise Http404("Invalid IRI")
        if not self.get_types(uri):
            raise Http404("URI has no types; not known around here")

        description_url = doc_forward(uri, described=True)

        for pattern, target, mimetypes in self.id_mapping_redirects:
            match = pattern.match(str(uri))
            if match and self.override_redirect(request, description_url, mimetypes):
                description_url = target % match.groupdict()
                break

        return HttpResponseSeeOther(description_url)
Example #3
0
    def get(self, request):
        uri = rdflib.URIRef(request.build_absolute_uri())
        if not IRI.match(uri):
            raise Http404("Invalid IRI")
        if not self.get_types(uri):
            raise Http404("URI has no types; not known around here")

        description_url = doc_forward(uri, described=True)

        for pattern, target, mimetypes in self.id_mapping_redirects:
            match = pattern.match(str(uri))
            if match and self.override_redirect(request, description_url,
                                                mimetypes):
                description_url = target % match.groupdict()
                break

        return HttpResponseSeeOther(description_url)
Example #4
0
 def get(self, request):
     uri = rdflib.URIRef(request.GET.get('uri', ''))
     try:
         url = urlparse.urlparse(uri)
     except Exception:
         raise Http404
     try:
         token = request.GET['token']
     except KeyError:
         raise Http404
     if token != self.get_uri_token(uri):
         raise Http404
     if not IRI.match(uri):
         return HttpResponseTemporaryRedirect(unicode(uri))
     elif request.GET.get('source') == 'purl':
         return HttpResponseSeeOther(doc_forward(uri, described=True))
     elif self.get_types(uri):
         return HttpResponsePermanentRedirect(
             doc_forward(uri, described=True))
     elif url.scheme in (
             'http', 'https') and url.netloc and url.path.startswith('/'):
         return HttpResponseTemporaryRedirect(unicode(uri))
     else:
         raise Http404
Example #5
0
    def get(self, request):
        additional_headers = {}

        # Apache helpfully(!?) unescapes encoded hash characters. If we get one
        # we know that the browser sent a '%23' (or else would have stripped it
        # as a fragment identifier. We replace it with a '%23' so that our URI
        # canonicalisation doesn't get stuck in an endless redirect loop.
        doc_url = request.build_absolute_uri().replace('#', '%23')

        # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo',
        # has a type (ergo 'bar' is a format), and if not we assume that
        # 'foo.bar' is part of the URI
        for formats in (None, ()):
            uri, format, is_local = doc_backward(doc_url, formats)
            if uri and not IRI.match(uri):
                raise Http404("Invalid IRI")
            if not uri:
                logger.debug("Could not resolve URL to a URI: %r", doc_url)
                raise Http404("Could not resolve URL to a URI")
            types = self.get_types(uri)
            if types:
                break
            doc_url = doc_url.rsplit('.', 1)[0]
        else:
            logger.debug("Resource has no type, so is probably not known in these parts: %r", uri)
            raise Http404("Resource has no type, so is probably not known in these parts")

        expected_doc_url = urlparse.urljoin(doc_url, doc_forward(uri, request, format=format, described=True))
        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True))

        self.context.update({
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        })

        subject_uri, doc_uri = self.context['subject_uri'], self.context['doc_uri']
        types = self.context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404("Graph was empty")

        self.template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)):
                self.template_name = tn
                break

        self.context.update({
            'graph': graph,
            'subject': subject,
            'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries': map(self.endpoint.normalize_query, queries),
            'template_name': self.template_name,
        })

        self.set_renderers()

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(self.request, self.context)
            if additional_context:
                self.context.update(additional_context)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            if request.renderers:
                format = request.renderers[0].format
                expected_doc_url = doc_forward(uri, request, format=format, described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        # NOTE: This getattrs every atttr on subject, so would force
        # memoization on any cached attributes. We call it as late as
        # possible to make sure the graph won't change afterwards, making
        # those cached results incorrect.
        self.conneg += subject

        if self.context['format']:
            try:
                return self.render_to_format(format=format)
            except KeyError:
                raise Http404
        else:
            return self.render()
Example #6
0
    def get(self, request):
        additional_headers = {}

        # Apache helpfully(!?) unescapes encoded hash characters. If we get one
        # we know that the browser sent a '%23' (or else would have stripped it
        # as a fragment identifier. We replace it with a '%23' so that our URI
        # canonicalisation doesn't get stuck in an endless redirect loop.
        doc_url = request.build_absolute_uri().replace('#', '%23')

        # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo',
        # has a type (ergo 'bar' is a format), and if not we assume that
        # 'foo.bar' is part of the URI
        for formats in (None, ()):
            uri, format, is_local = doc_backward(doc_url, formats)
            if uri and not IRI.match(uri):
                raise Http404("Invalid IRI")
            if not uri:
                logger.debug("Could not resolve URL to a URI: %r", doc_url)
                raise Http404("Could not resolve URL to a URI")
            types = self.get_types(uri)
            if types:
                break
            doc_url = doc_url.rsplit('.', 1)[0]
        else:
            logger.debug(
                "Resource has no type, so is probably not known in these parts: %r",
                uri)
            raise Http404(
                "Resource has no type, so is probably not known in these parts"
            )

        expected_doc_url = urlparse.urljoin(
            doc_url, doc_forward(uri, request, format=format, described=True))
        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug(
                "Request for a non-canonical doc URL (%r) for %r, redirecting to %r",
                doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        doc_uri = rdflib.URIRef(
            doc_forward(uri, request, format=None, described=True))

        self.context.update({
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        })

        subject_uri, doc_uri = self.context['subject_uri'], self.context[
            'doc_uri']
        types = self.context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404("Graph was empty")

        self.template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            if set(subject._graph.objects(
                    subject._identifier, NS.rdf.type)) & set(map(
                        expand, types)):
                self.template_name = tn
                break

        self.context.update({
            'graph':
            graph,
            'subject':
            subject,
            'licenses':
            [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets':
            [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries':
            map(self.endpoint.normalize_query, queries),
            'template_name':
            self.template_name,
        })

        self.set_renderers()

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(self.request, self.context)
            if additional_context:
                self.context.update(additional_context)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            if request.renderers:
                format = request.renderers[0].format
                expected_doc_url = doc_forward(uri,
                                               request,
                                               format=format,
                                               described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        # NOTE: This getattrs every atttr on subject, so would force
        # memoization on any cached attributes. We call it as late as
        # possible to make sure the graph won't change afterwards, making
        # those cached results incorrect.
        self.conneg += subject

        if self.context['format']:
            try:
                return self.render_to_format(format=format)
            except KeyError:
                raise Http404
        else:
            return self.render()