Exemple #1
0
    def get(self, request, project):
        """
        Serve custom user's defined ``/robots.txt``.

        If the user added a ``robots.txt`` in the "default version" of the
        project, we serve it directly.
        """

        # Use the ``robots.txt`` file from the default version configured
        version_slug = project.get_default_version()
        version = project.versions.get(slug=version_slug)

        no_serve_robots_txt = any([
            # If the default version is private or,
            version.privacy_level == constants.PRIVATE,
            # default version is not active or,
            not version.active,
            # default version is not built
            not version.built,
        ])

        if no_serve_robots_txt:
            # ... we do return a 404
            raise Http404()

        storage_path = project.get_storage_path(
            type_='html',
            version_slug=version_slug,
            include_file=False,
            version_type=self.version_type,
        )
        path = build_media_storage.join(storage_path, 'robots.txt')

        if build_media_storage.exists(path):
            url = build_media_storage.url(path)
            url = urlparse(url)._replace(scheme='', netloc='').geturl()
            return self._serve_docs(
                request,
                final_project=project,
                path=url,
            )

        sitemap_url = '{scheme}://{domain}/sitemap.xml'.format(
            scheme='https',
            domain=project.subdomain(),
        )
        context = {
            'sitemap_url': sitemap_url,
            'hidden_paths': self._get_hidden_paths(project),
        }
        return render(
            request,
            'robots.txt',
            context,
            content_type='text/plain',
        )
Exemple #2
0
    def get(
        self,
        request,
        project_slug=None,
        subproject_slug=None,
        subproject_slash=None,
        lang_slug=None,
        version_slug=None,
        filename='',
    ):  # noqa
        """
        Take the incoming parsed URL's and figure out what file to serve.

        ``subproject_slash`` is used to determine if the subproject URL has a slash,
        so that we can decide if we need to serve docs or add a /.
        """

        version_slug = self.get_version_from_host(request, version_slug)
        final_project, lang_slug, version_slug, filename = _get_project_data_from_request(  # noqa
            request,
            project_slug=project_slug,
            subproject_slug=subproject_slug,
            lang_slug=lang_slug,
            version_slug=version_slug,
            filename=filename,
        )

        log.debug(
            'Serving docs: project=%s, subproject=%s, lang_slug=%s, version_slug=%s, filename=%s',
            final_project.slug, subproject_slug, lang_slug, version_slug,
            filename)

        # Handle requests that need canonicalizing (eg. HTTP -> HTTPS, redirect to canonical domain)
        if hasattr(request, 'canonicalize'):
            try:
                return self.canonical_redirect(request, final_project,
                                               version_slug, filename)
            except InfiniteRedirectException:
                # Don't redirect in this case, since it would break things
                pass

        # Handle a / redirect when we aren't a single version
        if all([
                lang_slug is None,
                # External versions/builds will always have a version,
                # because it is taken from the host name
                version_slug is None or hasattr(request, 'external_domain'),
                filename == '',
                not final_project.single_version,
        ]):
            return self.system_redirect(request, final_project, lang_slug,
                                        version_slug, filename)

        # Handle `/projects/subproject` URL redirection:
        # when there _is_ a subproject_slug but not a subproject_slash
        if all([
                final_project.single_version,
                filename == '',
                subproject_slug,
                not subproject_slash,
        ]):
            return self.system_redirect(request, final_project, lang_slug,
                                        version_slug, filename)

        if all([
            (lang_slug is None or version_slug is None),
                not final_project.single_version,
                self.version_type != EXTERNAL,
        ]):
            log.warning(
                'Invalid URL for project with versions. url=%s, project=%s',
                filename, final_project.slug)
            raise Http404('Invalid URL for project with versions')

        # TODO: un-comment when ready to perform redirect here
        # redirect_path, http_status = self.get_redirect(
        #     final_project,
        #     lang_slug,
        #     version_slug,
        #     filename,
        #     request.path,
        # )
        # if redirect_path and http_status:
        #     return self.get_redirect_response(request, redirect_path, http_status)

        # Check user permissions and return an unauthed response if needed
        if not self.allowed_user(request, final_project, version_slug):
            return self.get_unauthed_response(request, final_project)

        storage_path = final_project.get_storage_path(
            type_='html',
            version_slug=version_slug,
            include_file=False,
            version_type=self.version_type,
        )

        # If ``filename`` is empty, serve from ``/``
        path = build_media_storage.join(storage_path, filename.lstrip('/'))
        # Handle our backend storage not supporting directory indexes,
        # so we need to append index.html when appropriate.
        if path[-1] == '/':
            # We need to add the index.html before ``storage.url`` since the
            # Signature and Expire time is calculated per file.
            path += 'index.html'

        # NOTE: calling ``.url`` will remove the trailing slash
        storage_url = build_media_storage.url(path, http_method=request.method)

        # URL without scheme and domain to perform an NGINX internal redirect
        parsed_url = urlparse(storage_url)._replace(scheme='', netloc='')
        final_url = parsed_url.geturl()

        return self._serve_docs(
            request,
            final_project=final_project,
            version_slug=version_slug,
            path=final_url,
        )
Exemple #3
0
    def get(
        self,
        request,
        project_slug=None,
        type_=None,
        version_slug=None,
        lang_slug=None,
        subproject_slug=None,
    ):
        """
        Download a specific piece of media.

        Perform an auth check if serving in private mode.

        This view is used to download a file using old-style URLs (download from
        the dashboard) and new-style URLs (download from the same domain as
        docs). Basically, the parameters received by the GET view are different
        (``project_slug`` does not come in the new-style URLs, for example) and
        we need to take it from the request. Once we get the final ``version``
        to be served, everything is the same for both paths.

        .. warning:: This is linked directly from the HTML pages.
                     It should only care about the Version permissions,
                     not the actual Project permissions.
        """
        if self.same_domain_url:
            # It uses the request to get the ``project``. The rest of arguments come
            # from the URL.
            final_project, lang_slug, version_slug, filename = _get_project_data_from_request(  # noqa
                request,
                project_slug=None,
                subproject_slug=subproject_slug,
                lang_slug=lang_slug,
                version_slug=version_slug,
            )

            if not self.allowed_user(request, final_project, version_slug):
                return self.get_unauthed_response(request, final_project)

            # We don't use ``.public`` in this filter because the access
            # permission was already granted by ``.allowed_user``
            version = get_object_or_404(
                final_project.versions,
                slug=version_slug,
            )

        else:
            # All the arguments come from the URL.
            version = get_object_or_404(
                Version.objects.public(user=request.user),
                project__slug=project_slug,
                slug=version_slug,
            )

        # Send media download to analytics - sensitive data is anonymized
        analytics_event.delay(
            event_category='Build Media',
            event_action=f'Download {type_}',
            event_label=str(version),
            ua=request.META.get('HTTP_USER_AGENT'),
            uip=get_client_ip(request),
        )

        storage_path = version.project.get_storage_path(
            type_=type_,
            version_slug=version_slug,
            version_type=version.type,
        )

        # URL without scheme and domain to perform an NGINX internal redirect
        url = build_media_storage.url(storage_path)
        url = urlparse(url)._replace(scheme='', netloc='').geturl()

        return self._serve_docs(
            request,
            final_project=version.project,
            version_slug=version.slug,
            path=url,
            download=True,
        )
Exemple #4
0
def _create_intersphinx_data(version, commit, build):
    """
    Create intersphinx data for this version.

    :param version: Version instance
    :param commit: Commit that updated path
    :param build: Build id
    """
    if not version.is_sphinx_type:
        return

    html_storage_path = version.project.get_storage_path(
        type_='html', version_slug=version.slug, include_file=False
    )
    json_storage_path = version.project.get_storage_path(
        type_='json', version_slug=version.slug, include_file=False
    )

    object_file = build_media_storage.join(html_storage_path, 'objects.inv')
    if not build_media_storage.exists(object_file):
        log.debug('No objects.inv, skipping intersphinx indexing.')
        return

    type_file = build_media_storage.join(json_storage_path, 'readthedocs-sphinx-domain-names.json')
    types = {}
    titles = {}
    if build_media_storage.exists(type_file):
        try:
            data = json.load(build_media_storage.open(type_file))
            types = data['types']
            titles = data['titles']
        except Exception:
            log.exception('Exception parsing readthedocs-sphinx-domain-names.json')

    # These classes are copied from Sphinx
    # https://github.com/sphinx-doc/sphinx/blob/d79d041f4f90818e0b495523fdcc28db12783caf/sphinx/ext/intersphinx.py#L400-L403  # noqa
    class MockConfig:
        intersphinx_timeout = None
        tls_verify = False
        user_agent = None

    class MockApp:
        srcdir = ''
        config = MockConfig()

        def warn(self, msg):
            log.warning('Sphinx MockApp.', msg=msg)

    # Re-create all objects from the new build of the version
    object_file_url = build_media_storage.url(object_file)
    if object_file_url.startswith('/'):
        # Filesystem backed storage simply prepends MEDIA_URL to the path to get the URL
        # This can cause an issue if MEDIA_URL is not fully qualified
        object_file_url = settings.RTD_INTERSPHINX_URL + object_file_url

    invdata = intersphinx.fetch_inventory(MockApp(), '', object_file_url)
    for key, value in sorted(invdata.items() or {}):
        domain, _type = key.split(':', 1)
        for name, einfo in sorted(value.items()):
            # project, version, url, display_name
            # ('Sphinx', '1.7.9', 'faq.html#epub-faq', 'Epub info')
            try:
                url = einfo[2]
                if '#' in url:
                    doc_name, anchor = url.split(
                        '#',
                        # The anchor can contain ``#`` characters
                        maxsplit=1
                    )
                else:
                    doc_name, anchor = url, ''
                display_name = einfo[3]
            except Exception:
                log.exception(
                    'Error while getting sphinx domain information. Skipping...',
                    project_slug=version.project.slug,
                    version_slug=version.slug,
                    sphinx_domain='{domain}->{name}',
                )
                continue

            # HACK: This is done because the difference between
            # ``sphinx.builders.html.StandaloneHTMLBuilder``
            # and ``sphinx.builders.dirhtml.DirectoryHTMLBuilder``.
            # They both have different ways of generating HTML Files,
            # and therefore the doc_name generated is different.
            # More info on: http://www.sphinx-doc.org/en/master/usage/builders/index.html#builders
            # Also see issue: https://github.com/readthedocs/readthedocs.org/issues/5821
            if doc_name.endswith('/'):
                doc_name += 'index.html'

            html_file = HTMLFile.objects.filter(
                project=version.project, version=version,
                path=doc_name, build=build,
            ).first()

            if not html_file:
                log.debug(
                    'HTMLFile object not found.',
                    project_slug=version.project.slug,
                    version_slug=version.slug,
                    build_id=build,
                    doc_name=doc_name
                )

                # Don't create Sphinx Domain objects
                # if the HTMLFile object is not found.
                continue

            SphinxDomain.objects.create(
                project=version.project,
                version=version,
                html_file=html_file,
                domain=domain,
                name=name,
                display_name=display_name,
                type=_type,
                type_display=types.get(f'{domain}:{_type}', ''),
                doc_name=doc_name,
                doc_display=titles.get(doc_name, ''),
                anchor=anchor,
                commit=commit,
                build=build,
            )