Example #1
0
    def get_formatted_locations(self):
        # type: () -> str
        lines = []
        redacted_index_urls = []
        if self.index_urls and self.index_urls != [PyPI.simple_url]:
            for url in self.index_urls:

                redacted_index_url = redact_auth_from_url(url)

                # Parse the URL
                purl = urllib_parse.urlsplit(redacted_index_url)

                # URL is generally invalid if scheme and netloc is missing
                # there are issues with Python and URL parsing, so this test
                # is a bit crude. See bpo-20271, bpo-23505. Python doesn't
                # always parse invalid URLs correctly - it should raise
                # exceptions for malformed URLs
                if not purl.scheme and not purl.netloc:
                    logger.warning(
                        'The index url "{}" seems invalid, '
                        'please provide a scheme.'.format(redacted_index_url))

                redacted_index_urls.append(redacted_index_url)

            lines.append('Looking in indexes: {}'.format(
                ', '.join(redacted_index_urls)))

        if self.find_links:
            lines.append('Looking in links: {}'.format(', '.join(
                redact_auth_from_url(url) for url in self.find_links)))
        return '\n'.join(lines)
Example #2
0
    def get_url_rev_and_auth(cls, url):
        # type: (str) -> Tuple[str, Optional[str], AuthInfo]
        """
        Parse the repository URL to use, and return the URL, revision,
        and auth info to use.

        Returns: (url, rev, (username, password)).
        """
        scheme, netloc, path, query, frag = urllib_parse.urlsplit(url)
        if '+' not in scheme:
            raise ValueError(
                "Sorry, {!r} is a malformed VCS url. "
                "The format is <vcs>+<protocol>://<url>, "
                "e.g. svn+http://myrepo/svn/MyApp#egg=MyApp".format(url))
        # Remove the vcs prefix.
        scheme = scheme.split('+', 1)[1]
        netloc, user_pass = cls.get_netloc_and_auth(netloc, scheme)
        rev = None
        if '@' in path:
            path, rev = path.rsplit('@', 1)
            if not rev:
                raise InstallationError(
                    "The URL {!r} has an empty revision (after @) "
                    "which is not supported. Include a revision after @ "
                    "or remove @ from the URL.".format(url))
        url = urllib_parse.urlunsplit((scheme, netloc, path, query, ''))
        return url, rev, user_pass
Example #3
0
 def redacted_url(self):
     # type: () -> str
     """url with user:password part removed unless it is formed with
     environment variables as specified in PEP 610, or it is ``git``
     in the case of a git URL.
     """
     purl = urllib_parse.urlsplit(self.url)
     netloc = self._remove_auth_from_netloc(purl.netloc)
     surl = urllib_parse.urlunsplit(
         (purl.scheme, netloc, purl.path, purl.query, purl.fragment))
     return surl
Example #4
0
    def __init__(self, url, file_storage_domain):
        # type: (str, str) -> None
        super(PackageIndex, self).__init__()
        self.url = url
        self.netloc = urllib_parse.urlsplit(url).netloc
        self.simple_url = self._url_for_path('simple')
        self.pypi_url = self._url_for_path('pypi')

        # This is part of a temporary hack used to block installs of PyPI
        # packages which depend on external urls only necessary until PyPI can
        # block such packages themselves
        self.file_storage_domain = file_storage_domain
Example #5
0
def _ensure_html_response(url, session):
    # type: (str, PipSession) -> None
    """Send a HEAD request to the URL, and ensure the response contains HTML.

    Raises `_NotHTTP` if the URL is not available for a HEAD request, or
    `_NotHTML` if the content type is not text/html.
    """
    scheme, netloc, path, query, fragment = urllib_parse.urlsplit(url)
    if scheme not in {'http', 'https'}:
        raise _NotHTTP()

    resp = session.head(url, allow_redirects=True)
    resp.raise_for_status()

    _ensure_html_header(resp)
Example #6
0
def _transform_url(url, transform_netloc):
    """Transform and replace netloc in a url.

    transform_netloc is a function taking the netloc and returning a
    tuple. The first element of this tuple is the new netloc. The
    entire tuple is returned.

    Returns a tuple containing the transformed url as item 0 and the
    original tuple returned by transform_netloc as item 1.
    """
    purl = urllib_parse.urlsplit(url)
    netloc_tuple = transform_netloc(purl.netloc)
    # stripped url
    url_pieces = (purl.scheme, netloc_tuple[0], purl.path, purl.query,
                  purl.fragment)
    surl = urllib_parse.urlunsplit(url_pieces)
    return surl, netloc_tuple
Example #7
0
    def __init__(
        self,
        url,                   # type: str
        comes_from=None,       # type: Optional[Union[str, HTMLPage]]
        requires_python=None,  # type: Optional[str]
        yanked_reason=None,    # type: Optional[Text]
        cache_link_parsing=True,  # type: bool
    ):
        # type: (...) -> None
        """
        :param url: url of the resource pointed to (href of the link)
        :param comes_from: instance of HTMLPage where the link was found,
            or string.
        :param requires_python: String containing the `Requires-Python`
            metadata field, specified in PEP 345. This may be specified by
            a data-requires-python attribute in the HTML link tag, as
            described in PEP 503.
        :param yanked_reason: the reason the file has been yanked, if the
            file has been yanked, or None if the file hasn't been yanked.
            This is the value of the "data-yanked" attribute, if present, in
            a simple repository HTML link. If the file has been yanked but
            no reason was provided, this should be the empty string. See
            PEP 592 for more information and the specification.
        :param cache_link_parsing: A flag that is used elsewhere to determine
                                   whether resources retrieved from this link
                                   should be cached. PyPI index urls should
                                   generally have this set to False, for
                                   example.
        """

        # url can be a UNC windows share
        if url.startswith('\\\\'):
            url = path_to_url(url)

        self._parsed_url = urllib_parse.urlsplit(url)
        # Store the url as a private attribute to prevent accidentally
        # trying to set a new value.
        self._url = url

        self.comes_from = comes_from
        self.requires_python = requires_python if requires_python else None
        self.yanked_reason = yanked_reason

        super(Link, self).__init__(key=url, defining_class=Link)

        self.cache_link_parsing = cache_link_parsing
Example #8
0
def url_to_path(url):
    # type: (str) -> str
    """
    Convert a file: URL to a path.
    """
    assert url.startswith('file:'), (
        "You can only turn file: urls into filenames (not {url!r})".format(
            **locals()))

    _, netloc, path, _, _ = urllib_parse.urlsplit(url)

    if not netloc or netloc == 'localhost':
        # According to RFC 8089, same as empty authority.
        netloc = ''
    elif sys.platform == 'win32':
        # If we have a UNC path, prepend UNC share notation.
        netloc = '\\\\' + netloc
    else:
        raise ValueError(
            'non-local file URIs are not supported on this platform: {url!r}'.
            format(**locals()))

    path = urllib_request.url2pathname(netloc + path)
    return path