def get_formatted_locations(self): # type: () -> str lines = [] redacted_index_urls = [] if self.index_urls and self.index_urls != [PyPI.simple_url]: for url in self.index_urls: redacted_index_url = redact_auth_from_url(url) # Parse the URL purl = urllib_parse.urlsplit(redacted_index_url) # URL is generally invalid if scheme and netloc is missing # there are issues with Python and URL parsing, so this test # is a bit crude. See bpo-20271, bpo-23505. Python doesn't # always parse invalid URLs correctly - it should raise # exceptions for malformed URLs if not purl.scheme and not purl.netloc: logger.warning( 'The index url "%s" seems invalid, ' 'please provide a scheme.', redacted_index_url) redacted_index_urls.append(redacted_index_url) lines.append('Looking in indexes: {}'.format( ', '.join(redacted_index_urls))) if self.find_links: lines.append('Looking in links: {}'.format(', '.join( redact_auth_from_url(url) for url in self.find_links))) return '\n'.join(lines)
def get_url_rev_and_auth(cls, url): # type: (str) -> Tuple[str, Optional[str], AuthInfo] """ Parse the repository URL to use, and return the URL, revision, and auth info to use. Returns: (url, rev, (username, password)). """ scheme, netloc, path, query, frag = urllib_parse.urlsplit(url) if '+' not in scheme: raise ValueError( "Sorry, {!r} is a malformed VCS url. " "The format is <vcs>+<protocol>://<url>, " "e.g. svn+http://myrepo/svn/MyApp#egg=MyApp".format(url)) # Remove the vcs prefix. scheme = scheme.split('+', 1)[1] netloc, user_pass = cls.get_netloc_and_auth(netloc, scheme) rev = None if '@' in path: path, rev = path.rsplit('@', 1) if not rev: raise InstallationError( "The URL {!r} has an empty revision (after @) " "which is not supported. Include a revision after @ " "or remove @ from the URL.".format(url)) url = urllib_parse.urlunsplit((scheme, netloc, path, query, '')) return url, rev, user_pass
def redacted_url(self): # type: () -> str """url with user:password part removed unless it is formed with environment variables as specified in PEP 610, or it is ``git`` in the case of a git URL. """ purl = urllib_parse.urlsplit(self.url) netloc = self._remove_auth_from_netloc(purl.netloc) surl = urllib_parse.urlunsplit( (purl.scheme, netloc, purl.path, purl.query, purl.fragment)) return surl
def __init__(self, url, file_storage_domain): # type: (str, str) -> None super(PackageIndex, self).__init__() self.url = url self.netloc = urllib_parse.urlsplit(url).netloc self.simple_url = self._url_for_path('simple') self.pypi_url = self._url_for_path('pypi') # This is part of a temporary hack used to block installs of PyPI # packages which depend on external urls only necessary until PyPI can # block such packages themselves self.file_storage_domain = file_storage_domain
def _ensure_html_response(url, session): # type: (str, PipSession) -> None """Send a HEAD request to the URL, and ensure the response contains HTML. Raises `_NotHTTP` if the URL is not available for a HEAD request, or `_NotHTML` if the content type is not text/html. """ scheme, netloc, path, query, fragment = urllib_parse.urlsplit(url) if scheme not in {'http', 'https'}: raise _NotHTTP() resp = session.head(url, allow_redirects=True) raise_for_status(resp) _ensure_html_header(resp)
def __init__( self, url, # type: str comes_from=None, # type: Optional[Union[str, HTMLPage]] requires_python=None, # type: Optional[str] yanked_reason=None, # type: Optional[Text] cache_link_parsing=True, # type: bool ): # type: (...) -> None """ :param url: url of the resource pointed to (href of the link) :param comes_from: instance of HTMLPage where the link was found, or string. :param requires_python: String containing the `Requires-Python` metadata field, specified in PEP 345. This may be specified by a data-requires-python attribute in the HTML link tag, as described in PEP 503. :param yanked_reason: the reason the file has been yanked, if the file has been yanked, or None if the file hasn't been yanked. This is the value of the "data-yanked" attribute, if present, in a simple repository HTML link. If the file has been yanked but no reason was provided, this should be the empty string. See PEP 592 for more information and the specification. :param cache_link_parsing: A flag that is used elsewhere to determine whether resources retrieved from this link should be cached. PyPI index urls should generally have this set to False, for example. """ # url can be a UNC windows share if url.startswith('\\\\'): url = path_to_url(url) self._parsed_url = urllib_parse.urlsplit(url) # Store the url as a private attribute to prevent accidentally # trying to set a new value. self._url = url self.comes_from = comes_from self.requires_python = requires_python if requires_python else None self.yanked_reason = yanked_reason super(Link, self).__init__(key=url, defining_class=Link) self.cache_link_parsing = cache_link_parsing
def _transform_url(url, transform_netloc): """Transform and replace netloc in a url. transform_netloc is a function taking the netloc and returning a tuple. The first element of this tuple is the new netloc. The entire tuple is returned. Returns a tuple containing the transformed url as item 0 and the original tuple returned by transform_netloc as item 1. """ purl = urllib_parse.urlsplit(url) netloc_tuple = transform_netloc(purl.netloc) # stripped url url_pieces = ( purl.scheme, netloc_tuple[0], purl.path, purl.query, purl.fragment ) surl = urllib_parse.urlunsplit(url_pieces) return surl, netloc_tuple
def url_to_path(url): # type: (str) -> str """ Convert a file: URL to a path. """ assert url.startswith('file:'), ( "You can only turn file: urls into filenames (not {url!r})".format( **locals())) _, netloc, path, _, _ = urllib_parse.urlsplit(url) if not netloc or netloc == 'localhost': # According to RFC 8089, same as empty authority. netloc = '' elif sys.platform == 'win32': # If we have a UNC path, prepend UNC share notation. netloc = '\\\\' + netloc else: raise ValueError( 'non-local file URIs are not supported on this platform: {url!r}'. format(**locals())) path = urllib_request.url2pathname(netloc + path) return path