def parse_netloc(netloc): # type: (str) -> Tuple[str, Optional[int]] """ Return the host-port pair from a netloc. """ url = build_url_from_netloc(netloc) parsed = urllib_parse.urlparse(url) return parsed.hostname, parsed.port
def _get_html_page(link, session=None): # type: (Link, Optional[PipSession]) -> Optional[HTMLPage] if session is None: raise TypeError( "_get_html_page() missing 1 required keyword argument: 'session'") url = link.url.split('#', 1)[0] # Check for VCS schemes that do not support lookup as web pages. vcs_scheme = _match_vcs_scheme(url) if vcs_scheme: logger.debug('Cannot look at %s URL %s', vcs_scheme, link) return None # Tack index.html onto file:// URLs that point to directories scheme, _, path, _, _, _ = urllib_parse.urlparse(url) if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urllib_parse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s', url) try: resp = _get_html_response(url, session=session) except _NotHTTP: logger.debug( 'Skipping page %s because it looks like an archive, and cannot ' 'be checked by HEAD.', link, ) except _NotHTML as exc: logger.debug( 'Skipping page %s because the %s request got Content-Type: %s', link, exc.request_desc, exc.content_type, ) except HTTPError as exc: _handle_get_page_fail(link, exc) except RetryError as exc: _handle_get_page_fail(link, exc) except SSLError as exc: reason = "There was a problem confirming the ssl certificate: " reason += str(exc) _handle_get_page_fail(link, reason, meth=logger.info) except requests.ConnectionError as exc: _handle_get_page_fail(link, "connection error: {}".format(exc)) except requests.Timeout: _handle_get_page_fail(link, "timed out") else: return _make_html_page(resp, cache_link_parsing=link.cache_link_parsing) return None
def _clean_link(url): # type: (str) -> str """ Make sure a link is fully quoted. For example, if ' ' occurs in the URL, it will be replaced with "%20", and without double-quoting other characters. """ # Split the URL into parts according to the general structure # `scheme://netloc/path;parameters?query#fragment`. result = urllib_parse.urlparse(url) # If the netloc is empty, then the URL refers to a local filesystem path. is_local_path = not result.netloc path = _clean_url_path(result.path, is_local_path=is_local_path) return urllib_parse.urlunparse(result._replace(path=path))
def handle_401(self, resp, **kwargs): # We only care about 401 responses, anything else we want to just # pass through the actual response if resp.status_code != 401: return resp # We are not able to prompt the user so simply return the response if not self.prompting: return resp parsed = urllib_parse.urlparse(resp.url) # Prompt the user for a new username and password username, password, save = self._prompt_for_password(parsed.netloc) # Store the new username and password to use for future requests self._credentials_to_save = None if username is not None and password is not None: self.passwords[parsed.netloc] = (username, password) # Prompt to save the password to keyring if save and self._should_save_password_to_keyring(): self._credentials_to_save = (parsed.netloc, username, password) # Consume content and release the original connection to allow our new # request to reuse the same one. resp.content resp.raw.release_conn() # Add our new username and password to the request req = HTTPBasicAuth(username or "", password or "")(resp.request) req.register_hook("response", self.warn_on_401) # On successful request, save the credentials that were used to # keyring. (Note that if the user responded "no" above, this member # is not set and nothing will be saved.) if self._credentials_to_save: req.register_hook("response", self.save_credentials) # Send our new request new_resp = resp.connection.send(req, **kwargs) new_resp.history.append(resp) return new_resp
def create( cls, find_links, # type: List[str] index_urls, # type: List[str] ): # type: (...) -> SearchScope """ Create a SearchScope object after normalizing the `find_links`. """ # Build find_links. If an argument starts with ~, it may be # a local file relative to a home directory. So try normalizing # it and if it exists, use the normalized version. # This is deliberately conservative - it might be fine just to # blindly normalize anything starting with a ~... built_find_links = [] # type: List[str] for link in find_links: if link.startswith('~'): new_link = normalize_path(link) if os.path.exists(new_link): link = new_link built_find_links.append(link) # If we don't have TLS enabled, then WARN if anyplace we're looking # relies on TLS. if not has_tls(): for link in itertools.chain(index_urls, built_find_links): parsed = urllib_parse.urlparse(link) if parsed.scheme == 'https': logger.warning( 'pip is configured with locations that require ' 'TLS/SSL, however the ssl module in Python is not ' 'available.') break return cls( find_links=built_find_links, index_urls=index_urls, )
def __init__(self, requirement_string): # type: (str) -> None try: req = REQUIREMENT.parseString(requirement_string) except ParseException as e: raise InvalidRequirement('Parse error at "{0!r}": {1}'.format( requirement_string[e.loc:e.loc + 8], e.msg)) self.name = req.name if req.url: parsed_url = urlparse.urlparse(req.url) if parsed_url.scheme == "file": if urlparse.urlunparse(parsed_url) != req.url: raise InvalidRequirement("Invalid URL given") elif not (parsed_url.scheme and parsed_url.netloc) or ( not parsed_url.scheme and not parsed_url.netloc): raise InvalidRequirement("Invalid URL: {0}".format(req.url)) self.url = req.url else: self.url = None self.extras = set(req.extras.asList() if req.extras else []) self.specifier = SpecifierSet(req.specifier) self.marker = req.marker if req.marker else None
def __init__(self, index_url, session, use_datetime=False): xmlrpc_client.Transport.__init__(self, use_datetime) index_parts = urllib_parse.urlparse(index_url) self._scheme = index_parts.scheme self._session = session
def is_secure_origin(self, location): # type: (Link) -> bool # Determine if this url used a secure transport mechanism parsed = urllib_parse.urlparse(str(location)) origin_protocol, origin_host, origin_port = ( parsed.scheme, parsed.hostname, parsed.port, ) # The protocol to use to see if the protocol matches. # Don't count the repository type as part of the protocol: in # cases such as "git+ssh", only use "ssh". (I.e., Only verify against # the last scheme.) origin_protocol = origin_protocol.rsplit('+', 1)[-1] # Determine if our origin is a secure origin by looking through our # hardcoded list of secure origins, as well as any additional ones # configured on this PackageFinder instance. for secure_origin in self.iter_secure_origins(): secure_protocol, secure_host, secure_port = secure_origin if origin_protocol != secure_protocol and secure_protocol != "*": continue try: addr = ipaddress.ip_address(None if origin_host is None else six.ensure_text(origin_host)) network = ipaddress.ip_network(six.ensure_text(secure_host)) except ValueError: # We don't have both a valid address or a valid network, so # we'll check this origin against hostnames. if (origin_host and origin_host.lower() != secure_host.lower() and secure_host != "*"): continue else: # We have a valid address and network, so see if the address # is contained within the network. if addr not in network: continue # Check to see if the port matches. if (origin_port != secure_port and secure_port != "*" and secure_port is not None): continue # If we've gotten here, then this origin matches the current # secure origin and we should return True return True # If we've gotten to this point, then the origin isn't secure and we # will not accept it as a valid location to search. We will however # log a warning that we are ignoring it. logger.warning( "The repository located at %s is not a trusted or secure host and " "is being ignored. If this repository is available via HTTPS we " "recommend you use HTTPS instead, otherwise you may silence " "this warning and allow it anyway with '--trusted-host %s'.", origin_host, origin_host, ) return False