def handle_401(self, resp, **kwargs): # We only care about 401 responses, anything else we want to just # pass through the actual response if resp.status_code != 401: return resp # We are not able to prompt the user so simple return the response if not self.prompting: return resp parsed = urlparse.urlparse(resp.url) # Prompt the user for a new username and password username = raw_input("User for %s: " % parsed.netloc) password = getpass.getpass("Password: "******"", password or "")(resp.request) # Send our new request new_resp = resp.connection.send(req, **kwargs) new_resp.history.append(resp) return new_resp
def __call__(self, req): parsed = urlparse.urlparse(req.url) # Get the netloc without any embedded credentials netloc = parsed.netloc.split("@", 1)[-1] # Set the url of the request to the url without any credentials req.url = urlparse.urlunparse(parsed[:1] + (netloc,) + parsed[2:]) # Use any stored credentials that we have for this netloc username, password = self.passwords.get(netloc, (None, None)) # Extract credentials embedded in the url if we have none stored if username is None: username, password = self.parse_credentials(parsed.netloc) if username or password: # Store the username and password self.passwords[netloc] = (username, password) # Send the basic auth with this request req = HTTPBasicAuth(username or "", password or "")(req) # Attach a hook to handle 401 responses req.register_hook("response", self.handle_401) return req
def send(self, request, stream=None, timeout=None, verify=None, cert=None, proxies=None): parsed_url = urlparse.urlparse(request.url) # We only work for requests with a host of localhost if parsed_url.netloc.lower() != "localhost": raise InvalidURL("Invalid URL %r: Only localhost is allowed" % request.url) real_url = urlparse.urlunparse(parsed_url[:1] + ("",) + parsed_url[2:]) pathname = url_to_path(real_url) resp = Response() resp.status_code = 200 resp.url = real_url stats = os.stat(pathname) modified = email.utils.formatdate(stats.st_mtime, usegmt=True) resp.headers = CaseInsensitiveDict({ "Content-Type": mimetypes.guess_type(pathname)[0] or "text/plain", "Content-Length": stats.st_size, "Last-Modified": modified, }) resp.raw = LocalFSResponse(open(pathname, "rb")) resp.close = resp.raw.close return resp
def request(self, method, url, *args, **kwargs): # Make file:// urls not fail due to lack of a hostname parsed = urlparse.urlparse(url) if parsed.scheme == "file": url = urlparse.urlunparse(parsed[:1] + ("localhost",) + parsed[2:]) # Allow setting a default timeout on a session kwargs.setdefault("timeout", self.timeout) # Dispatch the actual request return super(PipSession, self).request(method, url, *args, **kwargs)
def get_page(cls, link, req, cache=None, skip_archives=True, session=None): if session is None: session = PipSession() url = link.url url = url.split('#', 1)[0] if cache.too_many_failures(url): return None # Check for VCS schemes that do not support lookup as web pages. from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug( 'Cannot look at %(scheme)s URL %(link)s' % locals() ) return None if cache is not None: inst = cache.get_page(url) if inst is not None: return inst try: if skip_archives: if cache is not None: if cache.is_archive(url): return None filename = link.filename for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']: if filename.endswith(bad_ext): content_type = cls._get_content_type( url, session=session, ) if content_type.lower().startswith('text/html'): break else: logger.debug( 'Skipping page %s because of Content-Type: ' '%s' % (link, content_type) ) if cache is not None: cache.set_is_archive(url) return None logger.debug('Getting page %s' % url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = \ urlparse.urlparse(url) if scheme == 'file' and os.path.isdir(url2pathname(path)): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urlparse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s' % url) resp = session.get(url, headers={"Accept": "text/html"}) resp.raise_for_status() # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every # url we cannot know ahead of time for sure if something is HTML # or not. However we can check after we've downloaded it. content_type = resp.headers.get('Content-Type', 'unknown') if not content_type.lower().startswith("text/html"): logger.debug( 'Skipping page %s because of Content-Type: %s' % (link, content_type) ) if cache is not None: cache.set_is_archive(url) return None inst = cls(resp.text, resp.url, resp.headers, trusted=link.trusted) except requests.HTTPError as exc: level = 2 if exc.response.status_code == 404 else 1 cls._handle_fail(req, link, exc, url, cache=cache, level=level) except requests.ConnectionError as exc: cls._handle_fail( req, link, "connection error: %s" % exc, url, cache=cache, ) except requests.Timeout: cls._handle_fail(req, link, "timed out", url, cache=cache) except SSLError as exc: reason = ("There was a problem confirming the ssl certificate: " "%s" % exc) cls._handle_fail( req, link, reason, url, cache=cache, level=2, meth=logger.notify, ) else: if cache is not None: cache.add_page([url, resp.url], inst) return inst
def _link_package_versions(self, link, search_name): """ Return an iterable of triples (pkg_resources_version_key, link, python_version) that can be extracted from the given link. Meant to be overridden by subclasses, not called by clients. """ platform = get_platform() version = None if link.egg_fragment: egg_info = link.egg_fragment else: egg_info, ext = link.splitext() if not ext: if link not in self.logged_links: logger.debug('Skipping link %s; not a file' % link) self.logged_links.add(link) return [] if egg_info.endswith('.tar'): # Special double-extension case: egg_info = egg_info[:-4] ext = '.tar' + ext if ext not in self._known_extensions(): if link not in self.logged_links: logger.debug( 'Skipping link %s; unknown archive format: %s' % (link, ext) ) self.logged_links.add(link) return [] if "macosx10" in link.path and ext == '.zip': if link not in self.logged_links: logger.debug('Skipping link %s; macosx10 one' % (link)) self.logged_links.add(link) return [] if ext == wheel_ext: try: wheel = Wheel(link.filename) except InvalidWheelFilename: logger.debug( 'Skipping %s because the wheel filename is invalid' % link ) return [] if wheel.name.lower() != search_name.lower(): logger.debug( 'Skipping link %s; wrong project name (not %s)' % (link, search_name) ) return [] if not wheel.supported(): logger.debug( 'Skipping %s because it is not compatible with this ' 'Python' % link ) return [] # This is a dirty hack to prevent installing Binary Wheels from # PyPI unless it is a Windows or Mac Binary Wheel. This is # paired with a change to PyPI disabling uploads for the # same. Once we have a mechanism for enabling support for # binary wheels on linux that deals with the inherent problems # of binary distribution this can be removed. comes_from = getattr(link, "comes_from", None) if ( ( not platform.startswith('win') and not platform.startswith('macosx') ) and comes_from is not None and urlparse.urlparse( comes_from.url ).netloc.endswith("pypi.python.org")): if not wheel.supported(tags=supported_tags_noarch): logger.debug( "Skipping %s because it is a pypi-hosted binary " "Wheel on an unsupported platform" % link ) return [] version = wheel.version if not version: version = self._egg_info_matches(egg_info, search_name, link) if version is None: logger.debug( 'Skipping link %s; wrong project name (not %s)' % (link, search_name) ) return [] if (link.internal is not None and not link.internal and not normalize_name(search_name).lower() in self.allow_external and not self.allow_all_external): # We have a link that we are sure is external, so we should skip # it unless we are allowing externals logger.debug("Skipping %s because it is externally hosted." % link) self.need_warn_external = True return [] if (link.verifiable is not None and not link.verifiable and not (normalize_name(search_name).lower() in self.allow_unverified)): # We have a link that we are sure we cannot verify its integrity, # so we should skip it unless we are allowing unsafe installs # for this requirement. logger.debug("Skipping %s because it is an insecure and " "unverifiable file." % link) self.need_warn_unverified = True return [] match = self._py_version_re.search(version) if match: version = version[:match.start()] py_version = match.group(1) if py_version != sys.version[:3]: logger.debug( 'Skipping %s because Python version is incorrect' % link ) return [] logger.debug('Found link %s, version: %s' % (link, version)) return [( pkg_resources.parse_version(version), link, version, )]
def find_requirement(self, req, upgrade): def mkurl_pypi_url(url): loc = posixpath.join(url, url_name) # For maximum compatibility with easy_install, ensure the path # ends in a trailing slash. Although this isn't in the spec # (and PyPI can handle it without the slash) some other index # implementations might break if they relied on easy_install's # behavior. if not loc.endswith('/'): loc = loc + '/' return loc url_name = req.url_name # Only check main index if index URL is given: main_index_url = None if self.index_urls: # Check that we have the url_name correctly spelled: main_index_url = Link( mkurl_pypi_url(self.index_urls[0]), trusted=True, ) # This will also cache the page, so it's okay that we get it again # later: page = self._get_page(main_index_url, req) if page is None: url_name = self._find_url_name( Link(self.index_urls[0], trusted=True), url_name, req ) or req.url_name if url_name is not None: locations = [ mkurl_pypi_url(url) for url in self.index_urls] + self.find_links else: locations = list(self.find_links) for version in req.absolute_versions: if url_name is not None and main_index_url is not None: locations = [ posixpath.join(main_index_url.url, version)] + locations file_locations, url_locations = self._sort_locations(locations) # We trust every url that the user has given us whether it was given # via --index-url or --find-links locations = [Link(url, trusted=True) for url in url_locations] logger.debug('URLs to search for versions for %s:' % req) for location in locations: logger.debug('* %s' % location) # Determine if this url used a secure transport mechanism parsed = urlparse.urlparse(str(location)) if parsed.scheme in INSECURE_SCHEMES: secure_schemes = INSECURE_SCHEMES[parsed.scheme] if len(secure_schemes) == 1: ctx = (location, parsed.scheme, secure_schemes[0], parsed.netloc) logger.warn("%s uses an insecure transport scheme (%s). " "Consider using %s if %s has it available" % ctx) elif len(secure_schemes) > 1: ctx = ( location, parsed.scheme, ", ".join(secure_schemes), parsed.netloc, ) logger.warn("%s uses an insecure transport scheme (%s). " "Consider using one of %s if %s has any of " "them available" % ctx) else: ctx = (location, parsed.scheme) logger.warn("%s uses an insecure transport scheme (%s)." % ctx) found_versions = [] found_versions.extend( self._package_versions( # We trust every directly linked archive in find_links [Link(url, '-f', trusted=True) for url in self.find_links], req.name.lower() ) ) page_versions = [] for page in self._get_pages(locations, req): logger.debug('Analyzing links from page %s' % page.url) logger.indent += 2 try: page_versions.extend( self._package_versions(page.links, req.name.lower()) ) finally: logger.indent -= 2 file_versions = list( self._package_versions( [Link(url) for url in file_locations], req.name.lower() ) ) if (not found_versions and not page_versions and not file_versions): logger.fatal( 'Could not find any downloads that satisfy the requirement' ' %s' % req ) if self.need_warn_external: logger.warn("Some externally hosted files were ignored (use " "--allow-external %s to allow)." % req.name) if self.need_warn_unverified: logger.warn("Some insecure and unverifiable files were ignored" " (use --allow-unverified %s to allow)." % req.name) raise DistributionNotFound( 'No distributions at all found for %s' % req ) installed_version = [] if req.satisfied_by is not None: installed_version = [( req.satisfied_by.parsed_version, INSTALLED_VERSION, req.satisfied_by.version, )] if file_versions: file_versions.sort(reverse=True) logger.info( 'Local files found: %s' % ', '.join([ url_to_path(link.url) for parsed, link, version in file_versions ]) ) # this is an intentional priority ordering all_versions = installed_version + file_versions + found_versions \ + page_versions applicable_versions = [] for (parsed_version, link, version) in all_versions: if version not in req.req: logger.info( "Ignoring link %s, version %s doesn't match %s" % ( link, version, ','.join([''.join(s) for s in req.req.specs]) ) ) continue elif (is_prerelease(version) and not (self.allow_all_prereleases or req.prereleases)): # If this version isn't the already installed one, then # ignore it if it's a pre-release. if link is not INSTALLED_VERSION: logger.info( "Ignoring link %s, version %s is a pre-release (use " "--pre to allow)." % (link, version) ) continue applicable_versions.append((parsed_version, link, version)) applicable_versions = self._sort_versions(applicable_versions) existing_applicable = bool([ link for parsed_version, link, version in applicable_versions if link is INSTALLED_VERSION ]) if not upgrade and existing_applicable: if applicable_versions[0][1] is INSTALLED_VERSION: logger.info( 'Existing installed version (%s) is most up-to-date and ' 'satisfies requirement' % req.satisfied_by.version ) else: logger.info( 'Existing installed version (%s) satisfies requirement ' '(most up-to-date version is %s)' % (req.satisfied_by.version, applicable_versions[0][2]) ) return None if not applicable_versions: logger.fatal( 'Could not find a version that satisfies the requirement %s ' '(from versions: %s)' % ( req, ', '.join([ version for parsed_version, link, version in all_versions ]) ) ) if self.need_warn_external: logger.warn("Some externally hosted files were ignored (use " "--allow-external to allow).") if self.need_warn_unverified: logger.warn("Some insecure and unverifiable files were ignored" " (use --allow-unverified %s to allow)." % req.name) raise DistributionNotFound( 'No distributions matching the version for %s' % req ) if applicable_versions[0][1] is INSTALLED_VERSION: # We have an existing version, and its the best version logger.info( 'Installed version (%s) is most up-to-date (past versions: ' '%s)' % ( req.satisfied_by.version, ', '.join([ version for parsed_version, link, version in applicable_versions[1:] ]) or 'none')) raise BestVersionAlreadyInstalled if len(applicable_versions) > 1: logger.info( 'Using version %s (newest of versions: %s)' % ( applicable_versions[0][2], ', '.join([ version for parsed_version, link, version in applicable_versions ]) ) ) selected_version = applicable_versions[0][1] if (selected_version.internal is not None and not selected_version.internal): logger.warn("%s an externally hosted file and may be " "unreliable" % req.name) if (selected_version.verifiable is not None and not selected_version.verifiable): logger.warn("%s is potentially insecure and " "unverifiable." % req.name) if selected_version._deprecated_regex: logger.deprecated( "1.7", "%s discovered using a deprecated method of parsing, " "in the future it will no longer be discovered" % req.name ) return selected_version
def get_page(cls, link, req, cache=None, skip_archives=True): url = link.url url = url.split('#', 1)[0] if cache.too_many_failures(url): return None # Check for VCS schemes that do not support lookup as web pages. from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals()) return None if cache is not None: inst = cache.get_page(url) if inst is not None: return inst try: if skip_archives: if cache is not None: if cache.is_archive(url): return None filename = link.filename for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']: if filename.endswith(bad_ext): content_type = cls._get_content_type(url) if content_type.lower().startswith('text/html'): break else: logger.debug('Skipping page %s because of Content-Type: %s' % (link, content_type)) if cache is not None: cache.set_is_archive(url) return None logger.debug('Getting page %s' % url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) if scheme == 'file' and os.path.isdir(url2pathname(path)): # add trailing slash if not present so urljoin doesn't trim final segment if not url.endswith('/'): url += '/' url = urlparse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s' % url) resp = urlopen(url) real_url = geturl(resp) headers = resp.info() contents = resp.read() encoding = headers.get('Content-Encoding', None) #XXX need to handle exceptions and add testing for this if encoding is not None: if encoding == 'gzip': contents = gzip.GzipFile(fileobj=BytesIO(contents)).read() if encoding == 'deflate': contents = zlib.decompress(contents) inst = cls(u(contents), real_url, headers) except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError): e = sys.exc_info()[1] desc = str(e) if isinstance(e, socket.timeout): log_meth = logger.info level =1 desc = 'timed out' elif isinstance(e, URLError): log_meth = logger.info if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout): desc = 'timed out' level = 1 else: level = 2 elif isinstance(e, HTTPError) and e.code == 404: ## FIXME: notify? log_meth = logger.info level = 2 else: log_meth = logger.info level = 1 log_meth('Could not fetch URL %s: %s' % (link, desc)) log_meth('Will skip URL %s when looking for download links for %s' % (link.url, req)) if cache is not None: cache.add_page_failure(url, level) return None if cache is not None: cache.add_page([url, real_url], inst) return inst
def get_page(cls, link, req, cache=None, skip_archives=True, session=None): if session is None: session = PipSession() url = link.url url = url.split("#", 1)[0] if cache.too_many_failures(url): return None # Check for VCS schemes that do not support lookup as web pages. from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in "+:": logger.debug("Cannot look at %(scheme)s URL %(link)s" % locals()) return None if cache is not None: inst = cache.get_page(url) if inst is not None: return inst try: if skip_archives: if cache is not None: if cache.is_archive(url): return None filename = link.filename for bad_ext in [".tar", ".tar.gz", ".tar.bz2", ".tgz", ".zip"]: if filename.endswith(bad_ext): content_type = cls._get_content_type(url, session=session) if content_type.lower().startswith("text/html"): break else: logger.debug("Skipping page %s because of Content-Type: %s" % (link, content_type)) if cache is not None: cache.set_is_archive(url) return None logger.debug("Getting page %s" % url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) if scheme == "file" and os.path.isdir(url2pathname(path)): # add trailing slash if not present so urljoin doesn't trim final segment if not url.endswith("/"): url += "/" url = urlparse.urljoin(url, "index.html") logger.debug(" file: URL is directory, getting %s" % url) resp = session.get(url) resp.raise_for_status() # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement. For instance http://sourceforge.net/projects/docutils/files/docutils/0.8.1/docutils-0.8.1.tar.gz/download # redirects to http://superb-dca3.dl.sourceforge.net/project/docutils/docutils/0.8.1/docutils-0.8.1.tar.gz # Unless we issue a HEAD request on every url we cannot know # ahead of time for sure if something is HTML or not. However we # can check after we've downloaded it. content_type = resp.headers.get("Content-Type", "unknown") if not content_type.lower().startswith("text/html"): logger.debug("Skipping page %s because of Content-Type: %s" % (link, content_type)) if cache is not None: cache.set_is_archive(url) return None inst = cls(resp.text, resp.url, resp.headers, trusted=link.trusted) except requests.HTTPError as exc: level = 2 if exc.response.status_code == 404 else 1 cls._handle_fail(req, link, exc, url, cache=cache, level=level) except requests.Timeout: cls._handle_fail(req, link, "timed out", url, cache=cache) except SSLError as exc: reason = "There was a problem confirming the ssl certificate: " "%s" % exc cls._handle_fail(req, link, reason, url, cache=cache, level=2, meth=logger.notify) else: if cache is not None: cache.add_page([url, resp.url], inst) return inst
def get_page(cls, link, req, cache=None, skip_archives=True): url = link.url url = url.split('#', 1)[0] if cache.too_many_failures(url): return None # Check for VCS schemes that do not support lookup as web pages. from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals()) return None if cache is not None: inst = cache.get_page(url) if inst is not None: return inst try: if skip_archives: if cache is not None: if cache.is_archive(url): return None filename = link.filename for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']: if filename.endswith(bad_ext): content_type = cls._get_content_type(url) if content_type.lower().startswith('text/html'): break else: logger.debug('Skipping page %s because of Content-Type: %s' % (link, content_type)) if cache is not None: cache.set_is_archive(url) return None logger.debug('Getting page %s' % url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) if scheme == 'file' and os.path.isdir(url2pathname(path)): # add trailing slash if not present so urljoin doesn't trim final segment if not url.endswith('/'): url += '/' url = urlparse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s' % url) resp = urlopen(url) real_url = geturl(resp) headers = resp.info() contents = resp.read() encoding = headers.get('Content-Encoding', None) #XXX need to handle exceptions and add testing for this if encoding is not None: if encoding == 'gzip': contents = gzip.GzipFile(fileobj=BytesIO(contents)).read() if encoding == 'deflate': contents = zlib.decompress(contents) # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement. For instance http://sourceforge.net/projects/docutils/files/docutils/0.8.1/docutils-0.8.1.tar.gz/download # redirects to http://superb-dca3.dl.sourceforge.net/project/docutils/docutils/0.8.1/docutils-0.8.1.tar.gz # Unless we issue a HEAD request on every url we cannot know # ahead of time for sure if something is HTML or not. However we # can check after we've downloaded it. content_type = headers.get('Content-Type', 'unknown') if not content_type.lower().startswith("text/html"): logger.debug('Skipping page %s because of Content-Type: %s' % (link, content_type)) if cache is not None: cache.set_is_archive(url) return None inst = cls(u(contents), real_url, headers, trusted=link.trusted) except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError): e = sys.exc_info()[1] desc = str(e) if isinstance(e, socket.timeout): log_meth = logger.info level =1 desc = 'timed out' elif isinstance(e, URLError): #ssl/certificate error if hasattr(e, 'reason') and (isinstance(e.reason, ssl.SSLError) or isinstance(e.reason, CertificateError)): desc = 'There was a problem confirming the ssl certificate: %s' % e log_meth = logger.notify else: log_meth = logger.info if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout): desc = 'timed out' level = 1 else: level = 2 elif isinstance(e, HTTPError) and e.code == 404: ## FIXME: notify? log_meth = logger.info level = 2 else: log_meth = logger.info level = 1 log_meth('Could not fetch URL %s: %s' % (link, desc)) log_meth('Will skip URL %s when looking for download links for %s' % (link.url, req)) if cache is not None: cache.add_page_failure(url, level) return None if cache is not None: cache.add_page([url, real_url], inst) return inst
def get_page(cls, link, req, cache=None, skip_archives=True): url = link.url url = url.split('#', 1)[0] if cache.too_many_failures(url): return None # Check for VCS schemes that do not support lookup as web pages. from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals()) return None if cache is not None: inst = cache.get_page(url) if inst is not None: return inst try: if skip_archives: if cache is not None: if cache.is_archive(url): return None filename = link.filename for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']: if filename.endswith(bad_ext): content_type = cls._get_content_type(url) if content_type.lower().startswith('text/html'): break else: logger.debug( 'Skipping page %s because of Content-Type: %s' % (link, content_type)) if cache is not None: cache.set_is_archive(url) return None logger.debug('Getting page %s' % url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) if scheme == 'file' and os.path.isdir(url2pathname(path)): # add trailing slash if not present so urljoin doesn't trim final segment if not url.endswith('/'): url += '/' url = urlparse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s' % url) resp = urlopen(url) real_url = geturl(resp) headers = resp.info() contents = resp.read() encoding = headers.get('Content-Encoding', None) #XXX need to handle exceptions and add testing for this if encoding is not None: if encoding == 'gzip': contents = gzip.GzipFile(fileobj=BytesIO(contents)).read() if encoding == 'deflate': contents = zlib.decompress(contents) # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement. For instance http://sourceforge.net/projects/docutils/files/docutils/0.8.1/docutils-0.8.1.tar.gz/download # redirects to http://superb-dca3.dl.sourceforge.net/project/docutils/docutils/0.8.1/docutils-0.8.1.tar.gz # Unless we issue a HEAD request on every url we cannot know # ahead of time for sure if something is HTML or not. However we # can check after we've downloaded it. if not headers["Content-Type"].lower().startswith("text/html"): logger.debug('Skipping page %s because of Content-Type: %s' % (link, headers["Content-Type"])) if cache is not None: cache.set_is_archive(url) return None inst = cls(u(contents), real_url, headers, trusted=link.trusted) except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError): e = sys.exc_info()[1] desc = str(e) if isinstance(e, socket.timeout): log_meth = logger.info level = 1 desc = 'timed out' elif isinstance(e, URLError): #ssl/certificate error if hasattr(e, 'reason') and ( isinstance(e.reason, ssl.SSLError) or isinstance(e.reason, CertificateError)): desc = 'There was a problem confirming the ssl certificate: %s' % e log_meth = logger.notify else: log_meth = logger.info if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout): desc = 'timed out' level = 1 else: level = 2 elif isinstance(e, HTTPError) and e.code == 404: ## FIXME: notify? log_meth = logger.info level = 2 else: log_meth = logger.info level = 1 log_meth('Could not fetch URL %s: %s' % (link, desc)) log_meth( 'Will skip URL %s when looking for download links for %s' % (link.url, req)) if cache is not None: cache.add_page_failure(url, level) return None if cache is not None: cache.add_page([url, real_url], inst) return inst
def _link_package_versions(self, link, search_name): """ Return an iterable of triples (pkg_resources_version_key, link, python_version) that can be extracted from the given link. Meant to be overridden by subclasses, not called by clients. """ platform = get_platform() version = None if link.egg_fragment: egg_info = link.egg_fragment else: egg_info, ext = link.splitext() if not ext: if link not in self.logged_links: logger.debug('Skipping link %s; not a file' % link) self.logged_links.add(link) return [] if egg_info.endswith('.tar'): # Special double-extension case: egg_info = egg_info[:-4] ext = '.tar' + ext if ext not in self._known_extensions(): if link not in self.logged_links: logger.debug( 'Skipping link %s; unknown archive format: %s' % (link, ext)) self.logged_links.add(link) return [] if "macosx10" in link.path and ext == '.zip': if link not in self.logged_links: logger.debug('Skipping link %s; macosx10 one' % (link)) self.logged_links.add(link) return [] if link.wheel and link.wheel.name.lower() == search_name.lower(): version = link.wheel.version if not link.wheel.supported(): logger.debug( 'Skipping %s because it is not compatible with this Python' % link) return [] # This is a dirty hack to prevent installing Binary Wheels from # PyPI or one of its mirrors unless it is a Windows Binary # Wheel. This is paired with a change to PyPI disabling # uploads for the same. Once we have a mechanism for enabling # support for binary wheels on linux that deals with the # inherent problems of binary distribution this can be # removed. comes_from = getattr(link, "comes_from", None) if (not platform.startswith('win') and comes_from is not None and urlparse.urlparse(comes_from.url).netloc.endswith( "pypi.python.org")): if not link.wheel.supported(tags=supported_tags_noarch): logger.debug( "Skipping %s because it is a pypi-hosted binary " "Wheel on an unsupported platform" % link) return [] if not version: version = self._egg_info_matches(egg_info, search_name, link) if version is None: logger.debug('Skipping link %s; wrong project name (not %s)' % (link, search_name)) return [] if (link.internal is not None and not link.internal and not normalize_name(search_name).lower() in self.allow_external and not self.allow_all_external): # We have a link that we are sure is external, so we should skip # it unless we are allowing externals logger.debug("Skipping %s because it is externally hosted." % link) self.need_warn_external = True return [] if (link.verifiable is not None and not link.verifiable and not normalize_name(search_name).lower() in self.allow_insecure and not self.allow_all_insecure): # TODO: Remove after release # We have a link that we are sure we cannot verify it's integrity, # so we should skip it unless we are allowing unsafe installs # for this requirement. logger.debug("Skipping %s because it is an insecure and " "unverifiable file." % link) self.need_warn_insecure = True return [] match = self._py_version_re.search(version) if match: version = version[:match.start()] py_version = match.group(1) if py_version != sys.version[:3]: logger.debug( 'Skipping %s because Python version is incorrect' % link) return [] logger.debug('Found link %s, version: %s' % (link, version)) return [(pkg_resources.parse_version(version), link, version)]
def get_page(cls, link, req, cache=None, skip_archives=True): url = link.url url = url.split('#', 1)[0] if cache.too_many_failures(url): return None # Check for VCS schemes that do not support lookup as web pages. from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals()) return None if cache is not None: inst = cache.get_page(url) if inst is not None: return inst try: if skip_archives: if cache is not None: if cache.is_archive(url): return None filename = link.filename for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']: if filename.endswith(bad_ext): content_type = cls._get_content_type(url) if content_type.lower().startswith('text/html'): break else: logger.debug('Skipping page %s because of Content-Type: %s' % (link, content_type)) if cache is not None: cache.set_is_archive(url) return None logger.debug('Getting page %s' % url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) if scheme == 'file' and os.path.isdir(url2pathname(path)): # add trailing slash if not present so urljoin doesn't trim final segment if not url.endswith('/'): url += '/' url = urlparse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s' % url) resp = urlopen(url) real_url = geturl(resp) headers = resp.info() inst = cls(u(resp.read()), real_url, headers) except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError): e = sys.exc_info()[1] desc = str(e) if isinstance(e, socket.timeout): log_meth = logger.info level =1 desc = 'timed out' elif isinstance(e, URLError): log_meth = logger.info if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout): desc = 'timed out' level = 1 else: level = 2 elif isinstance(e, HTTPError) and e.code == 404: ## FIXME: notify? log_meth = logger.info level = 2 else: log_meth = logger.info level = 1 log_meth('Could not fetch URL %s: %s' % (link, desc)) log_meth('Will skip URL %s when looking for download links for %s' % (link.url, req)) if cache is not None: cache.add_page_failure(url, level) return None if cache is not None: cache.add_page([url, real_url], inst) return inst
def get_page(cls, link, req, cache=None, skip_archives=True, session=None): if session is None: session = PipSession() url = link.url url = url.split('#', 1)[0] if cache.too_many_failures(url): return None # Check for VCS schemes that do not support lookup as web pages. from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals()) return None if cache is not None: inst = cache.get_page(url) if inst is not None: return inst try: if skip_archives: if cache is not None: if cache.is_archive(url): return None filename = link.filename for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']: if filename.endswith(bad_ext): content_type = cls._get_content_type( url, session=session, ) if content_type.lower().startswith('text/html'): break else: logger.debug( 'Skipping page %s because of Content-Type: ' '%s' % (link, content_type)) if cache is not None: cache.set_is_archive(url) return None logger.debug('Getting page %s' % url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = \ urlparse.urlparse(url) if scheme == 'file' and os.path.isdir(url2pathname(path)): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urlparse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s' % url) resp = session.get(url, headers={"Accept": "text/html"}) resp.raise_for_status() # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every # url we cannot know ahead of time for sure if something is HTML # or not. However we can check after we've downloaded it. content_type = resp.headers.get('Content-Type', 'unknown') if not content_type.lower().startswith("text/html"): logger.debug('Skipping page %s because of Content-Type: %s' % (link, content_type)) if cache is not None: cache.set_is_archive(url) return None inst = cls(resp.text, resp.url, resp.headers, trusted=link.trusted) except requests.HTTPError as exc: level = 2 if exc.response.status_code == 404 else 1 cls._handle_fail(req, link, exc, url, cache=cache, level=level) except requests.ConnectionError as exc: cls._handle_fail( req, link, "connection error: %s" % exc, url, cache=cache, ) except requests.Timeout: cls._handle_fail(req, link, "timed out", url, cache=cache) except SSLError as exc: reason = ("There was a problem confirming the ssl certificate: " "%s" % exc) cls._handle_fail( req, link, reason, url, cache=cache, level=2, meth=logger.notify, ) else: if cache is not None: cache.add_page([url, resp.url], inst) return inst
def find_requirement(self, req, upgrade): def mkurl_pypi_url(url): loc = posixpath.join(url, url_name) # For maximum compatibility with easy_install, ensure the path # ends in a trailing slash. Although this isn't in the spec # (and PyPI can handle it without the slash) some other index # implementations might break if they relied on easy_install's # behavior. if not loc.endswith('/'): loc = loc + '/' return loc url_name = req.url_name # Only check main index if index URL is given: main_index_url = None if self.index_urls: # Check that we have the url_name correctly spelled: main_index_url = Link( mkurl_pypi_url(self.index_urls[0]), trusted=True, ) # This will also cache the page, so it's okay that we get it again # later: page = self._get_page(main_index_url, req) if page is None: url_name = self._find_url_name( Link(self.index_urls[0], trusted=True), url_name, req) or req.url_name if url_name is not None: locations = [mkurl_pypi_url(url) for url in self.index_urls] + self.find_links else: locations = list(self.find_links) for version in req.absolute_versions: if url_name is not None and main_index_url is not None: locations = [posixpath.join(main_index_url.url, version) ] + locations file_locations, url_locations = self._sort_locations(locations) # We trust every url that the user has given us whether it was given # via --index-url or --find-links locations = [Link(url, trusted=True) for url in url_locations] logger.debug('URLs to search for versions for %s:' % req) for location in locations: logger.debug('* %s' % location) # Determine if this url used a secure transport mechanism parsed = urlparse.urlparse(str(location)) if parsed.scheme in INSECURE_SCHEMES: secure_schemes = INSECURE_SCHEMES[parsed.scheme] if len(secure_schemes) == 1: ctx = (location, parsed.scheme, secure_schemes[0], parsed.netloc) logger.warn("%s uses an insecure transport scheme (%s). " "Consider using %s if %s has it available" % ctx) elif len(secure_schemes) > 1: ctx = ( location, parsed.scheme, ", ".join(secure_schemes), parsed.netloc, ) logger.warn("%s uses an insecure transport scheme (%s). " "Consider using one of %s if %s has any of " "them available" % ctx) else: ctx = (location, parsed.scheme) logger.warn("%s uses an insecure transport scheme (%s)." % ctx) found_versions = [] found_versions.extend( self._package_versions( # We trust every directly linked archive in find_links [Link(url, '-f', trusted=True) for url in self.find_links], req.name.lower())) page_versions = [] for page in self._get_pages(locations, req): logger.debug('Analyzing links from page %s' % page.url) logger.indent += 2 try: page_versions.extend( self._package_versions(page.links, req.name.lower())) finally: logger.indent -= 2 file_versions = list( self._package_versions([Link(url) for url in file_locations], req.name.lower())) if (not found_versions and not page_versions and not file_versions): logger.fatal( 'Could not find any downloads that satisfy the requirement' ' %s' % req) if self.need_warn_external: logger.warn("Some externally hosted files were ignored (use " "--allow-external %s to allow)." % req.name) if self.need_warn_unverified: logger.warn("Some insecure and unverifiable files were ignored" " (use --allow-unverified %s to allow)." % req.name) raise DistributionNotFound('No distributions at all found for %s' % req) installed_version = [] if req.satisfied_by is not None: installed_version = [( req.satisfied_by.parsed_version, INSTALLED_VERSION, req.satisfied_by.version, )] if file_versions: file_versions.sort(reverse=True) logger.info('Local files found: %s' % ', '.join([ url_to_path(link.url) for parsed, link, version in file_versions ])) # this is an intentional priority ordering all_versions = installed_version + file_versions + found_versions \ + page_versions applicable_versions = [] for (parsed_version, link, version) in all_versions: if version not in req.req: logger.info("Ignoring link %s, version %s doesn't match %s" % (link, version, ','.join( [''.join(s) for s in req.req.specs]))) continue elif (is_prerelease(version) and not (self.allow_all_prereleases or req.prereleases)): # If this version isn't the already installed one, then # ignore it if it's a pre-release. if link is not INSTALLED_VERSION: logger.info( "Ignoring link %s, version %s is a pre-release (use " "--pre to allow)." % (link, version)) continue applicable_versions.append((parsed_version, link, version)) applicable_versions = self._sort_versions(applicable_versions) existing_applicable = bool([ link for parsed_version, link, version in applicable_versions if link is INSTALLED_VERSION ]) if not upgrade and existing_applicable: if applicable_versions[0][1] is INSTALLED_VERSION: logger.info( 'Existing installed version (%s) is most up-to-date and ' 'satisfies requirement' % req.satisfied_by.version) else: logger.info( 'Existing installed version (%s) satisfies requirement ' '(most up-to-date version is %s)' % (req.satisfied_by.version, applicable_versions[0][2])) return None if not applicable_versions: logger.fatal( 'Could not find a version that satisfies the requirement %s ' '(from versions: %s)' % (req, ', '.join([ version for parsed_version, link, version in all_versions ]))) if self.need_warn_external: logger.warn("Some externally hosted files were ignored (use " "--allow-external to allow).") if self.need_warn_unverified: logger.warn("Some insecure and unverifiable files were ignored" " (use --allow-unverified %s to allow)." % req.name) raise DistributionNotFound( 'No distributions matching the version for %s' % req) if applicable_versions[0][1] is INSTALLED_VERSION: # We have an existing version, and its the best version logger.info( 'Installed version (%s) is most up-to-date (past versions: ' '%s)' % (req.satisfied_by.version, ', '.join([ version for parsed_version, link, version in applicable_versions[1:] ]) or 'none')) raise BestVersionAlreadyInstalled if len(applicable_versions) > 1: logger.info( 'Using version %s (newest of versions: %s)' % (applicable_versions[0][2], ', '.join([ version for parsed_version, link, version in applicable_versions ]))) selected_version = applicable_versions[0][1] if (selected_version.internal is not None and not selected_version.internal): logger.warn("%s an externally hosted file and may be " "unreliable" % req.name) if (selected_version.verifiable is not None and not selected_version.verifiable): logger.warn("%s is potentially insecure and " "unverifiable." % req.name) if selected_version._deprecated_regex: logger.deprecated( "1.7", "%s discovered using a deprecated method of parsing, " "in the future it will no longer be discovered" % req.name) return selected_version