def handle_401(self, resp, **kwargs):
        # We only care about 401 responses, anything else we want to just
        #   pass through the actual response
        if resp.status_code != 401:
            return resp

        # We are not able to prompt the user so simple return the response
        if not self.prompting:
            return resp

        parsed = urlparse.urlparse(resp.url)

        # Prompt the user for a new username and password
        username = raw_input("User for %s: " % parsed.netloc)
        password = getpass.getpass("Password: "******"", password or "")(resp.request)

        # Send our new request
        new_resp = resp.connection.send(req, **kwargs)
        new_resp.history.append(resp)

        return new_resp
    def __call__(self, req):
        parsed = urlparse.urlparse(req.url)

        # Get the netloc without any embedded credentials
        netloc = parsed.netloc.split("@", 1)[-1]

        # Set the url of the request to the url without any credentials
        req.url = urlparse.urlunparse(parsed[:1] + (netloc,) + parsed[2:])

        # Use any stored credentials that we have for this netloc
        username, password = self.passwords.get(netloc, (None, None))

        # Extract credentials embedded in the url if we have none stored
        if username is None:
            username, password = self.parse_credentials(parsed.netloc)

        if username or password:
            # Store the username and password
            self.passwords[netloc] = (username, password)

            # Send the basic auth with this request
            req = HTTPBasicAuth(username or "", password or "")(req)

        # Attach a hook to handle 401 responses
        req.register_hook("response", self.handle_401)

        return req
    def send(self, request, stream=None, timeout=None, verify=None, cert=None,
             proxies=None):
        parsed_url = urlparse.urlparse(request.url)

        # We only work for requests with a host of localhost
        if parsed_url.netloc.lower() != "localhost":
            raise InvalidURL("Invalid URL %r: Only localhost is allowed" %
                request.url)

        real_url = urlparse.urlunparse(parsed_url[:1] + ("",) + parsed_url[2:])
        pathname = url_to_path(real_url)

        resp = Response()
        resp.status_code = 200
        resp.url = real_url

        stats = os.stat(pathname)
        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
        resp.headers = CaseInsensitiveDict({
            "Content-Type": mimetypes.guess_type(pathname)[0] or "text/plain",
            "Content-Length": stats.st_size,
            "Last-Modified": modified,
        })

        resp.raw = LocalFSResponse(open(pathname, "rb"))
        resp.close = resp.raw.close

        return resp
    def request(self, method, url, *args, **kwargs):
        # Make file:// urls not fail due to lack of a hostname
        parsed = urlparse.urlparse(url)
        if parsed.scheme == "file":
            url = urlparse.urlunparse(parsed[:1] + ("localhost",) + parsed[2:])

        # Allow setting a default timeout on a session
        kwargs.setdefault("timeout", self.timeout)

        # Dispatch the actual request
        return super(PipSession, self).request(method, url, *args, **kwargs)
Example #5
0
    def get_page(cls, link, req, cache=None, skip_archives=True, session=None):
        if session is None:
            session = PipSession()

        url = link.url
        url = url.split('#', 1)[0]
        if cache.too_many_failures(url):
            return None

        # Check for VCS schemes that do not support lookup as web pages.
        from pip.vcs import VcsSupport
        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
                logger.debug(
                    'Cannot look at %(scheme)s URL %(link)s' % locals()
                )
                return None

        if cache is not None:
            inst = cache.get_page(url)
            if inst is not None:
                return inst
        try:
            if skip_archives:
                if cache is not None:
                    if cache.is_archive(url):
                        return None
                filename = link.filename
                for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(
                            url, session=session,
                        )
                        if content_type.lower().startswith('text/html'):
                            break
                        else:
                            logger.debug(
                                'Skipping page %s because of Content-Type: '
                                '%s' % (link, content_type)
                            )
                            if cache is not None:
                                cache.set_is_archive(url)
                            return None
            logger.debug('Getting page %s' % url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = \
                urlparse.urlparse(url)
            if scheme == 'file' and os.path.isdir(url2pathname(path)):
                # add trailing slash if not present so urljoin doesn't trim
                # final segment
                if not url.endswith('/'):
                    url += '/'
                url = urlparse.urljoin(url, 'index.html')
                logger.debug(' file: URL is directory, getting %s' % url)

            resp = session.get(url, headers={"Accept": "text/html"})
            resp.raise_for_status()

            # The check for archives above only works if the url ends with
            #   something that looks like an archive. However that is not a
            #   requirement of an url. Unless we issue a HEAD request on every
            #   url we cannot know ahead of time for sure if something is HTML
            #   or not. However we can check after we've downloaded it.
            content_type = resp.headers.get('Content-Type', 'unknown')
            if not content_type.lower().startswith("text/html"):
                logger.debug(
                    'Skipping page %s because of Content-Type: %s' %
                    (link, content_type)
                )
                if cache is not None:
                    cache.set_is_archive(url)
                return None

            inst = cls(resp.text, resp.url, resp.headers, trusted=link.trusted)
        except requests.HTTPError as exc:
            level = 2 if exc.response.status_code == 404 else 1
            cls._handle_fail(req, link, exc, url, cache=cache, level=level)
        except requests.ConnectionError as exc:
            cls._handle_fail(
                req, link, "connection error: %s" % exc, url,
                cache=cache,
            )
        except requests.Timeout:
            cls._handle_fail(req, link, "timed out", url, cache=cache)
        except SSLError as exc:
            reason = ("There was a problem confirming the ssl certificate: "
                      "%s" % exc)
            cls._handle_fail(
                req, link, reason, url,
                cache=cache,
                level=2,
                meth=logger.notify,
            )
        else:
            if cache is not None:
                cache.add_page([url, resp.url], inst)
            return inst
Example #6
0
    def _link_package_versions(self, link, search_name):
        """
        Return an iterable of triples (pkg_resources_version_key,
        link, python_version) that can be extracted from the given
        link.

        Meant to be overridden by subclasses, not called by clients.
        """
        platform = get_platform()

        version = None
        if link.egg_fragment:
            egg_info = link.egg_fragment
        else:
            egg_info, ext = link.splitext()
            if not ext:
                if link not in self.logged_links:
                    logger.debug('Skipping link %s; not a file' % link)
                    self.logged_links.add(link)
                return []
            if egg_info.endswith('.tar'):
                # Special double-extension case:
                egg_info = egg_info[:-4]
                ext = '.tar' + ext
            if ext not in self._known_extensions():
                if link not in self.logged_links:
                    logger.debug(
                        'Skipping link %s; unknown archive format: %s' %
                        (link, ext)
                    )
                    self.logged_links.add(link)
                return []
            if "macosx10" in link.path and ext == '.zip':
                if link not in self.logged_links:
                    logger.debug('Skipping link %s; macosx10 one' % (link))
                    self.logged_links.add(link)
                return []
            if ext == wheel_ext:
                try:
                    wheel = Wheel(link.filename)
                except InvalidWheelFilename:
                    logger.debug(
                        'Skipping %s because the wheel filename is invalid' %
                        link
                    )
                    return []
                if wheel.name.lower() != search_name.lower():
                    logger.debug(
                        'Skipping link %s; wrong project name (not %s)' %
                        (link, search_name)
                    )
                    return []
                if not wheel.supported():
                    logger.debug(
                        'Skipping %s because it is not compatible with this '
                        'Python' % link
                    )
                    return []
                # This is a dirty hack to prevent installing Binary Wheels from
                # PyPI unless it is a Windows or Mac Binary Wheel. This is
                # paired with a change to PyPI disabling uploads for the
                # same. Once we have a mechanism for enabling support for
                # binary wheels on linux that deals with the inherent problems
                # of binary distribution this can be removed.
                comes_from = getattr(link, "comes_from", None)
                if (
                        (
                            not platform.startswith('win')
                            and not platform.startswith('macosx')
                        )
                        and comes_from is not None
                        and urlparse.urlparse(
                            comes_from.url
                        ).netloc.endswith("pypi.python.org")):
                    if not wheel.supported(tags=supported_tags_noarch):
                        logger.debug(
                            "Skipping %s because it is a pypi-hosted binary "
                            "Wheel on an unsupported platform" % link
                        )
                        return []
                version = wheel.version

        if not version:
            version = self._egg_info_matches(egg_info, search_name, link)
        if version is None:
            logger.debug(
                'Skipping link %s; wrong project name (not %s)' %
                (link, search_name)
            )
            return []

        if (link.internal is not None
                and not link.internal
                and not normalize_name(search_name).lower()
                in self.allow_external
                and not self.allow_all_external):
            # We have a link that we are sure is external, so we should skip
            #   it unless we are allowing externals
            logger.debug("Skipping %s because it is externally hosted." % link)
            self.need_warn_external = True
            return []

        if (link.verifiable is not None
                and not link.verifiable
                and not (normalize_name(search_name).lower()
                         in self.allow_unverified)):
            # We have a link that we are sure we cannot verify its integrity,
            #   so we should skip it unless we are allowing unsafe installs
            #   for this requirement.
            logger.debug("Skipping %s because it is an insecure and "
                         "unverifiable file." % link)
            self.need_warn_unverified = True
            return []

        match = self._py_version_re.search(version)
        if match:
            version = version[:match.start()]
            py_version = match.group(1)
            if py_version != sys.version[:3]:
                logger.debug(
                    'Skipping %s because Python version is incorrect' % link
                )
                return []
        logger.debug('Found link %s, version: %s' % (link, version))
        return [(
            pkg_resources.parse_version(version),
            link,
            version,
        )]
Example #7
0
    def find_requirement(self, req, upgrade):

        def mkurl_pypi_url(url):
            loc = posixpath.join(url, url_name)
            # For maximum compatibility with easy_install, ensure the path
            # ends in a trailing slash.  Although this isn't in the spec
            # (and PyPI can handle it without the slash) some other index
            # implementations might break if they relied on easy_install's
            # behavior.
            if not loc.endswith('/'):
                loc = loc + '/'
            return loc

        url_name = req.url_name
        # Only check main index if index URL is given:
        main_index_url = None
        if self.index_urls:
            # Check that we have the url_name correctly spelled:
            main_index_url = Link(
                mkurl_pypi_url(self.index_urls[0]),
                trusted=True,
            )
            # This will also cache the page, so it's okay that we get it again
            # later:
            page = self._get_page(main_index_url, req)
            if page is None:
                url_name = self._find_url_name(
                    Link(self.index_urls[0], trusted=True),
                    url_name, req
                ) or req.url_name

        if url_name is not None:
            locations = [
                mkurl_pypi_url(url)
                for url in self.index_urls] + self.find_links
        else:
            locations = list(self.find_links)
        for version in req.absolute_versions:
            if url_name is not None and main_index_url is not None:
                locations = [
                    posixpath.join(main_index_url.url, version)] + locations

        file_locations, url_locations = self._sort_locations(locations)

        # We trust every url that the user has given us whether it was given
        #   via --index-url or --find-links
        locations = [Link(url, trusted=True) for url in url_locations]

        logger.debug('URLs to search for versions for %s:' % req)
        for location in locations:
            logger.debug('* %s' % location)

            # Determine if this url used a secure transport mechanism
            parsed = urlparse.urlparse(str(location))
            if parsed.scheme in INSECURE_SCHEMES:
                secure_schemes = INSECURE_SCHEMES[parsed.scheme]

                if len(secure_schemes) == 1:
                    ctx = (location, parsed.scheme, secure_schemes[0],
                           parsed.netloc)
                    logger.warn("%s uses an insecure transport scheme (%s). "
                                "Consider using %s if %s has it available" %
                                ctx)
                elif len(secure_schemes) > 1:
                    ctx = (
                        location,
                        parsed.scheme,
                        ", ".join(secure_schemes),
                        parsed.netloc,
                    )
                    logger.warn("%s uses an insecure transport scheme (%s). "
                                "Consider using one of %s if %s has any of "
                                "them available" % ctx)
                else:
                    ctx = (location, parsed.scheme)
                    logger.warn("%s uses an insecure transport scheme (%s)." %
                                ctx)

        found_versions = []
        found_versions.extend(
            self._package_versions(
                # We trust every directly linked archive in find_links
                [Link(url, '-f', trusted=True) for url in self.find_links],
                req.name.lower()
            )
        )
        page_versions = []
        for page in self._get_pages(locations, req):
            logger.debug('Analyzing links from page %s' % page.url)
            logger.indent += 2
            try:
                page_versions.extend(
                    self._package_versions(page.links, req.name.lower())
                )
            finally:
                logger.indent -= 2
        file_versions = list(
            self._package_versions(
                [Link(url) for url in file_locations],
                req.name.lower()
            )
        )
        if (not found_versions
                and not page_versions
                and not file_versions):
            logger.fatal(
                'Could not find any downloads that satisfy the requirement'
                ' %s' % req
            )

            if self.need_warn_external:
                logger.warn("Some externally hosted files were ignored (use "
                            "--allow-external %s to allow)." % req.name)

            if self.need_warn_unverified:
                logger.warn("Some insecure and unverifiable files were ignored"
                            " (use --allow-unverified %s to allow)." %
                            req.name)

            raise DistributionNotFound(
                'No distributions at all found for %s' % req
            )
        installed_version = []
        if req.satisfied_by is not None:
            installed_version = [(
                req.satisfied_by.parsed_version,
                INSTALLED_VERSION,
                req.satisfied_by.version,
            )]
        if file_versions:
            file_versions.sort(reverse=True)
            logger.info(
                'Local files found: %s' %
                ', '.join([
                    url_to_path(link.url)
                    for parsed, link, version in file_versions
                ])
            )
        # this is an intentional priority ordering
        all_versions = installed_version + file_versions + found_versions \
            + page_versions
        applicable_versions = []
        for (parsed_version, link, version) in all_versions:
            if version not in req.req:
                logger.info(
                    "Ignoring link %s, version %s doesn't match %s" %
                    (
                        link,
                        version,
                        ','.join([''.join(s) for s in req.req.specs])
                    )
                )
                continue
            elif (is_prerelease(version)
                    and not (self.allow_all_prereleases or req.prereleases)):
                # If this version isn't the already installed one, then
                #   ignore it if it's a pre-release.
                if link is not INSTALLED_VERSION:
                    logger.info(
                        "Ignoring link %s, version %s is a pre-release (use "
                        "--pre to allow)." % (link, version)
                    )
                    continue
            applicable_versions.append((parsed_version, link, version))
        applicable_versions = self._sort_versions(applicable_versions)
        existing_applicable = bool([
            link
            for parsed_version, link, version in applicable_versions
            if link is INSTALLED_VERSION
        ])
        if not upgrade and existing_applicable:
            if applicable_versions[0][1] is INSTALLED_VERSION:
                logger.info(
                    'Existing installed version (%s) is most up-to-date and '
                    'satisfies requirement' % req.satisfied_by.version
                )
            else:
                logger.info(
                    'Existing installed version (%s) satisfies requirement '
                    '(most up-to-date version is %s)' %
                    (req.satisfied_by.version, applicable_versions[0][2])
                )
            return None
        if not applicable_versions:
            logger.fatal(
                'Could not find a version that satisfies the requirement %s '
                '(from versions: %s)' %
                (
                    req,
                    ', '.join([
                        version
                        for parsed_version, link, version in all_versions
                    ])
                )
            )

            if self.need_warn_external:
                logger.warn("Some externally hosted files were ignored (use "
                            "--allow-external to allow).")

            if self.need_warn_unverified:
                logger.warn("Some insecure and unverifiable files were ignored"
                            " (use --allow-unverified %s to allow)." %
                            req.name)

            raise DistributionNotFound(
                'No distributions matching the version for %s' % req
            )
        if applicable_versions[0][1] is INSTALLED_VERSION:
            # We have an existing version, and its the best version
            logger.info(
                'Installed version (%s) is most up-to-date (past versions: '
                '%s)' % (
                    req.satisfied_by.version,
                    ', '.join([
                        version for parsed_version, link, version
                        in applicable_versions[1:]
                    ]) or 'none'))
            raise BestVersionAlreadyInstalled
        if len(applicable_versions) > 1:
            logger.info(
                'Using version %s (newest of versions: %s)' %
                (
                    applicable_versions[0][2],
                    ', '.join([
                        version for parsed_version, link, version
                        in applicable_versions
                    ])
                )
            )

        selected_version = applicable_versions[0][1]

        if (selected_version.internal is not None
                and not selected_version.internal):
            logger.warn("%s an externally hosted file and may be "
                        "unreliable" % req.name)

        if (selected_version.verifiable is not None
                and not selected_version.verifiable):
            logger.warn("%s is potentially insecure and "
                        "unverifiable." % req.name)

        if selected_version._deprecated_regex:
            logger.deprecated(
                "1.7",
                "%s discovered using a deprecated method of parsing, "
                "in the future it will no longer be discovered" % req.name
            )

        return selected_version
Example #8
0
    def get_page(cls, link, req, cache=None, skip_archives=True):
        url = link.url
        url = url.split('#', 1)[0]
        if cache.too_many_failures(url):
            return None

        # Check for VCS schemes that do not support lookup as web pages.
        from pip.vcs import VcsSupport
        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
                logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals())
                return None

        if cache is not None:
            inst = cache.get_page(url)
            if inst is not None:
                return inst
        try:
            if skip_archives:
                if cache is not None:
                    if cache.is_archive(url):
                        return None
                filename = link.filename
                for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(url)
                        if content_type.lower().startswith('text/html'):
                            break
                        else:
                            logger.debug('Skipping page %s because of Content-Type: %s' % (link, content_type))
                            if cache is not None:
                                cache.set_is_archive(url)
                            return None
            logger.debug('Getting page %s' % url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
            if scheme == 'file' and os.path.isdir(url2pathname(path)):
                # add trailing slash if not present so urljoin doesn't trim final segment
                if not url.endswith('/'):
                    url += '/'
                url = urlparse.urljoin(url, 'index.html')
                logger.debug(' file: URL is directory, getting %s' % url)

            resp = urlopen(url)

            real_url = geturl(resp)
            headers = resp.info()
            contents = resp.read()
            encoding = headers.get('Content-Encoding', None)
            #XXX need to handle exceptions and add testing for this
            if encoding is not None:
                if encoding == 'gzip':
                    contents = gzip.GzipFile(fileobj=BytesIO(contents)).read()
                if encoding == 'deflate':
                    contents = zlib.decompress(contents)
            inst = cls(u(contents), real_url, headers)
        except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError):
            e = sys.exc_info()[1]
            desc = str(e)
            if isinstance(e, socket.timeout):
                log_meth = logger.info
                level =1
                desc = 'timed out'
            elif isinstance(e, URLError):
                log_meth = logger.info
                if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout):
                    desc = 'timed out'
                    level = 1
                else:
                    level = 2
            elif isinstance(e, HTTPError) and e.code == 404:
                ## FIXME: notify?
                log_meth = logger.info
                level = 2
            else:
                log_meth = logger.info
                level = 1
            log_meth('Could not fetch URL %s: %s' % (link, desc))
            log_meth('Will skip URL %s when looking for download links for %s' % (link.url, req))
            if cache is not None:
                cache.add_page_failure(url, level)
            return None
        if cache is not None:
            cache.add_page([url, real_url], inst)
        return inst
Example #9
0
    def get_page(cls, link, req, cache=None, skip_archives=True, session=None):
        if session is None:
            session = PipSession()

        url = link.url
        url = url.split("#", 1)[0]
        if cache.too_many_failures(url):
            return None

        # Check for VCS schemes that do not support lookup as web pages.
        from pip.vcs import VcsSupport

        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in "+:":
                logger.debug("Cannot look at %(scheme)s URL %(link)s" % locals())
                return None

        if cache is not None:
            inst = cache.get_page(url)
            if inst is not None:
                return inst
        try:
            if skip_archives:
                if cache is not None:
                    if cache.is_archive(url):
                        return None
                filename = link.filename
                for bad_ext in [".tar", ".tar.gz", ".tar.bz2", ".tgz", ".zip"]:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(url, session=session)
                        if content_type.lower().startswith("text/html"):
                            break
                        else:
                            logger.debug("Skipping page %s because of Content-Type: %s" % (link, content_type))
                            if cache is not None:
                                cache.set_is_archive(url)
                            return None
            logger.debug("Getting page %s" % url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
            if scheme == "file" and os.path.isdir(url2pathname(path)):
                # add trailing slash if not present so urljoin doesn't trim final segment
                if not url.endswith("/"):
                    url += "/"
                url = urlparse.urljoin(url, "index.html")
                logger.debug(" file: URL is directory, getting %s" % url)

            resp = session.get(url)
            resp.raise_for_status()

            # The check for archives above only works if the url ends with
            #   something that looks like an archive. However that is not a
            #   requirement. For instance http://sourceforge.net/projects/docutils/files/docutils/0.8.1/docutils-0.8.1.tar.gz/download
            #   redirects to http://superb-dca3.dl.sourceforge.net/project/docutils/docutils/0.8.1/docutils-0.8.1.tar.gz
            #   Unless we issue a HEAD request on every url we cannot know
            #   ahead of time for sure if something is HTML or not. However we
            #   can check after we've downloaded it.
            content_type = resp.headers.get("Content-Type", "unknown")
            if not content_type.lower().startswith("text/html"):
                logger.debug("Skipping page %s because of Content-Type: %s" % (link, content_type))
                if cache is not None:
                    cache.set_is_archive(url)
                return None

            inst = cls(resp.text, resp.url, resp.headers, trusted=link.trusted)
        except requests.HTTPError as exc:
            level = 2 if exc.response.status_code == 404 else 1
            cls._handle_fail(req, link, exc, url, cache=cache, level=level)
        except requests.Timeout:
            cls._handle_fail(req, link, "timed out", url, cache=cache)
        except SSLError as exc:
            reason = "There was a problem confirming the ssl certificate: " "%s" % exc
            cls._handle_fail(req, link, reason, url, cache=cache, level=2, meth=logger.notify)
        else:
            if cache is not None:
                cache.add_page([url, resp.url], inst)
            return inst
Example #10
0
    def get_page(cls, link, req, cache=None, skip_archives=True):
        url = link.url
        url = url.split('#', 1)[0]
        if cache.too_many_failures(url):
            return None

        # Check for VCS schemes that do not support lookup as web pages.
        from pip.vcs import VcsSupport
        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
                logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals())
                return None

        if cache is not None:
            inst = cache.get_page(url)
            if inst is not None:
                return inst
        try:
            if skip_archives:
                if cache is not None:
                    if cache.is_archive(url):
                        return None
                filename = link.filename
                for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(url)
                        if content_type.lower().startswith('text/html'):
                            break
                        else:
                            logger.debug('Skipping page %s because of Content-Type: %s' % (link, content_type))
                            if cache is not None:
                                cache.set_is_archive(url)
                            return None
            logger.debug('Getting page %s' % url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
            if scheme == 'file' and os.path.isdir(url2pathname(path)):
                # add trailing slash if not present so urljoin doesn't trim final segment
                if not url.endswith('/'):
                    url += '/'
                url = urlparse.urljoin(url, 'index.html')
                logger.debug(' file: URL is directory, getting %s' % url)

            resp = urlopen(url)

            real_url = geturl(resp)
            headers = resp.info()
            contents = resp.read()
            encoding = headers.get('Content-Encoding', None)
            #XXX need to handle exceptions and add testing for this
            if encoding is not None:
                if encoding == 'gzip':
                    contents = gzip.GzipFile(fileobj=BytesIO(contents)).read()
                if encoding == 'deflate':
                    contents = zlib.decompress(contents)

            # The check for archives above only works if the url ends with
            #   something that looks like an archive. However that is not a
            #   requirement. For instance http://sourceforge.net/projects/docutils/files/docutils/0.8.1/docutils-0.8.1.tar.gz/download
            #   redirects to http://superb-dca3.dl.sourceforge.net/project/docutils/docutils/0.8.1/docutils-0.8.1.tar.gz
            #   Unless we issue a HEAD request on every url we cannot know
            #   ahead of time for sure if something is HTML or not. However we
            #   can check after we've downloaded it.
            content_type = headers.get('Content-Type', 'unknown')
            if not content_type.lower().startswith("text/html"):
                logger.debug('Skipping page %s because of Content-Type: %s' %
                                            (link, content_type))
                if cache is not None:
                    cache.set_is_archive(url)
                return None

            inst = cls(u(contents), real_url, headers, trusted=link.trusted)
        except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError):
            e = sys.exc_info()[1]
            desc = str(e)
            if isinstance(e, socket.timeout):
                log_meth = logger.info
                level =1
                desc = 'timed out'
            elif isinstance(e, URLError):
                #ssl/certificate error
                if hasattr(e, 'reason') and (isinstance(e.reason, ssl.SSLError) or isinstance(e.reason, CertificateError)):
                    desc = 'There was a problem confirming the ssl certificate: %s' % e
                    log_meth = logger.notify
                else:
                    log_meth = logger.info
                if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout):
                    desc = 'timed out'
                    level = 1
                else:
                    level = 2
            elif isinstance(e, HTTPError) and e.code == 404:
                ## FIXME: notify?
                log_meth = logger.info
                level = 2
            else:
                log_meth = logger.info
                level = 1
            log_meth('Could not fetch URL %s: %s' % (link, desc))
            log_meth('Will skip URL %s when looking for download links for %s' % (link.url, req))
            if cache is not None:
                cache.add_page_failure(url, level)
            return None
        if cache is not None:
            cache.add_page([url, real_url], inst)
        return inst
Example #11
0
File: index.py Project: markmc/pip
    def get_page(cls, link, req, cache=None, skip_archives=True):
        url = link.url
        url = url.split('#', 1)[0]
        if cache.too_many_failures(url):
            return None

        # Check for VCS schemes that do not support lookup as web pages.
        from pip.vcs import VcsSupport
        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
                logger.debug('Cannot look at %(scheme)s URL %(link)s' %
                             locals())
                return None

        if cache is not None:
            inst = cache.get_page(url)
            if inst is not None:
                return inst
        try:
            if skip_archives:
                if cache is not None:
                    if cache.is_archive(url):
                        return None
                filename = link.filename
                for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(url)
                        if content_type.lower().startswith('text/html'):
                            break
                        else:
                            logger.debug(
                                'Skipping page %s because of Content-Type: %s'
                                % (link, content_type))
                            if cache is not None:
                                cache.set_is_archive(url)
                            return None
            logger.debug('Getting page %s' % url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query,
             fragment) = urlparse.urlparse(url)
            if scheme == 'file' and os.path.isdir(url2pathname(path)):
                # add trailing slash if not present so urljoin doesn't trim final segment
                if not url.endswith('/'):
                    url += '/'
                url = urlparse.urljoin(url, 'index.html')
                logger.debug(' file: URL is directory, getting %s' % url)

            resp = urlopen(url)

            real_url = geturl(resp)
            headers = resp.info()
            contents = resp.read()
            encoding = headers.get('Content-Encoding', None)
            #XXX need to handle exceptions and add testing for this
            if encoding is not None:
                if encoding == 'gzip':
                    contents = gzip.GzipFile(fileobj=BytesIO(contents)).read()
                if encoding == 'deflate':
                    contents = zlib.decompress(contents)

            # The check for archives above only works if the url ends with
            #   something that looks like an archive. However that is not a
            #   requirement. For instance http://sourceforge.net/projects/docutils/files/docutils/0.8.1/docutils-0.8.1.tar.gz/download
            #   redirects to http://superb-dca3.dl.sourceforge.net/project/docutils/docutils/0.8.1/docutils-0.8.1.tar.gz
            #   Unless we issue a HEAD request on every url we cannot know
            #   ahead of time for sure if something is HTML or not. However we
            #   can check after we've downloaded it.
            if not headers["Content-Type"].lower().startswith("text/html"):
                logger.debug('Skipping page %s because of Content-Type: %s' %
                             (link, headers["Content-Type"]))
                if cache is not None:
                    cache.set_is_archive(url)
                return None

            inst = cls(u(contents), real_url, headers, trusted=link.trusted)
        except (HTTPError, URLError, socket.timeout, socket.error, OSError,
                WindowsError):
            e = sys.exc_info()[1]
            desc = str(e)
            if isinstance(e, socket.timeout):
                log_meth = logger.info
                level = 1
                desc = 'timed out'
            elif isinstance(e, URLError):
                #ssl/certificate error
                if hasattr(e, 'reason') and (
                        isinstance(e.reason, ssl.SSLError)
                        or isinstance(e.reason, CertificateError)):
                    desc = 'There was a problem confirming the ssl certificate: %s' % e
                    log_meth = logger.notify
                else:
                    log_meth = logger.info
                if hasattr(e, 'reason') and isinstance(e.reason,
                                                       socket.timeout):
                    desc = 'timed out'
                    level = 1
                else:
                    level = 2
            elif isinstance(e, HTTPError) and e.code == 404:
                ## FIXME: notify?
                log_meth = logger.info
                level = 2
            else:
                log_meth = logger.info
                level = 1
            log_meth('Could not fetch URL %s: %s' % (link, desc))
            log_meth(
                'Will skip URL %s when looking for download links for %s' %
                (link.url, req))
            if cache is not None:
                cache.add_page_failure(url, level)
            return None
        if cache is not None:
            cache.add_page([url, real_url], inst)
        return inst
Example #12
0
File: index.py Project: markmc/pip
    def _link_package_versions(self, link, search_name):
        """
        Return an iterable of triples (pkg_resources_version_key,
        link, python_version) that can be extracted from the given
        link.

        Meant to be overridden by subclasses, not called by clients.
        """
        platform = get_platform()

        version = None
        if link.egg_fragment:
            egg_info = link.egg_fragment
        else:
            egg_info, ext = link.splitext()
            if not ext:
                if link not in self.logged_links:
                    logger.debug('Skipping link %s; not a file' % link)
                    self.logged_links.add(link)
                return []
            if egg_info.endswith('.tar'):
                # Special double-extension case:
                egg_info = egg_info[:-4]
                ext = '.tar' + ext
            if ext not in self._known_extensions():
                if link not in self.logged_links:
                    logger.debug(
                        'Skipping link %s; unknown archive format: %s' %
                        (link, ext))
                    self.logged_links.add(link)
                return []
            if "macosx10" in link.path and ext == '.zip':
                if link not in self.logged_links:
                    logger.debug('Skipping link %s; macosx10 one' % (link))
                    self.logged_links.add(link)
                return []
            if link.wheel and link.wheel.name.lower() == search_name.lower():
                version = link.wheel.version
                if not link.wheel.supported():
                    logger.debug(
                        'Skipping %s because it is not compatible with this Python'
                        % link)
                    return []

                # This is a dirty hack to prevent installing Binary Wheels from
                #   PyPI or one of its mirrors unless it is a Windows Binary
                #   Wheel. This is paired with a change to PyPI disabling
                #   uploads for the same. Once we have a mechanism for enabling
                #   support for binary wheels on linux that deals with the
                #   inherent problems of binary distribution this can be
                #   removed.
                comes_from = getattr(link, "comes_from", None)
                if (not platform.startswith('win') and comes_from is not None
                        and urlparse.urlparse(comes_from.url).netloc.endswith(
                            "pypi.python.org")):
                    if not link.wheel.supported(tags=supported_tags_noarch):
                        logger.debug(
                            "Skipping %s because it is a pypi-hosted binary "
                            "Wheel on an unsupported platform" % link)
                        return []

        if not version:
            version = self._egg_info_matches(egg_info, search_name, link)
        if version is None:
            logger.debug('Skipping link %s; wrong project name (not %s)' %
                         (link, search_name))
            return []

        if (link.internal is not None and not link.internal and
                not normalize_name(search_name).lower() in self.allow_external
                and not self.allow_all_external):
            # We have a link that we are sure is external, so we should skip
            #   it unless we are allowing externals
            logger.debug("Skipping %s because it is externally hosted." % link)
            self.need_warn_external = True
            return []

        if (link.verifiable is not None and not link.verifiable and
                not normalize_name(search_name).lower() in self.allow_insecure
                and not self.allow_all_insecure):  # TODO: Remove after release
            # We have a link that we are sure we cannot verify it's integrity,
            #   so we should skip it unless we are allowing unsafe installs
            #   for this requirement.
            logger.debug("Skipping %s because it is an insecure and "
                         "unverifiable file." % link)
            self.need_warn_insecure = True
            return []

        match = self._py_version_re.search(version)
        if match:
            version = version[:match.start()]
            py_version = match.group(1)
            if py_version != sys.version[:3]:
                logger.debug(
                    'Skipping %s because Python version is incorrect' % link)
                return []
        logger.debug('Found link %s, version: %s' % (link, version))
        return [(pkg_resources.parse_version(version), link, version)]
Example #13
0
    def get_page(cls, link, req, cache=None, skip_archives=True):
        url = link.url
        url = url.split('#', 1)[0]
        if cache.too_many_failures(url):
            return None

        # Check for VCS schemes that do not support lookup as web pages.
        from pip.vcs import VcsSupport
        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
                logger.debug('Cannot look at %(scheme)s URL %(link)s' % locals())
                return None

        if cache is not None:
            inst = cache.get_page(url)
            if inst is not None:
                return inst
        try:
            if skip_archives:
                if cache is not None:
                    if cache.is_archive(url):
                        return None
                filename = link.filename
                for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(url)
                        if content_type.lower().startswith('text/html'):
                            break
                        else:
                            logger.debug('Skipping page %s because of Content-Type: %s' % (link, content_type))
                            if cache is not None:
                                cache.set_is_archive(url)
                            return None
            logger.debug('Getting page %s' % url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
            if scheme == 'file' and os.path.isdir(url2pathname(path)):
                # add trailing slash if not present so urljoin doesn't trim final segment
                if not url.endswith('/'):
                    url += '/'
                url = urlparse.urljoin(url, 'index.html')
                logger.debug(' file: URL is directory, getting %s' % url)

            resp = urlopen(url)

            real_url = geturl(resp)
            headers = resp.info()
            inst = cls(u(resp.read()), real_url, headers)
        except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError):
            e = sys.exc_info()[1]
            desc = str(e)
            if isinstance(e, socket.timeout):
                log_meth = logger.info
                level =1
                desc = 'timed out'
            elif isinstance(e, URLError):
                log_meth = logger.info
                if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout):
                    desc = 'timed out'
                    level = 1
                else:
                    level = 2
            elif isinstance(e, HTTPError) and e.code == 404:
                ## FIXME: notify?
                log_meth = logger.info
                level = 2
            else:
                log_meth = logger.info
                level = 1
            log_meth('Could not fetch URL %s: %s' % (link, desc))
            log_meth('Will skip URL %s when looking for download links for %s' % (link.url, req))
            if cache is not None:
                cache.add_page_failure(url, level)
            return None
        if cache is not None:
            cache.add_page([url, real_url], inst)
        return inst
Example #14
0
    def get_page(cls, link, req, cache=None, skip_archives=True, session=None):
        if session is None:
            session = PipSession()

        url = link.url
        url = url.split('#', 1)[0]
        if cache.too_many_failures(url):
            return None

        # Check for VCS schemes that do not support lookup as web pages.
        from pip.vcs import VcsSupport
        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
                logger.debug('Cannot look at %(scheme)s URL %(link)s' %
                             locals())
                return None

        if cache is not None:
            inst = cache.get_page(url)
            if inst is not None:
                return inst
        try:
            if skip_archives:
                if cache is not None:
                    if cache.is_archive(url):
                        return None
                filename = link.filename
                for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(
                            url,
                            session=session,
                        )
                        if content_type.lower().startswith('text/html'):
                            break
                        else:
                            logger.debug(
                                'Skipping page %s because of Content-Type: '
                                '%s' % (link, content_type))
                            if cache is not None:
                                cache.set_is_archive(url)
                            return None
            logger.debug('Getting page %s' % url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = \
                urlparse.urlparse(url)
            if scheme == 'file' and os.path.isdir(url2pathname(path)):
                # add trailing slash if not present so urljoin doesn't trim
                # final segment
                if not url.endswith('/'):
                    url += '/'
                url = urlparse.urljoin(url, 'index.html')
                logger.debug(' file: URL is directory, getting %s' % url)

            resp = session.get(url, headers={"Accept": "text/html"})
            resp.raise_for_status()

            # The check for archives above only works if the url ends with
            #   something that looks like an archive. However that is not a
            #   requirement of an url. Unless we issue a HEAD request on every
            #   url we cannot know ahead of time for sure if something is HTML
            #   or not. However we can check after we've downloaded it.
            content_type = resp.headers.get('Content-Type', 'unknown')
            if not content_type.lower().startswith("text/html"):
                logger.debug('Skipping page %s because of Content-Type: %s' %
                             (link, content_type))
                if cache is not None:
                    cache.set_is_archive(url)
                return None

            inst = cls(resp.text, resp.url, resp.headers, trusted=link.trusted)
        except requests.HTTPError as exc:
            level = 2 if exc.response.status_code == 404 else 1
            cls._handle_fail(req, link, exc, url, cache=cache, level=level)
        except requests.ConnectionError as exc:
            cls._handle_fail(
                req,
                link,
                "connection error: %s" % exc,
                url,
                cache=cache,
            )
        except requests.Timeout:
            cls._handle_fail(req, link, "timed out", url, cache=cache)
        except SSLError as exc:
            reason = ("There was a problem confirming the ssl certificate: "
                      "%s" % exc)
            cls._handle_fail(
                req,
                link,
                reason,
                url,
                cache=cache,
                level=2,
                meth=logger.notify,
            )
        else:
            if cache is not None:
                cache.add_page([url, resp.url], inst)
            return inst
Example #15
0
    def find_requirement(self, req, upgrade):
        def mkurl_pypi_url(url):
            loc = posixpath.join(url, url_name)
            # For maximum compatibility with easy_install, ensure the path
            # ends in a trailing slash.  Although this isn't in the spec
            # (and PyPI can handle it without the slash) some other index
            # implementations might break if they relied on easy_install's
            # behavior.
            if not loc.endswith('/'):
                loc = loc + '/'
            return loc

        url_name = req.url_name
        # Only check main index if index URL is given:
        main_index_url = None
        if self.index_urls:
            # Check that we have the url_name correctly spelled:
            main_index_url = Link(
                mkurl_pypi_url(self.index_urls[0]),
                trusted=True,
            )
            # This will also cache the page, so it's okay that we get it again
            # later:
            page = self._get_page(main_index_url, req)
            if page is None:
                url_name = self._find_url_name(
                    Link(self.index_urls[0], trusted=True), url_name,
                    req) or req.url_name

        if url_name is not None:
            locations = [mkurl_pypi_url(url)
                         for url in self.index_urls] + self.find_links
        else:
            locations = list(self.find_links)
        for version in req.absolute_versions:
            if url_name is not None and main_index_url is not None:
                locations = [posixpath.join(main_index_url.url, version)
                             ] + locations

        file_locations, url_locations = self._sort_locations(locations)

        # We trust every url that the user has given us whether it was given
        #   via --index-url or --find-links
        locations = [Link(url, trusted=True) for url in url_locations]

        logger.debug('URLs to search for versions for %s:' % req)
        for location in locations:
            logger.debug('* %s' % location)

            # Determine if this url used a secure transport mechanism
            parsed = urlparse.urlparse(str(location))
            if parsed.scheme in INSECURE_SCHEMES:
                secure_schemes = INSECURE_SCHEMES[parsed.scheme]

                if len(secure_schemes) == 1:
                    ctx = (location, parsed.scheme, secure_schemes[0],
                           parsed.netloc)
                    logger.warn("%s uses an insecure transport scheme (%s). "
                                "Consider using %s if %s has it available" %
                                ctx)
                elif len(secure_schemes) > 1:
                    ctx = (
                        location,
                        parsed.scheme,
                        ", ".join(secure_schemes),
                        parsed.netloc,
                    )
                    logger.warn("%s uses an insecure transport scheme (%s). "
                                "Consider using one of %s if %s has any of "
                                "them available" % ctx)
                else:
                    ctx = (location, parsed.scheme)
                    logger.warn("%s uses an insecure transport scheme (%s)." %
                                ctx)

        found_versions = []
        found_versions.extend(
            self._package_versions(
                # We trust every directly linked archive in find_links
                [Link(url, '-f', trusted=True) for url in self.find_links],
                req.name.lower()))
        page_versions = []
        for page in self._get_pages(locations, req):
            logger.debug('Analyzing links from page %s' % page.url)
            logger.indent += 2
            try:
                page_versions.extend(
                    self._package_versions(page.links, req.name.lower()))
            finally:
                logger.indent -= 2
        file_versions = list(
            self._package_versions([Link(url) for url in file_locations],
                                   req.name.lower()))
        if (not found_versions and not page_versions and not file_versions):
            logger.fatal(
                'Could not find any downloads that satisfy the requirement'
                ' %s' % req)

            if self.need_warn_external:
                logger.warn("Some externally hosted files were ignored (use "
                            "--allow-external %s to allow)." % req.name)

            if self.need_warn_unverified:
                logger.warn("Some insecure and unverifiable files were ignored"
                            " (use --allow-unverified %s to allow)." %
                            req.name)

            raise DistributionNotFound('No distributions at all found for %s' %
                                       req)
        installed_version = []
        if req.satisfied_by is not None:
            installed_version = [(
                req.satisfied_by.parsed_version,
                INSTALLED_VERSION,
                req.satisfied_by.version,
            )]
        if file_versions:
            file_versions.sort(reverse=True)
            logger.info('Local files found: %s' % ', '.join([
                url_to_path(link.url)
                for parsed, link, version in file_versions
            ]))
        # this is an intentional priority ordering
        all_versions = installed_version + file_versions + found_versions \
            + page_versions
        applicable_versions = []
        for (parsed_version, link, version) in all_versions:
            if version not in req.req:
                logger.info("Ignoring link %s, version %s doesn't match %s" %
                            (link, version, ','.join(
                                [''.join(s) for s in req.req.specs])))
                continue
            elif (is_prerelease(version)
                  and not (self.allow_all_prereleases or req.prereleases)):
                # If this version isn't the already installed one, then
                #   ignore it if it's a pre-release.
                if link is not INSTALLED_VERSION:
                    logger.info(
                        "Ignoring link %s, version %s is a pre-release (use "
                        "--pre to allow)." % (link, version))
                    continue
            applicable_versions.append((parsed_version, link, version))
        applicable_versions = self._sort_versions(applicable_versions)
        existing_applicable = bool([
            link for parsed_version, link, version in applicable_versions
            if link is INSTALLED_VERSION
        ])
        if not upgrade and existing_applicable:
            if applicable_versions[0][1] is INSTALLED_VERSION:
                logger.info(
                    'Existing installed version (%s) is most up-to-date and '
                    'satisfies requirement' % req.satisfied_by.version)
            else:
                logger.info(
                    'Existing installed version (%s) satisfies requirement '
                    '(most up-to-date version is %s)' %
                    (req.satisfied_by.version, applicable_versions[0][2]))
            return None
        if not applicable_versions:
            logger.fatal(
                'Could not find a version that satisfies the requirement %s '
                '(from versions: %s)' % (req, ', '.join([
                    version for parsed_version, link, version in all_versions
                ])))

            if self.need_warn_external:
                logger.warn("Some externally hosted files were ignored (use "
                            "--allow-external to allow).")

            if self.need_warn_unverified:
                logger.warn("Some insecure and unverifiable files were ignored"
                            " (use --allow-unverified %s to allow)." %
                            req.name)

            raise DistributionNotFound(
                'No distributions matching the version for %s' % req)
        if applicable_versions[0][1] is INSTALLED_VERSION:
            # We have an existing version, and its the best version
            logger.info(
                'Installed version (%s) is most up-to-date (past versions: '
                '%s)' % (req.satisfied_by.version, ', '.join([
                    version for parsed_version, link, version in
                    applicable_versions[1:]
                ]) or 'none'))
            raise BestVersionAlreadyInstalled
        if len(applicable_versions) > 1:
            logger.info(
                'Using version %s (newest of versions: %s)' %
                (applicable_versions[0][2], ', '.join([
                    version
                    for parsed_version, link, version in applicable_versions
                ])))

        selected_version = applicable_versions[0][1]

        if (selected_version.internal is not None
                and not selected_version.internal):
            logger.warn("%s an externally hosted file and may be "
                        "unreliable" % req.name)

        if (selected_version.verifiable is not None
                and not selected_version.verifiable):
            logger.warn("%s is potentially insecure and "
                        "unverifiable." % req.name)

        if selected_version._deprecated_regex:
            logger.deprecated(
                "1.7", "%s discovered using a deprecated method of parsing, "
                "in the future it will no longer be discovered" % req.name)

        return selected_version