Exemple #1
0
    def _build_session(self, options, retries=None, timeout=None):
        # type: (Values, Optional[int], Optional[int]) -> PipSession
        session = PipSession(
            cache=(normalize_path(os.path.join(options.cache_dir, "http"))
                   if options.cache_dir else None),
            retries=retries if retries is not None else options.retries,
            trusted_hosts=options.trusted_hosts,
            index_urls=self._get_index_urls(options),
        )

        # Handle custom ca-bundles from the user
        if options.cert:
            session.verify = options.cert

        # Handle SSL client certificate
        if options.client_cert:
            session.cert = options.client_cert

        # Handle timeouts
        if options.timeout or timeout:
            session.timeout = (timeout
                               if timeout is not None else options.timeout)

        # Handle configured proxies
        if options.proxy:
            session.proxies = {
                "http": options.proxy,
                "https": options.proxy,
            }

        # Determine if we can prompt the user for authentication or not
        session.auth.prompting = not options.no_input

        return session
Exemple #2
0
def _ensure_html_response(url: str, session: PipSession) -> None:
    """Send a HEAD request to the URL, and ensure the response contains HTML.

    Raises `_NotHTTP` if the URL is not available for a HEAD request, or
    `_NotHTML` if the content type is not text/html.
    """
    scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
    if scheme not in {"http", "https"}:
        raise _NotHTTP()

    resp = session.head(url, allow_redirects=True)
    raise_for_status(resp)

    _ensure_html_header(resp)
Exemple #3
0
 def __init__(
     self, url: str, session: PipSession, chunk_size: int = CONTENT_CHUNK_SIZE
 ) -> None:
     head = session.head(url, headers=HEADERS)
     raise_for_status(head)
     assert head.status_code == 200
     self._session, self._url, self._chunk_size = session, url, chunk_size
     self._length = int(head.headers["Content-Length"])
     self._file = NamedTemporaryFile()
     self.truncate(self._length)
     self._left: List[int] = []
     self._right: List[int] = []
     if "bytes" not in head.headers.get("Accept-Ranges", "none"):
         raise HTTPRangeRequestUnsupported("range request is not supported")
     self._check_zip()
Exemple #4
0
def _get_html_response(url: str, session: PipSession) -> Response:
    """Access an HTML page with GET, and return the response.

    This consists of three parts:

    1. If the URL looks suspiciously like an archive, send a HEAD first to
       check the Content-Type is HTML, to avoid downloading a large file.
       Raise `_NotHTTP` if the content type cannot be determined, or
       `_NotHTML` if it is not HTML.
    2. Actually perform the request. Raise HTTP exceptions on network failures.
    3. Check the Content-Type header to make sure we got HTML, and raise
       `_NotHTML` otherwise.
    """
    if is_archive_file(Link(url).filename):
        _ensure_html_response(url, session=session)

    logger.debug("Getting page %s", redact_auth_from_url(url))

    resp = session.get(
        url,
        headers={
            "Accept": "text/html",
            # We don't want to blindly returned cached data for
            # /simple/, because authors generally expecting that
            # twine upload && pip install will function, but if
            # they've done a pip install in the last ~10 minutes
            # it won't. Thus by setting this to zero we will not
            # blindly use any cached data, however the benefit of
            # using max-age=0 instead of no-cache, is that we will
            # still support conditional requests, so we will still
            # minimize traffic sent in cases where the page hasn't
            # changed at all, we will just always incur the round
            # trip for the conditional GET now instead of only
            # once per 10 minutes.
            # For more information, please see pypa/pip#5670.
            "Cache-Control": "max-age=0",
        },
    )
    raise_for_status(resp)

    # The check for archives above only works if the url ends with
    # something that looks like an archive. However that is not a
    # requirement of an url. Unless we issue a HEAD request on every
    # url we cannot know ahead of time for sure if something is HTML
    # or not. However we can check after we've downloaded it.
    _ensure_html_header(resp)

    return resp
Exemple #5
0
def unpack_url(
        link,  # type: Link
        location,  # type: str
        download_dir=None,  # type: Optional[str]
        session=None,  # type: Optional[PipSession]
        hashes=None,  # type: Optional[Hashes]
        progress_bar="on"  # type: str
):
    # type: (...) -> None
    """Unpack link.
       If link is a VCS link:
         if only_download, export into download_dir and ignore location
          else unpack into location
       for other types of link:
         - unpack into location
         - if download_dir, copy the file into download_dir
         - if only_download, mark location for deletion

    :param hashes: A Hashes object, one of whose embedded hashes must match,
        or HashMismatch will be raised. If the Hashes is empty, no matches are
        required, and unhashable types of requirements (like VCS ones, which
        would ordinarily raise HashUnsupported) are allowed.
    """
    # non-editable vcs urls
    if link.is_vcs:
        unpack_vcs_link(link, location)

    # file urls
    elif link.is_file:
        unpack_file_url(link, location, download_dir, hashes=hashes)

    # http urls
    else:
        if session is None:
            session = PipSession()

        unpack_http_url(link,
                        location,
                        download_dir,
                        session,
                        hashes=hashes,
                        progress_bar=progress_bar)
Exemple #6
0
def get_file_content(url: str, session: PipSession) -> Tuple[str, str]:
    """Gets the content of a file; it may be a filename, file: URL, or
    http: URL.  Returns (location, content).  Content is unicode.
    Respects # -*- coding: declarations on the retrieved files.

    :param url:         File path or url.
    :param session:     PipSession instance.
    """
    scheme = get_url_scheme(url)

    # Pip has special support for file:// URLs (LocalFSAdapter).
    if scheme in ['http', 'https', 'file']:
        resp = session.get(url)
        raise_for_status(resp)
        return resp.url, resp.text

    # Assume this is a bare path.
    try:
        with open(url, 'rb') as f:
            content = auto_decode(f.read())
    except OSError as exc:
        raise InstallationError(f'Could not open requirements file: {exc}')
    return url, content
Exemple #7
0
def _http_get_download(session: PipSession, link: Link) -> Response:
    target_url = link.url.split("#", 1)[0]
    resp = session.get(target_url, headers=HEADERS, stream=True)
    raise_for_status(resp)
    return resp