Esempio n. 1
0
 def request(
     self,
     host: "_HostType",
     handler: str,
     request_body: bytes,
     verbose: bool = False,
 ) -> Tuple["_Marshallable", ...]:
     assert isinstance(host, str)
     parts = (self._scheme, host, handler, None, None, None)
     url = urllib.parse.urlunparse(parts)
     try:
         headers = {"Content-Type": "text/xml"}
         response = self._session.post(
             url,
             data=request_body,
             headers=headers,
             stream=True,
         )
         raise_for_status(response)
         self.verbose = verbose
         return self.parse_response(response.raw)
     except NetworkConnectionError as exc:
         assert exc.response
         logger.critical(
             "HTTP error %s while getting %s",
             exc.response.status_code,
             url,
         )
         raise
Esempio n. 2
0
def _ensure_html_response(url: str, session: PipSession) -> None:
    """Send a HEAD request to the URL, and ensure the response contains HTML.

    Raises `_NotHTTP` if the URL is not available for a HEAD request, or
    `_NotHTML` if the content type is not text/html.
    """
    scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
    if scheme not in {"http", "https"}:
        raise _NotHTTP()

    resp = session.head(url, allow_redirects=True)
    raise_for_status(resp)

    _ensure_html_header(resp)
Esempio n. 3
0
 def __init__(
     self, url: str, session: PipSession, chunk_size: int = CONTENT_CHUNK_SIZE
 ) -> None:
     head = session.head(url, headers=HEADERS)
     raise_for_status(head)
     assert head.status_code == 200
     self._session, self._url, self._chunk_size = session, url, chunk_size
     self._length = int(head.headers["Content-Length"])
     self._file = NamedTemporaryFile()
     self.truncate(self._length)
     self._left: List[int] = []
     self._right: List[int] = []
     if "bytes" not in head.headers.get("Accept-Ranges", "none"):
         raise HTTPRangeRequestUnsupported("range request is not supported")
     self._check_zip()
Esempio n. 4
0
def _get_html_response(url: str, session: PipSession) -> Response:
    """Access an HTML page with GET, and return the response.

    This consists of three parts:

    1. If the URL looks suspiciously like an archive, send a HEAD first to
       check the Content-Type is HTML, to avoid downloading a large file.
       Raise `_NotHTTP` if the content type cannot be determined, or
       `_NotHTML` if it is not HTML.
    2. Actually perform the request. Raise HTTP exceptions on network failures.
    3. Check the Content-Type header to make sure we got HTML, and raise
       `_NotHTML` otherwise.
    """
    if is_archive_file(Link(url).filename):
        _ensure_html_response(url, session=session)

    logger.debug("Getting page %s", redact_auth_from_url(url))

    resp = session.get(
        url,
        headers={
            "Accept": "text/html",
            # We don't want to blindly returned cached data for
            # /simple/, because authors generally expecting that
            # twine upload && pip install will function, but if
            # they've done a pip install in the last ~10 minutes
            # it won't. Thus by setting this to zero we will not
            # blindly use any cached data, however the benefit of
            # using max-age=0 instead of no-cache, is that we will
            # still support conditional requests, so we will still
            # minimize traffic sent in cases where the page hasn't
            # changed at all, we will just always incur the round
            # trip for the conditional GET now instead of only
            # once per 10 minutes.
            # For more information, please see pypa/pip#5670.
            "Cache-Control": "max-age=0",
        },
    )
    raise_for_status(resp)

    # The check for archives above only works if the url ends with
    # something that looks like an archive. However that is not a
    # requirement of an url. Unless we issue a HEAD request on every
    # url we cannot know ahead of time for sure if something is HTML
    # or not. However we can check after we've downloaded it.
    _ensure_html_header(resp)

    return resp
Esempio n. 5
0
def get_file_content(url: str, session: PipSession) -> Tuple[str, str]:
    """Gets the content of a file; it may be a filename, file: URL, or
    http: URL.  Returns (location, content).  Content is unicode.
    Respects # -*- coding: declarations on the retrieved files.

    :param url:         File path or url.
    :param session:     PipSession instance.
    """
    scheme = get_url_scheme(url)

    # Pip has special support for file:// URLs (LocalFSAdapter).
    if scheme in ['http', 'https', 'file']:
        resp = session.get(url)
        raise_for_status(resp)
        return resp.url, resp.text

    # Assume this is a bare path.
    try:
        with open(url, 'rb') as f:
            content = auto_decode(f.read())
    except OSError as exc:
        raise InstallationError(f'Could not open requirements file: {exc}')
    return url, content
Esempio n. 6
0
def _http_get_download(session: PipSession, link: Link) -> Response:
    target_url = link.url.split("#", 1)[0]
    resp = session.get(target_url, headers=HEADERS, stream=True)
    raise_for_status(resp)
    return resp