Esempio n. 1
0
def getUrl(url: str, timeout) -> str:
    logger = getLogger("aqt.helper")
    with requests.Session() as session:
        retries = requests.adapters.Retry(
            total=Settings.max_retries_on_connection_error,
            backoff_factor=Settings.backoff_factor)
        adapter = requests.adapters.HTTPAdapter(max_retries=retries)
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        try:
            r = requests.get(url, allow_redirects=False, timeout=timeout)
            num_redirects = 0
            while 300 < r.status_code < 309 and num_redirects < 10:
                num_redirects += 1
                logger.debug("Asked to redirect({}) to: {}".format(
                    r.status_code, r.headers["Location"]))
                newurl = altlink(r.url, r.headers["Location"])
                logger.info("Redirected: {}".format(urlparse(newurl).hostname))
                r = session.get(newurl, stream=True, timeout=timeout)
        except (
                ConnectionResetError,
                requests.exceptions.ConnectionError,
                requests.exceptions.Timeout,
        ) as e:
            raise ArchiveConnectionError(
                f"Failure to connect to {url}: {type(e).__name__}") from e
        else:
            if r.status_code != 200:
                msg = f"Failed to retrieve file at {url}\nServer response code: {r.status_code}, reason: {r.reason}"
                raise ArchiveDownloadError(msg)
        result = r.text
    return result
Esempio n. 2
0
def getUrl(url: str, timeout, expected_hash: Optional[bytes] = None) -> str:
    """
    Gets a file from `url` via HTTP GET.

    No caller should call this function without providing an expected_hash, unless
    the caller is `get_hash`, which cannot know what the expected hash should be.
    """
    logger = getLogger("aqt.helper")
    with requests.sessions.Session() as session:
        retries = requests.adapters.Retry(
            total=Settings.max_retries_on_connection_error,
            backoff_factor=Settings.backoff_factor)
        adapter = requests.adapters.HTTPAdapter(max_retries=retries)
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        try:
            r = session.get(url, allow_redirects=False, timeout=timeout)
            num_redirects = 0
            while 300 < r.status_code < 309 and num_redirects < 10:
                num_redirects += 1
                logger.debug("Asked to redirect({}) to: {}".format(
                    r.status_code, r.headers["Location"]))
                newurl = altlink(r.url, r.headers["Location"])
                logger.info("Redirected: {}".format(urlparse(newurl).hostname))
                r = session.get(newurl, stream=True, timeout=timeout)
        except (
                ConnectionResetError,
                requests.exceptions.ConnectionError,
                requests.exceptions.Timeout,
        ) as e:
            raise ArchiveConnectionError(
                f"Failure to connect to {url}: {type(e).__name__}") from e
        else:
            if r.status_code != 200:
                msg = f"Failed to retrieve file at {url}\nServer response code: {r.status_code}, reason: {r.reason}"
                raise ArchiveDownloadError(msg)
        result = r.text
        filename = url.split("/")[-1]
        actual_hash = hashlib.sha256(bytes(result, "utf-8")).digest()
        if expected_hash is not None and expected_hash != actual_hash:
            raise ArchiveChecksumError(
                f"Downloaded file {filename} is corrupted! Detect checksum error.\n"
                f"Expect {expected_hash.hex()}: {url}\n"
                f"Actual {actual_hash.hex()}: {filename}")
    return result
Esempio n. 3
0
def downloadBinaryFile(url: str, out: str, hash_algo: str, exp: bytes,
                       timeout):
    logger = getLogger("aqt.helper")
    filename = Path(url).name
    with requests.Session() as session:
        retries = requests.adapters.Retry(
            total=Settings.max_retries_on_connection_error,
            backoff_factor=Settings.backoff_factor)
        adapter = requests.adapters.HTTPAdapter(max_retries=retries)
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        try:
            r = session.get(url,
                            allow_redirects=False,
                            stream=True,
                            timeout=timeout)
            if 300 < r.status_code < 309:
                logger.debug("Asked to redirect({}) to: {}".format(
                    r.status_code, r.headers["Location"]))
                newurl = altlink(r.url, r.headers["Location"])
                logger.info("Redirected: {}".format(urlparse(newurl).hostname))
                r = session.get(newurl, stream=True, timeout=timeout)
        except requests.exceptions.ConnectionError as e:
            raise ArchiveConnectionError(f"Connection error: {e.args}") from e
        except requests.exceptions.Timeout as e:
            raise ArchiveConnectionError(
                f"Connection timeout: {e.args}") from e
        else:
            hash = hashlib.new(hash_algo)
            try:
                with open(out, "wb") as fd:
                    for chunk in r.iter_content(chunk_size=8196):
                        fd.write(chunk)
                        hash.update(chunk)
                    fd.flush()
            except Exception as e:
                raise ArchiveDownloadError(
                    f"Download of {filename} has error: {e}") from e
            if exp is not None and hash.digest() != exp:
                raise ArchiveChecksumError(
                    f"Downloaded file {filename} is corrupted! Detect checksum error.\n"
                    f"Expect {exp.hex()}: {url}\n"
                    f"Actual {hash.digest().hex()}: {out}")
Esempio n. 4
0
 def mock_get_url(url, *args, **kwargs):
     if not xml_file:
         raise ArchiveDownloadError(
             f"Failed to retrieve file at {url}\nServer response code: 404, reason: Not Found"
         )
     return (Path(__file__).parent / "data" / xml_file).read_text("utf-8")
Esempio n. 5
0
 def _mock(url, **kwargs):
     urls_requested.add(url)
     if len(urls_requested) <= 1:
         raise ArchiveDownloadError()
     return "some_html_content"
Esempio n. 6
0
 def _mock(url, **kwargs):
     urls_requested.add(url)
     raise ArchiveDownloadError()
Esempio n. 7
0
 def mock_get_url(url, *args, **kwargs):
     if not xml_file:
         raise ArchiveDownloadError(
             f"Failed to retrieve file at {url}\nServer response code: 404, reason: Not Found"
         )
     return xml