def download_pdf_file(url: str, filename: str, filepath: str = '.', progress: bool = False) -> None: """ This function downloads a PDF from a given DOI. @:param ---------- url : url Download url for the article. filename : str The filename for the PDF. filepath : str The path to store the downloaded PDF. progress : bool Whether the progress of the API call should be printed out or not. """ try: headers = {"User-Agent": "python"} r = requests.get(url, stream=url, headers=headers) if r.status_code == 200: file_size = int(r.headers.get('content-length', 0)) block_size = 1024 path = os.path.join(filepath, filename) if not os.path.exists(filepath): os.makedirs(filepath) with open(path, 'wb') as file: chunk_size = 0 for chunk in r.iter_content(block_size): if progress and file_size > 0: chunk_size += len(chunk) Unpywall._progress(chunk_size / file_size) file.write(chunk) else: logger.warning( "Not able to download file, Http Response: {}".format( r.status_code)) except ConnectionError: logger.warning('Connection error received, will retry after 10 secs') sleep(10) Unpywall.download_pdf_file(url, filename, filepath) except Exception: logger.warning('Rethrowing error') raise
def test_progress(self, Unpywall, capfd): Unpywall._progress(0.5) captured = capfd.readouterr() assert len(captured.out) > 0