def download_file( self, file_key: str, file_vars: Optional[Dict[str, str]] = None, file_path: Optional[pathlib.Path] = None, create_dirs: bool = True, **path_args: Any, ) -> Optional[pathlib.Path]: """Download a file from the web and save it to disk Use pycurl (libcurl) to do the actual downloading. Requests might be nicer for this, but turned out to be much slower (and in practice unusable for bigger files) and also not really supporting ftp-downloads. Args: file_key: File key that should be downloaded. file_vars: File variables used to find path from file_key. file_path: Path where file will be saved, default is to read from configuration. create_dirs: Create directories as necessary before downloading file. path_args: Arguments passed on to .path() to find file_path. Returns: Path to downloaded file, None if no file was downloaded. """ # Do not download anything if download_missing class variable is False if not self.download_missing: return None # Do not download anything if url is not given in configuration if "url" not in self[file_key] or not self[file_key].url.str: return None # Get file_path from configuration if it's not given explicitly file_url = self.url(file_key, file_vars=file_vars, **path_args) is_zipped = self.is_path_zipped(file_url) path_args.update(is_zipped=is_zipped) if file_path is None: file_path = self.path(file_key, file_vars=file_vars, download_missing=False, **path_args) file_path = file_path.with_name(file_url.name) if create_dirs: file_path.parent.mkdir(parents=True, exist_ok=True) log.info(f"Download {file_key} from '{file_url}' to '{file_path}'") with builtins.open(file_path, mode="wb") as fid: c = pycurl.Curl() c.setopt(c.URL, file_url) c.setopt(c.WRITEDATA, fid) try: c.perform() if not (200 <= c.getinfo(c.HTTP_CODE) <= 299): raise pycurl.error() except pycurl.error: log.error(f"Problem downloading file: {c.getinfo(c.EFFECTIVE_URL)} ({c.getinfo(c.HTTP_CODE)})") if file_path.exists(): # Print first 10 lines to console head_of_file = f"Contents of '{file_path}':\n" + "\n".join(file_path.read_text().split("\n")[:10]) log.info(console.indent(head_of_file, num_spaces=8)) file_path.unlink() log.warn(f"Try to download '{file_url}' manually and save it at '{file_path}'") else: log.info(f"Done downloading {file_key}") finally: c.close() return file_path
def no_traceback_hook(_not_used_1, value, _not_used_2): """Only prints the error message, no traceback.""" log.error(str(value))