def download_url( url: str, destination: str, *, force_download: bool = False, verbose: bool = False, ) -> str: r"""Download URL to destination. Args: url: URL of file to download destination: file or folder to store file locally force_download: if ``True`` forces the artifact to be downloaded even if it exists locally already verbose: if ``True`` a progress bar is shown Returns: path of locally stored file Example: >>> dst = download_url('https://audeering.github.io/audeer/_static/favicon.png', '.') >>> os.path.basename(dst) 'favicon.png' """ # noqa: E501 destination = safe_path(destination) if os.path.isdir(destination): destination = os.path.join(destination, os.path.basename(url)) if os.path.exists(destination) and not force_download: return destination with progress_bar( disable=not verbose, desc=format_display_message(f'Downloading {url}', pbar=True), ) as pbar: def bar_update(block_num, block_size, total_size): if pbar.total is None and total_size: pbar.total = total_size pbar.update(block_size) urllib.request.urlretrieve(url, destination, reporthook=bar_update) return destination
def extract_archives( archives: typing.Sequence[str], destination: str, *, keep_archive: bool = True, verbose: bool = False, ) -> typing.List[str]: r"""Extract ZIP or TAR.GZ archives. Args: archives: paths of ZIP or TAR.GZ files destination: folder where the content should be stored. Will be created if it doesn't exist keep_archive: if ``False`` delete archive files after extraction verbose: if ``True`` a progress bar is shown Returns: combined member filenames of archives """ with progress_bar( total=len(archives), disable=not verbose, ) as pbar: member_names = [] for archive in archives: desc = format_display_message( f'Extract {os.path.basename(archive)}', pbar=True, ) pbar.set_description_str(desc) pbar.refresh() member_names += extract_archive( archive, destination, keep_archive=keep_archive, verbose=False, ) pbar.update() return member_names
def __init__(self, num_tasks, maxsize=0): super().__init__(maxsize) self.pbar = tqdm.progress_bar( total=num_tasks, desc=task_description, )
def run_tasks( task_func: typing.Callable, params: typing.Sequence[ typing.Tuple[ typing.Sequence[typing.Any], typing.Dict[str, typing.Any], ] ], *, num_workers: int = 1, multiprocessing: bool = False, progress_bar: bool = False, task_description: str = None ) -> typing.Sequence[typing.Any]: r"""Run parallel tasks using multprocessing. .. note:: Result values are returned in order of ``params``. Args: task_func: task function with one or more parameters, e.g. ``x, y, z``, and optionally returning a value params: sequence of tuples holding parameters for each task. Each tuple contains a sequence of positional arguments and a dictionary with keyword arguments, e.g.: ``[((x1, y1), {'z': z1}), ((x2, y2), {'z': z2}), ...]`` num_workers: number of parallel jobs or 1 for sequential processing. If ``None`` will be set to the number of processors on the machine multiplied by 5 in case of multithreading and number of processors in case of multiprocessing multiprocessing: use multiprocessing instead of multithreading progress_bar: show a progress bar task_description: task description that will be displayed next to progress bar Example: >>> power = lambda x, n: x ** n >>> params = [([2, n], {}) for n in range(10)] >>> run_tasks(power, params, num_workers=3) [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] """ num_tasks = max(1, len(params)) results = [None] * num_tasks if num_workers == 1: # sequential with tqdm.progress_bar( params, total=len(params), desc=task_description, disable=not progress_bar, ) as pbar: for index, param in enumerate(pbar): results[index] = task_func(*param[0], **param[1]) else: # parallel if multiprocessing: executor = concurrent.futures.ProcessPoolExecutor else: executor = concurrent.futures.ThreadPoolExecutor with executor(max_workers=num_workers) as pool: with tqdm.progress_bar( total=len(params), desc=task_description, disable=not progress_bar, ) as pbar: futures = [] for param in params: future = pool.submit(task_func, *param[0], **param[1]) future.add_done_callback(lambda p: pbar.update()) futures.append(future) for idx, future in enumerate(futures): result = future.result() results[idx] = result return results
def extract_archive( archive: str, destination: str, *, keep_archive: bool = True, verbose: bool = False, ) -> typing.List[str]: r"""Extract a ZIP or TAR.GZ file. Args: archive: path to ZIP or TAR.GZ file destination: folder where the content should be stored. Will be created if it doesn't exist keep_archive: if ``False`` delete archive file after extraction verbose: if ``True`` a progress bar is shown Returns: member filenames of archive Raises: RuntimeError: if the provided archive is not a ZIP or TAR.GZ file RuntimeError: if the archive file is malformed """ destination = safe_path(destination) if os.path.exists(destination): destination_created = False else: mkdir(destination) destination_created = True # Progress bar arguments desc = format_display_message( f'Extract {os.path.basename(archive)}', pbar=True, ) disable = not verbose try: if archive.endswith('zip'): with zipfile.ZipFile(archive, 'r') as zf: members = zf.infolist() with progress_bar( total=len(members), desc=desc, disable=disable, ) as pbar: for member in members: zf.extract(member, destination) pbar.update() member_names = [m.filename for m in members] elif archive.endswith('tar.gz'): with tarfile.open(archive, 'r') as tf: members = tf.getmembers() with progress_bar( total=len(members), desc=desc, disable=disable, ) as pbar: for member in members: tf.extract(member, destination, numeric_owner=True) pbar.update() member_names = [m.name for m in members] else: raise RuntimeError( f'You can only extract ZIP and TAR.GZ files, ' f'not {archive}' ) except (EOFError, zipfile.BadZipFile, tarfile.ReadError): raise RuntimeError(f'Broken archive: {archive}') except (KeyboardInterrupt, Exception): # pragma: nocover # Clean up broken extraction files if destination_created: if os.path.exists(destination): shutil.rmtree(destination) raise if not keep_archive: os.remove(archive) return member_names