Ejemplo n.º 1
0
def download_url(
        url: str,
        destination: str,
        *,
        force_download: bool = False,
        verbose: bool = False,
) -> str:
    r"""Download URL to destination.

    Args:
        url: URL of file to download
        destination: file or folder to store file locally
        force_download: if ``True`` forces the artifact to be downloaded
            even if it exists locally already
        verbose: if ``True`` a progress bar is shown

    Returns:
        path of locally stored file

    Example:
        >>> dst = download_url('https://audeering.github.io/audeer/_static/favicon.png', '.')
        >>> os.path.basename(dst)
        'favicon.png'

    """  # noqa: E501
    destination = safe_path(destination)
    if os.path.isdir(destination):
        destination = os.path.join(destination, os.path.basename(url))
    if os.path.exists(destination) and not force_download:
        return destination

    with progress_bar(
            disable=not verbose,
            desc=format_display_message(f'Downloading {url}', pbar=True),
    ) as pbar:

        def bar_update(block_num, block_size, total_size):
            if pbar.total is None and total_size:
                pbar.total = total_size
            pbar.update(block_size)

        urllib.request.urlretrieve(url, destination, reporthook=bar_update)

    return destination
Ejemplo n.º 2
0
def extract_archives(
        archives: typing.Sequence[str],
        destination: str,
        *,
        keep_archive: bool = True,
        verbose: bool = False,
) -> typing.List[str]:
    r"""Extract ZIP or TAR.GZ archives.

    Args:
        archives: paths of ZIP or TAR.GZ files
        destination: folder where the content should be stored.
            Will be created if it doesn't exist
        keep_archive: if ``False`` delete archive files after extraction
        verbose: if ``True`` a progress bar is shown

    Returns:
        combined member filenames of archives

    """
    with progress_bar(
        total=len(archives),
        disable=not verbose,
    ) as pbar:
        member_names = []
        for archive in archives:
            desc = format_display_message(
                f'Extract {os.path.basename(archive)}',
                pbar=True,
            )
            pbar.set_description_str(desc)
            pbar.refresh()
            member_names += extract_archive(
                archive,
                destination,
                keep_archive=keep_archive,
                verbose=False,
            )
            pbar.update()

    return member_names
Ejemplo n.º 3
0
 def __init__(self, num_tasks, maxsize=0):
     super().__init__(maxsize)
     self.pbar = tqdm.progress_bar(
         total=num_tasks,
         desc=task_description,
     )
Ejemplo n.º 4
0
def run_tasks(
        task_func: typing.Callable,
        params: typing.Sequence[
            typing.Tuple[
                typing.Sequence[typing.Any],
                typing.Dict[str, typing.Any],
            ]
        ],
        *,
        num_workers: int = 1,
        multiprocessing: bool = False,
        progress_bar: bool = False,
        task_description: str = None
) -> typing.Sequence[typing.Any]:
    r"""Run parallel tasks using multprocessing.

    .. note:: Result values are returned in order of ``params``.

    Args:
        task_func: task function with one or more
            parameters, e.g. ``x, y, z``, and optionally returning a value
        params: sequence of tuples holding parameters for each task.
            Each tuple contains a sequence of positional arguments and a
            dictionary with keyword arguments, e.g.:
            ``[((x1, y1), {'z': z1}), ((x2, y2), {'z': z2}), ...]``
        num_workers: number of parallel jobs or 1 for sequential
            processing. If ``None`` will be set to the number of
            processors on the machine multiplied by 5 in case of
            multithreading and number of processors in case of
            multiprocessing
        multiprocessing: use multiprocessing instead of multithreading
        progress_bar: show a progress bar
        task_description: task description
            that will be displayed next to progress bar

    Example:
        >>> power = lambda x, n: x ** n
        >>> params = [([2, n], {}) for n in range(10)]
        >>> run_tasks(power, params, num_workers=3)
        [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]

    """
    num_tasks = max(1, len(params))
    results = [None] * num_tasks

    if num_workers == 1:  # sequential

        with tqdm.progress_bar(
            params,
            total=len(params),
            desc=task_description,
            disable=not progress_bar,
        ) as pbar:
            for index, param in enumerate(pbar):
                results[index] = task_func(*param[0], **param[1])

    else:  # parallel

        if multiprocessing:
            executor = concurrent.futures.ProcessPoolExecutor
        else:
            executor = concurrent.futures.ThreadPoolExecutor
        with executor(max_workers=num_workers) as pool:
            with tqdm.progress_bar(
                    total=len(params),
                    desc=task_description,
                    disable=not progress_bar,
            ) as pbar:
                futures = []
                for param in params:
                    future = pool.submit(task_func, *param[0], **param[1])
                    future.add_done_callback(lambda p: pbar.update())
                    futures.append(future)
                for idx, future in enumerate(futures):
                    result = future.result()
                    results[idx] = result

    return results
Ejemplo n.º 5
0
def extract_archive(
        archive: str,
        destination: str,
        *,
        keep_archive: bool = True,
        verbose: bool = False,
) -> typing.List[str]:
    r"""Extract a ZIP or TAR.GZ file.

    Args:
        archive: path to ZIP or TAR.GZ file
        destination: folder where the content should be stored.
            Will be created if it doesn't exist
        keep_archive: if ``False`` delete archive file after extraction
        verbose: if ``True`` a progress bar is shown

    Returns:
        member filenames of archive

    Raises:
        RuntimeError: if the provided archive is not a ZIP or TAR.GZ file
        RuntimeError: if the archive file is malformed

    """
    destination = safe_path(destination)
    if os.path.exists(destination):
        destination_created = False
    else:
        mkdir(destination)
        destination_created = True

    # Progress bar arguments
    desc = format_display_message(
        f'Extract {os.path.basename(archive)}',
        pbar=True,
    )
    disable = not verbose

    try:
        if archive.endswith('zip'):
            with zipfile.ZipFile(archive, 'r') as zf:
                members = zf.infolist()
                with progress_bar(
                    total=len(members),
                    desc=desc,
                    disable=disable,
                ) as pbar:
                    for member in members:
                        zf.extract(member, destination)
                        pbar.update()
                    member_names = [m.filename for m in members]
        elif archive.endswith('tar.gz'):
            with tarfile.open(archive, 'r') as tf:
                members = tf.getmembers()
                with progress_bar(
                    total=len(members),
                    desc=desc,
                    disable=disable,
                ) as pbar:
                    for member in members:
                        tf.extract(member, destination, numeric_owner=True)
                        pbar.update()
                    member_names = [m.name for m in members]
        else:
            raise RuntimeError(
                f'You can only extract ZIP and TAR.GZ files, '
                f'not {archive}'
            )
    except (EOFError, zipfile.BadZipFile, tarfile.ReadError):
        raise RuntimeError(f'Broken archive: {archive}')
    except (KeyboardInterrupt, Exception):  # pragma: nocover
        # Clean up broken extraction files
        if destination_created:
            if os.path.exists(destination):
                shutil.rmtree(destination)
        raise

    if not keep_archive:
        os.remove(archive)

    return member_names