예제 #1
0
 def job(cache_root: str, file: str):
     file_pkl = audeer.replace_file_extension(
         file,
         audformat.define.TableStorageFormat.PICKLE,
     )
     _copy_file(file, cache_root, db_root_tmp, db_root)
     _copy_file(file_pkl, cache_root, db_root_tmp, db_root)
예제 #2
0
def _cached_files(
    files: typing.Sequence[str],
    deps: Dependencies,
    cached_versions: typing.Sequence[typing.Tuple[LooseVersion, str,
                                                  Dependencies], ],
    flavor: typing.Optional[Flavor],
    verbose: bool,
) -> (typing.Sequence[typing.Union[str, str]], typing.Sequence[str]):
    r"""Find cached files."""

    cached_files = []
    missing_files = []

    for file in audeer.progress_bar(
            files,
            desc='Cached files',
            disable=not verbose,
    ):
        found = False
        file_version = LooseVersion(deps.version(file))
        for cache_version, cache_root, cache_deps in cached_versions:
            if cache_version >= file_version:
                if file in cache_deps:
                    if deps.checksum(file) == cache_deps.checksum(file):
                        path = os.path.join(cache_root, file)
                        if flavor and flavor.format is not None:
                            path = audeer.replace_file_extension(
                                path,
                                flavor.format,
                            )
                        if os.path.exists(path):
                            found = True
                            break
        if found:
            if flavor and flavor.format is not None:
                file = audeer.replace_file_extension(
                    file,
                    flavor.format,
                )
            cached_files.append((cache_root, file))
        else:
            missing_files.append(file)

    return cached_files, missing_files
예제 #3
0
def _missing_media(
    db_root: str,
    media: typing.Sequence[str],
    flavor: Flavor,
    verbose: bool,
) -> typing.Sequence[str]:
    missing_media = []
    for file in audeer.progress_bar(media,
                                    desc='Missing media',
                                    disable=not verbose):
        path = os.path.join(db_root, file)
        if flavor.format is not None:
            path = audeer.replace_file_extension(path, flavor.format)
        if not os.path.exists(path):
            missing_media.append(file)
    return missing_media
예제 #4
0
def empty_file(tmpdir, request):
    """Fixture to generate empty audio files.

    The request parameter allows to select the file extension.

    """
    # Create empty audio file
    empty_file = os.path.join(tmpdir, 'empty-file.wav')
    af.write(empty_file, np.array([[]]), 16000)

    # Rename to match extension
    file_ext = request.param
    ofpath = audeer.replace_file_extension(empty_file, file_ext)
    if os.path.exists(empty_file):
        os.rename(empty_file, ofpath)

    yield ofpath

    if os.path.exists(ofpath):
        os.remove(ofpath)
예제 #5
0
def test_load_media(version, media, format):

    paths = audb.load_media(
        DB_NAME,
        media,
        version=version,
        format=format,
        verbose=False,
    )
    expected_paths = [
        os.path.join(pytest.CACHE_ROOT, p)
        for p in paths
    ]
    if format is not None:
        expected_paths = [
            audeer.replace_file_extension(p, format)
            for p in expected_paths
        ]
    assert paths == expected_paths

    # Clear cache to force loading from other cache
    if version is None:
        version = audb.latest_version(DB_NAME)
    cache_root = audb.core.load.database_cache_folder(
        DB_NAME,
        version,
        pytest.CACHE_ROOT,
        audb.Flavor(format=format),
    )
    shutil.rmtree(cache_root)
    paths2 = audb.load_media(
        DB_NAME,
        media,
        version=version,
        format=format,
        verbose=False,
    )
    assert paths2 == paths
예제 #6
0
def test_load(format, version):

    assert not audb.exists(
        DB_NAME,
        version=version,
        format=format,
    )

    db = audb.load(
        DB_NAME,
        version=version,
        format=format,
        full_path=False,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )
    db_root = db.meta['audb']['root']

    assert audb.exists(DB_NAME, version=version)

    if version is None:
        resolved_version = audb.latest_version(DB_NAME)
    else:
        resolved_version = version
    db_original = audformat.Database.load(DB_ROOT_VERSION[resolved_version])

    if format is not None:
        db_original.map_files(
            lambda x: audeer.replace_file_extension(x, format)
        )

    pd.testing.assert_index_equal(db.files, db_original.files)
    for file in db.files:
        assert os.path.exists(os.path.join(db_root, file))
    for table in db.tables:
        assert os.path.exists(os.path.join(db_root, f'db.{table}.csv'))
        pd.testing.assert_frame_equal(
            db_original[table].df,
            db[table].df,
        )

    df = audb.cached()
    assert df.loc[db_root]['version'] == resolved_version

    deps = audb.dependencies(DB_NAME, version=version)
    assert str(deps().to_string()) == str(deps)
    assert len(deps) == len(db.files) + len(db.tables)

    # from cache with full path

    db = audb.load(
        DB_NAME,
        version=version,
        full_path=True,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )
    for file in db.files:
        assert os.path.exists(file)
    for table in db.tables:
        assert os.path.exists(os.path.join(db_root, f'db.{table}.csv'))
예제 #7
0
def test_replace_file_extension(path, new_extension, expected_path):
    path = audeer.replace_file_extension(path, new_extension)
    assert path == expected_path
예제 #8
0
def load_media(
    name: str,
    media: typing.Union[str, typing.Sequence[str]],
    *,
    version: str = None,
    bit_depth: int = None,
    channels: typing.Union[int, typing.Sequence[int]] = None,
    format: str = None,
    mixdown: bool = False,
    sampling_rate: int = None,
    cache_root: str = None,
    num_workers: typing.Optional[int] = 1,
    verbose: bool = True,
) -> typing.List:
    r"""Load media file(s).

    If you are interested in media files
    and not the corresponding tables,
    you can use :func:`audb.load_media`
    to load them.
    This will not download any table files
    to your disk,
    but share the cache with :func:`audb.load`.

    Args:
        name: name of database
        media: load media files provided in the list
        version: version of database
        bit_depth: bit depth, one of ``16``, ``24``, ``32``
        channels: channel selection, see :func:`audresample.remix`.
            Note that media files with too few channels
            will be first upsampled by repeating the existing channels.
            E.g. ``channels=[0, 1]`` upsamples all mono files to stereo,
            and ``channels=[1]`` returns the second channel
            of all multi-channel files
            and all mono files.
        format: file format, one of ``'flac'``, ``'wav'``
        mixdown: apply mono mix-down
        sampling_rate: sampling rate in Hz, one of
            ``8000``, ``16000``, ``22500``, ``44100``, ``48000``
        cache_root: cache folder where databases are stored.
            If not set :meth:`audb.default_cache_root` is used
        num_workers: number of parallel jobs or 1 for sequential
            processing. If ``None`` will be set to the number of
            processors on the machine multiplied by 5
        verbose: show debug messages

    Returns:
        paths to media files

    Raises:
        ValueError: if a media file is requested
            that is not part of the database

    Example:
        >>> paths = load_media(
        ...     'emodb',
        ...     ['wav/03a01Fa.wav'],
        ...     version='1.1.1',
        ...     format='flac',
        ...     verbose=False,
        ... )
        >>> cache_root = audb.default_cache_root()
        >>> [p[len(cache_root):] for p in paths]
        ['/emodb/1.1.1/40bb2241/wav/03a01Fa.flac']

    """
    media = audeer.to_list(media)
    if len(media) == 0:
        return []

    if version is None:
        version = latest_version(name)
    deps = dependencies(name, version=version, cache_root=cache_root)

    available_files = deps.media
    for media_file in media:
        if media_file not in available_files:
            raise ValueError(
                f"Could not find '{media_file}' in {name} {version}")

    cached_versions = None

    flavor = Flavor(
        channels=channels,
        format=format,
        mixdown=mixdown,
        bit_depth=bit_depth,
        sampling_rate=sampling_rate,
    )
    db_root = database_cache_folder(name, version, cache_root, flavor)
    db_root_tmp = database_tmp_folder(db_root)

    if verbose:  # pragma: no cover
        print(f'Get:   {name} v{version}')
        print(f'Cache: {db_root}')

    # Start with database header without tables
    db, backend = load_header(
        db_root,
        name,
        version,
        flavor=flavor,
        add_audb_meta=True,
    )

    db_is_complete = _database_is_complete(db)

    # load missing media
    if not db_is_complete:
        _load_media(
            media,
            backend,
            db_root,
            db_root_tmp,
            name,
            version,
            cached_versions,
            deps,
            flavor,
            cache_root,
            num_workers,
            verbose,
        )

    if format is not None:
        media = [audeer.replace_file_extension(m, format) for m in media]

    return [os.path.join(db_root, m) for m in media]