def job(cache_root: str, file: str): file_pkl = audeer.replace_file_extension( file, audformat.define.TableStorageFormat.PICKLE, ) _copy_file(file, cache_root, db_root_tmp, db_root) _copy_file(file_pkl, cache_root, db_root_tmp, db_root)
def _cached_files( files: typing.Sequence[str], deps: Dependencies, cached_versions: typing.Sequence[typing.Tuple[LooseVersion, str, Dependencies], ], flavor: typing.Optional[Flavor], verbose: bool, ) -> (typing.Sequence[typing.Union[str, str]], typing.Sequence[str]): r"""Find cached files.""" cached_files = [] missing_files = [] for file in audeer.progress_bar( files, desc='Cached files', disable=not verbose, ): found = False file_version = LooseVersion(deps.version(file)) for cache_version, cache_root, cache_deps in cached_versions: if cache_version >= file_version: if file in cache_deps: if deps.checksum(file) == cache_deps.checksum(file): path = os.path.join(cache_root, file) if flavor and flavor.format is not None: path = audeer.replace_file_extension( path, flavor.format, ) if os.path.exists(path): found = True break if found: if flavor and flavor.format is not None: file = audeer.replace_file_extension( file, flavor.format, ) cached_files.append((cache_root, file)) else: missing_files.append(file) return cached_files, missing_files
def _missing_media( db_root: str, media: typing.Sequence[str], flavor: Flavor, verbose: bool, ) -> typing.Sequence[str]: missing_media = [] for file in audeer.progress_bar(media, desc='Missing media', disable=not verbose): path = os.path.join(db_root, file) if flavor.format is not None: path = audeer.replace_file_extension(path, flavor.format) if not os.path.exists(path): missing_media.append(file) return missing_media
def empty_file(tmpdir, request): """Fixture to generate empty audio files. The request parameter allows to select the file extension. """ # Create empty audio file empty_file = os.path.join(tmpdir, 'empty-file.wav') af.write(empty_file, np.array([[]]), 16000) # Rename to match extension file_ext = request.param ofpath = audeer.replace_file_extension(empty_file, file_ext) if os.path.exists(empty_file): os.rename(empty_file, ofpath) yield ofpath if os.path.exists(ofpath): os.remove(ofpath)
def test_load_media(version, media, format): paths = audb.load_media( DB_NAME, media, version=version, format=format, verbose=False, ) expected_paths = [ os.path.join(pytest.CACHE_ROOT, p) for p in paths ] if format is not None: expected_paths = [ audeer.replace_file_extension(p, format) for p in expected_paths ] assert paths == expected_paths # Clear cache to force loading from other cache if version is None: version = audb.latest_version(DB_NAME) cache_root = audb.core.load.database_cache_folder( DB_NAME, version, pytest.CACHE_ROOT, audb.Flavor(format=format), ) shutil.rmtree(cache_root) paths2 = audb.load_media( DB_NAME, media, version=version, format=format, verbose=False, ) assert paths2 == paths
def test_load(format, version): assert not audb.exists( DB_NAME, version=version, format=format, ) db = audb.load( DB_NAME, version=version, format=format, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) db_root = db.meta['audb']['root'] assert audb.exists(DB_NAME, version=version) if version is None: resolved_version = audb.latest_version(DB_NAME) else: resolved_version = version db_original = audformat.Database.load(DB_ROOT_VERSION[resolved_version]) if format is not None: db_original.map_files( lambda x: audeer.replace_file_extension(x, format) ) pd.testing.assert_index_equal(db.files, db_original.files) for file in db.files: assert os.path.exists(os.path.join(db_root, file)) for table in db.tables: assert os.path.exists(os.path.join(db_root, f'db.{table}.csv')) pd.testing.assert_frame_equal( db_original[table].df, db[table].df, ) df = audb.cached() assert df.loc[db_root]['version'] == resolved_version deps = audb.dependencies(DB_NAME, version=version) assert str(deps().to_string()) == str(deps) assert len(deps) == len(db.files) + len(db.tables) # from cache with full path db = audb.load( DB_NAME, version=version, full_path=True, num_workers=pytest.NUM_WORKERS, verbose=False, ) for file in db.files: assert os.path.exists(file) for table in db.tables: assert os.path.exists(os.path.join(db_root, f'db.{table}.csv'))
def test_replace_file_extension(path, new_extension, expected_path): path = audeer.replace_file_extension(path, new_extension) assert path == expected_path
def load_media( name: str, media: typing.Union[str, typing.Sequence[str]], *, version: str = None, bit_depth: int = None, channels: typing.Union[int, typing.Sequence[int]] = None, format: str = None, mixdown: bool = False, sampling_rate: int = None, cache_root: str = None, num_workers: typing.Optional[int] = 1, verbose: bool = True, ) -> typing.List: r"""Load media file(s). If you are interested in media files and not the corresponding tables, you can use :func:`audb.load_media` to load them. This will not download any table files to your disk, but share the cache with :func:`audb.load`. Args: name: name of database media: load media files provided in the list version: version of database bit_depth: bit depth, one of ``16``, ``24``, ``32`` channels: channel selection, see :func:`audresample.remix`. Note that media files with too few channels will be first upsampled by repeating the existing channels. E.g. ``channels=[0, 1]`` upsamples all mono files to stereo, and ``channels=[1]`` returns the second channel of all multi-channel files and all mono files. format: file format, one of ``'flac'``, ``'wav'`` mixdown: apply mono mix-down sampling_rate: sampling rate in Hz, one of ``8000``, ``16000``, ``22500``, ``44100``, ``48000`` cache_root: cache folder where databases are stored. If not set :meth:`audb.default_cache_root` is used num_workers: number of parallel jobs or 1 for sequential processing. If ``None`` will be set to the number of processors on the machine multiplied by 5 verbose: show debug messages Returns: paths to media files Raises: ValueError: if a media file is requested that is not part of the database Example: >>> paths = load_media( ... 'emodb', ... ['wav/03a01Fa.wav'], ... version='1.1.1', ... format='flac', ... verbose=False, ... ) >>> cache_root = audb.default_cache_root() >>> [p[len(cache_root):] for p in paths] ['/emodb/1.1.1/40bb2241/wav/03a01Fa.flac'] """ media = audeer.to_list(media) if len(media) == 0: return [] if version is None: version = latest_version(name) deps = dependencies(name, version=version, cache_root=cache_root) available_files = deps.media for media_file in media: if media_file not in available_files: raise ValueError( f"Could not find '{media_file}' in {name} {version}") cached_versions = None flavor = Flavor( channels=channels, format=format, mixdown=mixdown, bit_depth=bit_depth, sampling_rate=sampling_rate, ) db_root = database_cache_folder(name, version, cache_root, flavor) db_root_tmp = database_tmp_folder(db_root) if verbose: # pragma: no cover print(f'Get: {name} v{version}') print(f'Cache: {db_root}') # Start with database header without tables db, backend = load_header( db_root, name, version, flavor=flavor, add_audb_meta=True, ) db_is_complete = _database_is_complete(db) # load missing media if not db_is_complete: _load_media( media, backend, db_root, db_root_tmp, name, version, cached_versions, deps, flavor, cache_root, num_workers, verbose, ) if format is not None: media = [audeer.replace_file_extension(m, format) for m in media] return [os.path.join(db_root, m) for m in media]