def test_mixed_cache(): # Avoid failing searching for other versions # if databases a stored accross private and shared cache # and the private one is empty, see # https://github.com/audeering/audb/issues/101 # First load to shared cache audb.load( DB_NAME, sampling_rate=8000, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, only_metadata=True, tables='files', cache_root=pytest.SHARED_CACHE_ROOT, ) # Now try to load same version to private cache # to force audb.cached() to return empty dataframe clear_root(pytest.CACHE_ROOT) audeer.mkdir(pytest.CACHE_ROOT) audb.load( DB_NAME, sampling_rate=8000, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, only_metadata=True, tables='segments', )
def test_channels(channels): db = audb.load( DB_NAME, channels=channels, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) original_files = db['files']['original'].get() df = audb.cached() for converted_file, original_file in zip(db.files, original_files): converted_file = os.path.join(db.meta['audb']['root'], converted_file) original_file = os.path.join(DB_ROOT, original_file) if channels is None: assert audiofile.channels(converted_file) == \ audiofile.channels(original_file) assert df['channels'].values[0] == channels elif isinstance(channels, int): assert audiofile.channels(converted_file) == 1 assert df['channels'].values[0] == [1] else: assert audiofile.channels(converted_file) == len(channels)
def test_tables(tables, format, expected_tables, expected_files): db = audb.load( DB_NAME, tables=tables, format=format, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) assert list(db.tables) == expected_tables assert list(db.files) == expected_files
def test_media(media, format, expected_files): db = audb.load( DB_NAME, media=media, format=format, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) assert list(db.files) == expected_files assert list(db.tables) == ['dev', 'test', 'train']
def test_include_and_exclude(include, exclude, expected_files): # Test for backward compatibility with pytest.warns(UserWarning): db = audb.load( DB_NAME, include=include, exclude=exclude, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) assert list(db.files) == expected_files
def test_remove(publish_db, format): for remove in ( DB_FILES['1.0.0'][0], # bundle1 DB_FILES['1.0.0'][1], # bundle2 DB_FILES['1.0.0'][2], # single DB_FILES['2.0.0'][0], # new ): # remove db cache to ensure we always get a fresh copy shutil.rmtree(pytest.CACHE_ROOT) audb.remove_media(DB_NAME, remove) for removed_media in [False, True]: for version in audb.versions(DB_NAME): if remove in DB_FILES[version]: if format is not None: name, _ = os.path.splitext(remove) removed_file = f'{name}.{format}' else: removed_file = remove db = audb.load( DB_NAME, version=version, format=format, removed_media=removed_media, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) if removed_media: assert removed_file in db.files else: assert removed_file not in db.files assert removed_file not in audeer.list_file_names( os.path.join(db.meta['audb']['root'], 'audio'), ) # Make sure calling it again doesn't raise error audb.remove_media(DB_NAME, remove)
def test_format(format): db = audb.load( DB_NAME, format=format, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) original_files = db['files']['original'].get() df = audb.cached() assert df['format'].values[0] == format for converted_file, original_file in zip(db.files, original_files): converted_file = os.path.join(db.meta['audb']['root'], converted_file) original_file = os.path.join(DB_ROOT, original_file) if format is None: assert converted_file[-4:] == original_file[-4:] else: assert converted_file.endswith(format)
def test_mixdown(mixdown): db = audb.load( DB_NAME, mixdown=mixdown, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) original_files = db['files']['original'].get() df = audb.cached() assert df['mixdown'].values[0] == mixdown for converted_file, original_file in zip(db.files, original_files): converted_file = os.path.join(db.meta['audb']['root'], converted_file) original_file = os.path.join(DB_ROOT, original_file) if mixdown: assert audiofile.channels(converted_file) == 1 else: assert audiofile.channels(converted_file) == \ audiofile.channels(original_file)
def test_mix(mix): # Test for backward compatibility with pytest.warns(UserWarning): db = audb.load( DB_NAME, mix=mix, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) original_files = db['files']['original'].get() for converted_file, original_file in zip(db.files, original_files): converted_file = os.path.join(db.meta['audb']['root'], converted_file) original_file = os.path.join(DB_ROOT, original_file) if mix in ['mono', 'left', 'right']: assert audiofile.channels(converted_file) == 1 elif mix == 'stereo': assert audiofile.channels(converted_file) == 2 else: assert audiofile.channels(converted_file) == \ audiofile.channels(original_file)
def test_bit_depth(bit_depth): db = audb.load( DB_NAME, bit_depth=bit_depth, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) original_files = db['files']['original'].get() df = audb.cached() assert df['bit_depth'].values[0] == bit_depth for converted_file, original_file in zip(db.files, original_files): converted_file = os.path.join(db.meta['audb']['root'], converted_file) original_file = os.path.join(DB_ROOT, original_file) if bit_depth is None: assert audiofile.bit_depth(converted_file) == \ audiofile.bit_depth(original_file) else: assert audiofile.bit_depth(converted_file) == bit_depth
def test_sampling_rate(sampling_rate): db = audb.load( DB_NAME, sampling_rate=sampling_rate, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) original_files = db['files']['original'].get() df = audb.cached() assert df['sampling_rate'].values[0] == sampling_rate for converted_file, original_file in zip(db.files, original_files): converted_file = os.path.join(db.meta['audb']['root'], converted_file) original_file = os.path.join(DB_ROOT, original_file) if sampling_rate is None: assert audiofile.sampling_rate(converted_file) == \ audiofile.sampling_rate(original_file) else: assert audiofile.sampling_rate(converted_file) == sampling_rate
def test_load(format, version): assert not audb.exists( DB_NAME, version=version, format=format, ) db = audb.load( DB_NAME, version=version, format=format, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) db_root = db.meta['audb']['root'] assert audb.exists(DB_NAME, version=version) if version is None: resolved_version = audb.latest_version(DB_NAME) else: resolved_version = version db_original = audformat.Database.load(DB_ROOT_VERSION[resolved_version]) if format is not None: db_original.map_files( lambda x: audeer.replace_file_extension(x, format) ) pd.testing.assert_index_equal(db.files, db_original.files) for file in db.files: assert os.path.exists(os.path.join(db_root, file)) for table in db.tables: assert os.path.exists(os.path.join(db_root, f'db.{table}.csv')) pd.testing.assert_frame_equal( db_original[table].df, db[table].df, ) df = audb.cached() assert df.loc[db_root]['version'] == resolved_version deps = audb.dependencies(DB_NAME, version=version) assert str(deps().to_string()) == str(deps) assert len(deps) == len(db.files) + len(db.tables) # from cache with full path db = audb.load( DB_NAME, version=version, full_path=True, num_workers=pytest.NUM_WORKERS, verbose=False, ) for file in db.files: assert os.path.exists(file) for table in db.tables: assert os.path.exists(os.path.join(db_root, f'db.{table}.csv'))
def test_update_database(): version = '2.1.0' start_version = '2.0.0' db = audb.load_to( DB_ROOT_VERSION[version], DB_NAME, version=start_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # == Fail with missing dependency file previous_version = start_version dep_file = os.path.join( DB_ROOT_VERSION[version], audb.core.define.DEPENDENCIES_FILE, ) os.remove(dep_file) print(audeer.list_file_names(DB_ROOT_VERSION[version])) error_msg = ( f"You want to depend on '{previous_version}' " f"of {DB_NAME}, " f"but you don't have a '{audb.core.define.DEPENDENCIES_FILE}' " f"file present " f"in {DB_ROOT_VERSION[version]}. " f"Did you forgot to call " f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, " f"version={previous_version}?") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # Reload data to restore dependency file shutil.rmtree(DB_ROOT_VERSION[version]) db = audb.load_to( DB_ROOT_VERSION[version], DB_NAME, version=start_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # Remove one file as in version 3.0.0 remove_file = os.path.join('audio', '001.wav') remove_path = os.path.join(DB_ROOT_VERSION[version], remove_file) os.remove(remove_path) db.drop_files(remove_file) db.save(DB_ROOT_VERSION[version]) # == Fail as 2.0.0 is not the latest version previous_version = 'latest' error_msg = (f"You want to depend on '{audb.latest_version(DB_NAME)}' " f"of {DB_NAME}, " f"but the MD5 sum of your " f"'{audb.core.define.DEPENDENCIES_FILE}' file " f"in {DB_ROOT_VERSION[version]} " f"does not match the MD5 sum of the corresponding file " f"for the requested version in the repository. " f"Did you forgot to call " f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, " f"version='{audb.latest_version(DB_NAME)}') " f"or modified the file manually?") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # == Fail as we require a previous version previous_version = None error_msg = ( f"You did not set a dependency to a previous version, " f"but you have a '{audb.core.define.DEPENDENCIES_FILE}' file present " f"in {DB_ROOT_VERSION[version]}.") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) previous_version = start_version deps = audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # Check that depencies include previous and actual version only versions = audeer.sort_versions([deps.version(f) for f in deps.files]) assert versions[-1] == version assert versions[0] == previous_version # Check that there is no difference in the database # if published from scratch or from previous version db1 = audb.load( DB_NAME, version=version, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) db2 = audb.load( DB_NAME, version='3.0.0', full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) db1.meta['audb'] = {} db2.meta['audb'] = {} assert db1 == db2
# cache databases to avoid progress bar in code examples audb.config.REPOSITORIES = [ audb.Repository( name='data-public', host='https://audeering.jfrog.io/artifactory', backend='artifactory', ) ] database_name = 'emodb' database_version = '1.1.1' if not audb.exists(database_name, version=database_version): print(f'Pre-caching {database_name} v{database_version}') audb.load( database_name, version=database_version, num_workers=5, only_metadata=True, verbose=False, ) if not audb.exists( database_name, version=database_version, format='flac', sampling_rate=44100, ): print(f'Pre-caching {database_name} v{database_version} {{flac, 44100Hz}}') audb.load( database_name, version=database_version, format='flac', sampling_rate=44100,
def test_load_on_demand(): db_original = audformat.Database.load(DB_ROOT) db = audb.load( DB_NAME, version=DB_VERSION, only_metadata=True, full_path=False, verbose=False, ) assert db['table1'] == db_original['table1'] assert db['table2'] == db_original['table2'] pd.testing.assert_index_equal(db.files, db_original.files) assert not db.meta['audb']['complete'] db = audb.load( DB_NAME, version=DB_VERSION, only_metadata=True, tables=['table1'], full_path=False, verbose=False, ) assert db['table1'] == db_original['table1'] assert 'table2' not in db.tables pd.testing.assert_index_equal(db.files, db_original['table1'].files) assert not db.meta['audb']['complete'] db = audb.load( DB_NAME, version=DB_VERSION, only_metadata=True, tables='.*1', full_path=False, verbose=False, ) assert db['table1'] == db_original['table1'] assert 'table2' not in db.tables pd.testing.assert_index_equal(db.files, db_original['table1'].files) assert not db.meta['audb']['complete'] db = audb.load( DB_NAME, version=DB_VERSION, tables=['table1'], full_path=False, verbose=False, ) assert db['table1'] == db_original['table1'] assert 'table2' not in db.tables pd.testing.assert_index_equal(db.files, db_original['table1'].files) assert not db.meta['audb']['complete'] # Remove table to force downloading from backend again os.remove(os.path.join(db.meta['audb']['root'], 'db.table1.csv')) os.remove(os.path.join(db.meta['audb']['root'], 'db.table1.pkl')) db = audb.load( DB_NAME, version=DB_VERSION, only_metadata=True, full_path=False, verbose=False, ) assert db['table1'] == db_original['table1'] assert db['table2'] == db_original['table2'] pd.testing.assert_index_equal(db.files, db_original.files) assert not db.meta['audb']['complete'] db = audb.load( DB_NAME, version=DB_VERSION, media=['audio/000.wav', 'audio/001.wav'], full_path=False, verbose=False, ) assert 'table1' in db.tables assert 'table2' in db.tables pd.testing.assert_index_equal( db.files, audformat.filewise_index(['audio/000.wav', 'audio/001.wav']), ) assert not db.meta['audb']['complete'] db = audb.load( DB_NAME, version=DB_VERSION, tables=['table2'], full_path=False, verbose=False, ) assert 'table1' not in db.tables assert db['table2'] == db_original['table2'] pd.testing.assert_index_equal(db.files, db_original['table2'].files) assert db.meta['audb']['complete'] db = audb.load( DB_NAME, version=DB_VERSION, full_path=False, verbose=False, ) db_original.meta = [] db.meta = [] assert db == db_original
def test_load_wrong_argument(): with pytest.raises(TypeError): audb.load(DB_NAME, typo='1.0.0')
def test_publish(version): db = audformat.Database.load(DB_ROOT_VERSION[version]) print(db.is_portable) print(db.files) if not audb.versions(DB_NAME): with pytest.raises(RuntimeError): audb.latest_version(DB_NAME) archives = db['files']['speaker'].get().dropna().to_dict() deps = audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, archives=archives, previous_version=None, num_workers=pytest.NUM_WORKERS, verbose=False, ) backend = audb.core.utils.lookup_backend(DB_NAME, version) number_of_files = len(set(archives.keys())) number_of_archives = len(set(archives.values())) assert len(deps.files) - len(deps.archives) == (number_of_files - number_of_archives) for archive in set(archives.values()): assert archive in deps.archives db = audb.load( DB_NAME, version=version, full_path=False, num_workers=pytest.NUM_WORKERS, ) assert db.name == DB_NAME versions = audb.versions(DB_NAME) latest_version = audb.latest_version(DB_NAME) assert version in versions assert latest_version == versions[-1] df = audb.available(only_latest=False) assert DB_NAME in df.index assert set(df[df.index == DB_NAME]['version']) == set(versions) df = audb.available(only_latest=True) assert DB_NAME in df.index assert df[df.index == DB_NAME]['version'][0] == latest_version for file in db.files: name = archives[file] if file in archives else file file_path = backend.join(db.name, 'media', name) backend.exists(file_path, version) path = os.path.join(DB_ROOT_VERSION[version], file) assert deps.checksum(file) == audbackend.md5(path) if deps.format(file) in [ audb.core.define.Format.WAV, audb.core.define.Format.FLAC, ]: assert deps.bit_depth(file) == audiofile.bit_depth(path) assert deps.channels(file) == audiofile.channels(path) assert deps.duration(file) == audiofile.duration(path) assert deps.sampling_rate(file) == audiofile.sampling_rate(path)