def process_folder( self, root: str, *, filetype: str = 'wav', ) -> pd.Index: r"""Segment files in a folder. .. note:: At the moment does not scan in sub-folders! Args: root: root folder filetype: file extension Returns: Segmented index conform to audformat_ Raises: RuntimeError: if sampling rates do not match RuntimeError: if channel selection is invalid .. _audformat: https://audeering.github.io/audformat/data-format.html """ files = audeer.list_file_names(root, filetype=filetype) files = [os.path.join(root, os.path.basename(f)) for f in files] return self.process_files(files)
def test_list_file_names(tmpdir, files, path, filetype, file_list): dir_tmp = tmpdir.mkdir('folder') dir_tmp.mkdir('subfolder') path = os.path.join(str(dir_tmp), path) for file in files: # Create the files file_tmp = dir_tmp.join(file) file_tmp.write('') if os.path.isdir(path): file_list = [ audeer.safe_path(os.path.join(path, f)) for f in file_list ] else: file_list = [path] f = audeer.list_file_names(path, filetype=filetype) assert f == file_list assert type(f) is list
def test_remove(publish_db, format): for remove in ( DB_FILES['1.0.0'][0], # bundle1 DB_FILES['1.0.0'][1], # bundle2 DB_FILES['1.0.0'][2], # single DB_FILES['2.0.0'][0], # new ): # remove db cache to ensure we always get a fresh copy shutil.rmtree(pytest.CACHE_ROOT) audb.remove_media(DB_NAME, remove) for removed_media in [False, True]: for version in audb.versions(DB_NAME): if remove in DB_FILES[version]: if format is not None: name, _ = os.path.splitext(remove) removed_file = f'{name}.{format}' else: removed_file = remove db = audb.load( DB_NAME, version=version, format=format, removed_media=removed_media, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) if removed_media: assert removed_file in db.files else: assert removed_file not in db.files assert removed_file not in audeer.list_file_names( os.path.join(db.meta['audb']['root'], 'audio'), ) # Make sure calling it again doesn't raise error audb.remove_media(DB_NAME, remove)
def process_folder( self, root: str, *, filetype: str = 'wav', channel: int = None, ) -> pd.DataFrame: r"""Extract features from files in a folder. .. note:: At the moment does not scan in sub-folders! Args: root: root folder filetype: file extension channel: channel number Raises: RuntimeError: if number of channels do not match """ files = audeer.list_file_names(root, filetype=filetype) files = [os.path.join(root, os.path.basename(f)) for f in files] return self.process_files(files, channel=channel)
def process_folder( self, root: str, *, channel: int = None, filetype: str = 'wav', ) -> pd.Series: r"""Process files in a folder. .. note:: At the moment does not scan in sub-folders! Args: root: root folder channel: channel number filetype: file extension Returns: Series with processed files """ files = audeer.list_file_names(root, filetype=filetype) files = [os.path.join(root, os.path.basename(f)) for f in files] return self.process_files(files, channel=channel)
def process_folder( self, root: str, *, filetype: str = 'wav', ) -> pd.DataFrame: r"""Extract features from files in a folder. .. note:: At the moment does not scan in sub-folders! Args: root: root folder filetype: file extension Raises: RuntimeError: if sampling rates do not match RuntimeError: if channel selection is invalid RuntimeError: if multiple frames are returned, but ``win_dur`` is not set """ files = audeer.list_file_names(root, filetype=filetype) files = [os.path.join(root, os.path.basename(f)) for f in files] return self.process_files(files)
def cached( cache_root: str = None, *, shared: bool = False, ) -> pd.DataFrame: r"""List available databases in the cache. Args: cache_root: cache folder where databases are stored. If not set :meth:`audb.default_cache_root` is used shared: list shared databases Returns: cached databases """ cache_root = audeer.safe_path(cache_root or default_cache_root(shared=shared)) data = {} database_paths = audeer.list_dir_names(cache_root) for database_path in database_paths: database = os.path.basename(database_path) version_paths = audeer.list_dir_names(database_path) for version_path in version_paths: version = os.path.basename(version_path) # Skip tmp folder (e.g. 1.0.0~) if version.endswith('~'): # pragma: no cover continue flavor_id_paths = audeer.list_dir_names(version_path) # Skip old audb cache (e.g. 1 as flavor) files = audeer.list_file_names(version_path) deps_path = os.path.join(version_path, define.DEPENDENCIES_FILE) deps_path_cached = os.path.join( version_path, define.CACHED_DEPENDENCIES_FILE, ) if deps_path not in files and deps_path_cached not in files: # Skip all cache entries # that don't contain a db.csv or db.pkl file # as those stem from audb<1.0.0. # We only look for db.csv # as we switched to db.pkl with audb>=1.0.5 continue # pragma: no cover for flavor_id_path in flavor_id_paths: flavor_id = os.path.basename(flavor_id_path) files = audeer.list_file_names(flavor_id_path) files = [os.path.basename(f) for f in files] if define.HEADER_FILE in files: db = audformat.Database.load( flavor_id_path, load_data=False, ) flavor = db.meta['audb']['flavor'] complete = db.meta['audb']['complete'] data[flavor_id_path] = { 'name': database, 'flavor_id': flavor_id, 'version': version, 'complete': complete, } data[flavor_id_path].update(flavor) df = pd.DataFrame.from_dict(data, orient='index', dtype='object') # Replace NaN with None return df.where(pd.notnull(df), None)
def test_update_database(): version = '2.1.0' start_version = '2.0.0' db = audb.load_to( DB_ROOT_VERSION[version], DB_NAME, version=start_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # == Fail with missing dependency file previous_version = start_version dep_file = os.path.join( DB_ROOT_VERSION[version], audb.core.define.DEPENDENCIES_FILE, ) os.remove(dep_file) print(audeer.list_file_names(DB_ROOT_VERSION[version])) error_msg = ( f"You want to depend on '{previous_version}' " f"of {DB_NAME}, " f"but you don't have a '{audb.core.define.DEPENDENCIES_FILE}' " f"file present " f"in {DB_ROOT_VERSION[version]}. " f"Did you forgot to call " f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, " f"version={previous_version}?") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # Reload data to restore dependency file shutil.rmtree(DB_ROOT_VERSION[version]) db = audb.load_to( DB_ROOT_VERSION[version], DB_NAME, version=start_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # Remove one file as in version 3.0.0 remove_file = os.path.join('audio', '001.wav') remove_path = os.path.join(DB_ROOT_VERSION[version], remove_file) os.remove(remove_path) db.drop_files(remove_file) db.save(DB_ROOT_VERSION[version]) # == Fail as 2.0.0 is not the latest version previous_version = 'latest' error_msg = (f"You want to depend on '{audb.latest_version(DB_NAME)}' " f"of {DB_NAME}, " f"but the MD5 sum of your " f"'{audb.core.define.DEPENDENCIES_FILE}' file " f"in {DB_ROOT_VERSION[version]} " f"does not match the MD5 sum of the corresponding file " f"for the requested version in the repository. " f"Did you forgot to call " f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, " f"version='{audb.latest_version(DB_NAME)}') " f"or modified the file manually?") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # == Fail as we require a previous version previous_version = None error_msg = ( f"You did not set a dependency to a previous version, " f"but you have a '{audb.core.define.DEPENDENCIES_FILE}' file present " f"in {DB_ROOT_VERSION[version]}.") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) previous_version = start_version deps = audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # Check that depencies include previous and actual version only versions = audeer.sort_versions([deps.version(f) for f in deps.files]) assert versions[-1] == version assert versions[0] == previous_version # Check that there is no difference in the database # if published from scratch or from previous version db1 = audb.load( DB_NAME, version=version, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) db2 = audb.load( DB_NAME, version='3.0.0', full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) db1.meta['audb'] = {} db2.meta['audb'] = {} assert db1 == db2