Ejemplo n.º 1
0
    def process_folder(
        self,
        root: str,
        *,
        filetype: str = 'wav',
    ) -> pd.Index:
        r"""Segment files in a folder.

        .. note:: At the moment does not scan in sub-folders!

        Args:
            root: root folder
            filetype: file extension

        Returns:
            Segmented index conform to audformat_

        Raises:
            RuntimeError: if sampling rates do not match
            RuntimeError: if channel selection is invalid

        .. _audformat: https://audeering.github.io/audformat/data-format.html

        """
        files = audeer.list_file_names(root, filetype=filetype)
        files = [os.path.join(root, os.path.basename(f)) for f in files]
        return self.process_files(files)
Ejemplo n.º 2
0
def test_list_file_names(tmpdir, files, path, filetype, file_list):
    dir_tmp = tmpdir.mkdir('folder')
    dir_tmp.mkdir('subfolder')
    path = os.path.join(str(dir_tmp), path)
    for file in files:
        # Create the files
        file_tmp = dir_tmp.join(file)
        file_tmp.write('')
    if os.path.isdir(path):
        file_list = [
            audeer.safe_path(os.path.join(path, f)) for f in file_list
        ]
    else:
        file_list = [path]
    f = audeer.list_file_names(path, filetype=filetype)
    assert f == file_list
    assert type(f) is list
Ejemplo n.º 3
0
def test_remove(publish_db, format):

    for remove in (
            DB_FILES['1.0.0'][0],  # bundle1
            DB_FILES['1.0.0'][1],  # bundle2
            DB_FILES['1.0.0'][2],  # single
            DB_FILES['2.0.0'][0],  # new
    ):

        # remove db cache to ensure we always get a fresh copy
        shutil.rmtree(pytest.CACHE_ROOT)

        audb.remove_media(DB_NAME, remove)

        for removed_media in [False, True]:

            for version in audb.versions(DB_NAME):

                if remove in DB_FILES[version]:

                    if format is not None:
                        name, _ = os.path.splitext(remove)
                        removed_file = f'{name}.{format}'
                    else:
                        removed_file = remove

                    db = audb.load(
                        DB_NAME,
                        version=version,
                        format=format,
                        removed_media=removed_media,
                        full_path=False,
                        num_workers=pytest.NUM_WORKERS,
                        verbose=False,
                    )

                    if removed_media:
                        assert removed_file in db.files
                    else:
                        assert removed_file not in db.files
                    assert removed_file not in audeer.list_file_names(
                        os.path.join(db.meta['audb']['root'], 'audio'), )

        # Make sure calling it again doesn't raise error
        audb.remove_media(DB_NAME, remove)
Ejemplo n.º 4
0
    def process_folder(
            self,
            root: str,
            *,
            filetype: str = 'wav',
            channel: int = None,
    ) -> pd.DataFrame:
        r"""Extract features from files in a folder.

        .. note:: At the moment does not scan in sub-folders!

        Args:
            root: root folder
            filetype: file extension
            channel: channel number

        Raises:
            RuntimeError: if number of channels do not match

        """
        files = audeer.list_file_names(root, filetype=filetype)
        files = [os.path.join(root, os.path.basename(f)) for f in files]
        return self.process_files(files, channel=channel)
Ejemplo n.º 5
0
    def process_folder(
            self,
            root: str,
            *,
            channel: int = None,
            filetype: str = 'wav',
    ) -> pd.Series:
        r"""Process files in a folder.

        .. note:: At the moment does not scan in sub-folders!

        Args:
            root: root folder
            channel: channel number
            filetype: file extension

        Returns:
            Series with processed files

        """
        files = audeer.list_file_names(root, filetype=filetype)
        files = [os.path.join(root, os.path.basename(f)) for f in files]
        return self.process_files(files, channel=channel)
Ejemplo n.º 6
0
    def process_folder(
        self,
        root: str,
        *,
        filetype: str = 'wav',
    ) -> pd.DataFrame:
        r"""Extract features from files in a folder.

        .. note:: At the moment does not scan in sub-folders!

        Args:
            root: root folder
            filetype: file extension

        Raises:
            RuntimeError: if sampling rates do not match
            RuntimeError: if channel selection is invalid
            RuntimeError: if multiple frames are returned,
                but ``win_dur`` is not set

        """
        files = audeer.list_file_names(root, filetype=filetype)
        files = [os.path.join(root, os.path.basename(f)) for f in files]
        return self.process_files(files)
Ejemplo n.º 7
0
def cached(
    cache_root: str = None,
    *,
    shared: bool = False,
) -> pd.DataFrame:
    r"""List available databases in the cache.

    Args:
        cache_root: cache folder where databases are stored.
            If not set :meth:`audb.default_cache_root` is used
        shared: list shared databases

    Returns:
        cached databases

    """
    cache_root = audeer.safe_path(cache_root
                                  or default_cache_root(shared=shared))

    data = {}

    database_paths = audeer.list_dir_names(cache_root)
    for database_path in database_paths:
        database = os.path.basename(database_path)
        version_paths = audeer.list_dir_names(database_path)
        for version_path in version_paths:
            version = os.path.basename(version_path)

            # Skip tmp folder (e.g. 1.0.0~)
            if version.endswith('~'):  # pragma: no cover
                continue

            flavor_id_paths = audeer.list_dir_names(version_path)

            # Skip old audb cache (e.g. 1 as flavor)
            files = audeer.list_file_names(version_path)
            deps_path = os.path.join(version_path, define.DEPENDENCIES_FILE)
            deps_path_cached = os.path.join(
                version_path,
                define.CACHED_DEPENDENCIES_FILE,
            )
            if deps_path not in files and deps_path_cached not in files:
                # Skip all cache entries
                # that don't contain a db.csv or db.pkl file
                # as those stem from audb<1.0.0.
                # We only look for db.csv
                # as we switched to db.pkl with audb>=1.0.5
                continue  # pragma: no cover

            for flavor_id_path in flavor_id_paths:
                flavor_id = os.path.basename(flavor_id_path)
                files = audeer.list_file_names(flavor_id_path)
                files = [os.path.basename(f) for f in files]

                if define.HEADER_FILE in files:
                    db = audformat.Database.load(
                        flavor_id_path,
                        load_data=False,
                    )
                    flavor = db.meta['audb']['flavor']
                    complete = db.meta['audb']['complete']
                    data[flavor_id_path] = {
                        'name': database,
                        'flavor_id': flavor_id,
                        'version': version,
                        'complete': complete,
                    }
                    data[flavor_id_path].update(flavor)

    df = pd.DataFrame.from_dict(data, orient='index', dtype='object')
    # Replace NaN with None
    return df.where(pd.notnull(df), None)
Ejemplo n.º 8
0
def test_update_database():

    version = '2.1.0'
    start_version = '2.0.0'

    db = audb.load_to(
        DB_ROOT_VERSION[version],
        DB_NAME,
        version=start_version,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )

    # == Fail with missing dependency file
    previous_version = start_version
    dep_file = os.path.join(
        DB_ROOT_VERSION[version],
        audb.core.define.DEPENDENCIES_FILE,
    )
    os.remove(dep_file)
    print(audeer.list_file_names(DB_ROOT_VERSION[version]))
    error_msg = (
        f"You want to depend on '{previous_version}' "
        f"of {DB_NAME}, "
        f"but you don't have a '{audb.core.define.DEPENDENCIES_FILE}' "
        f"file present "
        f"in {DB_ROOT_VERSION[version]}. "
        f"Did you forgot to call "
        f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, "
        f"version={previous_version}?")
    with pytest.raises(RuntimeError, match=re.escape(error_msg)):
        audb.publish(
            DB_ROOT_VERSION[version],
            version,
            pytest.PUBLISH_REPOSITORY,
            previous_version=previous_version,
            num_workers=pytest.NUM_WORKERS,
            verbose=False,
        )

    # Reload data to restore dependency file
    shutil.rmtree(DB_ROOT_VERSION[version])
    db = audb.load_to(
        DB_ROOT_VERSION[version],
        DB_NAME,
        version=start_version,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )
    # Remove one file as in version 3.0.0
    remove_file = os.path.join('audio', '001.wav')
    remove_path = os.path.join(DB_ROOT_VERSION[version], remove_file)
    os.remove(remove_path)
    db.drop_files(remove_file)
    db.save(DB_ROOT_VERSION[version])

    # == Fail as 2.0.0 is not the latest version
    previous_version = 'latest'
    error_msg = (f"You want to depend on '{audb.latest_version(DB_NAME)}' "
                 f"of {DB_NAME}, "
                 f"but the MD5 sum of your "
                 f"'{audb.core.define.DEPENDENCIES_FILE}' file "
                 f"in {DB_ROOT_VERSION[version]} "
                 f"does not match the MD5 sum of the corresponding file "
                 f"for the requested version in the repository. "
                 f"Did you forgot to call "
                 f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, "
                 f"version='{audb.latest_version(DB_NAME)}') "
                 f"or modified the file manually?")
    with pytest.raises(RuntimeError, match=re.escape(error_msg)):
        audb.publish(
            DB_ROOT_VERSION[version],
            version,
            pytest.PUBLISH_REPOSITORY,
            previous_version=previous_version,
            num_workers=pytest.NUM_WORKERS,
            verbose=False,
        )

    # == Fail as we require a previous version
    previous_version = None
    error_msg = (
        f"You did not set a dependency to a previous version, "
        f"but you have a '{audb.core.define.DEPENDENCIES_FILE}' file present "
        f"in {DB_ROOT_VERSION[version]}.")
    with pytest.raises(RuntimeError, match=re.escape(error_msg)):
        audb.publish(
            DB_ROOT_VERSION[version],
            version,
            pytest.PUBLISH_REPOSITORY,
            previous_version=previous_version,
            num_workers=pytest.NUM_WORKERS,
            verbose=False,
        )

    previous_version = start_version
    deps = audb.publish(
        DB_ROOT_VERSION[version],
        version,
        pytest.PUBLISH_REPOSITORY,
        previous_version=previous_version,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )

    # Check that depencies include previous and actual version only
    versions = audeer.sort_versions([deps.version(f) for f in deps.files])
    assert versions[-1] == version
    assert versions[0] == previous_version

    # Check that there is no difference in the database
    # if published from scratch or from previous version
    db1 = audb.load(
        DB_NAME,
        version=version,
        full_path=False,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )
    db2 = audb.load(
        DB_NAME,
        version='3.0.0',
        full_path=False,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )
    db1.meta['audb'] = {}
    db2.meta['audb'] = {}
    assert db1 == db2