Esempio n. 1
0
def test_publish_error_messages():

    for version in ['1.0.0', '4.0.0', '5.0.0']:

        if version == '1.0.0':
            error_msg = ("A version '1.0.0' already exists for database "
                         f"'{DB_NAME}'.")
        elif version == '4.0.0':
            error_msg = (
                "5 files are referenced in tables that cannot be found. "
                "Missing files are: '['audio/002.wav', 'audio/003.wav', "
                "'audio/004.wav', 'audio/005.wav', 'audio/new.wav']'.")
        elif version == '5.0.0':
            error_msg = (
                "25 files are referenced in tables that cannot be found. "
                "Missing files are: '['audio/002.wav', 'audio/003.wav', "
                "'audio/004.wav', 'audio/005.wav', 'audio/new.wav', "
                "'file0.wav', 'file1.wav', 'file10.wav', 'file11.wav', "
                "'file12.wav', 'file13.wav', 'file14.wav', 'file15.wav', "
                "'file16.wav', 'file17.wav', 'file18.wav', 'file19.wav', "
                "'file2.wav', 'file3.wav', 'file4.wav'], ...'.")

        with pytest.raises(RuntimeError, match=re.escape(error_msg)):

            audb.publish(
                DB_ROOT_VERSION[version],
                version,
                pytest.PUBLISH_REPOSITORY,
                previous_version=None,
                num_workers=pytest.NUM_WORKERS,
                verbose=False,
            )
Esempio n. 2
0
def test_invalid_archives(name):

    archives = {os.path.join('audio', '001.wav'): name}
    with pytest.raises(ValueError):
        audb.publish(
            DB_ROOT_VERSION['1.0.0'],
            '1.0.1',
            pytest.PUBLISH_REPOSITORY,
            archives=archives,
            num_workers=pytest.NUM_WORKERS,
            verbose=False,
        )
Esempio n. 3
0
def fixture_publish_db():

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)

    # create db

    db = audformat.testing.create_db(minimal=True)
    db.name = DB_NAME
    db['files'] = audformat.Table(audformat.filewise_index(list(DB_FILES)))
    db['files']['original'] = audformat.Column()
    db['files']['original'].set(list(DB_FILES))
    for file in DB_FILES:
        signal = np.zeros(
            (
                DB_FILES[file]['channels'],
                DB_FILES[file]['sampling_rate'],
            ),
            dtype=np.float32,
        )
        path = os.path.join(DB_ROOT, file)
        audeer.mkdir(os.path.dirname(path))
        audiofile.write(path,
                        signal,
                        DB_FILES[file]['sampling_rate'],
                        bit_depth=DB_FILES[file]['bit_depth'])
    db['segments'] = audformat.Table(
        audformat.segmented_index(
            [list(DB_FILES)[0]] * 3,
            starts=['0s', '1s', '2s'],
            ends=['1s', '2s', '3s'],
        ))
    db.save(DB_ROOT)

    # publish db

    audb.publish(
        DB_ROOT,
        '1.0.0',
        pytest.PUBLISH_REPOSITORY,
        verbose=False,
    )

    yield

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)
Esempio n. 4
0
def publish_db():

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)

    # create db

    db = audformat.testing.create_db(minimal=True)
    db.name = DB_NAME
    db['files'] = audformat.Table(audformat.filewise_index(DB_FILES['1.0.0']))

    # publish 1.0.0

    db.save(DB_ROOT)
    audformat.testing.create_audio_files(db)
    archives = {
        db.files[0]: 'bundle',
        db.files[1]: 'bundle',
    }
    audb.publish(
        DB_ROOT,
        '1.0.0',
        pytest.PUBLISH_REPOSITORY,
        archives=archives,
        verbose=False,
    )

    # publish 2.0.0

    db['files'].extend_index(
        audformat.filewise_index(DB_FILES['2.0.0']),
        inplace=True,
    )
    db.save(DB_ROOT)
    audformat.testing.create_audio_files(db)
    audb.publish(
        DB_ROOT,
        '2.0.0',
        pytest.PUBLISH_REPOSITORY,
        verbose=False,
    )

    yield

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)
Esempio n. 5
0
def fixture_publish_db():

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)

    # create db

    DB.save(DB_ROOT)

    # publish db

    audb.publish(
        DB_ROOT,
        DB_VERSION,
        pytest.PUBLISH_REPOSITORY,
        verbose=False,
    )

    yield

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)
Esempio n. 6
0
def fixture_publish_db():

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)

    # create db

    db = audformat.testing.create_db(minimal=True)
    db.name = DB_NAME
    db.schemes['scheme'] = audformat.Scheme()
    audformat.testing.add_table(
        db,
        'table1',
        'filewise',
        num_files=[0, 1, 2],
    )
    audformat.testing.add_table(
        db,
        'table2',
        'filewise',
        num_files=[1, 2, 3],
    )
    db.save(DB_ROOT)
    audformat.testing.create_audio_files(db)

    # publish 1.0.0

    audb.publish(
        DB_ROOT,
        DB_VERSION,
        pytest.PUBLISH_REPOSITORY,
        verbose=False,
    )

    yield

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)
Esempio n. 7
0
def fixture_publish_db():

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)

    # create db

    db = audformat.testing.create_db(minimal=True)
    db.name = DB_NAME
    db.schemes['scheme'] = audformat.Scheme(
        labels=['positive', 'neutral', 'negative']
    )
    audformat.testing.add_table(
        db,
        'emotion',
        audformat.define.IndexType.SEGMENTED,
        num_files=5,
        columns={'emotion': ('scheme', None)}
    )
    db.schemes['speaker'] = audformat.Scheme(
        labels=['adam', 'eve']
    )
    db['files'] = audformat.Table(db.files)
    db['files']['speaker'] = audformat.Column(scheme_id='speaker')
    db['files']['speaker'].set(
        ['adam', 'adam', 'eve', 'eve'],
        index=audformat.filewise_index(db.files[:4]),
    )

    # publish 1.0.0

    db.save(DB_ROOT_VERSION['1.0.0'])
    audformat.testing.create_audio_files(db)
    archives = db['files']['speaker'].get().dropna().to_dict()
    audb.publish(
        DB_ROOT_VERSION['1.0.0'],
        '1.0.0',
        pytest.PUBLISH_REPOSITORY,
        archives=archives,
        verbose=False,
    )

    # publish 1.1.0, add table

    audformat.testing.add_table(
        db, 'train', audformat.define.IndexType.SEGMENTED,
        columns={'label': ('scheme', None)}
    )

    db.save(DB_ROOT_VERSION['1.1.0'])
    audformat.testing.create_audio_files(db)
    shutil.copy(
        os.path.join(DB_ROOT_VERSION['1.0.0'], 'db.csv'),
        os.path.join(DB_ROOT_VERSION['1.1.0'], 'db.csv'),
    )
    audb.publish(
        DB_ROOT_VERSION['1.1.0'],
        '1.1.0',
        pytest.PUBLISH_REPOSITORY,
        verbose=False,
    )

    # publish 1.1.1, change label

    db['train'].df['label'][0] = None

    db.save(DB_ROOT_VERSION['1.1.1'])
    audformat.testing.create_audio_files(db)
    shutil.copy(
        os.path.join(DB_ROOT_VERSION['1.1.0'], 'db.csv'),
        os.path.join(DB_ROOT_VERSION['1.1.1'], 'db.csv'),
    )
    audb.publish(
        DB_ROOT_VERSION['1.1.1'],
        '1.1.1',
        pytest.PUBLISH_REPOSITORY,
        verbose=False,
    )

    # publish 2.0.0, alter and remove media

    db.save(DB_ROOT_VERSION['2.0.0'])
    audformat.testing.create_audio_files(db)
    file = os.path.join(DB_ROOT_VERSION['2.0.0'], db.files[0])
    y, sr = audiofile.read(file)
    y[0] = 1
    audiofile.write(file, y, sr)
    file = db.files[-1]
    db.pick_files(lambda x: x != file)
    os.remove(audeer.safe_path(os.path.join(DB_ROOT_VERSION['2.0.0'], file)))
    db.save(DB_ROOT_VERSION['2.0.0'])

    shutil.copy(
        os.path.join(DB_ROOT_VERSION['1.1.1'], 'db.csv'),
        os.path.join(DB_ROOT_VERSION['2.0.0'], 'db.csv'),
    )
    audb.publish(
        DB_ROOT_VERSION['2.0.0'],
        '2.0.0',
        pytest.PUBLISH_REPOSITORY,
        verbose=False,
    )

    # publish 3.0.0, remove table

    db.drop_tables('train')

    db.save(DB_ROOT_VERSION['3.0.0'])
    audformat.testing.create_audio_files(db)
    shutil.copy(
        os.path.join(DB_ROOT_VERSION['2.0.0'], 'db.csv'),
        os.path.join(DB_ROOT_VERSION['3.0.0'], 'db.csv'),
    )
    audb.publish(
        DB_ROOT_VERSION['3.0.0'],
        '3.0.0',
        pytest.PUBLISH_REPOSITORY,
        verbose=False,
    )

    yield

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)
Esempio n. 8
0
def fixture_publish_db():

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)

    # create db

    db = audformat.testing.create_db(minimal=True)
    db.name = DB_NAME
    db.schemes['scheme'] = audformat.Scheme(labels=['some', 'test', 'labels'])
    audformat.testing.add_table(
        db,
        'test',
        audformat.define.IndexType.SEGMENTED,
        columns={'label': ('scheme', None)},
        num_files=[0, 1],
    )
    audformat.testing.add_table(
        db,
        'dev',
        audformat.define.IndexType.SEGMENTED,
        columns={'label': ('scheme', None)},
        num_files=[10, 11],
    )
    audformat.testing.add_table(
        db,
        'train',
        audformat.define.IndexType.SEGMENTED,
        columns={'label': ('scheme', None)},
        num_files=[20, 21],
    )
    # Add nested folder structure to ensure not all files in an archive
    # are stored in the same folder
    mapping = {
        'audio/020.wav': 'audio/1/020.wav',
        'audio/021.wav': 'audio/2/021.wav',
    }
    files = db['train'].df.index.get_level_values('file')
    starts = db['train'].df.index.get_level_values('start')
    ends = db['train'].df.index.get_level_values('end')
    db['train'].df.index = audformat.segmented_index(
        files=[mapping[f] for f in files],
        starts=starts,
        ends=ends,
    )
    db.save(DB_ROOT)
    audformat.testing.create_audio_files(db)

    # publish db

    archives = {}
    for table in db.tables:
        archives.update({file: table for file in db[table].files})
    audb.publish(
        DB_ROOT,
        '1.0.0',
        pytest.PUBLISH_REPOSITORY,
        archives=archives,
        verbose=False,
    )

    yield

    clear_root(DB_ROOT)
    clear_root(pytest.FILE_SYSTEM_HOST)
Esempio n. 9
0
    },
    13: {
        'gender': 'female',
        'age': 32,
        'language': 'deu'
    },
    14: {
        'gender': 'female',
        'age': 35,
        'language': 'deu'
    },
    15: {
        'gender': 'male',
        'age': 25,
        'language': 'deu'
    },
    16: {
        'gender': 'female',
        'age': 31,
        'language': 'deu'
    },
}
db.save(build_dir)

audb.publish(
    build_dir,
    version,
    repository,
    previous_version=previous_version,
)
Esempio n. 10
0
import audb

DB_ROOT = './build'

repository = audb.Repository(
    name='data-public',
    host='https://audeering.jfrog.io/artifactory',
    backend='artifactory',
)
audb.publish(
    DB_ROOT,
    version='1.1.0',
    repository=repository,
    num_workers=1,
    verbose=True,
)
Esempio n. 11
0
def test_update_database():

    version = '2.1.0'
    start_version = '2.0.0'

    db = audb.load_to(
        DB_ROOT_VERSION[version],
        DB_NAME,
        version=start_version,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )

    # == Fail with missing dependency file
    previous_version = start_version
    dep_file = os.path.join(
        DB_ROOT_VERSION[version],
        audb.core.define.DEPENDENCIES_FILE,
    )
    os.remove(dep_file)
    print(audeer.list_file_names(DB_ROOT_VERSION[version]))
    error_msg = (
        f"You want to depend on '{previous_version}' "
        f"of {DB_NAME}, "
        f"but you don't have a '{audb.core.define.DEPENDENCIES_FILE}' "
        f"file present "
        f"in {DB_ROOT_VERSION[version]}. "
        f"Did you forgot to call "
        f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, "
        f"version={previous_version}?")
    with pytest.raises(RuntimeError, match=re.escape(error_msg)):
        audb.publish(
            DB_ROOT_VERSION[version],
            version,
            pytest.PUBLISH_REPOSITORY,
            previous_version=previous_version,
            num_workers=pytest.NUM_WORKERS,
            verbose=False,
        )

    # Reload data to restore dependency file
    shutil.rmtree(DB_ROOT_VERSION[version])
    db = audb.load_to(
        DB_ROOT_VERSION[version],
        DB_NAME,
        version=start_version,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )
    # Remove one file as in version 3.0.0
    remove_file = os.path.join('audio', '001.wav')
    remove_path = os.path.join(DB_ROOT_VERSION[version], remove_file)
    os.remove(remove_path)
    db.drop_files(remove_file)
    db.save(DB_ROOT_VERSION[version])

    # == Fail as 2.0.0 is not the latest version
    previous_version = 'latest'
    error_msg = (f"You want to depend on '{audb.latest_version(DB_NAME)}' "
                 f"of {DB_NAME}, "
                 f"but the MD5 sum of your "
                 f"'{audb.core.define.DEPENDENCIES_FILE}' file "
                 f"in {DB_ROOT_VERSION[version]} "
                 f"does not match the MD5 sum of the corresponding file "
                 f"for the requested version in the repository. "
                 f"Did you forgot to call "
                 f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, "
                 f"version='{audb.latest_version(DB_NAME)}') "
                 f"or modified the file manually?")
    with pytest.raises(RuntimeError, match=re.escape(error_msg)):
        audb.publish(
            DB_ROOT_VERSION[version],
            version,
            pytest.PUBLISH_REPOSITORY,
            previous_version=previous_version,
            num_workers=pytest.NUM_WORKERS,
            verbose=False,
        )

    # == Fail as we require a previous version
    previous_version = None
    error_msg = (
        f"You did not set a dependency to a previous version, "
        f"but you have a '{audb.core.define.DEPENDENCIES_FILE}' file present "
        f"in {DB_ROOT_VERSION[version]}.")
    with pytest.raises(RuntimeError, match=re.escape(error_msg)):
        audb.publish(
            DB_ROOT_VERSION[version],
            version,
            pytest.PUBLISH_REPOSITORY,
            previous_version=previous_version,
            num_workers=pytest.NUM_WORKERS,
            verbose=False,
        )

    previous_version = start_version
    deps = audb.publish(
        DB_ROOT_VERSION[version],
        version,
        pytest.PUBLISH_REPOSITORY,
        previous_version=previous_version,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )

    # Check that depencies include previous and actual version only
    versions = audeer.sort_versions([deps.version(f) for f in deps.files])
    assert versions[-1] == version
    assert versions[0] == previous_version

    # Check that there is no difference in the database
    # if published from scratch or from previous version
    db1 = audb.load(
        DB_NAME,
        version=version,
        full_path=False,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )
    db2 = audb.load(
        DB_NAME,
        version='3.0.0',
        full_path=False,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )
    db1.meta['audb'] = {}
    db2.meta['audb'] = {}
    assert db1 == db2
Esempio n. 12
0
def test_publish(version):

    db = audformat.Database.load(DB_ROOT_VERSION[version])
    print(db.is_portable)
    print(db.files)

    if not audb.versions(DB_NAME):
        with pytest.raises(RuntimeError):
            audb.latest_version(DB_NAME)

    archives = db['files']['speaker'].get().dropna().to_dict()
    deps = audb.publish(
        DB_ROOT_VERSION[version],
        version,
        pytest.PUBLISH_REPOSITORY,
        archives=archives,
        previous_version=None,
        num_workers=pytest.NUM_WORKERS,
        verbose=False,
    )
    backend = audb.core.utils.lookup_backend(DB_NAME, version)
    number_of_files = len(set(archives.keys()))
    number_of_archives = len(set(archives.values()))
    assert len(deps.files) - len(deps.archives) == (number_of_files -
                                                    number_of_archives)
    for archive in set(archives.values()):
        assert archive in deps.archives

    db = audb.load(
        DB_NAME,
        version=version,
        full_path=False,
        num_workers=pytest.NUM_WORKERS,
    )
    assert db.name == DB_NAME

    versions = audb.versions(DB_NAME)
    latest_version = audb.latest_version(DB_NAME)

    assert version in versions
    assert latest_version == versions[-1]

    df = audb.available(only_latest=False)
    assert DB_NAME in df.index
    assert set(df[df.index == DB_NAME]['version']) == set(versions)

    df = audb.available(only_latest=True)
    assert DB_NAME in df.index
    assert df[df.index == DB_NAME]['version'][0] == latest_version

    for file in db.files:
        name = archives[file] if file in archives else file
        file_path = backend.join(db.name, 'media', name)
        backend.exists(file_path, version)
        path = os.path.join(DB_ROOT_VERSION[version], file)
        assert deps.checksum(file) == audbackend.md5(path)
        if deps.format(file) in [
                audb.core.define.Format.WAV,
                audb.core.define.Format.FLAC,
        ]:
            assert deps.bit_depth(file) == audiofile.bit_depth(path)
            assert deps.channels(file) == audiofile.channels(path)
            assert deps.duration(file) == audiofile.duration(path)
            assert deps.sampling_rate(file) == audiofile.sampling_rate(path)