Ejemplo n.º 1
0
def test_on_the_fly_feature_extraction_unsupervised_dataset_with_augmentation(libri_cut_set):
    tested_dataset = DynamicUnsupervisedDataset(
        feature_extractor=Fbank(),
        cuts=libri_cut_set,
        augmenter=WavAugmenter.create_predefined('reverb', sampling_rate=16000)
    )
    # Just test that it runs
    tested_feats = tested_dataset[0]
Ejemplo n.º 2
0
def test_feature_set_builder_with_augmentation():
    recordings: RecordingSet = RecordingSet.from_json(
        'test/fixtures/audio.json')
    augment_fn = WavAugmenter.create_predefined('pitch_reverb_tdrop',
                                                sampling_rate=8000)
    extractor = Fbank()
    with TemporaryDirectory() as d, LilcomFilesWriter(d) as storage:
        builder = FeatureSetBuilder(feature_extractor=extractor,
                                    storage=storage,
                                    augment_fn=augment_fn)
        feature_set = builder.process_and_store_recordings(
            recordings=recordings)

        assert len(feature_set) == 6

        feature_infos = list(feature_set)

        # Assert the properties shared by all features
        for features in feature_infos:
            # assert that fbank is the default feature type
            assert features.type == 'fbank'
            # assert that duration is always a multiple of frame_shift
            assert features.num_frames == round(features.duration /
                                                features.frame_shift)
            # assert that num_features is preserved
            assert features.num_features == builder.feature_extractor.config.num_mel_bins
            # assert that the storage type metadata matches
            assert features.storage_type == storage.name
            # assert that the metadata is consistent with the data shapes
            arr = features.load()
            assert arr.shape[0] == features.num_frames
            assert arr.shape[1] == features.num_features

        # Assert the properties for recordings of duration 0.5 seconds
        for features in feature_infos[:2]:
            assert features.num_frames == 50
            assert features.duration == 0.5

        # Assert the properties for recordings of duration 1.0 seconds
        for features in feature_infos[2:]:
            assert features.num_frames == 100
            assert features.duration == 1.0
Ejemplo n.º 3
0
def extract(audio_manifest: Pathlike, output_dir: Pathlike,
            segmentation_manifest: Optional[Pathlike], augmentation: str,
            feature_manifest: Optional[Pathlike], compressed: bool,
            lilcom_tick_power: int, root_dir: Optional[Pathlike],
            num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    audio_set = RecordingSet.from_json(audio_manifest)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())

    # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored)
    supervision_set = (SupervisionSet.from_json(segmentation_manifest)
                       if segmentation_manifest is not None else None)

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)

    augmenter = None
    if augmentation is not None:
        sampling_rate = next(iter(audio_set)).sampling_rate
        assert all(rec.sampling_rate == sampling_rate for rec in audio_set), \
            "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time."
        augmenter = WavAugmenter.create_predefined(name=augmentation,
                                                   sampling_rate=sampling_rate)

    feature_set_builder = FeatureSetBuilder(
        feature_extractor=feature_extractor,
        output_dir=output_dir,
        root_dir=root_dir,
        augmenter=augmenter)
    feature_set_builder.process_and_store_recordings(
        recordings=audio_set,
        segmentation=None,  # TODO: implement and use
        compressed=compressed,
        lilcom_tick_power=lilcom_tick_power,
        num_jobs=num_jobs)
Ejemplo n.º 4
0
def extract(recording_manifest: Pathlike, output_dir: Pathlike,
            augmentation: str, feature_manifest: Optional[Pathlike],
            storage_type: str, lilcom_tick_power: int,
            root_dir: Optional[Pathlike], num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    recordings: RecordingSet = RecordingSet.from_json(recording_manifest)
    if root_dir is not None:
        recordings = recordings.with_path_prefix(root_dir)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)
    storage_path = output_dir / 'feats.h5' if 'hdf5' in storage_type else output_dir / 'storage'

    augmenter = None
    if augmentation is not None:
        sampling_rate = next(iter(recordings)).sampling_rate
        assert all(rec.sampling_rate == sampling_rate for rec in recordings), \
            "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time."
        augmenter = WavAugmenter.create_predefined(name=augmentation,
                                                   sampling_rate=sampling_rate)

    with get_writer(storage_type)(storage_path,
                                  tick_power=lilcom_tick_power) as storage:
        feature_set_builder = FeatureSetBuilder(
            feature_extractor=feature_extractor,
            storage=storage,
            augmenter=augmenter)
        feature_set_builder.process_and_store_recordings(
            recordings=recordings,
            output_manifest=output_dir / 'feature_manifest.json.gz',
            num_jobs=num_jobs)
Ejemplo n.º 5
0
def test_feature_set_builder(augmentation):
    audio_set = RecordingSet.from_json('test/fixtures/audio.json')
    augmenter = WavAugmenter.create_predefined(
        augmentation, sampling_rate=8000) if augmentation is not None else None
    with TemporaryDirectory() as output_dir:
        builder = FeatureSetBuilder(feature_extractor=Fbank(),
                                    output_dir=output_dir,
                                    augmenter=augmenter)
        feature_set = builder.process_and_store_recordings(
            recordings=audio_set)

    assert len(feature_set) == 6

    feature_infos = list(feature_set)

    # Assert the properties shared by all features
    for features in feature_infos:
        # assert that fbank is the default feature type
        assert features.type == 'fbank'
        # assert that duration is always a multiple of frame_shift
        assert features.num_frames == round(features.duration /
                                            features.frame_shift)
        # assert that num_features is preserved
        assert features.num_features == builder.feature_extractor.config.num_mel_bins
        # assert that lilcom is the default storate type
        assert features.storage_type == 'lilcom'

    # Assert the properties for recordings of duration 0.5 seconds
    for features in feature_infos[:2]:
        assert features.num_frames == 50
        assert features.duration == 0.5

    # Assert the properties for recordings of duration 1.0 seconds
    for features in feature_infos[2:]:
        assert features.num_frames == 100
        assert features.duration == 1.0
Ejemplo n.º 6
0
def test_predefined_augmentation_setups(audio, name):
    augmenter = WavAugmenter.create_predefined(name=name, sampling_rate=16000)
    augmented_audio = augmenter(audio, sampling_rate=16000)
    assert augmented_audio.shape == audio.shape
    assert (augmented_audio != audio).any()