Example #1
0
def test_serialization():
    audio_set = RecordingSet.from_recordings([
        Recording(
            id='x',
            sources=[
                AudioSource(
                    type='file',
                    channel_ids=[0],
                    source='text/fixtures/mono_c0.wav'
                ),
                AudioSource(
                    type='command',
                    channel_ids=[1],
                    source='cat text/fixtures/mono_c1.wav'
                )
            ],
            sampling_rate=8000,
            num_samples=4000,
            duration_seconds=0.5
        )
    ])
    with NamedTemporaryFile() as f:
        audio_set.to_yaml(f.name)
        deserialized = RecordingSet.from_yaml(f.name)
    assert deserialized == audio_set
Example #2
0
def test_serialization(format, compressed):
    recording_set = RecordingSet.from_recordings([
        Recording(
            id='x',
            sources=[
                AudioSource(
                    type='file',
                    channels=[0],
                    source='text/fixtures/mono_c0.wav'
                ),
                AudioSource(
                    type='command',
                    channels=[1],
                    source='cat text/fixtures/mono_c1.wav'
                )
            ],
            sampling_rate=8000,
            num_samples=4000,
            duration=0.5
        )
    ])
    with NamedTemporaryFile(suffix='.gz' if compressed else '') as f:
        if format == 'yaml':
            recording_set.to_yaml(f.name)
            deserialized = RecordingSet.from_yaml(f.name)
        if format == 'json':
            recording_set.to_json(f.name)
            deserialized = RecordingSet.from_json(f.name)
    assert deserialized == recording_set
Example #3
0
def test_feature_set_builder():
    audio_set = RecordingSet.from_yaml('test/fixtures/audio.yml')
    with TemporaryDirectory() as output_dir:
        builder = FeatureSetBuilder(feature_extractor=FeatureExtractor(),
                                    output_dir=output_dir)
        feature_set = builder.process_and_store_recordings(
            recordings=audio_set)

    assert len(feature_set) == 4

    feature_infos = list(feature_set)

    # Assert the properties shared by all features
    for features in feature_infos:
        # assert that fbank is the default feature type
        assert features.type == 'fbank'
        # assert that duration is always a multiple of frame_shift
        assert features.num_frames == round(features.duration /
                                            features.frame_shift)
        # assert that num_features is preserved
        assert features.num_features == builder.feature_extractor.mfcc_fbank_common_config.num_mel_bins
        # assert that lilcom is the default storate type
        assert features.storage_type == 'lilcom'

    # Assert the properties for recordings of duration 0.5 seconds
    for features in feature_infos[:2]:
        assert features.num_frames == 50
        assert features.duration == 0.5

    # Assert the properties for recordings of duration 1.0 seconds
    for features in feature_infos[2:]:
        assert features.num_frames == 100
        assert features.duration == 1.0
Example #4
0
def make_feats(
        audio_manifest: Pathlike,
        output_dir: Pathlike,
        segmentation_manifest: Optional[Pathlike],
        # TODO: augmentation manifest should specify a number of transforms and probability of their application
        # e.g.:
        # "add_noise", "prob": 0.5, "noise_recordings": ["path1.wav", "path2.wav"]
        # "reverberate", "prob": 0.2, "rirs": ["rir1.wav", "rir2.wav"] (or however the RIRs are stored like... can be params for simulation)
        augmentation_manifest: Optional[Pathlike],
        feature_manifest: Optional[Pathlike],
        compressed: bool,
        lilcom_tick_power: int,
        root_dir: Optional[Pathlike],
        num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    audio_set = RecordingSet.from_yaml(audio_manifest)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if
                         feature_manifest is not None else FeatureExtractor())

    # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored)
    supervision_set = (SupervisionSet.from_yaml(segmentation_manifest)
                       if segmentation_manifest is not None else None)

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)

    feature_set_builder = FeatureSetBuilder(
        feature_extractor=feature_extractor,
        output_dir=output_dir,
        root_dir=root_dir,
        augmentation_manifest=augmentation_manifest)
    feature_set_builder.process_and_store_recordings(
        recordings=audio_set,
        segmentation=None,  # TODO: implement and use
        compressed=compressed,
        lilcom_tick_power=lilcom_tick_power,
        num_jobs=num_jobs)
Example #5
0
def get_audio_set() -> RecordingSet:
    return RecordingSet.from_yaml('test/fixtures/audio.yml')
Example #6
0
def libri_recording_set():
    return RecordingSet.from_yaml('test/fixtures/libri/audio.yml')