コード例 #1
0
ファイル: test_feature_set.py プロジェクト: popcornell/lhotse
def test_feature_extractor(feature_type, exception_expectation):
    # For now, just test that it runs
    # TODO: test that the output is similar to Kaldi
    with exception_expectation:
        fe = FeatureExtractor(type=feature_type)
        samples, sr = torchaudio.load(
            'test/fixtures/libri/libri-1088-134315-0000.wav')
        fe.extract(samples=samples, sampling_rate=sr)
コード例 #2
0
ファイル: test_feature_set.py プロジェクト: popcornell/lhotse
def test_feature_extractor_to_dict_no_defaults():
    fe = FeatureExtractor()
    fe.mfcc_fbank_common_config.num_mel_bins = 80
    fe_dict = fe.to_dict()
    assert fe_dict == {
        'type': 'fbank',
        'mfcc_fbank_common_config': {
            'num_mel_bins': 80
        }
    }
コード例 #3
0
def extract(recording_manifest: Pathlike, output_dir: Pathlike,
            feature_manifest: Optional[Pathlike], storage_type: str,
            lilcom_tick_power: int, root_dir: Optional[Pathlike],
            num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    recordings: RecordingSet = RecordingSet.from_json(recording_manifest)
    if root_dir is not None:
        recordings = recordings.with_path_prefix(root_dir)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)
    storage_path = output_dir / 'feats.h5' if 'hdf5' in storage_type else output_dir / 'storage'

    with get_writer(storage_type)(storage_path,
                                  tick_power=lilcom_tick_power) as storage:
        feature_set_builder = FeatureSetBuilder(
            feature_extractor=feature_extractor,
            storage=storage,
        )
        feature_set_builder.process_and_store_recordings(
            recordings=recordings,
            output_manifest=output_dir / 'feature_manifest.json.gz',
            num_jobs=num_jobs)
コード例 #4
0
def extract_cuts(
    cutset: Pathlike,
    output_cutset: Pathlike,
    storage_path: Pathlike,
    feature_manifest: Optional[Pathlike],
    storage_type: str,
    num_jobs: int,
):
    """
    Extract features for cuts in a given CUTSET manifest.
    The features are stored in STORAGE_PATH, and the output manifest
    with features is stored in OUTPUT_CUTSET.
    """
    from lhotse import CutSet

    cuts: CutSet = CutSet.from_file(cutset)
    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())
    cuts = cuts.compute_and_store_features(
        extractor=feature_extractor,
        storage_path=storage_path,
        num_jobs=num_jobs,
        storage_type=get_writer(storage_type),
    )
    Path(output_cutset).parent.mkdir(parents=True, exist_ok=True)
    cuts.to_file(output_cutset)
コード例 #5
0
ファイル: test_feature_set.py プロジェクト: popcornell/lhotse
def test_feature_set_builder():
    audio_set = RecordingSet.from_yaml('test/fixtures/audio.yml')
    with TemporaryDirectory() as output_dir:
        builder = FeatureSetBuilder(feature_extractor=FeatureExtractor(),
                                    output_dir=output_dir)
        feature_set = builder.process_and_store_recordings(
            recordings=audio_set)

    assert len(feature_set) == 4

    feature_infos = list(feature_set)

    # Assert the properties shared by all features
    for features in feature_infos:
        # assert that fbank is the default feature type
        assert features.type == 'fbank'
        # assert that duration is always a multiple of frame_shift
        assert features.num_frames == round(features.duration /
                                            features.frame_shift)
        # assert that num_features is preserved
        assert features.num_features == builder.feature_extractor.mfcc_fbank_common_config.num_mel_bins
        # assert that lilcom is the default storate type
        assert features.storage_type == 'lilcom'

    # Assert the properties for recordings of duration 0.5 seconds
    for features in feature_infos[:2]:
        assert features.num_frames == 50
        assert features.duration == 0.5

    # Assert the properties for recordings of duration 1.0 seconds
    for features in feature_infos[2:]:
        assert features.num_frames == 100
        assert features.duration == 1.0
コード例 #6
0
ファイル: test_feature_set.py プロジェクト: popcornell/lhotse
def test_overlay_fbank():
    # Treat it more like a test of "it runs" rather than "it works"
    t = np.linspace(0, 1, 8000, dtype=np.float32)
    x1 = np.sin(440.0 * t).reshape(1, -1)
    x2 = np.sin(55.0 * t).reshape(1, -1)

    feature_extractor = FeatureExtractor(type='fbank')
    f1 = feature_extractor.extract(x1, 8000).numpy()
    f2 = feature_extractor.extract(x2, 8000).numpy()
    mixer = FbankMixer(
        base_feats=f1,
        frame_shift=feature_extractor.spectrogram_config.frame_shift,
    )
    mixer.add_to_mix(f2)

    fmix_feat = mixer.mixed_feats
    fmix_time = feature_extractor.extract(x1 + x2, 8000).numpy()

    np.testing.assert_almost_equal(fmix_feat, fmix_time, decimal=0)
コード例 #7
0
ファイル: features.py プロジェクト: popcornell/lhotse
def make_feats(
        audio_manifest: Pathlike,
        output_dir: Pathlike,
        segmentation_manifest: Optional[Pathlike],
        # TODO: augmentation manifest should specify a number of transforms and probability of their application
        # e.g.:
        # "add_noise", "prob": 0.5, "noise_recordings": ["path1.wav", "path2.wav"]
        # "reverberate", "prob": 0.2, "rirs": ["rir1.wav", "rir2.wav"] (or however the RIRs are stored like... can be params for simulation)
        augmentation_manifest: Optional[Pathlike],
        feature_manifest: Optional[Pathlike],
        compressed: bool,
        lilcom_tick_power: int,
        root_dir: Optional[Pathlike],
        num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    audio_set = RecordingSet.from_yaml(audio_manifest)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if
                         feature_manifest is not None else FeatureExtractor())

    # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored)
    supervision_set = (SupervisionSet.from_yaml(segmentation_manifest)
                       if segmentation_manifest is not None else None)

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)

    feature_set_builder = FeatureSetBuilder(
        feature_extractor=feature_extractor,
        output_dir=output_dir,
        root_dir=root_dir,
        augmentation_manifest=augmentation_manifest)
    feature_set_builder.process_and_store_recordings(
        recordings=audio_set,
        segmentation=None,  # TODO: implement and use
        compressed=compressed,
        lilcom_tick_power=lilcom_tick_power,
        num_jobs=num_jobs)
コード例 #8
0
ファイル: test_feature_set.py プロジェクト: popcornell/lhotse
def test_feature_set_serialization():
    feature_set = FeatureSet(feature_extractor=FeatureExtractor(),
                             features=[
                                 Features(recording_id='irrelevant',
                                          channel_id=0,
                                          start=0.0,
                                          duration=20.0,
                                          type='fbank',
                                          num_frames=2000,
                                          num_features=20,
                                          storage_type='lilcom',
                                          storage_path='/irrelevant/path.llc')
                             ])
    with NamedTemporaryFile() as f:
        feature_set.to_yaml(f.name)
        feature_set_deserialized = FeatureSet.from_yaml(f.name)
    assert feature_set_deserialized == feature_set
コード例 #9
0
def extract_cuts_batch(
    cutset: Pathlike,
    output_cutset: Pathlike,
    storage_path: Pathlike,
    feature_manifest: Optional[Pathlike],
    storage_type: str,
    num_jobs: int,
    batch_duration: Seconds,
):
    """
    Extract features for cuts in a given CUTSET manifest.
    The features are stored in STORAGE_PATH, and the output manifest
    with features is stored in OUTPUT_CUTSET.

    This version enables CUDA acceleration for feature extractors
    that support it (e.g., kaldifeat extractors).

    \b
    Example usage of kaldifeat fbank with CUDA:

        $ pip install kaldifeat  # note: ensure it's compiled with CUDA

        $ lhotse feat write-default-config -f kaldifeat-fbank feat.yml

        $ sed 's/device: cpu/device: cuda/' feat.yml feat-cuda.yml

        $ lhotse feat extract-cuts-batch -f feat-cuda.yml cuts.jsonl cuts_with_feats.jsonl feats.h5
    """
    from lhotse import CutSet

    cuts: CutSet = CutSet.from_file(cutset)
    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())
    cuts = cuts.compute_and_store_features_batch(
        extractor=feature_extractor,
        storage_path=storage_path,
        batch_duration=batch_duration,
        num_workers=num_jobs,
        storage_type=get_writer(storage_type),
    )
    Path(output_cutset).parent.mkdir(parents=True, exist_ok=True)
    cuts.to_file(output_cutset)
コード例 #10
0
ファイル: features.py プロジェクト: twistedmove/lhotse
def extract(audio_manifest: Pathlike, output_dir: Pathlike,
            segmentation_manifest: Optional[Pathlike], augmentation: str,
            feature_manifest: Optional[Pathlike], compressed: bool,
            lilcom_tick_power: int, root_dir: Optional[Pathlike],
            num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    audio_set = RecordingSet.from_json(audio_manifest)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())

    # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored)
    supervision_set = (SupervisionSet.from_json(segmentation_manifest)
                       if segmentation_manifest is not None else None)

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)

    augmenter = None
    if augmentation is not None:
        sampling_rate = next(iter(audio_set)).sampling_rate
        assert all(rec.sampling_rate == sampling_rate for rec in audio_set), \
            "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time."
        augmenter = WavAugmenter.create_predefined(name=augmentation,
                                                   sampling_rate=sampling_rate)

    feature_set_builder = FeatureSetBuilder(
        feature_extractor=feature_extractor,
        output_dir=output_dir,
        root_dir=root_dir,
        augmenter=augmenter)
    feature_set_builder.process_and_store_recordings(
        recordings=audio_set,
        segmentation=None,  # TODO: implement and use
        compressed=compressed,
        lilcom_tick_power=lilcom_tick_power,
        num_jobs=num_jobs)
コード例 #11
0
def extract(recording_manifest: Pathlike, output_dir: Pathlike,
            augmentation: str, feature_manifest: Optional[Pathlike],
            storage_type: str, lilcom_tick_power: int,
            root_dir: Optional[Pathlike], num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    recordings: RecordingSet = RecordingSet.from_json(recording_manifest)
    if root_dir is not None:
        recordings = recordings.with_path_prefix(root_dir)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)
    storage_path = output_dir / 'feats.h5' if 'hdf5' in storage_type else output_dir / 'storage'

    augmenter = None
    if augmentation is not None:
        sampling_rate = next(iter(recordings)).sampling_rate
        assert all(rec.sampling_rate == sampling_rate for rec in recordings), \
            "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time."
        augmenter = WavAugmenter.create_predefined(name=augmentation,
                                                   sampling_rate=sampling_rate)

    with get_writer(storage_type)(storage_path,
                                  tick_power=lilcom_tick_power) as storage:
        feature_set_builder = FeatureSetBuilder(
            feature_extractor=feature_extractor,
            storage=storage,
            augmenter=augmenter)
        feature_set_builder.process_and_store_recordings(
            recordings=recordings,
            output_manifest=output_dir / 'feature_manifest.json.gz',
            num_jobs=num_jobs)
コード例 #12
0
ファイル: test_feature_set.py プロジェクト: zcth428/lhotse
def test_feature_extractor_generic_deserialization():
    fe = Fbank()
    with NamedTemporaryFile() as f:
        fe.to_yaml(f.name)
        fe_deserialized = FeatureExtractor.from_yaml(f.name)
    assert fe_deserialized.config == fe.config
コード例 #13
0
def feature_set():
    return FeatureSet(feature_extractor=FeatureExtractor(),
                      features=[
                          features('rec-1', 0.0, 600.0),
                          features('rec-2', 0.0, 357.0)
                      ])
コード例 #14
0
ファイル: features.py プロジェクト: popcornell/lhotse
def write_default_feature_config(output_config):
    """Save a default feature extraction config to OUTPUT_CONFIG."""
    FeatureExtractor().to_yaml(output_config, include_defaults=True)