other_params = {} some_augmentation = None @mark.parametrize( [ "recording_id", "channel", "start", "duration", "exception_expectation", "expected_num_frames", ], [ ("recording-1", 0, 0.0, None, does_not_raise(), 50), # whole recording ( "recording-1", 0, 0.0, 0.499, does_not_raise(), 50, ), # practically whole recording ("recording-2", 0, 0.0, 0.7, does_not_raise(), 70), ("recording-2", 0, 0.5, 0.5, does_not_raise(), 50), ("recording-2", 1, 0.25, 0.65, does_not_raise(), 65), ("recording-nonexistent", 0, 0.0, None, raises(KeyError), None), # no recording ("recording-1", 1000, 0.0, None, raises(KeyError), None), # no channel (
assert padded.num_samples == expected_num_samples mixed_feats = padded.load_features() assert mixed_feats.shape == (2000, 40) np.testing.assert_allclose(mixed_feats[1604:, :], PADDING_LOG_ENERGY, atol=0.8) # Only padding after 16.04s np.testing.assert_array_less(PADDING_LOG_ENERGY, mixed_feats[1603, :]) # Padding didn't start before 16.04s pre_mixed_feats = libri_cut.load_features() np.testing.assert_almost_equal(pre_mixed_feats, mixed_feats[:1604, :], decimal=5) @pytest.mark.parametrize( ['duration', 'num_frames', 'num_samples', 'expected_duration', 'expected_num_frames', 'expected_num_samples', 'exception_expectation'], [ (20, None, None, 20, 2000, 320000, does_not_raise()), (None, 2000, None, 20, 2000, 320000, pytest.raises(AssertionError)), (None, None, 320000, 20, 2000, 320000, does_not_raise()), ] ) def test_pad_simple_cut_audio_only( libri_cut, duration, num_frames, num_samples, expected_duration, expected_num_frames, expected_num_samples, exception_expectation ): libri_cut.features = None
import pickle from tempfile import NamedTemporaryFile import pytest from lhotse import AudioSource, Cut, CutSet, FeatureSet, Features, Recording, RecordingSet, SupervisionSegment, \ SupervisionSet, load_manifest, store_manifest from lhotse.utils import fastcopy, is_module_available, nullcontext as does_not_raise @pytest.mark.parametrize(['path', 'exception_expectation'], [ ('test/fixtures/audio.json', does_not_raise()), ('test/fixtures/supervision.json', does_not_raise()), ('test/fixtures/dummy_feats/feature_manifest.json', does_not_raise()), ('test/fixtures/libri/cuts.json', does_not_raise()), ('test/fixtures/feature_config.yml', pytest.raises(ValueError)), ('no/such/path.xd', pytest.raises(AssertionError)), ]) def test_load_any_lhotse_manifest(path, exception_expectation): with exception_expectation: load_manifest(path) @pytest.fixture def recording_set(): return RecordingSet.from_recordings([ Recording(id='x', sources=[ AudioSource(type='file', channels=[0], source='text/fixtures/mono_c0.wav'),
def test_cut_set_batch_feature_extraction(cut_set, extractor_type): extractor = extractor_type() cut_set = cut_set.resample(16000) with NamedTemporaryFile() as tmpf: cut_set_with_feats = cut_set.compute_and_store_features_batch( extractor=extractor, storage_path=tmpf.name, num_workers=0, ) validate(cut_set_with_feats, read_data=True) @pytest.mark.parametrize( ["suffix", "exception_expectation"], [ (".jsonl", does_not_raise()), (".json", pytest.raises(InvalidPathExtension)), ], ) def test_cut_set_batch_feature_extraction_manifest_path( cut_set, suffix, exception_expectation): extractor = Fbank() cut_set = cut_set.resample(16000) with NamedTemporaryFile() as feat_f, NamedTemporaryFile( suffix=suffix) as manifest_f: with exception_expectation: cut_set_with_feats = cut_set.compute_and_store_features_batch( extractor=extractor, storage_path=feat_f.name, manifest_path=manifest_f.name, num_workers=0,
def test_get_stereo_audio_from_single_file(recording_set): samples = recording_set.load_audio("recording-2") np.testing.assert_almost_equal(samples, expected_stereo_single_source()) def test_load_audio_from_sphere_file(recording_set): samples = recording_set.load_audio("recording-3") np.testing.assert_almost_equal(samples, expected_stereo_single_source()) @mark.parametrize( ["channels", "expected_audio", "exception_expectation"], [ (None, expected_stereo_two_sources(), does_not_raise()), (0, expected_channel_0(), does_not_raise()), (1, expected_channel_1(), does_not_raise()), ([0, 1], expected_stereo_two_sources(), does_not_raise()), (1000, "irrelevant", raises(AssertionError)), ], ) def test_get_audio_multichannel(recording_set, channels, expected_audio, exception_expectation): with exception_expectation: loaded_audio = recording_set.load_audio("recording-1", channels=channels) np.testing.assert_almost_equal(loaded_audio, expected_audio) @mark.parametrize(
# Invariant 1: we receive the same amount of items in a dataloader epoch as there we in the CutSet assert len(sampled_cuts) == len(cut_set) # Invariant 2: the items are not duplicated assert len(set(c.id for c in sampled_cuts)) == len(sampled_cuts) # Invariant 3: the items are shuffled, i.e. the order is different than that in the CutSet assert [c.id for c in sampled_cuts] != [c.id for c in cut_set] @pytest.mark.parametrize( ["max_duration", "max_frames", "max_samples", "exception_expectation"], [ ( None, None, None, does_not_raise(), ), # represents no criterion (unlimited batch size) (10.0, None, None, does_not_raise()), (None, 1000, None, does_not_raise()), (None, None, 160000, does_not_raise()), (None, 1000, 160000, pytest.raises(AssertionError)), (5.0, 1000, 160000, pytest.raises(AssertionError)), ], ) def test_single_cut_sampler_time_constraints( max_duration, max_frames, max_samples, exception_expectation ): # The dummy cuts have a duration of 1 second each cut_set = DummyManifest(CutSet, begin_id=0, end_id=100) if max_frames is None: cut_set = cut_set.drop_features()
start=0.5, duration=6.0), SupervisionSegment(id='sup-2', recording_id='irrelevant', start=7.0, duration=2.0), SupervisionSegment(id='sup-3', recording_id='irrelevant', start=13.0, duration=2.5) ] @pytest.mark.parametrize( ['offset', 'expected_duration', 'exception_expectation'], [(0, 10.0, does_not_raise()), (1, 11.0, does_not_raise()), (5, 15.0, does_not_raise()), (10, 20.0, does_not_raise()), (100, 'irrelevant', pytest.raises(AssertionError))]) def test_overlay_cut_duration_and_supervisions(offset, expected_duration, exception_expectation, cut1, cut2): with exception_expectation: mixed_cut = cut1.mix(cut2, offset_other_by=offset) assert isinstance(mixed_cut, MixedCut) assert mixed_cut.duration == expected_duration assert mixed_cut.supervisions == [ SupervisionSegment(id='sup-1', recording_id='irrelevant', start=0.5, duration=6.0),
import torchaudio from lhotse import Fbank, KaldifeatFbank, KaldifeatFbankConfig, Mfcc from lhotse.features import create_default_feature_extractor from lhotse.features.kaldifeat import KaldifeatMelOptions, KaldifeatMfcc from lhotse.utils import nullcontext as does_not_raise kaldifeat = pytest.importorskip( "kaldifeat", reason="Kaldifeat tests require kaldifeat to be installed." ) @pytest.mark.parametrize( ["feature_type", "exception_expectation"], [ ("kaldifeat-fbank", does_not_raise()), ("kaldifeat-mfcc", does_not_raise()), ], ) def test_feature_extractor(feature_type, exception_expectation): # For now, just test that it runs with exception_expectation: fe = create_default_feature_extractor(feature_type) samples, sr = torchaudio.load("test/fixtures/libri/libri-1088-134315-0000.wav") fe.extract(samples=samples, sampling_rate=sr) def test_kaldifeat_config(): x = np.arange(8000, dtype=np.float32) fe = KaldifeatFbank(KaldifeatFbankConfig(mel_opts=KaldifeatMelOptions(num_bins=27))) feats = fe.extract(x, sampling_rate=16000)
from lhotse.audio import RecordingSet from lhotse.augmentation import WavAugmenter, is_wav_augment_available from lhotse.features import (Fbank, FeatureExtractor, FeatureMixer, FeatureSet, FeatureSetBuilder, Features, Mfcc, Spectrogram, create_default_feature_extractor) from lhotse.features.io import LilcomFilesWriter, LilcomHdf5Writer, NumpyFilesWriter, NumpyHdf5Writer from lhotse.testing.dummies import DummyManifest from lhotse.utils import Seconds, time_diff_to_num_frames from lhotse.utils import nullcontext as does_not_raise other_params = {} some_augmentation = None @mark.parametrize(['feature_type', 'exception_expectation'], [('mfcc', does_not_raise()), ('fbank', does_not_raise()), ('spectrogram', does_not_raise()), ('pitch', raises(Exception))]) def test_feature_extractor(feature_type, exception_expectation): # For now, just test that it runs with exception_expectation: fe = create_default_feature_extractor(feature_type) samples, sr = torchaudio.load( 'test/fixtures/libri/libri-1088-134315-0000.wav') fe.extract(samples=samples, sampling_rate=sr) def test_feature_extractor_serialization(): fe = Fbank() with NamedTemporaryFile() as f: fe.to_yaml(f.name)
for batch in sampler: sampler_cut_ids.extend(batch) # Invariant 1: we receive the same amount of items in a dataloader epoch as there we in the CutSet assert len(sampler_cut_ids) == len(cut_set) # Invariant 2: the items are not duplicated assert len(set(sampler_cut_ids)) == len(sampler_cut_ids) # Invariant 3: the items are shuffled, i.e. the order is different than that in the CutSet assert sampler_cut_ids != [c.id for c in cut_set] @pytest.mark.parametrize( ['max_duration', 'max_frames', 'max_samples', 'exception_expectation'], [ (None, None, None, does_not_raise()), # represents no criterion (unlimited batch size) (10.0, None, None, does_not_raise()), (None, 1000, None, does_not_raise()), (None, None, 160000, does_not_raise()), (None, 1000, 160000, pytest.raises(AssertionError)), (5.0, 1000, 160000, pytest.raises(AssertionError)), ]) def test_single_cut_sampler_time_constraints(max_duration, max_frames, max_samples, exception_expectation): # The dummy cuts have a duration of 1 second each cut_set = DummyManifest(CutSet, begin_id=0, end_id=100) if max_frames is None: cut_set = cut_set.drop_features() with exception_expectation:
from tempfile import NamedTemporaryFile import pytest import torch import torchaudio from lhotse import Fbank, FeatureExtractor, create_default_feature_extractor from lhotse.utils import nullcontext as does_not_raise @pytest.mark.parametrize( ["feature_type", "exception_expectation"], [ ("mfcc", does_not_raise()), ("fbank", does_not_raise()), ("spectrogram", does_not_raise()), ("pitch", pytest.raises(Exception)), ], ) def test_feature_extractor(feature_type, exception_expectation): # For now, just test that it runs with exception_expectation: fe = create_default_feature_extractor(feature_type) samples, sr = torchaudio.load( "test/fixtures/libri/libri-1088-134315-0000.wav") fe.extract(samples=samples, sampling_rate=sr) @pytest.mark.parametrize( ["feature_type", "exception_expectation"], [
duration=6.0), SupervisionSegment(id="sup-2", recording_id="irrelevant", start=7.0, duration=2.0), SupervisionSegment(id="sup-3", recording_id="irrelevant", start=13.0, duration=2.5), ] @pytest.mark.parametrize( ["offset", "allow_padding", "expected_duration", "exception_expectation"], [ (0, False, 10.0, does_not_raise()), (1, False, 11.0, does_not_raise()), (5, False, 15.0, does_not_raise()), (10, False, 20.0, does_not_raise()), (100, False, "irrelevant", pytest.raises(AssertionError)), (100, True, 110.0, does_not_raise()), ], ) def test_overlay_cut_duration_and_supervisions(offset, allow_padding, expected_duration, exception_expectation, cut1, cut2): with exception_expectation: mixed_cut = cut1.mix(cut2, offset_other_by=offset, allow_padding=allow_padding)
other_params = {} some_augmentation = None @mark.parametrize( [ "recording_id", "channel", "start", "duration", "exception_expectation", "expected_num_frames", ], [ ("recording-1", 0, 0.0, None, does_not_raise(), 50), # whole recording ( "recording-1", 0, 0.0, 0.499, does_not_raise(), 50, ), # practically whole recording ("recording-2", 0, 0.0, 0.7, does_not_raise(), 70), ("recording-2", 0, 0.5, 0.5, does_not_raise(), 50), ("recording-2", 1, 0.25, 0.65, does_not_raise(), 65), ("recording-nonexistent", 0, 0.0, None, raises(KeyError), None), # no recording ("recording-1", 1000, 0.0, None, raises(KeyError), None), # no channel ( "recording-2",