def test_serialization(): audio_set = RecordingSet.from_recordings([ Recording( id='x', sources=[ AudioSource( type='file', channel_ids=[0], source='text/fixtures/mono_c0.wav' ), AudioSource( type='command', channel_ids=[1], source='cat text/fixtures/mono_c1.wav' ) ], sampling_rate=8000, num_samples=4000, duration_seconds=0.5 ) ]) with NamedTemporaryFile() as f: audio_set.to_yaml(f.name) deserialized = RecordingSet.from_yaml(f.name) assert deserialized == audio_set
def test_serialization(format, compressed): recording_set = RecordingSet.from_recordings([ Recording( id='x', sources=[ AudioSource( type='file', channels=[0], source='text/fixtures/mono_c0.wav' ), AudioSource( type='command', channels=[1], source='cat text/fixtures/mono_c1.wav' ) ], sampling_rate=8000, num_samples=4000, duration=0.5 ) ]) with NamedTemporaryFile(suffix='.gz' if compressed else '') as f: if format == 'yaml': recording_set.to_yaml(f.name) deserialized = RecordingSet.from_yaml(f.name) if format == 'json': recording_set.to_json(f.name) deserialized = RecordingSet.from_json(f.name) assert deserialized == recording_set
def test_feature_set_builder(): audio_set = RecordingSet.from_yaml('test/fixtures/audio.yml') with TemporaryDirectory() as output_dir: builder = FeatureSetBuilder(feature_extractor=FeatureExtractor(), output_dir=output_dir) feature_set = builder.process_and_store_recordings( recordings=audio_set) assert len(feature_set) == 4 feature_infos = list(feature_set) # Assert the properties shared by all features for features in feature_infos: # assert that fbank is the default feature type assert features.type == 'fbank' # assert that duration is always a multiple of frame_shift assert features.num_frames == round(features.duration / features.frame_shift) # assert that num_features is preserved assert features.num_features == builder.feature_extractor.mfcc_fbank_common_config.num_mel_bins # assert that lilcom is the default storate type assert features.storage_type == 'lilcom' # Assert the properties for recordings of duration 0.5 seconds for features in feature_infos[:2]: assert features.num_frames == 50 assert features.duration == 0.5 # Assert the properties for recordings of duration 1.0 seconds for features in feature_infos[2:]: assert features.num_frames == 100 assert features.duration == 1.0
def make_feats( audio_manifest: Pathlike, output_dir: Pathlike, segmentation_manifest: Optional[Pathlike], # TODO: augmentation manifest should specify a number of transforms and probability of their application # e.g.: # "add_noise", "prob": 0.5, "noise_recordings": ["path1.wav", "path2.wav"] # "reverberate", "prob": 0.2, "rirs": ["rir1.wav", "rir2.wav"] (or however the RIRs are stored like... can be params for simulation) augmentation_manifest: Optional[Pathlike], feature_manifest: Optional[Pathlike], compressed: bool, lilcom_tick_power: int, root_dir: Optional[Pathlike], num_jobs: int): """ Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR, with one file per recording (or segment). """ audio_set = RecordingSet.from_yaml(audio_manifest) feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if feature_manifest is not None else FeatureExtractor()) # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored) supervision_set = (SupervisionSet.from_yaml(segmentation_manifest) if segmentation_manifest is not None else None) output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) feature_set_builder = FeatureSetBuilder( feature_extractor=feature_extractor, output_dir=output_dir, root_dir=root_dir, augmentation_manifest=augmentation_manifest) feature_set_builder.process_and_store_recordings( recordings=audio_set, segmentation=None, # TODO: implement and use compressed=compressed, lilcom_tick_power=lilcom_tick_power, num_jobs=num_jobs)
def get_audio_set() -> RecordingSet: return RecordingSet.from_yaml('test/fixtures/audio.yml')
def libri_recording_set(): return RecordingSet.from_yaml('test/fixtures/libri/audio.yml')