def extract_cuts( cutset: Pathlike, output_cutset: Pathlike, storage_path: Pathlike, feature_manifest: Optional[Pathlike], storage_type: str, num_jobs: int, ): """ Extract features for cuts in a given CUTSET manifest. The features are stored in STORAGE_PATH, and the output manifest with features is stored in OUTPUT_CUTSET. """ from lhotse import CutSet cuts: CutSet = CutSet.from_file(cutset) feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if feature_manifest is not None else Fbank()) cuts = cuts.compute_and_store_features( extractor=feature_extractor, storage_path=storage_path, num_jobs=num_jobs, storage_type=get_writer(storage_type), ) Path(output_cutset).parent.mkdir(parents=True, exist_ok=True) cuts.to_file(output_cutset)
def extract(recording_manifest: Pathlike, output_dir: Pathlike, feature_manifest: Optional[Pathlike], storage_type: str, lilcom_tick_power: int, root_dir: Optional[Pathlike], num_jobs: int): """ Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR, with one file per recording (or segment). """ recordings: RecordingSet = RecordingSet.from_json(recording_manifest) if root_dir is not None: recordings = recordings.with_path_prefix(root_dir) feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if feature_manifest is not None else Fbank()) output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) storage_path = output_dir / 'feats.h5' if 'hdf5' in storage_type else output_dir / 'storage' with get_writer(storage_type)(storage_path, tick_power=lilcom_tick_power) as storage: feature_set_builder = FeatureSetBuilder( feature_extractor=feature_extractor, storage=storage, ) feature_set_builder.process_and_store_recordings( recordings=recordings, output_manifest=output_dir / 'feature_manifest.json.gz', num_jobs=num_jobs)
def extract_cuts_batch( cutset: Pathlike, output_cutset: Pathlike, storage_path: Pathlike, feature_manifest: Optional[Pathlike], storage_type: str, num_jobs: int, batch_duration: Seconds, ): """ Extract features for cuts in a given CUTSET manifest. The features are stored in STORAGE_PATH, and the output manifest with features is stored in OUTPUT_CUTSET. This version enables CUDA acceleration for feature extractors that support it (e.g., kaldifeat extractors). \b Example usage of kaldifeat fbank with CUDA: $ pip install kaldifeat # note: ensure it's compiled with CUDA $ lhotse feat write-default-config -f kaldifeat-fbank feat.yml $ sed 's/device: cpu/device: cuda/' feat.yml feat-cuda.yml $ lhotse feat extract-cuts-batch -f feat-cuda.yml cuts.jsonl cuts_with_feats.jsonl feats.h5 """ from lhotse import CutSet cuts: CutSet = CutSet.from_file(cutset) feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if feature_manifest is not None else Fbank()) cuts = cuts.compute_and_store_features_batch( extractor=feature_extractor, storage_path=storage_path, batch_duration=batch_duration, num_workers=num_jobs, storage_type=get_writer(storage_type), ) Path(output_cutset).parent.mkdir(parents=True, exist_ok=True) cuts.to_file(output_cutset)
def make_feats( audio_manifest: Pathlike, output_dir: Pathlike, segmentation_manifest: Optional[Pathlike], # TODO: augmentation manifest should specify a number of transforms and probability of their application # e.g.: # "add_noise", "prob": 0.5, "noise_recordings": ["path1.wav", "path2.wav"] # "reverberate", "prob": 0.2, "rirs": ["rir1.wav", "rir2.wav"] (or however the RIRs are stored like... can be params for simulation) augmentation_manifest: Optional[Pathlike], feature_manifest: Optional[Pathlike], compressed: bool, lilcom_tick_power: int, root_dir: Optional[Pathlike], num_jobs: int): """ Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR, with one file per recording (or segment). """ audio_set = RecordingSet.from_yaml(audio_manifest) feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if feature_manifest is not None else FeatureExtractor()) # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored) supervision_set = (SupervisionSet.from_yaml(segmentation_manifest) if segmentation_manifest is not None else None) output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) feature_set_builder = FeatureSetBuilder( feature_extractor=feature_extractor, output_dir=output_dir, root_dir=root_dir, augmentation_manifest=augmentation_manifest) feature_set_builder.process_and_store_recordings( recordings=audio_set, segmentation=None, # TODO: implement and use compressed=compressed, lilcom_tick_power=lilcom_tick_power, num_jobs=num_jobs)
def extract(audio_manifest: Pathlike, output_dir: Pathlike, segmentation_manifest: Optional[Pathlike], augmentation: str, feature_manifest: Optional[Pathlike], compressed: bool, lilcom_tick_power: int, root_dir: Optional[Pathlike], num_jobs: int): """ Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR, with one file per recording (or segment). """ audio_set = RecordingSet.from_json(audio_manifest) feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if feature_manifest is not None else Fbank()) # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored) supervision_set = (SupervisionSet.from_json(segmentation_manifest) if segmentation_manifest is not None else None) output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) augmenter = None if augmentation is not None: sampling_rate = next(iter(audio_set)).sampling_rate assert all(rec.sampling_rate == sampling_rate for rec in audio_set), \ "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time." augmenter = WavAugmenter.create_predefined(name=augmentation, sampling_rate=sampling_rate) feature_set_builder = FeatureSetBuilder( feature_extractor=feature_extractor, output_dir=output_dir, root_dir=root_dir, augmenter=augmenter) feature_set_builder.process_and_store_recordings( recordings=audio_set, segmentation=None, # TODO: implement and use compressed=compressed, lilcom_tick_power=lilcom_tick_power, num_jobs=num_jobs)
def extract(recording_manifest: Pathlike, output_dir: Pathlike, augmentation: str, feature_manifest: Optional[Pathlike], storage_type: str, lilcom_tick_power: int, root_dir: Optional[Pathlike], num_jobs: int): """ Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR, with one file per recording (or segment). """ recordings: RecordingSet = RecordingSet.from_json(recording_manifest) if root_dir is not None: recordings = recordings.with_path_prefix(root_dir) feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if feature_manifest is not None else Fbank()) output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) storage_path = output_dir / 'feats.h5' if 'hdf5' in storage_type else output_dir / 'storage' augmenter = None if augmentation is not None: sampling_rate = next(iter(recordings)).sampling_rate assert all(rec.sampling_rate == sampling_rate for rec in recordings), \ "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time." augmenter = WavAugmenter.create_predefined(name=augmentation, sampling_rate=sampling_rate) with get_writer(storage_type)(storage_path, tick_power=lilcom_tick_power) as storage: feature_set_builder = FeatureSetBuilder( feature_extractor=feature_extractor, storage=storage, augmenter=augmenter) feature_set_builder.process_and_store_recordings( recordings=recordings, output_manifest=output_dir / 'feature_manifest.json.gz', num_jobs=num_jobs)
def test_feature_extractor_generic_deserialization(): fe = Fbank() with NamedTemporaryFile() as f: fe.to_yaml(f.name) fe_deserialized = FeatureExtractor.from_yaml(f.name) assert fe_deserialized.config == fe.config