def extract_from_recording_and_store( self, recording: Recording, storage: FeaturesWriter, offset: Seconds = 0, duration: Optional[Seconds] = None, channels: Union[int, List[int]] = None, augment_fn: Optional[AugmentFn] = None, ) -> "Features": """ Extract the features from a ``Recording`` in a full pipeline: * load audio from disk; * optionally, perform audio augmentation; * extract the features; * save them to disk in a specified directory; * return a ``Features`` object with a description of the extracted features and the source data used. :param recording: a ``Recording`` that specifies what's the input audio. :param storage: a ``FeaturesWriter`` object that will handle storing the feature matrices. :param offset: an optional offset in seconds for where to start reading the recording. :param duration: an optional duration specifying how much audio to load from the recording. :param channels: an optional int or list of ints, specifying the channels; by default, all channels will be used. :param augment_fn: an optional ``WavAugmenter`` instance to modify the waveform before feature extraction. :return: a ``Features`` manifest item for the extracted feature matrix. """ from lhotse.qa import validate_features samples = recording.load_audio( offset=offset, duration=duration, channels=channels, ) if augment_fn is not None: samples = augment_fn(samples, recording.sampling_rate) feats = self.extract(samples=samples, sampling_rate=recording.sampling_rate) storage_key = store_feature_array(feats, storage=storage) manifest = Features( recording_id=recording.id, channels=channels if channels is not None else recording.channel_ids, # The start is relative to the beginning of the recording. start=offset, duration=recording.duration if duration is not None else recording.duration, type=self.name, num_frames=feats.shape[0], num_features=feats.shape[1], frame_shift=self.frame_shift, sampling_rate=recording.sampling_rate, storage_type=storage.name, storage_path=str(storage.storage_path), storage_key=storage_key, ) validate_features(manifest, feats_data=feats) return manifest
def extract_from_samples_and_store( self, samples: np.ndarray, storage: FeaturesWriter, sampling_rate: int, offset: Seconds = 0, channel: Optional[int] = None, augment_fn: Optional[AugmentFn] = None, ) -> "Features": """ Extract the features from an array of audio samples in a full pipeline: * optional audio augmentation; * extract the features; * save them to disk in a specified directory; * return a ``Features`` object with a description of the extracted features. Note, unlike in ``extract_from_recording_and_store``, the returned ``Features`` object might not be suitable to store in a ``FeatureSet``, as it does not reference any particular ``Recording``. Instead, this method is useful when extracting features from cuts - especially ``MixedCut`` instances, which may be created from multiple recordings and channels. :param samples: a numpy ndarray with the audio samples. :param sampling_rate: integer sampling rate of ``samples``. :param storage: a ``FeaturesWriter`` object that will handle storing the feature matrices. :param offset: an offset in seconds for where to start reading the recording - when used for ``Cut`` feature extraction, must be equal to ``Cut.start``. :param channel: an optional channel number to insert into ``Features`` manifest. :param augment_fn: an optional ``WavAugmenter`` instance to modify the waveform before feature extraction. :return: a ``Features`` manifest item for the extracted feature matrix (it is not written to disk). """ from lhotse.qa import validate_features if augment_fn is not None: samples = augment_fn(samples, sampling_rate) duration = round(samples.shape[1] / sampling_rate, ndigits=8) feats = self.extract(samples=samples, sampling_rate=sampling_rate) storage_key = store_feature_array(feats, storage=storage) manifest = Features( start=offset, duration=duration, type=self.name, num_frames=feats.shape[0], num_features=feats.shape[1], frame_shift=self.frame_shift, sampling_rate=sampling_rate, channels=channel, storage_type=storage.name, storage_path=str(storage.storage_path), storage_key=storage_key, ) validate_features(manifest, feats_data=feats) return manifest
def test_validate_features_consistent_num_frames_does_not_raise(): manifest = Features( type='irrelevant', num_frames=100, num_features=40, frame_shift=0.01, sampling_rate=16000, start=0.0, duration=1.0, storage_type='irrelevant', storage_path='irrelevant', storage_key='irrelevant', ) validate_features(manifest)
def test_validate_features_inconsistent_num_frames_raises(): manifest = Features( type='irrelevant', num_frames=101, num_features=40, frame_shift=0.01, sampling_rate=16000, start=0.0, duration=1.0, storage_type='irrelevant', storage_path='irrelevant', storage_key='irrelevant', ) with pytest.raises(AssertionError): validate_features(manifest)