Beispiel #1
0
    def extract_from_recording_and_store(
        self,
        recording: Recording,
        storage: FeaturesWriter,
        offset: Seconds = 0,
        duration: Optional[Seconds] = None,
        channels: Union[int, List[int]] = None,
        augment_fn: Optional[AugmentFn] = None,
    ) -> "Features":
        """
        Extract the features from a ``Recording`` in a full pipeline:

        * load audio from disk;
        * optionally, perform audio augmentation;
        * extract the features;
        * save them to disk in a specified directory;
        * return a ``Features`` object with a description of the extracted features and the source data used.

        :param recording: a ``Recording`` that specifies what's the input audio.
        :param storage: a ``FeaturesWriter`` object that will handle storing the feature matrices.
        :param offset: an optional offset in seconds for where to start reading the recording.
        :param duration: an optional duration specifying how much audio to load from the recording.
        :param channels: an optional int or list of ints, specifying the channels;
            by default, all channels will be used.
        :param augment_fn: an optional ``WavAugmenter`` instance to modify the waveform before feature extraction.
        :return: a ``Features`` manifest item for the extracted feature matrix.
        """
        from lhotse.qa import validate_features

        samples = recording.load_audio(
            offset=offset,
            duration=duration,
            channels=channels,
        )
        if augment_fn is not None:
            samples = augment_fn(samples, recording.sampling_rate)
        feats = self.extract(samples=samples,
                             sampling_rate=recording.sampling_rate)
        storage_key = store_feature_array(feats, storage=storage)
        manifest = Features(
            recording_id=recording.id,
            channels=channels
            if channels is not None else recording.channel_ids,
            # The start is relative to the beginning of the recording.
            start=offset,
            duration=recording.duration
            if duration is not None else recording.duration,
            type=self.name,
            num_frames=feats.shape[0],
            num_features=feats.shape[1],
            frame_shift=self.frame_shift,
            sampling_rate=recording.sampling_rate,
            storage_type=storage.name,
            storage_path=str(storage.storage_path),
            storage_key=storage_key,
        )
        validate_features(manifest, feats_data=feats)
        return manifest
Beispiel #2
0
    def extract_from_samples_and_store(
        self,
        samples: np.ndarray,
        storage: FeaturesWriter,
        sampling_rate: int,
        offset: Seconds = 0,
        channel: Optional[int] = None,
        augment_fn: Optional[AugmentFn] = None,
    ) -> "Features":
        """
        Extract the features from an array of audio samples in a full pipeline:

        * optional audio augmentation;
        * extract the features;
        * save them to disk in a specified directory;
        * return a ``Features`` object with a description of the extracted features.

        Note, unlike in ``extract_from_recording_and_store``, the returned ``Features`` object
        might not be suitable to store in a ``FeatureSet``, as it does not reference any particular
        ``Recording``. Instead, this method is useful when extracting features from cuts - especially
        ``MixedCut`` instances, which may be created from multiple recordings and channels.

        :param samples: a numpy ndarray with the audio samples.
        :param sampling_rate: integer sampling rate of ``samples``.
        :param storage: a ``FeaturesWriter`` object that will handle storing the feature matrices.
        :param offset: an offset in seconds for where to start reading the recording - when used for
            ``Cut`` feature extraction, must be equal to ``Cut.start``.
        :param channel: an optional channel number to insert into ``Features`` manifest.
        :param augment_fn: an optional ``WavAugmenter`` instance to modify the waveform before feature extraction.
        :return: a ``Features`` manifest item for the extracted feature matrix (it is not written to disk).
        """
        from lhotse.qa import validate_features

        if augment_fn is not None:
            samples = augment_fn(samples, sampling_rate)
        duration = round(samples.shape[1] / sampling_rate, ndigits=8)
        feats = self.extract(samples=samples, sampling_rate=sampling_rate)
        storage_key = store_feature_array(feats, storage=storage)
        manifest = Features(
            start=offset,
            duration=duration,
            type=self.name,
            num_frames=feats.shape[0],
            num_features=feats.shape[1],
            frame_shift=self.frame_shift,
            sampling_rate=sampling_rate,
            channels=channel,
            storage_type=storage.name,
            storage_path=str(storage.storage_path),
            storage_key=storage_key,
        )
        validate_features(manifest, feats_data=feats)
        return manifest
Beispiel #3
0
def test_validate_features_consistent_num_frames_does_not_raise():
    manifest = Features(
        type='irrelevant',
        num_frames=100,
        num_features=40,
        frame_shift=0.01,
        sampling_rate=16000,
        start=0.0,
        duration=1.0,
        storage_type='irrelevant',
        storage_path='irrelevant',
        storage_key='irrelevant',
    )
    validate_features(manifest)
Beispiel #4
0
def test_validate_features_inconsistent_num_frames_raises():
    manifest = Features(
        type='irrelevant',
        num_frames=101,
        num_features=40,
        frame_shift=0.01,
        sampling_rate=16000,
        start=0.0,
        duration=1.0,
        storage_type='irrelevant',
        storage_path='irrelevant',
        storage_key='irrelevant',
    )
    with pytest.raises(AssertionError):
        validate_features(manifest)