Beispiel #1
0
    def __call__(self, original, sample_rate):
        """Augment original waveform

        Parameters
        ----------
        original : `np.ndarray`
            (n_samples, n_channels) waveform.
        sample_rate : `int`
            Sample rate.

        Returns
        -------
        augmented : `np.ndarray`
            (n_samples, n_channels) noise-augmented waveform.
        """

        raw_audio = RawAudio(sample_rate=sample_rate, mono=True)

        # accumulate enough noise to cover duration of original waveform
        noises = []
        len_left = len(original)
        while len_left > 0:

            # select noise file at random
            file = random.choice(self.files_)

            # select noise segment at random
            segment = next(random_segment(file['gaps'], weighted=False))
            duration = segment.duration
            segment_len = duration * sample_rate

            # if noise segment is longer than what is needed, crop it at random
            if segment_len > len_left:
                duration = len_left / sample_rate
                segment = next(random_subsegment(segment, duration))

            noise = raw_audio.crop(file,
                                   segment,
                                   mode='center',
                                   fixed=duration)

            # decrease the `len_left` value by the size of the returned noise
            len_left -= len(noise)

            noise = normalize(noise)
            noises.append(noise)

        # concatenate
        # FIXME: use fade-in between concatenated noises
        noise = np.vstack(noises)

        # select SNR at random
        snr = (self.snr_max -
               self.snr_min) * np.random.random_sample() + self.snr_min
        alpha = np.exp(-np.log(10) * snr / 20)

        return normalize(original) + alpha * noise
Beispiel #2
0
    def __call__(self, original, sample_rate):
        """Augment original waveform

        Parameters
        ----------
        original : `np.ndarray`
            (n_samples, n_channels) waveform.
        sample_rate : `int`
            Sample rate.

        Returns
        -------
        augmented : `np.ndarray`
            (n_samples, n_channels) noise-augmented waveform.
        """

        raw_audio = RawAudio(sample_rate=sample_rate, mono=True)

        original_duration = len(original) / sample_rate

        # accumulate enough noise to cover duration of original waveform
        noises = []
        left = original_duration
        while left > 0:

            # select noise file at random
            file = random.choice(self.files_)
            duration = file['duration']

            # if noise file is longer than what is needed, crop it
            if duration > left:
                segment = next(random_subsegment(Segment(0, duration), left))
                noise = raw_audio.crop(file,
                                       segment,
                                       mode='center',
                                       fixed=left)
                left = 0

            # otherwise, take the whole file
            else:
                noise = raw_audio(file).data
                left -= duration

            noise = normalize(noise)
            noises.append(noise)

        # concatenate
        # FIXME: use fade-in between concatenated noises
        noise = np.vstack(noises)

        # select SNR at random
        snr = (self.snr_max -
               self.snr_min) * np.random.random_sample() + self.snr_min
        alpha = np.exp(-np.log(10) * snr / 20)

        return normalize(original) + alpha * noise
Beispiel #3
0
 def _create_config(self, segment_size_sec: float):
     return metrics.SpeakerValidationConfig(
         protocol_name='VoxCeleb.SpeakerVerification.VoxCeleb2',
         feature_extraction=RawAudio(sample_rate=self.sample_rate),
         preprocessors={'audio': FileFinder()},
         duration=segment_size_sec)