Esempio n. 1
0
File: perturb.py Progetto: vsl9/NeMo
 def perturb(self, data):
     impulse_record = self._rng.sample(self._manifest.data, 1)[0]
     impulse = AudioSegment.from_file(impulse_record['audio_filepath'],
                                      target_sr=data.sample_rate)
     logging.debug("impulse: %s", impulse_record['audio_filepath'])
     data._samples = signal.fftconvolve(data.samples, impulse.samples,
                                        "full")
Esempio n. 2
0
    def perturb(self, data):
        att_factor = 0.8
        max_level = np.max(np.abs(data._samples))
        norm_factor = att_factor / max_level
        norm_samples = norm_factor * data._samples
        orig_f = NamedTemporaryFile(suffix=".wav")
        sf.write(orig_f.name, norm_samples.transpose(), 16000)

        codec_ind = random.randint(0, len(self._codecs) - 1)
        if self._codecs[codec_ind] == "amr-nb":
            transcoded_f = NamedTemporaryFile(suffix="_amr.wav")
            rates = list(range(0, 8))
            rate = rates[random.randint(0, len(rates) - 1)]
            _ = subprocess.check_output(
                f"sox {orig_f.name} -V0 -C {rate} -t amr-nb - | sox -t amr-nb - -V0 -b 16 -r 16000 {transcoded_f.name}",
                shell=True,
            )
        elif self._codecs[codec_ind] == "g711":
            transcoded_f = NamedTemporaryFile(suffix="_g711.wav")
            _ = subprocess.check_output(
                f"sox {orig_f.name} -V0  -r 8000 -c 1 -e a-law {transcoded_f.name}",
                shell=True)

        new_data = AudioSegment.from_file(transcoded_f.name, target_sr=16000)
        data._samples = new_data._samples[0:data._samples.shape[0]]
        return
Esempio n. 3
0
    def perturb(self, data):
        snr_db = self._rng.uniform(self._min_snr_db, self._max_snr_db)
        noise_record = self._rng.sample(self._manifest.data, 1)[0]
        noise = AudioSegment.from_file(noise_record.audio_file,
                                       target_sr=data.sample_rate)
        noise_gain_db = min(data.rms_db - noise.rms_db - snr_db,
                            self._max_gain_db)
        # logging.debug("noise: %s %s %s", snr_db, noise_gain_db, noise_record.audio_file)

        # calculate noise segment to use
        start_time = self._rng.uniform(0.0, noise.duration - data.duration)
        if noise.duration > (start_time + data.duration):
            noise.subsegment(start_time=start_time,
                             end_time=start_time + data.duration)

        # adjust gain for snr purposes and superimpose
        noise.gain_db(noise_gain_db)

        if noise._samples.shape[0] < data._samples.shape[0]:
            noise_idx = self._rng.randint(
                0, data._samples.shape[0] - noise._samples.shape[0])
            data._samples[noise_idx:noise_idx +
                          noise._samples.shape[0]] += noise._samples

        else:
            data._samples += noise._samples
Esempio n. 4
0
def read_one_audiosegment(manifest,
                          target_sr,
                          rng,
                          tarred_audio=False,
                          audio_dataset=None):

    if tarred_audio:
        if audio_dataset is None:
            raise TypeError("Expected augmentation dataset but got None")
        audio_file, file_id = next(audio_dataset)
        manifest_idx = manifest.mapping[file_id]
        manifest_entry = manifest[manifest_idx]

        offset = 0 if manifest_entry.offset is None else manifest_entry.offset
        duration = 0 if manifest_entry.duration is None else manifest_entry.duration

    else:
        audio_record = rng.sample(manifest.data, 1)[0]
        audio_file = audio_record.audio_file
        offset = 0 if audio_record.offset is None else audio_record.offset
        duration = 0 if audio_record.duration is None else audio_record.duration

    return AudioSegment.from_file(audio_file,
                                  target_sr=target_sr,
                                  offset=offset,
                                  duration=duration)
Esempio n. 5
0
 def perturb(self, data):
     impulse_record = self._rng.sample(self._manifest.data, 1)[0]
     impulse = AudioSegment.from_file(impulse_record.audio_file,
                                      target_sr=data.sample_rate)
     # logging.debug("impulse: %s", impulse_record['audio_filepath'])
     impulse_norm = (impulse.samples - min(impulse.samples)) / (
         max(impulse.samples) - min(impulse.samples))
     data._samples = signal.fftconvolve(data._samples, impulse_norm, "same")
Esempio n. 6
0
 def process(self, file_path, offset=0, duration=0, trim=False):
     audio = AudioSegment.from_file(
         file_path,
         target_sr=self.sample_rate,
         int_values=self.int_values,
         offset=offset,
         duration=duration,
         trim=trim,
     )
     return self.process_segment(audio)
Esempio n. 7
0
    def __getitem__(self, index):
        example = self.collection[index]
        features = AudioSegment.segment_from_file(
            example.audio_file,
            n_segments=self.n_segments,
            trim=self.trim,
        )
        features = torch.tensor(features.samples, dtype=torch.float)
        f, fl = features, torch.tensor(features.shape[0]).long()

        return f, fl
Esempio n. 8
0
 def exposed_get_path_samples(self, file_path, target_sr, int_values,
                              offset, duration, trim):
     print(f"loading.. {file_path}")
     audio = AudioSegment.from_file(
         file_path,
         target_sr=target_sr,
         int_values=int_values,
         offset=offset,
         duration=duration,
         trim=trim,
     )
     # print(f"returning.. {len(audio.samples)} items of type{type(audio.samples)}")
     return pickle.dumps(audio.samples)
Esempio n. 9
0
    def __getitem__(self, index):
        """
        Given a index, returns audio and audio_length of the corresponding element. Audio clips of n_segments are
        randomly chosen if the audio is longer than n_segments.
        """
        example = self.collection[index]
        features = AudioSegment.segment_from_file(example.audio_file, n_segments=self.n_segments, trim=self.trim,)
        features = torch.tensor(features.samples)
        audio, audio_length = features, torch.tensor(features.shape[0]).long()

        truncate = audio_length % self.truncate_to
        if truncate != 0:
            audio_length -= truncate.long()
            audio = audio[:audio_length]

        return audio, audio_length
Esempio n. 10
0
File: perturb.py Progetto: vsl9/NeMo
    def perturb(self, data):
        snr_db = self._rng.uniform(self._min_snr_db, self._max_snr_db)
        noise_record = self._rng.sample(self._manifest.data, 1)[0]
        noise = AudioSegment.from_file(noise_record['audio_filepath'],
                                       target_sr=data.sample_rate)
        noise_gain_db = min(data.rms_db - noise.rms_db - snr_db,
                            self._max_gain_db)
        logging.debug("noise: %s %s %s", snr_db, noise_gain_db,
                      noise_record['audio_filepath'])

        # calculate noise segment to use
        start_time = self._rng.uniform(0.0, noise.duration - data.duration)
        noise.subsegment(start_time=start_time,
                         end_time=start_time + data.duration)

        # adjust gain for snr purposes and superimpose
        noise.gain_db(noise_gain_db)
        data._samples = data._samples + noise.samples