def test_n_output_audio(self): audio, sampling_rate = AudioLoader.load_audio(self.sample_wav_file) flows = [ naf.Sequential( [naa.CropAug(sampling_rate=sampling_rate), naa.LoudnessAug()]), naf.Sometimes( [naa.CropAug(sampling_rate=sampling_rate), naa.LoudnessAug()], pipeline_p=0.9), naf.Sequential([ naf.Sequential([ naa.CropAug(sampling_rate=sampling_rate), naa.LoudnessAug() ]), naf.Sometimes([ naa.CropAug(sampling_rate=sampling_rate), naa.LoudnessAug() ], pipeline_p=0.9) ]) ] for flow in flows: augmented_audios = flow.augment(audio, n=3) self.assertGreater(len(augmented_audios), 1) for augmented_audio in augmented_audios: self.assertFalse(np.array_equal(audio, augmented_audio)) self.assertLess(0, len(flows))
def test_substitute(self): aug = naa.LoudnessAug() augmented_audio = aug.augment(self.audio) self.assertFalse(np.array_equal(self.audio, augmented_audio)) self.assertEqual(len(self.audio), len(augmented_audio)) self.assertTrue(self.sampling_rate > 0)
def augment_audio(data, sampling_rate, loudness_factor, speed_range, pitch_range, shift_max, noise_factor): """ OPTIONS: LOUDNESS AUGMENTER: naa.LoudnessAug() MASK AUGMENTER: naa.MaskAug() SPEED AUGMENTER: naa.SpeedAug() SHIFT AUGMENTER: naa.ShiftAug() NOISE AUGMENTER: naa.NoiseAug() CROP AUGMENTER: naa.CropAug() PITCH AUGMENTER_ naa.PitchAug() """ flow = naf.Sequential([naa.LoudnessAug(loudness_factor), naa.SpeedAug(speed_range), naa.PitchAug(sampling_rate = sampling_rate, pitch_range = pitch_range), naa.ShiftAug(sampling_rate = sampling_rate, shift_max = shift_max), naa.NoiseAug(noise_factor), ]) augmented_audio = flow.augment(data) return augmented_audio
def test_substitute(self): audio, sampling_rate = librosa.load(self.sample_wav_file) aug = naa.LoudnessAug() augmented_audio = aug.augment(audio) self.assertFalse(np.array_equal(audio, augmented_audio)) self.assertEqual(len(audio), len(augmented_audio)) self.assertTrue(sampling_rate > 0)
def gen_augmented(original, sample_rate): # list of augmentors available from the nlpaug library augment_list = [ #naa.CropAug(sampling_rate=sample_rate) naa.NoiseAug(), naa.SpeedAug(), naa.LoudnessAug(factor=(0.5, 2)), naa.VtlpAug(sampling_rate=sample_rate, zone=(0.0, 1.0)), naa.PitchAug(sampling_rate=sample_rate, factor=(-1, 3)) ] # sample augmentation randomly aug_idx = random.randint(0, len(augment_list) - 1) augmented_data = augment_list[aug_idx].augment(original) return augmented_data
def process_audio_files_with_aug(filename, dirpath): sr = 16000 audio_array, sample_rate = librosa.load(os.path.join(dirpath, 'flac', filename), sr=sr) aug_crop = naa.CropAug(sampling_rate=sr) audio_array_crop = aug_crop.augment(audio_array) aug_loud = naa.LoudnessAug(loudness_factor=(2, 5)) audio_array_loud = aug_loud.augment(audio_array) aug_noise = naa.NoiseAug(noise_factor=0.03) audio_array_noise = aug_noise.augment(audio_array) audio_array_list= [audio_array,audio_array_crop,audio_array_loud, audio_array_noise ] out_list = convert_audio_to_processed_list(audio_array_list, filename, dirpath) return out_list
def test_zone(self): zone = (0, 1) coverage = 1 aug = naa.LoudnessAug(zone=zone, coverage=coverage) aug.model.stateless = False augmented_audio = aug.augment(self.audio) reconstruct_augmented_audio = np.concatenate( (self.audio[:aug.model.start_pos], aug.model.aug_data, self.audio[aug.model.end_pos:]), axis=0) self.assertTrue( np.array_equal(augmented_audio, reconstruct_augmented_audio)) self.assertTrue(len(aug.model.aug_data), int(len(self.audio) * (zone[1] - zone[0]) * coverage))
def process_audio_files_with_aug(self, audio_path): sr = 16000 audio_array, sample_rate = librosa.load(audio_path, sr=sr) aug_crop = naa.CropAug(sampling_rate=sr) audio_array_crop = aug_crop.augment(audio_array) aug_loud = naa.LoudnessAug(loudness_factor=(2, 5)) audio_array_loud = aug_loud.augment(audio_array) aug_noise = naa.NoiseAug(noise_factor=0.03) audio_array_noise = aug_noise.augment(audio_array) mel_spec_array_load = melspectrogram(audio_array_loud, hparams=hparams) mel_spec_array_noise = melspectrogram(audio_array_noise, hparams=hparams) audio_array_list = [mel_spec_array_load, mel_spec_array_noise] # audio_array_list= [audio_array_crop,audio_array_loud, # audio_array_noise ] return audio_array_list
def test_coverage_and_zone(self): params = [ ((0.3, 0.7), 1), ((0, 1), 1) ] for zone, coverage in params: augs = [ naa.LoudnessAug(zone=zone, coverage=coverage, stateless=False), naa.MaskAug(zone=zone, coverage=coverage, stateless=False), naa.NoiseAug(zone=zone, coverage=coverage, stateless=False), naa.PitchAug(zone=zone, coverage=coverage, stateless=False, sampling_rate=self.sampling_rate), naa.SpeedAug(zone=zone, coverage=coverage, stateless=False), naa.VtlpAug(zone=zone, coverage=coverage, stateless=False, sampling_rate=self.sampling_rate), naa.NormalizeAug(zone=zone, coverage=coverage, stateless=False), naa.PolarityInverseAug(zone=zone, coverage=coverage, stateless=False) ] for aug in augs: aug_data = aug.augment(self.audio) self.assertTrue(len(aug_data[aug.start_pos:aug.end_pos]), int(len(self.audio) * (zone[1] - zone[0]) * coverage))
def test_empty_input(self): audio = np.array([]) aug = naa.LoudnessAug() augmented_audio = aug.augment(audio) self.assertTrue(np.array_equal(audio, augmented_audio))
def aug_loudness(data, loudness_factor=(0.95, 1.5)): aug = naa.LoudnessAug(loudness_factor=loudness_factor) augmented_data = aug.augment(data) return augmented_data
def loudness(data): aug = naa.LoudnessAug() return aug.augment(data)
def get_random_loudness(self): return naa.LoudnessAug(crop=(0, 1), coverage=1)