def get_transforms(bckgrd_aug_dir=None, secondary_bckgrd_aug_dir=None): list_of_aug = [ # AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.3), AddGaussianNoise(p=0.2), AddGaussianSNR(p=0.2), Gain(min_gain_in_db=-15, max_gain_in_db=15, p=0.3) ] if bckgrd_aug_dir is not None: list_of_aug.append(AddBackgroundNoise(bckgrd_aug_dir, p=0.2)) if secondary_bckgrd_aug_dir is not None: list_of_aug.append( AddShortNoises(secondary_bckgrd_aug_dir, min_time_between_sounds=0.0, max_time_between_sounds=15.0, burst_probability=0.5, p=0.6)) list_of_aug += [ AddGaussianNoise(p=0.2), AddGaussianSNR(p=0.2), Gain(min_gain_in_db=-15, max_gain_in_db=15, p=0.3) ] augmenter = Compose(list_of_aug) transforms = { "train": get_training_augmentation(augmenter), "valid": get_validation_augmentation() } return transforms
def make_transform(): return Compose([ FrequencyMask(min_frequency_band=0.005, max_frequency_band=0.10, p=0.25), TimeStretch(min_rate=0.15, max_rate=.25, p=0.25), AddGaussianSNR(min_SNR=0.001, max_SNR=.25, p=0.25) ])
def get_transforms(bckgrd_aug_dir=None): list_of_aug = [ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.3), AddGaussianSNR(p=0.3) ] if bckgrd_aug_dir is not None: list_of_aug.append(AddBackgroundNoise(bckgrd_aug_dir,p=0.5)) augmenter = Compose(list_of_aug) transforms = { "train": get_training_augmentation(augmenter), "valid": get_validation_augmentation() } return transforms
def compose_augmentations(rir_path): impulse_path = os.path.join(rir_path, 'simulated_rirs') noise_path = os.path.join(rir_path, 'pointsource_noises') if not (os.path.exists(impulse_path) and os.path.exists(noise_path)): raise ValueError( 'Unable to augment signal, rir_path "{}" does not exist.'.format( rir_path)) return Compose([ AddGaussianSNR(min_SNR=0.2, max_SNR=0.5, p=0.5), AddImpulseResponse(impulse_path, leave_length_unchanged=True, p=0.3), AddBackgroundNoise(noise_path, p=0.3), AddShortNoises(noise_path, max_snr_in_db=80, p=0.3) ])
def compose_without_noise(ir_path='data/impulse'): _p = 0.25 transforms = [ AddGaussianNoise(p=_p), Shift(p=_p, min_fraction=-0.2, max_fraction=0.2), FrequencyMask(p=_p), TimeMask(p=_p, max_band_part=0.25), AddGaussianSNR(p=_p), ClippingDistortion(p=_p, max_percentile_threshold=20), MyAddImpulseResponse(p=_p, ir_path=ir_path), TimeStretch(p=_p / 10), PitchShift(p=_p / 25), ] return MyCompose(transforms, p=1.0, max_augs=3)
def compose(sounds_path): _p = 0.2 transforms = [ MyGain(p=_p), AddGaussianNoise(p=_p), Shift(p=_p, min_fraction=-0.25, max_fraction=0.25), FrequencyMask(p=_p), TimeMask(p=_p, max_band_part=0.25), AddGaussianSNR(p=_p), ClippingDistortion(p=_p, max_percentile_threshold=20), AddBackgroundNoise(sounds_path=sounds_path, p=_p), TimeStretch(p=_p/10), PitchShift(p=_p/30), ] return Compose(transforms, p=0.4, shuffle=True)
{ "instance": AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "background_noises"), p=1.0), "num_runs": 5, }, { "instance": AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0), "num_runs": 5, }, { "instance": AddGaussianSNR(p=1.0), "num_runs": 5 }, { "instance": AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir")), "num_runs": 1, }, { "instance": AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0,
def generate(self, wave_file, output_dir): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(wave_file) _filename = os.path.basename(wave_file).split('.')[0] # AddImpulseResponse if self.AddImpulseResponse[0]: augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir")) ]) output_file_path = os.path.join( output_dir, _filename + "_AddImpulseResponse{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask if self.FrequencyMask[0]: augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_FrequencyMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask if self.TimeMask[0]: augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR if self.AddGaussianSNR[0]: augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianSNR{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise if self.AddGaussianNoise[0]: augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch if self.TimeStretch[0]: augmenter = Compose( [TimeStretch(min_rate=0.5, max_rate=1.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeStretch{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift if self.PitchShift[0]: augmenter = Compose( [PitchShift(min_semitones=-6, max_semitones=12, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_PitchShift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift if self.Shift[0]: augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Shift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover if self.ShiftWithoutRoll[0]: augmenter = Compose([ Shift(min_fraction=-0.2, max_fraction=0.2, rollover=False, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ShiftWithoutRollover{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize if self.Normalize[0]: augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join( output_dir, _filename + "_Normalize{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Resample if self.Resample[0]: augmenter = Compose([ Resample(min_sample_rate=12000, max_sample_rate=44100, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Resample{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion if self.ClippingDistortion[0]: augmenter = Compose( [ClippingDistortion(max_percentile_threshold=10, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ClippingDistortion{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise if self.AddBackgroundNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddBackgroundNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddWhiteNoise if self.AddWhiteNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "white_noises"), p=1.0) ]) for i in range(self.AddWhiteNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddWhiteNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddPinkNoise if self.AddPinkNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "pink_noises"), p=1.0) ]) for i in range(self.AddPinkNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddPinkNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises if self.AddShortNoises[0]: augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddShortNoises{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
def __init__(self, root_dir, csv_dir, conf, bird_code, inv_ebird_label, num_test_samples=10, bckgrd_aug_dir=None, background_audio_dir=None, file_type="mp3", isTraining=True, transform=None, apply_mixer=False): self.root_dir = root_dir self.conf = conf self.isTraining = isTraining self.bird_code = bird_code self.inv_ebird_label = inv_ebird_label self.transform = transform self.file_type = file_type self.apply_mixer = apply_mixer self.additional_loader_params = { "worker_init_fn": self.init_workers_fn, "collate_fn": self.collate_fn } self.sampler = ImbalancedDatasetSampler df = pd.read_csv(csv_dir) df.secondary_labels = df.secondary_labels.apply(eval) self.data = list(df[["filename", "ebird_code", "secondary_labels"]].to_dict('index').values()) self.background_audio_dir = background_audio_dir if self.background_audio_dir is not None: for bk in background_audio_dir.glob('**/*.wav'): self.data.append({"filename": bk}) self.num_test_samples = num_test_samples self.length = len(self.data) if self.apply_mixer: self.dict_grp = {} for grp, d in df.groupby("ebird_code"): self.dict_grp[grp] = d.index.values self.possible_mixer_keys = list(self.dict_grp.keys()) if bckgrd_aug_dir is not None: self.augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.3), AddGaussianSNR(p=0.3), PitchShift(min_semitones=-4, max_semitones=4, p=0.3), AddBackgroundNoise(bckgrd_aug_dir, p=0.5), ]) else: self.augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.3), AddGaussianSNR(p=0.3), PitchShift(min_semitones=-4, max_semitones=4, p=0.3) ]) del df
import matplotlib.pyplot as plt # now in different location files = glob.glob('./dataset/*.wav') for audio_file in files: sample_rate, sound_np = wavfile.read(audio_file) if sound_np.dtype != np.float32: assert sound_np.dtype == np.int16 sound_np = np.divide( sound_np, 32768, dtype=np.float32 ) number = os.path.split(audio_file)[-1][:-4] transforms = [ {"instance": AddGaussianSNR(p=1.0), "num_runs": 3}, {"instance": TimeStretch(min_rate=0.4, max_rate=1.25, p=1.0), "num_runs": 5}, { "instance": PitchShift(min_semitones=-5, max_semitones=5, p=1.0), "num_runs": 6, }, {"instance": Shift(min_fraction=-0.85, max_fraction=0.85, p=1.0), "num_runs": 4}, {"instance": Resample(p=1.0), "num_runs": 5}, {"instance": ClippingDistortion(p=1.0), "num_runs": 3}, ] for transform in transforms: augmenter = Compose([transform["instance"]]) run_name = ( transform.get("name") if transform.get("name")
for i in range(5): output_file_path = os.path.join( output_dir, "FrequencyMask_{:03d}.wav".format(i) ) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "TimeMask_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "AddGaussianSNR_{:03d}.wav".format(i) ) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)] ) for i in range(5): output_file_path = os.path.join( output_dir, "AddGaussianNoise_{:03d}.wav".format(i) )
def __init__(self, augment_type, p, cross_valid=False): self.cross_valid = cross_valid self.sample_rate = 8000 self.type = augment_type self.p = p wham_path = '../../../librimix/data/wham_noise/cv' if self.cross_valid else '../../../librimix/data/wham_noise/tr' if self.type == 'wham_weak': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=5, max_snr_in_db=15, p=1) ]) elif self.type == 'wham_strong': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=2, max_snr_in_db=7, p=1) ]) elif self.type == 'reverb_weak': self.augment = AudioEffectsChain().reverb( reverberance=random.randrange(0, 50), room_scale=random.randrange(0, 50), stereo_depth=random.randrange(0, 50), ) elif self.type == 'reverb_strong': self.augment = AudioEffectsChain().reverb( reverberance=random.randrange(50, 100), room_scale=random.randrange(50, 100), stereo_depth=random.randrange(50, 100), ) elif self.type == 'cascade': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=0, max_snr_in_db=5, p=self.p), AddGaussianSNR(min_SNR=0.001, max_SNR=0.25, p=self.p), ClippingDistortion(min_percentile_threshold=0, max_percentile_threshold=40, p=self.p), FrequencyMask(min_frequency_band=0.0, max_frequency_band=0.5, p=self.p), PolarityInversion(p=self.p), Shift(min_fraction=-0.5, max_fraction=0.5, rollover=True, p=self.p), TimeMask(min_band_part=0.0, max_band_part=0.2, fade=False, p=self.p) ]) elif self.type == 'distort': self.augment = Compose([ PitchShift(min_semitones=-4, max_semitones=4, p=self.p), TimeStretch(min_rate=0.8, max_rate=1.25, leave_length_unchanged=True, p=self.p) ]) elif self.type == 'none': self.augment = None else: raise ValueError( "Did not recognize augmentation type. Received %s, expected 'wham_weak', 'wham_strong', 'reverb_weak', 'reverb_strong', 'cascade', 'distort', or 'none'." % self.type)
def transform(file_path, output_folder, iterations): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(file_path) file_name = os.path.basename(file_path).replace('.wav', '') def produce(augmenter, name): for i in range(iterations): output_file_path = '{}/{}'.format( output_folder, "{}_{}_{}.wav".format(name, file_name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) produce(augmenter, 'TimeMask') # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) produce(augmenter, 'FrequencyMask') # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) produce(augmenter, 'AddGaussianSNR') # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) produce(augmenter, 'PitchShift') # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5)]) produce(augmenter, 'TimeStretch') # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) produce(augmenter, 'AddGaussianNoise') # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) produce(augmenter, 'Shift') # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) produce(augmenter, 'Shift without rollover') # Normalize augmenter = Compose([Normalize(p=1.0)]) produce(augmenter, 'Normalize') # AddImpulseResponse augmenter = Compose( [AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"))]) produce(augmenter, 'AddImpulseResponse') # Resample augmenter = Compose([Resample(p=1.0)]) produce(augmenter, 'Resample') # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) produce(augmenter, 'ClippingDistortion') # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join(DEMO_DIR, "background_noises"), p=1.0) ]) produce(augmenter, 'AddBackgroundNoise') # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) produce(augmenter, 'AddShortNoises')
min_snr=5.0, max_snr=20.0, **kwargs): super().__init__(always_apply, p) self.min_snr = min_snr self.max_snr = max_snr def apply(self, y: np.ndarray, **params): snr = np.random.uniform(self.min_snr, self.max_snr) a_signal = np.sqrt(y**2).max() a_noise = a_signal / (10**(snr / 20)) pink_noise = cn.powerlaw_psd_gaussian(1, len(y)) a_pink = np.sqrt(pink_noise**2).max() augmented = (y + pink_noise * 1 / a_pink * a_noise).astype(y.dtype) return augmented AUGMENT = Compose([ PitchShift(min_semitones=-4, max_semitones=4, p=0.2), Gain(min_gain_in_db=-12, max_gain_in_db=12, p=0.2), AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.2), AddGaussianSNR(min_SNR=0.1, max_SNR=1, p=0.2), PinkNoiseSNR(min_snr=5.0, max_snr=10, p=0.2) ]) def do_aug(samples, sample_rate): return AUGMENT(samples, sample_rate)
def applyTransformations(fileName, output_dir, auxiliarSoundsDir): name = fileName.split(".")[0].split("/")[-1] samples = load_wav_file(fileName) # AddImpulseResponse augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(auxiliarSoundsDir, "helperSounds/ir")) ]) output_file_path = os.path.join( output_dir, "{}_AddImpulseResponse_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_FrequencyMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianSNR_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeStretch_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_itchShift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "{}_Shift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ShiftWithoutRollover_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join(output_dir, "{}_Normalize_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ClippingDistortion_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( auxiliarSoundsDir, "helperSounds/background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddBackgroundNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(auxiliarSoundsDir, "helperSounds/short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddShortNoises_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)