def addFx(sound, effects, pad=3000, fade_in=100, fade_out=100): # Add padding if pad > 0: sound += AudioSegment.silent(duration=pad, frame_rate=sound.frame_rate) # convert pydub sound to np array samples = np.array(sound.get_array_of_samples()) samples = samples.astype(np.int16) chain = AudioEffectsChain() for effect, value in effects: if effect == "reverb" and value > 0: chain.reverb(reverberance=value) elif effect == "distortion" and value > 0: chain.overdrive(gain=value) elif effect == "highpass" and value > 0: chain.highpass(value) elif effect == "lowpass" and value > 0: chain.lowpass(value) elif effect == "bass": frequency = 100 gain = value if isinstance(value, tuple): gain, frequency = value print("%s, %s" % (gain, frequency)) chain.highshelf(gain=gain, frequency=frequency) elif effect == "echo": echoStr = "echo 0.8 0.9" amount = value count = 1 # check if we have echo count indicated if isinstance(value, tuple): amount, count = value for i in range(count): # amount between 10 (robot) and 1000 (mountains) echoStr += " %s 0.3" % amount chain.custom(echoStr) elif effect == "tempo" and value != 1.0 and value != 1: chain.tempo(factor=value) # apply reverb effect fx = (chain) y = fx(samples) # convert it back to an array and create a new sound clip newData = array.array(sound.array_type, y) newSound = sound._spawn(newData) dur = len(newSound) newSound = newSound.fade_in(min(fade_in, dur)).fade_out(min(fade_out, dur)) return newSound
def worker(_): sig = np.random.standard_normal(44100 * 8) effect = AudioEffectsChain() effect = effect.pitch(np.random.uniform(-300, 300)) effect = effect.tempo(np.random.uniform(0.8, 1.2)) effect = effect.reverb(np.random.uniform(0, 100)) return effect(sig)
def __call__(self, input): effect = AudioEffectsChain() if np.random.uniform() > 0.5: effect = effect.pitch(np.random.uniform(-300, 300)) if np.random.uniform() > 0.5: effect = effect.tempo(np.random.uniform(0.8, 1.2)) if np.random.uniform() > 0.5: effect = effect.reverb(np.random.uniform(0, 100)) # if np.random.uniform() > 0.5: # effect = effect.overdrive(np.random.uniform(0, 10)) # if np.random.uniform() > 0.5: # effect = effect.limiter(np.random.uniform(-10, 10)) # if np.random.uniform() > 0.5: # effect = effect.lowshelf() # if np.random.uniform() > 0.5: # effect = effect.highshelf() return effect(input)
def __call__(self, wav_file): if not Path(wav_file).exists(): print(wav_file) raise IOError sr, wav = scipy.io.wavfile.read(wav_file) if wav.ndim > 1 and wav.shape[1] > 1: logger.error("wav file has two or more channels") sys.exit(1) if type(wav[0]) is np.int32: wav = wav.astype('float32', copy=False) / 2147483648.0 elif type(wav[0]) is np.int16: wav = wav.astype('float32', copy=False) / 32768.0 elif type(wav[0]) is np.uint8: wav = wav.astype('float32', copy=False) / 256.0 - 128.0 fx = AudioEffectsChain() if self.resample: if self.sample_rate > sr: ratio = int(self.sample_rate / sr) fx.upsample(ratio) elif self.sample_rate < sr: ratio = int(sr / self.sample_rate) fx.custom(f"downsample {ratio}") if self.tempo: tempo_change = np.random.uniform(*self.tempo_range) fx.tempo(tempo_change, opt_flag="s") if self.pitch: pitch_change = np.random.uniform(*self.pitch_range) fx.pitch(pitch_change) # dithering fx.custom(f"dither -s") wav = fx(wav, sample_in=sr, sample_out=self.sample_rate) #wav = wav / max(abs(wav)) # normalize audio power gain = 0.1 wav_energy = np.sqrt(np.sum(np.power(wav, 2)) / wav.size) wav = gain * wav / wav_energy # sample-domain padding if self.padding: wav = np.pad(wav, self.num_padding, mode='constant') # sample-domain offset if self.offset: offset = np.random.randint(*self.offset_range) wav = np.roll(wav, offset, axis=0) if self.noise: snr = 10.0**(np.random.uniform(*self.noise_range) / 10.0) noise = np.random.normal(0, 1, wav.shape) noise_energy = np.sqrt(np.sum(np.power(noise, 2)) / noise.size) wav = wav + snr * gain * noise / noise_energy #filename = wav_file.replace(".wav", "_augmented.wav") #scipy.io.wavfile.write(filename, self.sample_rate, wav) return torch.FloatTensor(wav)