Exemple #1
0
def addFx(sound, effects, pad=3000, fade_in=100, fade_out=100):
    # Add padding
    if pad > 0:
        sound += AudioSegment.silent(duration=pad, frame_rate=sound.frame_rate)

    # convert pydub sound to np array
    samples = np.array(sound.get_array_of_samples())
    samples = samples.astype(np.int16)

    chain = AudioEffectsChain()
    for effect, value in effects:
        if effect == "reverb" and value > 0:
            chain.reverb(reverberance=value)
        elif effect == "distortion" and value > 0:
            chain.overdrive(gain=value)
        elif effect == "highpass" and value > 0:
            chain.highpass(value)
        elif effect == "lowpass" and value > 0:
            chain.lowpass(value)
        elif effect == "bass":
            frequency = 100
            gain = value
            if isinstance(value, tuple):
                gain, frequency = value
            print("%s, %s" % (gain, frequency))
            chain.highshelf(gain=gain, frequency=frequency)
        elif effect == "echo":
            echoStr = "echo 0.8 0.9"
            amount = value
            count = 1
            # check if we have echo count indicated
            if isinstance(value, tuple):
                amount, count = value
            for i in range(count):
                # amount between 10 (robot) and 1000 (mountains)
                echoStr += " %s 0.3" % amount
            chain.custom(echoStr)
        elif effect == "tempo" and value != 1.0 and value != 1:
            chain.tempo(factor=value)

    # apply reverb effect
    fx = (chain)
    y = fx(samples)

    # convert it back to an array and create a new sound clip
    newData = array.array(sound.array_type, y)
    newSound = sound._spawn(newData)
    dur = len(newSound)
    newSound = newSound.fade_in(min(fade_in, dur)).fade_out(min(fade_out, dur))
    return newSound
Exemple #2
0
def worker(_):
    sig = np.random.standard_normal(44100 * 8)

    effect = AudioEffectsChain()
    effect = effect.pitch(np.random.uniform(-300, 300))
    effect = effect.tempo(np.random.uniform(0.8, 1.2))
    effect = effect.reverb(np.random.uniform(0, 100))

    return effect(sig)
Exemple #3
0
    def __call__(self, input):
        effect = AudioEffectsChain()

        if np.random.uniform() > 0.5:
            effect = effect.pitch(np.random.uniform(-300, 300))
        if np.random.uniform() > 0.5:
            effect = effect.tempo(np.random.uniform(0.8, 1.2))
        if np.random.uniform() > 0.5:
            effect = effect.reverb(np.random.uniform(0, 100))
        # if np.random.uniform() > 0.5:
        #     effect = effect.overdrive(np.random.uniform(0, 10))
        # if np.random.uniform() > 0.5:
        #     effect = effect.limiter(np.random.uniform(-10, 10))
        # if np.random.uniform() > 0.5:
        #     effect = effect.lowshelf()
        # if np.random.uniform() > 0.5:
        #     effect = effect.highshelf()

        return effect(input)
Exemple #4
0
    def __call__(self, wav_file):
        if not Path(wav_file).exists():
            print(wav_file)
            raise IOError

        sr, wav = scipy.io.wavfile.read(wav_file)
        if wav.ndim > 1 and wav.shape[1] > 1:
            logger.error("wav file has two or more channels")
            sys.exit(1)
        if type(wav[0]) is np.int32:
            wav = wav.astype('float32', copy=False) / 2147483648.0
        elif type(wav[0]) is np.int16:
            wav = wav.astype('float32', copy=False) / 32768.0
        elif type(wav[0]) is np.uint8:
            wav = wav.astype('float32', copy=False) / 256.0 - 128.0

        fx = AudioEffectsChain()

        if self.resample:
            if self.sample_rate > sr:
                ratio = int(self.sample_rate / sr)
                fx.upsample(ratio)
            elif self.sample_rate < sr:
                ratio = int(sr / self.sample_rate)
                fx.custom(f"downsample {ratio}")

        if self.tempo:
            tempo_change = np.random.uniform(*self.tempo_range)
            fx.tempo(tempo_change, opt_flag="s")

        if self.pitch:
            pitch_change = np.random.uniform(*self.pitch_range)
            fx.pitch(pitch_change)

        # dithering
        fx.custom(f"dither -s")

        wav = fx(wav, sample_in=sr, sample_out=self.sample_rate)
        #wav = wav / max(abs(wav))

        # normalize audio power
        gain = 0.1
        wav_energy = np.sqrt(np.sum(np.power(wav, 2)) / wav.size)
        wav = gain * wav / wav_energy

        # sample-domain padding
        if self.padding:
            wav = np.pad(wav, self.num_padding, mode='constant')

        # sample-domain offset
        if self.offset:
            offset = np.random.randint(*self.offset_range)
            wav = np.roll(wav, offset, axis=0)

        if self.noise:
            snr = 10.0**(np.random.uniform(*self.noise_range) / 10.0)
            noise = np.random.normal(0, 1, wav.shape)
            noise_energy = np.sqrt(np.sum(np.power(noise, 2)) / noise.size)
            wav = wav + snr * gain * noise / noise_energy

        #filename = wav_file.replace(".wav", "_augmented.wav")
        #scipy.io.wavfile.write(filename, self.sample_rate, wav)
        return torch.FloatTensor(wav)