def main(): audio_name = 'rhythm.wav' audio_path = '../../data/audio_files/' + audio_name sample_rate = 44100 wave, sr = librosa.load(audio_path, sample_rate) fx = ( AudioEffectsChain() .highshelf() .delay() .phaser() .reverb() .lowshelf() ) result = fx(wave) print('Playing original sound...') sd.play(wave, sample_rate, blocking=True) print('Playing Effected sound...') sd.play(result, sample_rate, blocking=True)
def createSamples(numVersions): # Generate the base files for intensity in range(1, 11): makeHeartbeats(tempoBpm=80, numBeats=10, includeThird=intensity, filename="base/HB_S3_{}".format(intensity), write=True) _createSingleBeat("base/TR_{}".format(intensity), intensity, tempoBpm=80, write=True) # Create different versions of each base sample baseSounds = glob("../media/base/*.wav", recursive=True) print('{} base sounds written'.format(len(baseSounds))) for version in range(1, numVersions + 1): # Randomize effects fx = (AudioEffectsChain().reverb( reverberance=randint(0, 30), hf_damping=randint(0, 30), room_scale=randint(0, 30), stereo_depth=randint(0, 30)).pitch(shift=randint(-300, 300))) # Apply effects to each intensity and write new file for sound in baseSounds: new = "../media/main/{}_v{}.wav".format( sound[sound.rfind('/'):sound.rfind('.')], version) # Write new sound fx(sound, new) print('Version {} sounds written'.format(version))
def randAudioAugment(): fx = AudioEffectsChain() effect = [random.randint(0, 1) for i in range(6)] if effect[0] == 1: # lowshelf randGain = random.randint(0, 12) * random.choice([-1, 1]) randFreq = random.randint(20, 300) randSlop = random.uniform(1, 7) / 10 # 0.1~0.7 fx.lowshelf(gain=randGain, frequency=randFreq, slope=randSlop) if effect[1] == 1: # highshelf randGain = random.randint(0, 12) * random.choice([-1, 1]) randFreq = random.randint(1000, 3000) randSlop = random.uniform(1, 7) / 10 # 0.1~0.7 fx.highshelf(gain=randGain, frequency=randFreq, slope=randSlop) if effect[2] == 1: # equalizer randFreq = random.randint(100, 3000) randQ = random.uniform(5, 15) / 10 # 0.5~1.5 randDB = random.randint(0, 6) * random.choice([-1, 1]) fx.equalizer(frequency=randFreq, q=randQ, db=randDB) if effect[3] == 1: # overdrive randGain = random.randint(3, 7) fx.overdrive(gain=randGain, colour=40) if effect[4] == 1: # phaser fx.phaser( gain_in=0.9, gain_out=0.8, delay=1, decay=0.25, speed=2, triangular=False ) if effect[5] == 1: # reverb randReverb = random.randint(30, 70) randDamp = random.randint(30, 70) randRoom = random.randint(30, 70) randWet = random.randint(1, 6) fx.reverb( reverberance=randReverb, hf_damping=randDamp, room_scale=randRoom, stereo_depth=100, pre_delay=20, wet_gain=randWet, wet_only=False, ) return fx
tts_checkpoint = MODEL_PATH tts_config = CONFIG_PATH use_cuda = False tts_speakers = None vocoder_checkpoint = VOCODER_MODEL_PATH vocoder_config = VOCODER_CONFIG_PATH wavernn_lib_path = None synthesizer = Synthesizer(Config()) texts = """ Hello world, i can speak, yahoo! """ data = synthesizer.tts(texts) with open("./audio_output/hello.wav", "wb") as f: f.write(data.read()) from pysndfx import AudioEffectsChain fx = ( AudioEffectsChain().highshelf().speed(0.8).pitch(-31).reverb(30) # .chorus(0.4, 0.6, [[55, 0.4, 0.55, .5, 't']]) .lowshelf()) infile = './audio_output/hello.wav' outfile = './audio_output/hello2.wav' # Apply phaser and reverb directly to an audio file. fx(infile, outfile)
import os import acoustics import numpy as np from librosa import load from pysndfx import AudioEffectsChain from librosa.output import write_wav from pydub import AudioSegment fx = ( AudioEffectsChain() .normalize() ) def test_on_one_file(): infile = 'D:\\2019af-sr-aishell2\\AISHELL-2\\iOS\\data\\data\\D1048\\ID1048W0001.wav' outfile = './t.wav' fx(infile, outfile) def add_noise(): infile = 'D:\\2019af-sr-aishell2\\AISHELL-2\\iOS\\data\\data\\D1048\\ID1048W0001.wav' outfile = './t.wav' noise_file = "D:\\_background_noise_\\pink_noise.wav" sound1 = AudioSegment.from_file(infile) sound2 = AudioSegment.from_file(noise_file) - 30 combined = sound1.overlay(sound2)
def _highshelf(self, chain: AudioEffectsChain): return chain.highshelf(frequency=3000)
def _speed_down(self, chain: AudioEffectsChain): return chain.speed(factor=np.random.randint(7, 10) / 10)
def sample_chord( self, notes=None, samples=44100, sr=44100, res_type=None, noise=True, arpeggio_prob=0, # random_crop=False, return_audio=False, extract_features=False): run = True while run == True: try: import random audio = np.zeros(samples) delay = 0 # np.random.randint(samples) if notes == None: notes = self.chords.sample(1).midi_notes.values[0] for note in notes: # print(note, delay) db = random.choice(self.dbs) single_note_audio = db[note - 40] # make start a zero crossing start = np.min( np.where( librosa.zero_crossings(single_note_audio) == True)) single_note_audio = single_note_audio[start:] if single_note_audio.shape[0] < samples: single_note_audio = np.pad( single_note_audio, (0, samples - single_note_audio.shape[0]), mode='constant') assert single_note_audio.shape[0] == samples # data augmentation fx = (AudioEffectsChain().bandpass( frequency=np.random.randint(100, 1000), q=np.random.randint(1, 3))) single_note_audio = fx(single_note_audio) single_note_audio *= np.random.uniform(0.5, 1) # add if random_crop: zero_crossing = librosa.zero_crossings( single_note_audio) random_end = np.random.randint(samples / 10, samples) random_fade = np.random.randint( random_end + 1000, random_end + 10000) while zero_crossing[ random_end] == False and zero_crossing[ random_fade] == False: random_end = np.random.randint( samples / 10, samples) random_fade = np.random.randint( random_end + 1000, random_end + 10000) single_note_audio[random_end:random_end + random_fade] *= np.linspace( 1, 0, random_fade) single_note_audio[random_end:] = 0 audio[delay:] += single_note_audio[:samples - delay] if np.random.rand(1) < arpeggio_prob: arpeggio = True else: arpeggio = False if arpeggio: if np.random.rand(1) > 0.5: delay += np.random.randint(samples) else: delay += np.random.randint(100) if delay > samples - 1000: delay = 0 if noise and np.random.randint(2) == 1: audio += (np.random.normal(size=audio.shape) * np.random.uniform(0, 0.005)) if extract_features: # extract features features = abs( librosa.cqt(audio, sr, n_bins=84 * 10, bins_per_octave=12 * 10, hop_length=64, res_type=res_type)) if sr == 16000: features = features[:, :-1] else: features = None run = False except Exception as ex: print('ex', ex, notes, 'Retry') # print(librosa.midi_to_note(notes)) import time time.sleep(1) return features
def lift_baseline_gain(signal): fx = AudioEffectsChain().gain(2) return fx(signal)
import librosa import soundfile as sf import os import numpy as np import random from pysndfx import AudioEffectsChain noise_dir = '/media/pranjal/Seagate Backup Plus Drive/DataASR/Noise/noise.npy' clean_dir = '/media/pranjal/Seagate Backup Plus Drive/DataASR/Russian/ru/clips_wav/' out_dir_nosiy = '/media/pranjal/Seagate Backup Plus Drive/DataASR/Russian/ru/clips_pickle_noise/' out_dir_clean = '/media/pranjal/Seagate Backup Plus Drive/DataASR/Russian/ru/clips_pickle_clean/' noise = np.load(noise_dir) fx = ( AudioEffectsChain().highshelf().reverb().phaser() # .delay() .lowshelf()) for file in os.listdir(clean_dir): audio, sr = librosa.load(clean_dir + str(file)) x = random.randrange(0, len(noise) - len(audio), 1) noisy = audio + noise[x:x + len(audio)] * (np.random.uniform(0.01, 0.012)) noisy = noisy + fx(noisy) * (np.random.uniform(0.13, 0.19)) # spectrum = librosa.stft(audio,n_fft = 512) ##For clean speech # print(type(spectrum)) # spectrum_imag = np.imag(spectrum) # spectrum_real = np.real(spectrum) # spectrum_new = np.dstack((spectrum_real,spectrum_imag)) # spectrum1 = librosa.stft(noisy,n_fft = 512) ##For noisy audio
# TODO: Add bpm analysis for song 'chopping'. import hashlib import os import sys import uuid import yt_dlp as youtube_dl from pysndfx import AudioEffectsChain from pydub import AudioSegment vw_fx_chain = (AudioEffectsChain().speed(0.75).chorus( 0.4, 0.6, [[55, 0.4, 0.55, .5, 't']]).reverb(reverberance=30, hf_damping=50, room_scale=100, stereo_depth=100, pre_delay=20, wet_gain=0, wet_only=False)) output_dir = 'output' ydl_config = { 'quiet': True, 'format': 'bestaudio/best', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192', # Originally 192
def add_echo(audio, echo=100): fx = AudioEffectsChain().reverb(reverberance=echo) return fx(audio)
def extract_features(speaker_files, features, params): speaker_features = defaultdict() for speaker_id in tqdm(speaker_files.keys()): data_tot, labels_tot, labels_segs_tot, segs = list(), list(), list( ), list() for wav_path, emotion in speaker_files[speaker_id]: # Read wave data x, sr = librosa.load(wav_path, sr=None) # Apply pre-emphasis filter x = librosa.effects.preemphasis(x, zi=[0.0]) # Extract required features into (C,F,T) features_data = GET_FEATURES[features](x, sr, params) # Segment features into (N,C,F,T) features_segmented = segment_nd_features(features_data, emotion, params['segment_size']) #Collect all the segments data_tot.append(features_segmented[1]) labels_tot.append(features_segmented[3]) labels_segs_tot.extend(features_segmented[2]) segs.append(features_segmented[0]) # Post process data_tot = np.vstack(data_tot).astype(np.float32) labels_tot = np.asarray(labels_tot, dtype=np.int8) labels_segs_tot = np.asarray(labels_segs_tot, dtype=np.int8) segs = np.asarray(segs, dtype=np.int8) # Make sure everything is extracted properly assert len(labels_tot) == len(segs) assert data_tot.shape[0] == labels_segs_tot.shape[0] == sum(segs) # Mix with noise if params['mixnoise'] == True: data_tot = np.expand_dims(data_tot, axis=0) # + NOISE for noise in NOISE_FILES.keys(): data_noise_tot = list() noise_path = NOISE_DIR + noise labels_temp, segs_temp = list(), list() #for verification for wav_path, emotion in speaker_files[speaker_id]: create_mixed_audio_file(wav_path, noise_path, 'temp_noise.wav', NOISE_FILES[noise]) # Load mixed speech x_noise, sr_noise = librosa.load('temp_noise.wav', sr=None) assert sr_noise == sr # Pre-emphasis x_noise = librosa.effects.preemphasis(x_noise, zi=[0.0]) #assert len(x_noise) == len(x) # Extract required features into (C,F,T) features_data_noise = GET_FEATURES[features](x_noise, sr, params) # Segment features into (N,C,F,T) features_noise_segmented = segment_nd_features( features_data_noise, emotion, params['segment_size']) #Collect all the segments data_noise_tot.append(features_noise_segmented[1]) labels_temp.append(features_noise_segmented[3]) segs_temp.append(features_noise_segmented[0]) os.remove('temp_noise.wav') data_noise_tot = np.vstack(data_noise_tot).astype(np.float32) data_noise_tot = np.expand_dims(data_noise_tot, axis=0) assert data_noise_tot.shape[1] == data_tot.shape[1] labels_temp = np.asarray(labels_temp, dtype=np.int8) segs_temp = np.asarray(segs, dtype=np.int8) assert np.array_equal(labels_temp, labels_tot) assert np.array_equal(segs_temp, segs) data_tot = np.concatenate((data_tot, data_noise_tot), axis=0) # + REVERB data_reverb_tot = list() labels_temp, segs_temp = list(), list() #for verification fx = (AudioEffectsChain().reverb()) for wav_path, emotion in speaker_files[speaker_id]: # Read wave data x, sr = librosa.load(wav_path, sr=None) # Apply reverb x_reverb = fx(x) # Pre-emphasis x_reverb = librosa.effects.preemphasis(x_reverb, zi=[0.0]) # Extract required features into (C,F,T) features_data_reverb = GET_FEATURES[features](x_reverb, sr, params) # Segment features into (N,C,F,T) features_reverb_segmented = segment_nd_features( features_data_reverb, emotion, params['segment_size']) #Collect all the segments data_reverb_tot.append(features_reverb_segmented[1]) labels_temp.append(features_reverb_segmented[3]) segs_temp.append(features_reverb_segmented[0]) data_reverb_tot = np.vstack(data_reverb_tot).astype(np.float32) data_reverb_tot = np.expand_dims(data_reverb_tot, axis=0) assert data_reverb_tot.shape[1] == data_tot.shape[1] labels_temp = np.asarray(labels_temp, dtype=np.int8) segs_temp = np.asarray(segs, dtype=np.int8) assert np.array_equal(labels_temp, labels_tot) assert np.array_equal(segs_temp, segs) data_tot = np.concatenate((data_tot, data_reverb_tot), axis=0) # Make sure everything is extracted properly assert data_tot.shape[0] == len(NOISE_FILES.keys()) + 2 #Put into speaker features dictionary print(data_tot.shape) speaker_features[speaker_id] = (data_tot, labels_tot, labels_segs_tot, segs) #if params['mixnoise'] == True: # speaker_features[speaker_id] = ((data_tot,data_noise_tot), labels_tot, labels_segs_tot, segs ) #else: # speaker_features[speaker_id] = (data_tot, labels_tot, labels_segs_tot, segs) assert len(speaker_features) == len(speaker_files) return speaker_features
def __call__(self, wav_file): if not Path(wav_file).exists(): print(wav_file) raise IOError sr, wav = scipy.io.wavfile.read(wav_file) if wav.ndim > 1 and wav.shape[1] > 1: logger.error("wav file has two or more channels") sys.exit(1) if type(wav[0]) is np.int32: wav = wav.astype('float32', copy=False) / 2147483648.0 elif type(wav[0]) is np.int16: wav = wav.astype('float32', copy=False) / 32768.0 elif type(wav[0]) is np.uint8: wav = wav.astype('float32', copy=False) / 256.0 - 128.0 fx = AudioEffectsChain() if self.resample: if self.sample_rate > sr: ratio = int(self.sample_rate / sr) fx.upsample(ratio) elif self.sample_rate < sr: ratio = int(sr / self.sample_rate) fx.custom(f"downsample {ratio}") if self.tempo: tempo_change = np.random.uniform(*self.tempo_range) fx.tempo(tempo_change, opt_flag="s") if self.pitch: pitch_change = np.random.uniform(*self.pitch_range) fx.pitch(pitch_change) # dithering fx.custom(f"dither -s") wav = fx(wav, sample_in=sr, sample_out=self.sample_rate) #wav = wav / max(abs(wav)) # normalize audio power gain = 0.1 wav_energy = np.sqrt(np.sum(np.power(wav, 2)) / wav.size) wav = gain * wav / wav_energy # sample-domain padding if self.padding: wav = np.pad(wav, self.num_padding, mode='constant') # sample-domain offset if self.offset: offset = np.random.randint(*self.offset_range) wav = np.roll(wav, offset, axis=0) if self.noise: snr = 10.0**(np.random.uniform(*self.noise_range) / 10.0) noise = np.random.normal(0, 1, wav.shape) noise_energy = np.sqrt(np.sum(np.power(noise, 2)) / noise.size) wav = wav + snr * gain * noise / noise_energy #filename = wav_file.replace(".wav", "_augmented.wav") #scipy.io.wavfile.write(filename, self.sample_rate, wav) return torch.FloatTensor(wav)
import sys from pysndfx import AudioEffectsChain fx = (AudioEffectsChain().reverb().delay()) infile = sys.argv[1] outfile = sys.argv[2] fx(infile, outfile)
def process(self, wave_data: numpy.ndarray): low_shelf = AudioEffectsChain().bandreject(80, q=10.0) high_shelf = AudioEffectsChain().highpass(150) return low_shelf(wave_data)
def addFx(sound, effects, pad=3000, fade_in=100, fade_out=100): # Add padding if pad > 0: sound += AudioSegment.silent(duration=pad, frame_rate=sound.frame_rate) # convert pydub sound to np array samples = np.array(sound.get_array_of_samples()) samples = samples.astype(np.int16) chain = AudioEffectsChain() for effect, value in effects: if effect == "reverb" and value > 0: chain.reverb(reverberance=value) elif effect == "distortion" and value > 0: chain.overdrive(gain=value) elif effect == "highpass" and value > 0: chain.highpass(value) elif effect == "lowpass" and value > 0: chain.lowpass(value) elif effect == "bass": frequency = 100 gain = value if isinstance(value, tuple): gain, frequency = value print("%s, %s" % (gain, frequency)) chain.highshelf(gain=gain, frequency=frequency) elif effect == "echo": echoStr = "echo 0.8 0.9" amount = value count = 1 # check if we have echo count indicated if isinstance(value, tuple): amount, count = value for i in range(count): # amount between 10 (robot) and 1000 (mountains) echoStr += " %s 0.3" % amount chain.custom(echoStr) elif effect == "tempo" and value != 1.0 and value != 1: chain.tempo(factor=value) # apply reverb effect fx = (chain) y = fx(samples) # convert it back to an array and create a new sound clip newData = array.array(sound.array_type, y) newSound = sound._spawn(newData) dur = len(newSound) newSound = newSound.fade_in(min(fade_in, dur)).fade_out(min(fade_out, dur)) return newSound
from pysndfx import AudioEffectsChain from os import remove def processEffects(files): """ """ rec = ['../media/set2/S3_1final.wav', '../media/set2/S3_2final.wav', \ '../media/set2/S3_3final.wav', '../media/set2/S3_4final.wav', \ '../media/set2/S3_5final.wav', '../media/set2/S3_6final.wav', \ '../media/set2/S3_7final.wav', '../media/set2/S3_8final.wav', \ '../media/set2/S3_9final.wav', '../media/set2/S3_10final.wav'] fx = ( AudioEffectsChain() .reverb(reverberance=5, hf_damping=15, room_scale=20, stereo_depth=30) .pitch(shift=300) ) for x in rec: infile = x outfile = x.replace('final', 'v5') fx(infile, outfile) def makeHeartbeats( filename="python_heartbeat", numBeats=10, tempoBpm=100, fs=44100, includeThird=0, write=False ): """ Creates numBeats heartbeats cycles, outputting as WAV.
import scipy.signal as sg from pysndfx import AudioEffectsChain def filter_audio(y, sr=16_000, cutoff=15_000, low_cutoff=1, filter_order=5): sos = sg.butter(filter_order, [low_cutoff / sr / 2, cutoff / sr / 2], btype='band', analog=False, output='sos') filtered = sg.sosfilt(sos, y) return filtered def shelf(y, sr=16_000, gain=5, frequency=500, slope=0.5, high_frequency=7_000): afc = AudioEffectsChain() fx = afc.lowshelf(gain=gain, frequency=frequency, slope=slope)\ .highshelf(gain=-gain, frequency=high_frequency, slope=slope) y = fx(y, sample_in=sr, sample_out=sr) return y
import os import random from pysndfx import AudioEffectsChain def getRandomFile(): return "songs/" + random.choice(os.listdir("songs/")) if __name__ == "__main__": print("Running...") fx = (AudioEffectsChain().equalizer(2000, db=10.0)) fx("sounds/doh.wav", "proc_test_pysndfx.wav") print("Done!")
def _speed_up(self, chain: AudioEffectsChain): np.random.rand() return chain.speed(factor=np.random.randint(11, 17) / 10)
def enhance(y): apply_audio_effects = AudioEffectsChain().lowshelf(gain=10.0, frequency=260, slope=0.1).reverb(reverberance=25, hf_damping=5, room_scale=5, stereo_depth=50, pre_delay=20, wet_gain=0, wet_only=False)#.normalize() y_enhanced = apply_audio_effects(y) return y_enhanced
def _reverb(self, chain: AudioEffectsChain): return chain.reverb()
def _make_chain(low, high): return (AudioEffectsChain().lowpass(high, 3.0).highpass(low, 3.0))
def _lowshelf(self, chain: AudioEffectsChain): return chain.lowshelf(frequency=300)
parser.add_argument('--speed', '-s', type=float, default=1) parser.add_argument('--reverb', '-r', type=bool, default=False) parser.add_argument('--fps', type=int, default=30) parser.add_argument('--bitrate', type=str, default='12000k') parser.add_argument('--margin-left', type=int, default=0) parser.add_argument('--margin-right', type=int, default=0) args = parser.parse_args() # we import everything we need here for performance from moviepy.editor import * from pysndfx import AudioEffectsChain from math import trunc import os # apply effects to audio fx = AudioEffectsChain().speed(args.speed) if args.reverb: fx = fx.reverb() fx(args.audio, 'tmp.mp3') # check if the background is a gif is_gif = args.background.endswith('.gif') # load the background and the audio audio = AudioFileClip('tmp.mp3') image = VideoFileClip(args.background) if is_gif else ImageClip( args.background) image = image.margin(left=args.margin_left, right=args.margin_right)
from array import array def change_speed(audio, speed=1.0): out = audio._spawn(audio.raw_data, overrides={'frame_rate': int(audio.frame_rate * speed)}) return out.set_frame_rate(audio.frame_rate) filename = str(input()) audio = AudioSegment.from_mp3(filename) audio = change_speed(audio, speed=0.861) samples = audio.get_array_of_samples() npsamples = np.array(samples) fx = (AudioEffectsChain().reverb(reverberance=70)) npsamples = fx(npsamples) samples = array(audio.array_type, npsamples) audio = audio._spawn(samples) outputname, _, format = filename.partition('.') outputname = outputname + "_snr" + '.' + format audio.export(outputname, format=format, bitrate="320k")
def process(self, wave_data: np.ndarray): apply_audio_effects = AudioEffectsChain().pitch(200).tremolo( 500).delay(0.6, 0.8, [33], [0.9]) return apply_audio_effects(wave_data, sample_in=BASE_SAMPLING_RATE, sample_out=BASE_SAMPLING_RATE)
def normalize(signal, target_dB): fx = (AudioEffectsChain().custom( "norm {}".format(target_dB))) signal = fx(signal) return signal
def process(self, wave_data: np.ndarray): pitch_shift = AudioEffectsChain().pitch(self.pitch_shift) return pitch_shift(wave_data, sample_in=BASE_SAMPLING_RATE, sample_out=BASE_SAMPLING_RATE)