def slicer(): audioformat = "*.wav" audio_length = 10 fs = 16000 audio_dir = os.path.join(os.path.dirname(__file__), 'noise_train') filenames = glob.glob(os.path.join(audio_dir, audioformat)) audio_length = int(audio_length * fs) if not os.path.exists(audio_dir): os.makedirs(audio_dir + "_new") for wavfile in filenames: file, fs = audioread(wavfile) for i in range(0, len(file) // audio_length): newfile = file[i:i + audio_length] newdir = os.path.join( os.path.dirname(__file__), 'noise_train_new\\' + wavfile.split("\\")[-1].split(".")[0] + "_" + str(i) + ".wav") #print(wavfile) #print(newdir) audiowrite(newfile, fs, newdir, norm=False) return
def main(cfg): mixture_dir, clean_dir, noise_dir = make_output_dirs(cfg) audio_format = cfg["audio_format"] speech_files, noise_files = get_speech_and_noise_files(cfg) snr_lower = cfg["snr_lower"] snr_upper = cfg["snr_upper"] total_snrlevels = cfg["total_snrlevels"] fs = cfg["fs"] total_hours = cfg["total_hours"] audio_length = cfg["audio_length"] silence_length = cfg["silence_length"] total_num_mixtures = int(total_hours * 60 * 60 // audio_length) for cur_mix_idx in tqdm.tqdm(range(total_num_mixtures)): idx_s = np.random.randint(0, np.size(speech_files)) base_clean, fs = audioread(speech_files[idx_s]) base_clean = concat_to_size(base_clean, speech_files, idx_s, audio_length, silence_length, fs) idx_n = np.random.randint(0, np.size(noise_files)) base_noise, fs = audioread(noise_files[idx_n]) base_noise = concat_to_size(base_noise[:len(base_clean)], speech_files, idx_s, audio_length, silence_length, fs) base_noise = base_noise[:len(base_clean)] for snr_db in np.linspace(snr_lower, snr_upper, total_snrlevels): clean, noise = base_clean.copy(), base_noise.copy() clean, noise = snr_setter(clean=clean, noise=noise, snr=snr_db) if cfg['room_type'] == 'single_k': clean, noise, mixtures = simulate_k_room(clean, noise, cfg) elif cfg['room_type'] == 'multi_k': cur_cfg = cfg.copy() selected_k = np.random.choice(cfg['k_choices']) cur_cfg['n_noise_mics'], cur_cfg['n_speech_mics'] = selected_k['n_noise_mics'], selected_k['n_speech_mics'] clean, noise, mixtures = simulate_k_room(clean, noise, cur_cfg) else: clean, noise, mixtures = simulate_room( clean, noise, cfg, cur_mix_idx) clean, noise, mixtures = clean[:audio_length * fs], noise[:audio_length * fs], mixtures[:audio_length * fs] clean_fname = '{}_SNR_{}'.format(cur_mix_idx, snr_db) + audio_format noise_fname = '{}_SNR_{}'.format(cur_mix_idx, snr_db) + audio_format mix_fname = '{}_SNR_{}'.format(cur_mix_idx, snr_db) + audio_format clean_path = os.path.join(clean_dir, clean_fname) noise_path = os.path.join(noise_dir, noise_fname) mix_path = os.path.join(mixture_dir, mix_fname) audiowrite(clean, fs, clean_path, norm=False) audiowrite(noise, fs, noise_path, norm=False) audiowrite(mixtures, fs, mix_path, norm=False)
def main_gen(params): '''Calls gen_audio() to generate the audio signals, verifies that they meet the requirements, and writes the files to storage''' clean_source_files = [] clean_clipped_files = [] clean_low_activity_files = [] noise_source_files = [] noise_clipped_files = [] noise_low_activity_files = [] clean_index = 0 noise_index = 0 file_num = params['fileindex_start'] while file_num <= params['fileindex_end']: # generate clean speech clean, clean_sf, clean_cf, clean_laf, clean_index = \ gen_audio(True, params, clean_index) # add reverb with selected RIR rir_index = random.randint(0,len(params['myrir'])-1) my_rir = os.path.normpath(os.path.join('datasets', 'impulse_responses', params['myrir'][rir_index])) (fs_rir,samples_rir) = wavfile.read(my_rir) my_channel = int(params['mychannel'][rir_index]) if samples_rir.ndim==1: samples_rir_ch = np.array(samples_rir) elif my_channel > 1: samples_rir_ch = samples_rir[:, my_channel -1] else: samples_rir_ch = samples_rir[:, my_channel -1] #print(samples_rir.shape) #print(my_channel) clean = add_pyreverb(clean, samples_rir_ch) # generate noise noise, noise_sf, noise_cf, noise_laf, noise_index = \ gen_audio(False, params, noise_index, len(clean)) clean_clipped_files += clean_cf clean_low_activity_files += clean_laf noise_clipped_files += noise_cf noise_low_activity_files += noise_laf # get rir files and config # mix clean speech and noise # if specified, use specified SNR value if not params['randomize_snr']: snr = params['snr'] # use a randomly sampled SNR value between the specified bounds else: snr = np.random.randint(params['snr_lower'], params['snr_upper']) clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, clean=clean, noise=noise, snr=snr) # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, # clean=clean, # noise=noise, # snr=snr) # unexpected clipping if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr): print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \ "returning without writing audio to disk") continue clean_source_files += clean_sf noise_source_files += noise_sf # write resultant audio streams to files hyphen = '-' clean_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in clean_sf] clean_files_joined = hyphen.join(clean_source_filenamesonly)[:MAXFILELEN] noise_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in noise_sf] noise_files_joined = hyphen.join(noise_source_filenamesonly)[:MAXFILELEN] noisyfilename = clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_tl' + str(target_level) + '_fileid_' + str(file_num) + '.wav' cleanfilename = 'clean_fileid_'+str(file_num)+'.wav' noisefilename = 'noise_fileid_'+str(file_num)+'.wav' noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename) cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename) noisepath = os.path.join(params['noise_proc_dir'], noisefilename) audio_signals = [noisy_snr, clean_snr, noise_snr] file_paths = [noisypath, cleanpath, noisepath] file_num += 1 for i in range(len(audio_signals)): try: audiowrite(file_paths[i], audio_signals[i], params['fs']) except Exception as e: print(str(e)) return clean_source_files, clean_clipped_files, clean_low_activity_files, \ noise_source_files, noise_clipped_files, noise_low_activity_files
def __call__(self, noisy_speech_filename, output_dir=None): "Apply NSNet model to one file and produce an output file with clean speech." enhanced_filename = os.path.join(output_dir or self.output_dir, os.path.basename(noisy_speech_filename)) logging.info("NSNet inference: %s", noisy_speech_filename) sig, sample_rate = sf.read(noisy_speech_filename) ssize = len(sig) print('ssize:', ssize) fsize = len(self.wind) hsize = int(self.hop_fraction * self.framesize) sstart = hsize - fsize print('sstart:', sstart) send = ssize nframe = math.ceil((send - sstart) / hsize) zpleft = -sstart zpright = (nframe - 1) * hsize + fsize - zpleft - ssize if zpleft > 0 or zpright > 0: sigpad = np.zeros(ssize + zpleft + zpright) sigpad[zpleft:len(sigpad)-zpright] = sig else: sigpad = sig sout = np.zeros(nframe * hsize) x_old = np.zeros(hsize) model_input_names = [inp.name for inp in self.model.get_inputs()] model_inputs = { inp.name: np.zeros( [dim if isinstance(dim, int) else 1 for dim in inp.shape], dtype=np.float32) for inp in self.model.get_inputs()[1:]} mu = None sigmasquare = None frame_count = 0 for frame_sampleindex in range(0, nframe * hsize, hsize): # second frame starts from mid-of first frame and goes until frame-size sigpadframe = sigpad[frame_sampleindex:frame_sampleindex + fsize] * self.wind xmag, xphs = audiolib.magphasor(audiolib.stft( sigpadframe, self.sampling_rate, self.wind, self.hop_fraction, self.dft_size, synth=True, zphase=False)) feat = audiolib.logpow(xmag, floor=self.spectral_floor) if frame_sampleindex == 0: mu = feat sigmasquare = feat**2 norm_feat, mu, sigmasquare, frame_count = audiolib.onlineMVN_perframe( feat, frame_counter=frame_count, mu=mu, sigmasquare=sigmasquare, frameshift=0.01, tauFeat=3., tauFeatInit=0.1, t_init=0.1) norm_feat = norm_feat[np.newaxis, np.newaxis, :] model_inputs['input'] = np.float32(norm_feat) model_outputs = self.model.run(None, model_inputs) model_inputs = dict(zip(model_input_names, model_outputs)) mask = model_outputs[0].squeeze() x_enh = audiolib.istft( (xmag * mask) * xphs, sample_rate, self.wind, self.dft_size, zphase=False) sout[frame_sampleindex:frame_sampleindex + hsize] = x_old + x_enh[0:hsize] x_old = x_enh[hsize:fsize] xfinal = sout audiolib.audiowrite(xfinal, sample_rate, enhanced_filename, norm=False)
def main_gen(params, filenum): '''Calls gen_audio() to generate the audio signals, verifies that they meet the requirements, and writes the files to storage''' print("Generating file #" + str(filenum)) clean_clipped_files = [] clean_low_activity_files = [] noise_clipped_files = [] noise_low_activity_files = [] while True: # generate clean speech clean, clean_source_files, clean_cf, clean_laf = \ gen_audio(True, params, filenum) # generate noise noise, noise_source_files, noise_cf, noise_laf = \ gen_audio(False, params, filenum, len(clean)) clean_clipped_files += clean_cf clean_low_activity_files += clean_laf noise_clipped_files += noise_cf noise_low_activity_files += noise_laf # mix clean speech and noise # if specified, use specified SNR value if not params['randomize_snr']: snr = params['snr'] # use a randomly sampled SNR value between the specified bounds else: snr = np.random.randint(params['snr_lower'], params['snr_upper']) clean_snr, noise_snr, noisy_snr, target_level = snr_mixer(params=params, clean=clean, noise=noise, snr=snr) # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, # clean=clean, # noise=noise, # snr=snr) # unexpected clipping if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr): continue else: break # write resultant audio streams to files hyphen = '-' clean_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in clean_source_files] clean_files_joined = hyphen.join(clean_source_filenamesonly)[:MAXFILELEN] noise_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in noise_source_files] noise_files_joined = hyphen.join(noise_source_filenamesonly)[:MAXFILELEN] noisyfilename = clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_fileid_' + str(filenum) + '.wav' cleanfilename = 'clean_fileid_'+str(filenum)+'.wav' noisefilename = 'noise_fileid_'+str(filenum)+'.wav' noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename) cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename) noisepath = os.path.join(params['noise_proc_dir'], noisefilename) audio_signals = [noisy_snr, clean_snr, noise_snr] file_paths = [noisypath, cleanpath, noisepath] for i in range(len(audio_signals)): try: audiowrite(file_paths[i], audio_signals[i], params['fs']) except Exception as e: print(str(e)) pass return clean_source_files, clean_clipped_files, clean_low_activity_files, \ noise_source_files, noise_clipped_files, noise_low_activity_files
def simulate_room(clean, noise, cfg, rand_seed, debug=False): n_mics = cfg['num_mics'] n_inter = cfg['n_inter_locs'] room_size_upper = cfg['room_size_upper'] room_size_lower = cfg['room_size_lower'] room_geom_fixed = cfg['room_geom_fixed'] # each mixture has the same room across db levels np.random.seed(rand_seed) if room_geom_fixed: room_size = np.array([room_size_upper] * 3) # random seed holding so that we get the same random room but not the # same source source locs cur_rand_state = np.random.get_state() np.random.seed(42) mic_locs = [np.random.uniform(0, dim, n_mics) for dim in room_size] np.random.set_state(cur_rand_state) else: room_size = np.random.uniform(room_size_lower, room_size_upper, size=(3)) mic_locs = [np.random.uniform(0, dim, n_mics) for dim in room_size] room = pra.ShoeBox(room_size, fs=cfg['fs'], absorption=0.35, max_order=10) mic_locs = np.array(mic_locs).T mic_array = pra.MicrophoneArray(mic_locs.T, room.fs) room.add_microphone_array(mic_array) speech_locs = [np.linspace(runif(0, dim), runif(0, dim), n_inter) for i, dim in enumerate(room_size)] speech_locs = np.array(speech_locs).T noise_locs = [np.linspace(runif(0, dim), runif(0, dim), n_inter) for i, dim in enumerate(room_size)] noise_locs = np.array(noise_locs).T chunk_size = len(clean) // n_inter fade_overlap = chunk_size // 4 for i in range(len(speech_locs)): cur_speech_clip = np.zeros(len(clean)) cur_noise_clip = np.zeros(len(noise)) start_idx = max(0, i * chunk_size - fade_overlap) end_idx = min(len(clean), (i + 1) * chunk_size + fade_overlap) cur_speech_clip[start_idx:end_idx] = clean[start_idx:end_idx].reshape(-1) cur_noise_clip[start_idx:end_idx] = noise[start_idx:end_idx].reshape(-1) # since may not be exactly divisble on the last one grab all the rest if i == (len(speech_locs) - 1): cur_speech_clip[start_idx:] = clean[start_idx:].reshape(-1) cur_noise_clip[start_idx:] = noise[start_idx:].reshape(-1) cur_speech_clip = fade_signal(cur_speech_clip, start_idx, end_idx, chunk_size, fade_overlap, len(speech_locs), i) cur_noise_clip = fade_signal(cur_noise_clip, start_idx, end_idx, chunk_size, fade_overlap, len(speech_locs), i) room.add_source(speech_locs[i].reshape(-1), signal=cur_speech_clip, delay=0) room.add_source(noise_locs[i].reshape(-1), signal=cur_noise_clip, delay=0) res = room.simulate(return_premix=True) clean_ref = res[np.arange(n_inter) * 2, 0, :].sum(0, keepdims=True) noise_ref = res[np.arange(n_inter) * 2 + 1, 0, :].sum(0, keepdims=True) if debug: import matplotlib.pyplot as plt room.plot(freq=[1000, 2000, 4000, 8000], img_order=0) plt.savefig('room.png') fig, ax = plt.subplots(len(res.sum(0)), 1) for i in range(len(ax)): ax[i].plot(res.sum(0)[i]) audiolib.audiowrite(res.sum(0)[i, None].T, cfg['fs'], './debug/res_{}.wav'.format(i)) plt.savefig('./debug/res.png') # soundfile expects an N x C array for multichannel audio clean_final = clean_ref.T if cfg['echoic_ref_clean'] else clean.reshape((-1, 1)) noise_final = noise_ref.T if cfg['echoic_ref_noise'] else noise.reshape((-1, 1)) return clean_final, noise_final, res.sum(0).T
def main(cfg): snr_lower = float(cfg["snr_lower"]) snr_upper = float(cfg["snr_upper"]) total_snrlevels = float(cfg["total_snrlevels"]) clean_dir = os.path.join(os.path.dirname(__file__), 'clean_train') if cfg["speech_dir"] != 'None': clean_dir = cfg["speech_dir"] if not os.path.exists(clean_dir): assert False, ("Clean speech data is required") noise_dir = os.path.join(os.path.dirname(__file__), 'noise_train') if cfg["noise_dir"] != 'None': noise_dir = cfg["noise_dir"] if not os.path.exists(noise_dir): assert False, ("Noise data is required") fs = float(cfg["sampling_rate"]) audioformat = cfg["audioformat"] total_hours = float(cfg["total_hours"]) audio_length = float(cfg["audio_length"]) silence_length = float(cfg["silence_length"]) noisyspeech_dir = os.path.join(os.path.dirname(__file__), 'NoisySpeech_training') if not os.path.exists(noisyspeech_dir): os.makedirs(noisyspeech_dir) clean_proc_dir = os.path.join(os.path.dirname(__file__), 'CleanSpeech_training') if not os.path.exists(clean_proc_dir): os.makedirs(clean_proc_dir) noise_proc_dir = os.path.join(os.path.dirname(__file__), 'Noise_training') if not os.path.exists(noise_proc_dir): os.makedirs(noise_proc_dir) total_secs = total_hours * 60 * 60 total_samples = int(total_secs * fs) audio_length = int(audio_length * fs) SNR = np.linspace(snr_lower, snr_upper, total_snrlevels) cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat)) if cfg["noise_types_excluded"] == 'None': noisefilenames = glob.glob(os.path.join(noise_dir, audioformat)) else: filestoexclude = cfg["noise_types_excluded"].split(',') noisefilenames = glob.glob(os.path.join(noise_dir, audioformat)) for i in range(len(filestoexclude)): noisefilenames = [ fn for fn in noisefilenames if not os.path.basename(fn).startswith(filestoexclude[i]) ] filecounter = 0 num_samples = 0 while num_samples < total_samples: idx_s = np.random.randint(0, np.size(cleanfilenames)) clean, fs = audioread(cleanfilenames[idx_s]) if len(clean) > audio_length: clean = clean else: while len(clean) <= audio_length: idx_s = idx_s + 1 if idx_s >= np.size(cleanfilenames) - 1: idx_s = np.random.randint(0, np.size(cleanfilenames)) newclean, fs = audioread(cleanfilenames[idx_s]) cleanconcat = np.append(clean, np.zeros(int(fs * silence_length))) clean = np.append(cleanconcat, newclean) idx_n = np.random.randint(0, np.size(noisefilenames)) noise, fs = audioread(noisefilenames[idx_n]) if len(noise) >= len(clean): noise = noise[0:len(clean)] else: while len(noise) <= len(clean): idx_n = idx_n + 1 if idx_n >= np.size(noisefilenames) - 1: idx_n = np.random.randint(0, np.size(noisefilenames)) newnoise, fs = audioread(noisefilenames[idx_n]) noiseconcat = np.append(noise, np.zeros(int(fs * silence_length))) noise = np.append(noiseconcat, newnoise) noise = noise[0:len(clean)] filecounter = filecounter + 1 for i in range(np.size(SNR)): clean_snr, noise_snr, noisy_snr = snr_mixer(clean=clean, noise=noise, snr=SNR[i]) noisyfilename = 'noisy' + str(filecounter) + '_SNRdb_' + str( SNR[i]) + '_clnsp' + str(filecounter) + '.wav' cleanfilename = 'clnsp' + str(filecounter) + '.wav' noisefilename = 'noisy' + str(filecounter) + '_SNRdb_' + str( SNR[i]) + '.wav' noisypath = os.path.join(noisyspeech_dir, noisyfilename) cleanpath = os.path.join(clean_proc_dir, cleanfilename) noisepath = os.path.join(noise_proc_dir, noisefilename) audiowrite(noisy_snr, fs, noisypath, norm=False) audiowrite(clean_snr, fs, cleanpath, norm=False) audiowrite(noise_snr, fs, noisepath, norm=False) num_samples = num_samples + len(noisy_snr)
def main(cfg): snr_lower = int(cfg["snr_lower"]) snr_upper = int(cfg["snr_upper"]) total_snrlevels = int(cfg["total_snrlevels"]) clean_dir = os.path.join(os.path.dirname(__file__), 'clean_train') if cfg["speech_dir"] != 'None': clean_dir = cfg["speech_dir"] if not os.path.exists(clean_dir): assert False, ("Clean speech data is required") try: noise_dir = os.path.join(os.path.dirname(__file__), 'noise_train_new') if cfg["noise_dir"] != 'None': noise_dir = cfg["noise_dir"] if not os.path.exists(noise_dir): raise Exception("path does not exist") if not len(os.listdir(noise_dir)): raise Exception("folder is empty") except: res = input( "Can't find noise data. Do you want to run the audio slicer?\nEnter [Y]es [N]o\n" ) if res.lower() in ["y", "yes", ""]: slicer() print("Sliced successfully.\n\n") else: input("Noise data is required.\nPress any key to exit.\n") sys.exit() fs = float(cfg["sampling_rate"]) audioformat = cfg["audioformat"] total_hours = float(cfg["total_hours"]) audio_length = float(cfg["audio_length"]) silence_length = float(cfg["silence_length"]) noisyspeech_dir = os.path.join(os.path.dirname(__file__), 'mix') if not os.path.exists(noisyspeech_dir): os.makedirs(noisyspeech_dir) clean_proc_dir = os.path.join(os.path.dirname(__file__), 's1') if not os.path.exists(clean_proc_dir): os.makedirs(clean_proc_dir) noise_proc_dir = os.path.join(os.path.dirname(__file__), 's2') if not os.path.exists(noise_proc_dir): os.makedirs(noise_proc_dir) total_secs = total_hours * 60 * 60 total_samples = int(total_secs * fs) audio_length = int(audio_length * fs) SNR = np.linspace(snr_lower, snr_upper, total_snrlevels) cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat)) if cfg["noise_types_excluded"] == 'None': noisefilenames = glob.glob(os.path.join(noise_dir, audioformat)) else: filestoexclude = cfg["noise_types_excluded"].split(',') noisefilenames = glob.glob(os.path.join(noise_dir, audioformat)) for i in range(len(filestoexclude)): noisefilenames = [ fn for fn in noisefilenames if not os.path.basename(fn).startswith(filestoexclude[i]) ] filecounter = 0 num_samples = 0 while num_samples < total_samples: idx_s = np.random.randint(0, np.size(cleanfilenames)) clean, fs = audioread(cleanfilenames[idx_s]) if len(clean) == audio_length: clean = clean else: while len(clean) <= audio_length: idx_s = idx_s + 1 if idx_s >= np.size(cleanfilenames) - 1: idx_s = np.random.randint(0, np.size(cleanfilenames)) newclean, fs = audioread(cleanfilenames[idx_s]) cleanconcat = np.append(clean, np.zeros(int(fs * silence_length))) clean = np.append(cleanconcat, newclean) clean = clean[0:audio_length] idx_n = np.random.randint(0, np.size(noisefilenames)) noise, fs = audioread(noisefilenames[idx_n]) if len(noise) >= len(clean): noise = noise[0:len(clean)] else: while len(noise) <= len(clean): idx_n = idx_n + 1 if idx_n >= np.size(noisefilenames) - 1: idx_n = np.random.randint(0, np.size(noisefilenames)) newnoise, fs = audioread(noisefilenames[idx_n]) noiseconcat = np.append(noise, np.zeros(int(fs * silence_length))) noise = np.append(noiseconcat, newnoise) noise = noise[0:len(clean)] filecounter = filecounter + 1 for i in range(np.size(SNR)): clean_snr, noise_snr, noisy_snr = snr_mixer(clean=clean, noise=noise, snr=SNR[i]) noisyfilename = 'noisy' + str(filecounter) + '_SNRdb_' + str( SNR[i]) + noisefilenames[idx_n].split("\\")[-1].split( ".")[0] + '.wav' cleanfilename = 'clnsp' + str(filecounter) + '_.wav' noisefilename = 'noise' + str(filecounter) + '_SNRdb_' + str( SNR[i]) + noisefilenames[idx_n].split("\\")[-1].split( ".")[0] + '.wav' noisypath = os.path.join(noisyspeech_dir, noisyfilename) cleanpath = os.path.join(clean_proc_dir, cleanfilename) noisepath = os.path.join(noise_proc_dir, noisefilename) audiowrite(noisy_snr, fs, noisypath, norm=False) audiowrite(clean_snr, fs, cleanpath, norm=False) audiowrite(noise_snr, fs, noisepath, norm=False) num_samples = num_samples + len(noisy_snr)
from audiolib import audioread, audiowrite, snr_mixer from os.path import isfile, join, basename import os import numpy as np """ Given a source folder, add white-noise in a range of different SNR levels to all files in the source folder. """ if __name__=="__main__": source_folder = "clips" source_files = [join(source_folder, f) for f in os.listdir(source_folder) if isfile(join(source_folder, f))] output_folder = source_folder+"_snr" snr_min = 30 snr_max = 50 for f in source_files: clean, fs = audioread(f) # white-noise noise = np.random.normal(0, 1, len(clean)) for i in range (snr_min,snr_max): clean_snr, noise_snr, noisy_snr = snr_mixer(clean=clean, noise=noise, snr=i) output_filename = join(output_folder, f'{i}S_{os.path.splitext(basename(f))[0]}.wav') audiowrite(noisy_snr, fs,output_filename, norm=False)
def __getitem__(self, idx): (file_idx, file_db_suffix) = self.file_labels[idx] file_name = file_idx + '_SNR_' + file_db_suffix clean_file = join(self.mono_speech_dir, file_name) noise_file = join(self.mono_noise_dir, file_name) mix_file = join(self.mix_dir, file_name) clean_data, _ = audiolib.audioread(clean_file) noise_data, _ = audiolib.audioread(noise_file) mix_data, _ = audiolib.audioread(mix_file) return torch.Tensor(clean_data), torch.Tensor(noise_data), torch.Tensor(mix_data), file_db_suffix if __name__=="__main__": import data_gen_config data_config = data_gen_config.default() db_lvls = np.linspace(data_config['snr_lower'], data_config['snr_upper'], data_config['total_snrlevels']) dset = MultiChannelDataset(root_dir=data_config['output_data_dir'], db_lvls=db_lvls) clean, noise, mix = dset[1] audiolib.audiowrite(clean.numpy().T, data_config['fs'], './debug/clean.wav') audiolib.audiowrite(noise.numpy().T, data_config['fs'], './debug/noise.wav') for i in range(len(mix)): audiolib.audiowrite(mix.numpy().T[:,i], data_config['fs'], './debug/mix_{}.wav'.format(i))
def main_gen(params): '''Calls gen_audio() to generate the audio signals, verifies that they meet the requirements, and writes the files to storage''' clean_source_files = [] clean_clipped_files = [] clean_low_activity_files = [] clean_source_files2 = [] clean_clipped_files2 = [] clean_low_activity_files2 = [] noise_source_files = [] noise_clipped_files = [] noise_low_activity_files = [] clean_index = 0 clean_index2 = 0 noise_index = 0 file_num = params['fileindex_start'] cleanfilenames = params['cleanfilenames'] cleanfilenames2 = params['cleanfilenames2'] # spk_index = 0 # start of speaker index num_spk = len(cleanfilenames) while file_num <= params['fileindex_end']: # generate clean speech #clean, clean_sf, clean_cf, clean_laf, clean_index = \ # gen_audio(True, params, clean_index) spk_index = random.randint(0, len(params['cleanfilenames']) - 1) chosen_clean = gen_audio3(True, params, spk_index) num_clips = int(len(chosen_clean)) #(True, params, clean_index) # add reverb with selected RIR #rir_index = random.randint(0,len(params['myrir'])-1) num_to_select1 = 1 rirfilenames = params['myrir'] chosen_clean_reverb = [] for clean in chosen_clean: myrir = random.sample(rirfilenames, num_to_select1) (fs_rir, samples_rir) = wavfile.read(myrir[0]) if len(samples_rir.shape) > 1: channel = random.randint(0, len(samples_rir.shape) - 1) samples_rir_ch = samples_rir[:, channel] else: samples_rir_ch = samples_rir clean_reverb = add_pyreverb(clean, samples_rir_ch) chosen_clean_reverb.append(clean_reverb) # add secondary speech and/or noise for each chunck of primary speech for chose_primary in chosen_clean_reverb: index2 = random.randint(0, len(params['cleanfilenames2']) - 1) clean2, clean_sf, clean_cf, clean_laf, clean_index = \ gen_audio2(True, params, index2, chose_primary.shape[0]) noise_index = random.randint(0, len(params['noisefilenames']) - 1) # generate noise noise, noise_sf, noise_cf, noise_laf, noise_index = \ gen_audio(False, params, noise_index, chose_primary.shape[0]) # if specified, use specified SNR value if not params['randomize_snr']: snr = params['snr'] # use a randomly sampled SNR value between the specified bounds else: snr = np.random.randint(params['snr_lower'], params['snr_upper']) snr2 = np.random.randint(params['snr_lower'], params['snr_upper']) snr3 = np.random.randint(params['snr_lower'], params['snr_upper']) # 1. Primary(clean) + Noise clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer( params=params, clean=chose_primary, noise=noise, snr=snr) # 2. Primary + Secondary clean_snr2, noise_snr2, noisy_snr2, target_level2 = segmental_snr_mixer( params=params, clean=chose_primary, noise=clean2, snr=snr2) # 3. Primary + Seconday (clean2) + Noise clean_snr3, noise_snr3, noisy_snr3, target_level3 = segmental_snr_mixer( params=params, clean=noisy_snr2, noise=noise, snr=snr3) # unexpected clipping #if is_clipped(clean_snr) or is_clipped(noise_snr2) or is_clipped(noisy_snr2): if is_clipped(clean_snr) or is_clipped(noisy_snr): print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \ "returning without writing audio to disk") continue if is_clipped(clean_snr2) or is_clipped(noisy_snr2): print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \ "returning without writing audio to disk") continue if is_clipped(clean_snr3) or is_clipped(noisy_snr3): print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \ "returning without writing audio to disk") continue clean_source_files += clean_sf noise_source_files += noise_sf # write resultant audio streams to files hyphen = '-' clean_source_filenamesonly = [ i[:-4].split(os.path.sep)[-1] for i in clean_sf ] clean_files_joined = hyphen.join( clean_source_filenamesonly)[:MAXFILELEN] noise_source_filenamesonly = [ i[:-4].split(os.path.sep)[-1] for i in noise_sf ] noise_files_joined = hyphen.join( noise_source_filenamesonly)[:MAXFILELEN] noisyfilename = 'primary_noisy_fileid_' + str(file_num) + '_' + clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_tl' + str(target_level) + '.wav' cleanfilename = 'clean_fileid_' + str(file_num) + '.wav' noisefilename = 'noise_fileid_' + str(file_num) + '.wav' noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename) cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename) noisepath = os.path.join(params['noise_proc_dir'], noisefilename) noisyfilename2 = 'ps_noisy_fileid_'+ str(file_num) + '_' + clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_tl' + str(target_level) + '.wav' cleanfilename2 = 'ps_clean_fileid_' + str(file_num) + '.wav' noisefilename2 = 'ps_noise_fileid_' + str(file_num) + '.wav' noisypath2 = os.path.join(params['noisyspeech_dir'], noisyfilename2) cleanpath2 = os.path.join(params['clean_proc_dir'], cleanfilename2) noisepath2 = os.path.join(params['noise_proc_dir'], noisefilename2) noisyfilename3 = 'psn_noisy_fileid_' + str(file_num) + '_' +clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_tl' + str(target_level) + '.wav' cleanfilename3 = 'psn_clean_fileid_' + str(file_num) + '.wav' noisefilename3 = 'psn_noise_fileid_' + str(file_num) + '.wav' noisypath3 = os.path.join(params['noisyspeech_dir'], noisyfilename3) cleanpath3 = os.path.join(params['clean_proc_dir'], cleanfilename3) noisepath3 = os.path.join(params['noise_proc_dir'], noisefilename3) audio_signals = [noisy_snr, clean_snr, noise_snr] file_paths = [noisypath, cleanpath, noisepath] audio_signals2 = [noisy_snr2, clean_snr, noise_snr2] file_paths2 = [noisypath2, cleanpath2, noisepath2] audio_signals3 = [noisy_snr3, clean_snr, noise_snr3] file_paths3 = [noisypath3, cleanpath3, noisepath3] file_num += 1 # file_num = file_num + 3*num_clips for i in range(len(audio_signals)): try: audiowrite(file_paths[i], audio_signals[i], params['fs']) audiowrite(file_paths2[i], audio_signals2[i], params['fs']) audiowrite(file_paths3[i], audio_signals3[i], params['fs']) except Exception as e: print(str(e)) # for i in range(len(audio_signals2)): # try: # except Exception as e: # print(str(e)) # for i in range(len(audio_signals3)): # try: # audiowrite(file_paths3[i], audio_signals3[i], params['fs']) # except Exception as e: # print(str(e)) return clean_source_files, clean_clipped_files, clean_low_activity_files, \ noise_source_files, noise_clipped_files, noise_low_activity_files