def build_audio3(is_clean, params, spk_index, audio_samples_length=-1): '''Construct an audio signal from source files of primary speaker Returns a list of all segments belonging to primary speaker''' fs_output = params['fs'] silence_length = params['silence_length'] if audio_samples_length == -1: audio_samples_length = int(params['audio_length'] * params['fs']) output_audio = np.zeros(0) remaining_length = audio_samples_length files_used = [] clipped_files = [] if is_clean: source_files = params['cleanfilenames'] idx = spk_index input_audio, fs_input = audioread(source_files[idx]) if fs_input != fs_output: input_audio = librosa.resample(input_audio, fs_input, fs_output) num_segs = int(input_audio.shape[0] / (params['fs'] * params['audio_length'])) # if current file is longer than remaining desired length, and this is # noise generation or this is training set, subsample it randomly remaining_length = input_audio.shape[0] start_idx = 0 audios_all = [] for seg in range(0, num_segs): # print(seg) audio_temp = input_audio[seg * audio_samples_length:(seg + 1) * audio_samples_length] if not is_clipped(audio_temp): audios_all.append(audio_temp) files_used.append(source_files[idx]) else: clipped_files.append(source_files[idx]) return audios_all, files_used, clipped_files
def build_audio(is_clean, params, index, audio_samples_length=-1): '''Construct an audio signal from source files''' fs_output = params['fs'] silence_length = params['silence_length'] if audio_samples_length == -1: audio_samples_length = int(params['audio_length']*params['fs']) output_audio = np.zeros(0) remaining_length = audio_samples_length files_used = [] clipped_files = [] if is_clean: source_files = params['cleanfilenames'] idx = index else: if 'noisefilenames' in params.keys(): source_files = params['noisefilenames'] idx = index # if noise files are organized into individual subdirectories, pick a directory randomly else: noisedirs = params['noisedirs'] # pick a noise category randomly idx_n_dir = np.random.randint(0, np.size(noisedirs)) source_files = glob.glob(os.path.join(noisedirs[idx_n_dir], params['audioformat'])) shuffle(source_files) # pick a noise source file index randomly idx = np.random.randint(0, np.size(source_files)) # initialize silence silence = np.zeros(int(fs_output*silence_length)) # iterate through multiple clips until we have a long enough signal tries_left = MAXTRIES while remaining_length > 0 and tries_left > 0: # read next audio file and resample if necessary idx = (idx + 1) % np.size(source_files) input_audio, fs_input = audioread(source_files[idx]) if input_audio is None: sys.stderr.write("WARNING: Cannot read file: %s\n" % source_files[idx]) continue if fs_input != fs_output: input_audio = librosa.resample(input_audio, fs_input, fs_output) # if current file is longer than remaining desired length, and this is # noise generation or this is training set, subsample it randomly if len(input_audio) > remaining_length and (not is_clean or not params['is_test_set']): idx_seg = np.random.randint(0, len(input_audio)-remaining_length) input_audio = input_audio[idx_seg:idx_seg+remaining_length] # check for clipping, and if found move onto next file if is_clipped(input_audio): clipped_files.append(source_files[idx]) tries_left -= 1 continue # concatenate current input audio to output audio stream files_used.append(source_files[idx]) output_audio = np.append(output_audio, input_audio) remaining_length -= len(input_audio) # add some silence if we have not reached desired audio length if remaining_length > 0: silence_len = min(remaining_length, len(silence)) output_audio = np.append(output_audio, silence[:silence_len]) remaining_length -= silence_len if tries_left == 0 and not is_clean and 'noisedirs' in params.keys(): print("There are not enough non-clipped files in the " + noisedirs[idx_n_dir] + \ " directory to complete the audio build") return [], [], clipped_files, idx return output_audio, files_used, clipped_files, idx
def main_gen(params): '''Calls gen_audio() to generate the audio signals, verifies that they meet the requirements, and writes the files to storage''' clean_source_files = [] clean_clipped_files = [] clean_low_activity_files = [] noise_source_files = [] noise_clipped_files = [] noise_low_activity_files = [] clean_index = 0 noise_index = 0 file_num = params['fileindex_start'] while file_num <= params['fileindex_end']: # generate clean speech clean, clean_sf, clean_cf, clean_laf, clean_index = \ gen_audio(True, params, clean_index) # add reverb with selected RIR rir_index = random.randint(0,len(params['myrir'])-1) my_rir = os.path.normpath(os.path.join('datasets', 'impulse_responses', params['myrir'][rir_index])) (fs_rir,samples_rir) = wavfile.read(my_rir) my_channel = int(params['mychannel'][rir_index]) if samples_rir.ndim==1: samples_rir_ch = np.array(samples_rir) elif my_channel > 1: samples_rir_ch = samples_rir[:, my_channel -1] else: samples_rir_ch = samples_rir[:, my_channel -1] #print(samples_rir.shape) #print(my_channel) clean = add_pyreverb(clean, samples_rir_ch) # generate noise noise, noise_sf, noise_cf, noise_laf, noise_index = \ gen_audio(False, params, noise_index, len(clean)) clean_clipped_files += clean_cf clean_low_activity_files += clean_laf noise_clipped_files += noise_cf noise_low_activity_files += noise_laf # get rir files and config # mix clean speech and noise # if specified, use specified SNR value if not params['randomize_snr']: snr = params['snr'] # use a randomly sampled SNR value between the specified bounds else: snr = np.random.randint(params['snr_lower'], params['snr_upper']) clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, clean=clean, noise=noise, snr=snr) # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, # clean=clean, # noise=noise, # snr=snr) # unexpected clipping if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr): print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \ "returning without writing audio to disk") continue clean_source_files += clean_sf noise_source_files += noise_sf # write resultant audio streams to files hyphen = '-' clean_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in clean_sf] clean_files_joined = hyphen.join(clean_source_filenamesonly)[:MAXFILELEN] noise_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in noise_sf] noise_files_joined = hyphen.join(noise_source_filenamesonly)[:MAXFILELEN] noisyfilename = clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_tl' + str(target_level) + '_fileid_' + str(file_num) + '.wav' cleanfilename = 'clean_fileid_'+str(file_num)+'.wav' noisefilename = 'noise_fileid_'+str(file_num)+'.wav' noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename) cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename) noisepath = os.path.join(params['noise_proc_dir'], noisefilename) audio_signals = [noisy_snr, clean_snr, noise_snr] file_paths = [noisypath, cleanpath, noisepath] file_num += 1 for i in range(len(audio_signals)): try: audiowrite(file_paths[i], audio_signals[i], params['fs']) except Exception as e: print(str(e)) return clean_source_files, clean_clipped_files, clean_low_activity_files, \ noise_source_files, noise_clipped_files, noise_low_activity_files
def build_audio(is_clean, params, filenum, audio_samples_length=-1): '''Construct an audio signal from source files''' fs_output = params['fs'] silence_length = params['silence_length'] if audio_samples_length == -1: audio_samples_length = int(params['audio_length']*params['fs']) output_audio = np.zeros(0) remaining_length = audio_samples_length files_used = [] clipped_files = [] global clean_counter, noise_counter if is_clean: source_files = params['cleanfilenames'] idx_counter = clean_counter else: source_files = params['noisefilenames'] idx_counter = noise_counter # initialize silence silence = np.zeros(int(fs_output*silence_length)) # iterate through multiple clips until we have a long enough signal tries_left = MAXTRIES while remaining_length > 0 and tries_left > 0: # read next audio file and resample if necessary with idx_counter.get_lock(): idx_counter.value += 1 idx = idx_counter.value % np.size(source_files) input_audio, fs_input = audioread(source_files[idx]) if fs_input != fs_output: input_audio = librosa.resample(input_audio, fs_input, fs_output) # if current file is longer than remaining desired length, and this is # noise generation or this is training set, subsample it randomly if len(input_audio) > remaining_length and (not is_clean or not params['is_test_set']): idx_seg = np.random.randint(0, len(input_audio)-remaining_length) input_audio = input_audio[idx_seg:idx_seg+remaining_length] # check for clipping, and if found move onto next file if is_clipped(input_audio): clipped_files.append(source_files[idx]) tries_left -= 1 continue # concatenate current input audio to output audio stream files_used.append(source_files[idx]) output_audio = np.append(output_audio, input_audio) remaining_length -= len(input_audio) # add some silence if we have not reached desired audio length if remaining_length > 0: silence_len = min(remaining_length, len(silence)) output_audio = np.append(output_audio, silence[:silence_len]) remaining_length -= silence_len if tries_left == 0: print("Audio generation failed for filenum " + str(filenum)) return [], [], clipped_files return output_audio, files_used, clipped_files
def main_gen(params, filenum): '''Calls gen_audio() to generate the audio signals, verifies that they meet the requirements, and writes the files to storage''' print("Generating file #" + str(filenum)) clean_clipped_files = [] clean_low_activity_files = [] noise_clipped_files = [] noise_low_activity_files = [] while True: # generate clean speech clean, clean_source_files, clean_cf, clean_laf = \ gen_audio(True, params, filenum) # generate noise noise, noise_source_files, noise_cf, noise_laf = \ gen_audio(False, params, filenum, len(clean)) clean_clipped_files += clean_cf clean_low_activity_files += clean_laf noise_clipped_files += noise_cf noise_low_activity_files += noise_laf # mix clean speech and noise # if specified, use specified SNR value if not params['randomize_snr']: snr = params['snr'] # use a randomly sampled SNR value between the specified bounds else: snr = np.random.randint(params['snr_lower'], params['snr_upper']) clean_snr, noise_snr, noisy_snr, target_level = snr_mixer(params=params, clean=clean, noise=noise, snr=snr) # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, # clean=clean, # noise=noise, # snr=snr) # unexpected clipping if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr): continue else: break # write resultant audio streams to files hyphen = '-' clean_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in clean_source_files] clean_files_joined = hyphen.join(clean_source_filenamesonly)[:MAXFILELEN] noise_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in noise_source_files] noise_files_joined = hyphen.join(noise_source_filenamesonly)[:MAXFILELEN] noisyfilename = clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_fileid_' + str(filenum) + '.wav' cleanfilename = 'clean_fileid_'+str(filenum)+'.wav' noisefilename = 'noise_fileid_'+str(filenum)+'.wav' noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename) cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename) noisepath = os.path.join(params['noise_proc_dir'], noisefilename) audio_signals = [noisy_snr, clean_snr, noise_snr] file_paths = [noisypath, cleanpath, noisepath] for i in range(len(audio_signals)): try: audiowrite(file_paths[i], audio_signals[i], params['fs']) except Exception as e: print(str(e)) pass return clean_source_files, clean_clipped_files, clean_low_activity_files, \ noise_source_files, noise_clipped_files, noise_low_activity_files
def main_gen(params): '''Calls gen_audio() to generate the audio signals, verifies that they meet the requirements, and writes the files to storage''' clean_source_files = [] clean_clipped_files = [] clean_low_activity_files = [] clean_source_files2 = [] clean_clipped_files2 = [] clean_low_activity_files2 = [] noise_source_files = [] noise_clipped_files = [] noise_low_activity_files = [] clean_index = 0 clean_index2 = 0 noise_index = 0 file_num = params['fileindex_start'] cleanfilenames = params['cleanfilenames'] cleanfilenames2 = params['cleanfilenames2'] # spk_index = 0 # start of speaker index num_spk = len(cleanfilenames) while file_num <= params['fileindex_end']: # generate clean speech #clean, clean_sf, clean_cf, clean_laf, clean_index = \ # gen_audio(True, params, clean_index) spk_index = random.randint(0, len(params['cleanfilenames']) - 1) chosen_clean = gen_audio3(True, params, spk_index) num_clips = int(len(chosen_clean)) #(True, params, clean_index) # add reverb with selected RIR #rir_index = random.randint(0,len(params['myrir'])-1) num_to_select1 = 1 rirfilenames = params['myrir'] chosen_clean_reverb = [] for clean in chosen_clean: myrir = random.sample(rirfilenames, num_to_select1) (fs_rir, samples_rir) = wavfile.read(myrir[0]) if len(samples_rir.shape) > 1: channel = random.randint(0, len(samples_rir.shape) - 1) samples_rir_ch = samples_rir[:, channel] else: samples_rir_ch = samples_rir clean_reverb = add_pyreverb(clean, samples_rir_ch) chosen_clean_reverb.append(clean_reverb) # add secondary speech and/or noise for each chunck of primary speech for chose_primary in chosen_clean_reverb: index2 = random.randint(0, len(params['cleanfilenames2']) - 1) clean2, clean_sf, clean_cf, clean_laf, clean_index = \ gen_audio2(True, params, index2, chose_primary.shape[0]) noise_index = random.randint(0, len(params['noisefilenames']) - 1) # generate noise noise, noise_sf, noise_cf, noise_laf, noise_index = \ gen_audio(False, params, noise_index, chose_primary.shape[0]) # if specified, use specified SNR value if not params['randomize_snr']: snr = params['snr'] # use a randomly sampled SNR value between the specified bounds else: snr = np.random.randint(params['snr_lower'], params['snr_upper']) snr2 = np.random.randint(params['snr_lower'], params['snr_upper']) snr3 = np.random.randint(params['snr_lower'], params['snr_upper']) # 1. Primary(clean) + Noise clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer( params=params, clean=chose_primary, noise=noise, snr=snr) # 2. Primary + Secondary clean_snr2, noise_snr2, noisy_snr2, target_level2 = segmental_snr_mixer( params=params, clean=chose_primary, noise=clean2, snr=snr2) # 3. Primary + Seconday (clean2) + Noise clean_snr3, noise_snr3, noisy_snr3, target_level3 = segmental_snr_mixer( params=params, clean=noisy_snr2, noise=noise, snr=snr3) # unexpected clipping #if is_clipped(clean_snr) or is_clipped(noise_snr2) or is_clipped(noisy_snr2): if is_clipped(clean_snr) or is_clipped(noisy_snr): print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \ "returning without writing audio to disk") continue if is_clipped(clean_snr2) or is_clipped(noisy_snr2): print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \ "returning without writing audio to disk") continue if is_clipped(clean_snr3) or is_clipped(noisy_snr3): print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \ "returning without writing audio to disk") continue clean_source_files += clean_sf noise_source_files += noise_sf # write resultant audio streams to files hyphen = '-' clean_source_filenamesonly = [ i[:-4].split(os.path.sep)[-1] for i in clean_sf ] clean_files_joined = hyphen.join( clean_source_filenamesonly)[:MAXFILELEN] noise_source_filenamesonly = [ i[:-4].split(os.path.sep)[-1] for i in noise_sf ] noise_files_joined = hyphen.join( noise_source_filenamesonly)[:MAXFILELEN] noisyfilename = 'primary_noisy_fileid_' + str(file_num) + '_' + clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_tl' + str(target_level) + '.wav' cleanfilename = 'clean_fileid_' + str(file_num) + '.wav' noisefilename = 'noise_fileid_' + str(file_num) + '.wav' noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename) cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename) noisepath = os.path.join(params['noise_proc_dir'], noisefilename) noisyfilename2 = 'ps_noisy_fileid_'+ str(file_num) + '_' + clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_tl' + str(target_level) + '.wav' cleanfilename2 = 'ps_clean_fileid_' + str(file_num) + '.wav' noisefilename2 = 'ps_noise_fileid_' + str(file_num) + '.wav' noisypath2 = os.path.join(params['noisyspeech_dir'], noisyfilename2) cleanpath2 = os.path.join(params['clean_proc_dir'], cleanfilename2) noisepath2 = os.path.join(params['noise_proc_dir'], noisefilename2) noisyfilename3 = 'psn_noisy_fileid_' + str(file_num) + '_' +clean_files_joined + '_' + noise_files_joined + '_snr' + \ str(snr) + '_tl' + str(target_level) + '.wav' cleanfilename3 = 'psn_clean_fileid_' + str(file_num) + '.wav' noisefilename3 = 'psn_noise_fileid_' + str(file_num) + '.wav' noisypath3 = os.path.join(params['noisyspeech_dir'], noisyfilename3) cleanpath3 = os.path.join(params['clean_proc_dir'], cleanfilename3) noisepath3 = os.path.join(params['noise_proc_dir'], noisefilename3) audio_signals = [noisy_snr, clean_snr, noise_snr] file_paths = [noisypath, cleanpath, noisepath] audio_signals2 = [noisy_snr2, clean_snr, noise_snr2] file_paths2 = [noisypath2, cleanpath2, noisepath2] audio_signals3 = [noisy_snr3, clean_snr, noise_snr3] file_paths3 = [noisypath3, cleanpath3, noisepath3] file_num += 1 # file_num = file_num + 3*num_clips for i in range(len(audio_signals)): try: audiowrite(file_paths[i], audio_signals[i], params['fs']) audiowrite(file_paths2[i], audio_signals2[i], params['fs']) audiowrite(file_paths3[i], audio_signals3[i], params['fs']) except Exception as e: print(str(e)) # for i in range(len(audio_signals2)): # try: # except Exception as e: # print(str(e)) # for i in range(len(audio_signals3)): # try: # audiowrite(file_paths3[i], audio_signals3[i], params['fs']) # except Exception as e: # print(str(e)) return clean_source_files, clean_clipped_files, clean_low_activity_files, \ noise_source_files, noise_clipped_files, noise_low_activity_files