def single_mix(combo_idx, split_list, datapath): assert len(combo_idx) == len(split_list) mix_rate = 1.0 / float(len(split_list)) wav_list = [] prefix = 'mix' mid_name = '' for part_idx in range(len(split_list)): idx, path = split_list[part_idx][combo_idx[part_idx]] wav, _ = librosa.load(path, sr=16000) wav_list.append(wav) mid_name += '-%05d' % idx mix_wav = np.zeros_like(wav_list[0]) for wav in wav_list: mix_wav += wav * mix_rate #mix two signals wav_name = prefix + mid_name + '.wav' wavfile.write('%s/mix_wav/%s' % (datapath, wav_name), 16000, mix_wav) F_mix = utils.fast_stft(mix_wav) name = prefix + mid_name + '.npy' store_path = '%s/mix/%s' % (datapath, name) np.save(store_path, F_mix) with open('%s/mix_log.txt' % datapath, 'a') as f: f.write(name) f.write('\n')
def single_mix(combo_idx, split_list, database_repo): assert len(combo_idx) == len(split_list) mix_rate = 1.0 / float(len(split_list)) wav_list = [] prefix = "mix" mid_name = "" for part_idx in range(len(split_list)): idx, path = split_list[part_idx][combo_idx[part_idx]] wav, _ = librosa.load(path, sr=16000) wav_list.append(wav) mid_name += '-%05d' % idx # mix wav file mix_wav = np.zeros_like(wav_list[0]) for wav in wav_list: mix_wav += wav * mix_rate # save mix wav file wav_name = prefix + mid_name + '.wav' wavfile.write('%s/mix_wav/%s' % (database_repo, wav_name), 16000, mix_wav) # transfer mix wav to TF domain F_mix = utils.fast_stft(mix_wav) name = prefix + mid_name + ".npy" store_path = '%s/mix/%s' % (database_repo, name) # save mix as npy file np.save(store_path, F_mix) # save mix log with open('%s/mix_log.txt' % database_repo, 'a') as f: f.write(name) f.write("\n")
def preprocess_audio(video_name=VIDEO_NAME, sr=16000): path = "%s.wav" % video_name data, _ = librosa.load(path, sr=sr) data = utils.fast_stft(data) name = 'preprocessed-%s' % video_name np.save('%s.npy' % name, data)
def single_audio_to_npy(audio_path_list, database_repo=DATABASE_REPO_PATH, fix_sr=16000): for idx, path in audio_path_list: data, _ = librosa.load(path, sr=fix_sr) data = utils.fast_stft(data) name = 'single-%05d' % idx with open('%s/single_TF.txt' % database_repo, 'a') as f: f.write('%s.npy' % name) f.write('\n') np.save(('%s/single/%s.npy' % (database_repo, name)), data)
def single_audio_to_npy(audio_path_list, database_repo=DATABASE_REPO_PATH, fix_sr=16000): for idx, path in audio_path_list: print('\rsingle npy generating... %d' % ((idx / len(audio_path_list)) * 100), end='') data, _ = librosa.load(path, sr=fix_sr) data = utils.fast_stft(data, power=True) name = 'single-%06d' % idx with open('%s/single_TF.txt' % database_repo, 'a') as f: f.write('%s.npy' % name) f.write('\n') np.save(('%s/single/%s.npy' % (database_repo, name)), data) print()
def audio_to_numpy(audio_path_list, data_path=database_path, fix_sr=16000): for idx, path in audio_path_list: print('\r aduio numpy generating... %d' % ((idx / len(audio_path_list)) * 100), end='') data, _ = librosa.load(path, sr=fix_sr) data = utils.fast_stft(data) name = 'single-%05d' % idx with open('%s/single_TF.txt' % data_path, 'a') as f: f.write('%s.npy' % name) f.write('\n') np.save(('%s/single/%s.npy' % (data_path, name)), data) print()
# options SHAPE_CHECK = 0 MODEL_CHECK1 = 0 MODEL_CHECK2 = 1 data1, sr1 = librosa.load('../../data/audio/audio_train/trim_audio_train0.wav', sr=16000) data2, sr2 = librosa.load('../../data/audio/audio_train/trim_audio_train1.wav', sr=16000) mix = data1 * 0.5 + data2 * 0.5 if SHAPE_CHECK: # check shape print(data1.shape) D1 = avp.fast_stft(data1) print(D1.shape) T1 = avp.fast_istft(D1) print(T1.shape) # check model if MODEL_CHECK1: F1 = avp.fast_stft(data1) F1 = np.expand_dims(F1, axis=0) F2 = avp.fast_stft(data2) F2 = np.expand_dims(F2, axis=0) FM = avp.fast_stft(mix) FM = np.expand_dims(FM, axis=0) cRM1 = np.abs(F1) / np.abs(FM) cRM1[~np.isfinite(cRM1)] = 0
def generate_mix_sample(audio_path_list,num_speaker,fix_sr=16000,verbose=0): ''' generate mix sample from audios in the list :param audio_path_list: list contains path of the wav audio file :param num_speaker: specify the task for speech separation :param fix_sr: fix sample rate ''' # initiate variables # shape of F_mix = (298,257,2) # shpae of crm = (298,257,2) data_list = [] F_list = [] # STFT list for each sample cRM_list = [] mix_name = "mix" crm_name = "crm" post_name = "" # import data for i in range(num_speaker): idx,path =audio_path_list[i] post_name += "-%05d"%idx data, _ = librosa.load(path,sr=fix_sr) data_list.append(data) # create mix audio according to mix rate mix_rate = 1.0 / float(num_speaker) mix = np.zeros(shape=data_list[0].shape) for data in data_list: mix += data*mix_rate # transfrom data via STFT and several preprocessing function for i in range(num_speaker): F = utils.fast_stft(data_list[i],power=False) F_list.append(F) F_mix = utils.fast_stft(mix,power=False) # create cRM for each speaker and fill into y_sample for i in range(num_speaker): cRM_list.append(utils.fast_cRM(F_list[i],F_mix)) # return values if verbose == 1: print('shape of X: ',F_mix.shape) for i in range(len(cRM_list)): print('shape of cRM%s :'%i,cRM_list[i].shape) # save record in txt mix_name += post_name crm_name += post_name # write txt with open('audio_database/dataset.txt','a') as f: f.write(mix_name+".npy") for i in range(len(cRM_list)): line = " " + crm_name + ("-%05d"%audio_path_list[i][0]) + ".npy" f.write(line) f.write("\n") # save file as npy np.save(('audio_database/mix/%s.npy'%mix_name), F_mix) for i in range(len(cRM_list)): name = crm_name + ("-%05d"%audio_path_list[i][0]) np.save(('audio_database/crm/%s.npy'%name), cRM_list[i])
def single_audio_to_npy(audio_path_list,fix_sr=16000): for idx,path in audio_path_list: data, _ = librosa.load(path, sr=fix_sr) data = utils.fast_stft(data) name = 'single-%05d'%idx np.save(('audio_database/single/%s.npy'%name),data)