def main(config_path): src, fs = wav_tools.read_wav('data/src.wav') new_brir_path = 'data/test.wav' # configs = parse_config_file(config_path) # new_config_path = 'config.cfg' # new_fig_path = 'test.png' # syn_brir(configs, new_config_path, new_brir_path, new_fig_path, # parallel_type=2, n_worker=12) new_brir, fs = wav_tools.read_wav(new_brir_path) new_record = wav_tools.brir_filter(src, new_brir) wav_tools.write_wav(new_record, fs, 'data/new/reverb/15_0387_0.wav') brir, fs = wav_tools.read_wav(config_path.replace('cfg', 'wav')) record = wav_tools.brir_filter(src, brir) wav_tools.write_wav(new_record, fs, 'data/pre/reverb/15_0387_0.wav') # brir fig, ax = plt.subplots(3, 3, tight_layout=True, figsize=[10, 8]) ax[0, 0].plot(brir[:, 0]) ax[0, 0].set_ylabel('brir') ax[0, 0].set_title('pre') ax[0, 1].plot(new_brir[:, 0]) ax[0, 1].set_title('new') ax[0, 2].plot(brir[:, 0] - new_brir[:, 0]) ax[0, 2].yaxis.set_major_formatter(ticker.LogFormatter()) ax[0, 2].set_title('difference') ax[1, 0].plot(record[:, 0]) ax[1, 0].set_ylabel('record') ax[1, 1].plot(new_record[:, 0]) ax[1, 2].plot(record[:, 0] - new_record[:, 0]) ax[1, 2].yaxis.set_major_formatter(ticker.LogFormatter()) specgram, freqs, bins, im = ax[2, 0].specgram(record[:, 0], Fs=fs, NFFT=512, noverlap=256, cmap='jet') new_specgram, freqs, bins, im = ax[2, 1].specgram(new_record[:, 0], Fs=fs, NFFT=512, noverlap=256, cmap='jet') ax[2, 2].imshow(specgram - new_specgram, aspect='auto', cmap='jet', extent=[bins[0], bins[-1], freqs[0], freqs[-1]]) fig.savefig('images/validate.png')
def file_reader(record_set_dir, batch_size=-1, is_shuffle=True): """ read wav files in given directies, one file per time Args: record_set_dir: directory or list of directories where recordings exist Returns: samples generator, [samples, label_all] """ if isinstance(record_set_dir, list): dirs = record_set_dir else: dirs = [record_set_dir] # fpath_all = [] for sub_set_dir in dirs: fpath_all_sub = get_fpath(sub_set_dir, '.wav', is_absolute=True) fpath_all.extend(fpath_all_sub) if is_shuffle: np.random.shuffle(fpath_all) # print('#file',len(fpath_all)) # raise Exception() if len(fpath_all) < 1: raise Exception('empty folder:{}'.format(record_set_dir)) frame_len = 320 shift_len = 160 n_azi = 37 if batch_size > 1: x_all = np.zeros((0, frame_len, 2, 1)) y_all = np.zeros((0, n_azi)) for fpath in fpath_all: record, fs = wav_tools.read_wav(fpath) x_file_all = wav_tools.frame_data(record, frame_len, shift_len) x_file_all = np.expand_dims(x_file_all, axis=-1) # onehot azi label n_sample_file = x_file_all.shape[0] fname = os.path.basename(fpath) azi = np.int16(fname.split('_')[0]) y_file_all = np.zeros((n_sample_file, n_azi)) y_file_all[:, azi] = 1 if batch_size > 0: x_all = np.concatenate((x_all, x_file_all), axis=0) y_all = np.concatenate((y_all, y_file_all), axis=0) while x_all.shape[0] > batch_size: x_batch = copy.deepcopy(x_all[:batch_size]) y_batch = copy.deepcopy(y_all[:batch_size]) x_all = x_all[batch_size:] y_all = y_all[batch_size:] yield [x_batch, y_batch] else: yield [x_file_all, y_file_all]
def main(): args = parse_args() x, fs = wav_tools.read_wav(args.wav_path) frame_len = int(fs * args.frame_len) frame_shift = int(fs * args.frame_shift) cal_spectrogram(x, frame_len, frame_shift, fs, args.freq_low, args.freq_high, args.n_band, args.fig_path, args.dpi)
def cal_fea(record_dir, fea_dir): """calculate GCC-PHAT features Args: record_dir: wave dataset directory """ if not os.path.exists(record_dir): os.makedirs(record_dir) wav_fpath_all = get_fpath(dir_path=record_dir, suffix='.wav', pattern='reverb') pb = ProcessBar(max_value=len(wav_fpath_all), title=f'GCC_PHAT {record_dir}') pool = Pool(24) for wav_fpath in wav_fpath_all: fea_fpath = os.path.join(fea_dir, '{}.npy'.format(wav_fpath[:-4])) if os.path.exists(fea_fpath): # warnings.warn(f'{fea_fpath} exists!') continue data, fs = wav_tools.read_wav(os.path.join(record_dir, wav_fpath)) frame_all = wav_tools.frame_data(data, frame_len=320, shift_len=160) n_frame = frame_all.shape[0] fea_frame_all = pool.map(gcc_phat_parallel_f, [frame_all[i] for i in range(n_frame)]) fea_frame_all = np.asarray(fea_frame_all) dir_tmp = os.path.dirname(fea_fpath) if not os.path.exists(dir_tmp): os.makedirs(dir_tmp) np.save(fea_fpath, fea_frame_all) pb.update()
def syn_record(src_fpath_all, set_dir, n_wav_per_azi, task_i, pb): """synthesize spatial recordings as well corresponding direct sound for each set """ filter_gpu = Filter_GPU(gpu_index=1) brirs_direct = load_brirs('Anechoic') wav_count = 0 for room in room_all: direct_dir = os.path.join(set_dir, 'direct', room) os.makedirs(direct_dir, exist_ok=True) rever_dir = os.path.join(set_dir, 'reverb', room) os.makedirs(rever_dir, exist_ok=True) brirs_room = load_brirs(room) for azi_i in range(n_azi): for i in range(n_wav_per_azi): pb.update(task_i) src_fpath = src_fpath_all[wav_count] wav_count = wav_count + 1 src, fs = wav_tools.read_wav(src_fpath) src = truncate_silence(src) direct = filter_gpu.brir_filter(src, brirs_direct[azi_i]) # direct = wav_tools.brir_filter(src, brirs_direct[azi_i]) direct_fpath = os.path.join(direct_dir, f'{azi_i}_{i}.wav') wav_tools.write_wav(direct, fs, direct_fpath) reverb = filter_gpu.brir_filter(src, brirs_room[azi_i]) # reverb = wav_tools.brir_filter(src, brirs_room[azi_i]) reverb_fpath = os.path.join(rever_dir, f'{azi_i}_{i}.wav') wav_tools.write_wav(reverb, fs, reverb_fpath)
def gen_test_sample(room, mic_pos, azi_tar, n_inter, test_i, filter_gpu, front_end): src_azi_all = np.zeros(n_inter + 1) src_azi_all[0] = azi_tar src_fpath_all = get_wav_fpath(n_inter + 1) src_tar, _ = wav_tools.read_wav(src_fpath_all[0]) record_tar = syn_record(src_tar, room, mic_pos, azi_tar, filter_gpu) mix = record_tar mix_len = mix.shape[0] for i in range(n_inter): # minimal azimuth separation 10^o inter_azi = azi_tar while np.abs(azi_tar - inter_azi) < azi_sep_theta: inter_azi = np.random.choice(azi_tar_all, size=1)[0] src_azi_all[i + 1] = inter_azi src_inter, _ = wav_tools.read_wav(src_fpath_all[1 + i]) src_inter_norm = wav_tools.set_snr(src_inter, src_tar, 0) record_inter = syn_record(src_inter_norm, room, mic_pos, inter_azi, filter_gpu) mix_len = min((mix_len, record_inter.shape[0])) mix = mix[:mix_len] + record_inter[:mix_len] mix_fpath = os.path.join( record_set_dir, room, f'{mic_pos}', '_'.join( (f'{azi_tar}', f'{n_inter}', f'{test_i}.npy'))) os.makedirs(os.path.dirname(mix_fpath), exist_ok=True) np.save(mix_fpath, [mix, src_azi_all]) fea_fpath = os.path.join( fea_set_dir, room, f'{mic_pos}', '_'.join( (f'{azi_tar}', f'{n_inter}', f'{test_i}.npy'))) os.makedirs(os.path.dirname(fea_fpath), exist_ok=True) [cue_frame_all, ccf_frame_all] = front_end.cal_cues(tar=mix, frame_len=frame_len, shift_len=shift_len, max_delay=max_delay, n_worker=1) np.save(fea_fpath, [cue_frame_all, ccf_frame_all, src_azi_all])
def main(): wav, fs = wav_tools.read_wav('record.wav') gt_filter = GTF(fs, freq_low=80, freq_high=5e3, n_band=32) wav_band_all_py = gt_filter.filter_py(wav) np.save('wav_band_all_py.npy', wav_band_all_py) # wav_band_all_py = np.load('wav_band_all_py.npy') print(np.max(wav_band_all_py)) wav_band_all = gt_filter.filter(wav) print(np.max(wav_band_all)) for band_i in range(32): fig, ax = plt.subplots(2, 1) ax[0].plot(wav_band_all[band_i, :, 0].T) ax[0].plot(wav_band_all_py[band_i, :, 0].T) ax[0].set_xlim([5000, 5050]) ax[1].plot(wav_band_all_py[band_i, :, 0].T) ax[1].plot(wav_band_all[band_i, :, 0].T) ax[1].set_xlim([5000, 5050]) fig.savefig(f'../images/eg_{band_i}.png') plt.close(fig)
def wav2npy(reverb_set_dir, npy_dir, is_anechoic): """ read wav files in given directies, one file per time Args: record_set_dir: directory or list of directories where recordings exist batch_size: is_shuffle: Returns: samples generator, [samples, label_all] """ frame_len = 320 shift_len = 160 n_azi = 37 batch_size = 128 os.makedirs(npy_dir, exist_ok=True) # fpath_reverb_all = get_fpath(reverb_set_dir, '.wav', is_absolute=True) if len(fpath_reverb_all) < 1: raise Exception('empty folder:{}'.format(reverb_set_dir)) pb = ProcessBar(len(fpath_reverb_all)) batch_count = 0 x_r = np.zeros((0, frame_len, 2, 1)) x_d = np.zeros((0, frame_len, 2, 1)) y_loc = np.zeros((0, n_azi)) for fpath_reverb in fpath_reverb_all: pb.update() # reverb signal record, fs = wav_tools.read_wav(fpath_reverb) x_r_file = np.expand_dims(wav_tools.frame_data(record, frame_len, shift_len), axis=-1) # direct signal fpath_direct = fpath_reverb.replace('reverb', 'direct') direct, fs = wav_tools.read_wav(fpath_direct) x_d_file = np.expand_dims(wav_tools.frame_data(direct, frame_len, shift_len), axis=-1) # onehot azi label n_sample_file = x_d_file.shape[0] if x_r_file.shape[0] != n_sample_file: raise Exception('sample number do not consist') fname = os.path.basename(fpath_reverb) azi = np.int16(fname.split('_')[0]) y_loc_file = np.zeros((n_sample_file, n_azi)) y_loc_file[:, azi] = 1 x_r = np.concatenate((x_r, x_r_file), axis=0) x_d = np.concatenate((x_d, x_d_file), axis=0) y_loc = np.concatenate((y_loc, y_loc_file), axis=0) while x_d.shape[0] > batch_size: x_r_batch = x_r[:batch_size] x_d_batch = x_d[:batch_size] y_loc_batch = y_loc[:batch_size] npy_fpath = os.path.join(npy_dir, '{}.npy'.format(batch_count)) np.save(npy_fpath, [x_d_batch, x_r_batch, y_loc_batch, is_anechoic]) batch_count = batch_count + 1 x_r = x_r[batch_size:] x_d = x_d[batch_size:] y_loc = y_loc[batch_size:]
tar_fpath = '../Data/Records/train/RT_0.5/5/19_11_20_tar.wav' inter_fpath = '../Data/Records/train/RT_0.5/5/19_11_20_inter.wav' band_i = 20 *_, mic_pos, fname = fea_fpath.split('/') tar_azi, inter_azi, snr = [int(item) for item in fname[:-4].split('_')] fea_file = np.load(fea_fpath) cue_frame_all = fea_file['cue_frame_all'] ccf_frame_all = fea_file['ccf_frame_all'] snr_frame_all = fea_file['snr_frame_all'] n_frame = cue_frame_all.shape[1] # 1. vad, on one channel(L) tar_record, fs = wav_tools.read_wav(tar_fpath) inter_record, fs = wav_tools.read_wav(inter_fpath) theta_vad = 40 vad_flag_frame_all = wav_tools.vad(x=tar_record[:, 0], frame_len=frame_len, shift_len=shift_len, theta=theta_vad, is_plot=False) vad_flag_frame_all = vad_flag_frame_all[:n_frame] # 2. SNR in each frequency band snr_flag_frame_all = snr_frame_all[band_i] > 0.0 # 3. correlation coefficients
def file_reader(fea_dir, band_tar=None, azi_tar=None, is_screen=False, record_dir=None, is_plot=False, fig_name=None, is_pb=False): # theta_vad = 40 theta_corr_coef = 0.3 theta_itd = 44.0 / 44.1 if is_screen: src_fpath_all = load_src_fpath(record_dir) fea_fpath_all = get_fpath(fea_dir, suffix='.npz', is_absolute=True) pb = ProcessBar(len(fea_fpath_all)) for fea_fpath in fea_fpath_all: if is_pb: pb.update() *_, room, mic_pos, fname = fea_fpath[:-4].split('/') azi, wav_i, snr = [np.int16(item) for item in fname.split('_')] if (azi_tar is not None) and (azi != azi_tar): continue fea_file = np.load(fea_fpath) cue_frame_all = fea_file['cue_frame_all'] ccf_frame_all = fea_file['ccf_frame_all'] snr_frame_all = fea_file['snr_frame_all'] if not is_screen: if band_tar is None: yield np.transpose(cue_frame_all, axes=(1, 0, 2)) else: yield cue_frame_all[band_tar] else: n_frame = cue_frame_all.shape[1] flag_frame_all_band_all = [] # feature selection # 1. vad, on one channel(L) src_fpath_tar = \ src_fpath_all[room][mic_pos][f'{azi}_{wav_i}_{snr}'][0] src_fpath_tar = src_fpath_tar.replace('Data/TIMIT', 'Data/TIMIT_wav') src_tar, fs = wav_tools.read_wav(src_fpath_tar) tar_fpath = ''.join((f'{record_dir}/{room}/{mic_pos}/', f'{azi}_{wav_i}_{snr}_tar.wav')) tar, fs = wav_tools.read_wav(tar_fpath) if tar.shape[0] != src_tar.shape[0]: raise Exception() # time delay between source and recording, about 70 samples delay = 190 src_tar = np.concatenate((src_tar[delay:], np.zeros(delay))) vad_flag_frame_all = wav_tools.vad(x=src_tar, frame_len=frame_len, shift_len=shift_len, theta=theta_vad, is_plot=False) vad_flag_frame_all = np.squeeze(vad_flag_frame_all[:n_frame]) for band_i in range(n_band): if band_tar is not None: if band_i != band_tar: continue # 2. SNR in each frequency band snr_flag_frame_all = snr_frame_all[band_i] > 0.0 # 3. correlation coefficients ccf_flag_frame_all = np.greater( np.max(ccf_frame_all[band_i], axis=1), theta_corr_coef) # 4. ITDs range itd_flag_frame_all = np.less( np.abs(cue_frame_all[band_i, :, 0]), theta_itd) # itd ms # combine all criteras flag_frame_all = np.logical_and.reduce( (vad_flag_frame_all, snr_flag_frame_all, ccf_flag_frame_all, itd_flag_frame_all)) flag_frame_all_band_all.append(flag_frame_all) # plot waveform and corresponding criteria result if is_plot: tar_fpath = os.path.join('{}_{}.wav'.format(azi, wav_i)) tar, fs = wav_tools.read_wav(tar_fpath) inter_fpath = '{}_{}_{}.wav'.format(azi, wav_i, snr) inter, fs = wav_tools.read_wav(inter_fpath) front_end = Auditory_model(fs=fs, cf_low=freq_low, cf_high=freq_high, n_band=n_band, is_middle_ear=True, ihc_type='Roman') t_frame = np.arange(n_frame) * shift_len + int(frame_len / 2) fig = plt.figure(figsize=(8, 4), tight_layout=True) ax1 = plt.subplot(221) ax1.plot(np.sum(front_end.filter(inter)[band_i], axis=1)) ax1.plot(np.sum(front_end.filter(tar)[band_i], axis=1)) ax_twin = ax1.twinx() ax_twin.plot(t_frame, flag_frame_all, color='red') ax2 = plt.subplot(223) ax2.plot(t_frame, vad_flag_frame_all + 0.09, label='vad') ax2.plot(t_frame, snr_flag_frame_all + 0.06, label='snr') ax2.plot(t_frame, ccf_flag_frame_all + 0.03, label='ccf') ax2.plot(t_frame, itd_flag_frame_all, label='itd') ax2.legend() ax3 = plt.subplot(122) plot_cue_sample(cue_frame_all[band_i], ax3) plot_cue_sample(cue_frame_all[band_i, flag_frame_all, :], ax3) plot_tools.savefig(fig, fig_name=fig_name, fig_dir='./') return if band_i is None: flag_frame_all = np.logical_and.reduce(flag_frame_all_band_all) yield np.transpose(cue_frame_all[:, flag_frame_all, :], axes=(1, 0, 2)) else: flag_frame_all = flag_frame_all_band_all[0] yield cue_frame_all[band_tar, flag_frame_all, :]
def main(): args = parse_args() x, fs = wav_tools.read_wav(args.wav_path) filter(x, fs, args.freq_low, args.freq_high, args.n_band, args.result_dir, args.fig_path)