def file_reader(record_set_dir, batch_size=-1, is_shuffle=True): """ read wav files in given directies, one file per time Args: record_set_dir: directory or list of directories where recordings exist Returns: samples generator, [samples, label_all] """ if isinstance(record_set_dir, list): dirs = record_set_dir else: dirs = [record_set_dir] # fpath_all = [] for sub_set_dir in dirs: fpath_all_sub = get_fpath(sub_set_dir, '.wav', is_absolute=True) fpath_all.extend(fpath_all_sub) if is_shuffle: np.random.shuffle(fpath_all) # print('#file',len(fpath_all)) # raise Exception() if len(fpath_all) < 1: raise Exception('empty folder:{}'.format(record_set_dir)) frame_len = 320 shift_len = 160 n_azi = 37 if batch_size > 1: x_all = np.zeros((0, frame_len, 2, 1)) y_all = np.zeros((0, n_azi)) for fpath in fpath_all: record, fs = wav_tools.read_wav(fpath) x_file_all = wav_tools.frame_data(record, frame_len, shift_len) x_file_all = np.expand_dims(x_file_all, axis=-1) # onehot azi label n_sample_file = x_file_all.shape[0] fname = os.path.basename(fpath) azi = np.int16(fname.split('_')[0]) y_file_all = np.zeros((n_sample_file, n_azi)) y_file_all[:, azi] = 1 if batch_size > 0: x_all = np.concatenate((x_all, x_file_all), axis=0) y_all = np.concatenate((y_all, y_file_all), axis=0) while x_all.shape[0] > batch_size: x_batch = copy.deepcopy(x_all[:batch_size]) y_batch = copy.deepcopy(y_all[:batch_size]) x_all = x_all[batch_size:] y_all = y_all[batch_size:] yield [x_batch, y_batch] else: yield [x_file_all, y_file_all]
def cal_fea(record_dir, fea_dir): """calculate GCC-PHAT features Args: record_dir: wave dataset directory """ if not os.path.exists(record_dir): os.makedirs(record_dir) wav_fpath_all = get_fpath(dir_path=record_dir, suffix='.wav', pattern='reverb') pb = ProcessBar(max_value=len(wav_fpath_all), title=f'GCC_PHAT {record_dir}') pool = Pool(24) for wav_fpath in wav_fpath_all: fea_fpath = os.path.join(fea_dir, '{}.npy'.format(wav_fpath[:-4])) if os.path.exists(fea_fpath): # warnings.warn(f'{fea_fpath} exists!') continue data, fs = wav_tools.read_wav(os.path.join(record_dir, wav_fpath)) frame_all = wav_tools.frame_data(data, frame_len=320, shift_len=160) n_frame = frame_all.shape[0] fea_frame_all = pool.map(gcc_phat_parallel_f, [frame_all[i] for i in range(n_frame)]) fea_frame_all = np.asarray(fea_frame_all) dir_tmp = os.path.dirname(fea_fpath) if not os.path.exists(dir_tmp): os.makedirs(dir_tmp) np.save(fea_fpath, fea_frame_all) pb.update()
def file_reader(reverb_set_dir, batch_size=128, is_shuffle=True, frame_len=320, shift_len=160, n_azi=37): """ read wav files in given directies, one file per time Args: record_set_dir: directory or list of directories where recordings exist batch_size: is_shuffle: Returns: samples generator, [samples, label_all] """ if isinstance(reverb_set_dir, list): dir_all = reverb_set_dir else: dir_all = [reverb_set_dir] # fpath_reverb_all = [] for dir_fpath in dir_all: fpath_all_tmp = get_fpath(dir_fpath, '.npy', is_absolute=True) fpath_reverb_all.extend(fpath_all_tmp) if is_shuffle: np.random.shuffle(fpath_reverb_all) for fpath_reverb in fpath_reverb_all: x_d_batch, x_r_batch, y_loc_batch, is_anechoic = np.load( fpath_reverb, allow_pickle=True) # if x_d.shape[0] == batch_size and x_r.shape[0] == batch_size and y_loc.shape[0] == batch_size: yield x_r_batch, y_loc_batch
def file_reader(dataset_dir, norm_coef_fpath, batch_size=-1, is_shuffle=True): """Read spectrum files under given directory Args: dataset_dir: string or list of strings norm_coef_fpath: file path of normalization coefficients batch_size: if not specified, return the data of a file per time is_shuffle: Returns: sample,label_onehot """ n_azi = 37 # number of sound position fea_len = 37 # size of feature if isinstance(dataset_dir, list): dir_all = dataset_dir else: dir_all = [dataset_dir] mean, std = np.load(norm_coef_fpath) # fpath_all = [] for dir_tmp in dir_all: fpath_all_tmp = get_fpath(dir_tmp, suffix='.npy', is_absolute=True) fpath_all.extend(fpath_all_tmp) if is_shuffle: np.random.shuffle(fpath_all) # randomize files order if len(fpath_all) < 1: raise Exception('folder is empty: {}'.format(dataset_dir)) if batch_size > 0: x_all = np.zeros((0, fea_len)) y_all = np.zeros((0, n_azi)) for fpath in fpath_all: fea_file_all = np.load(fpath) x_file_all = np.divide(fea_file_all - mean, std) n_sample_tmp = x_file_all.shape[0] fname, _ = os.path.basename(fpath).split('.') azi, _ = map(int, fname.split('_')) y_file_all = np.zeros((n_sample_tmp, n_azi)) y_file_all[:, azi] = 1 if batch_size > 0: # x_all = np.concatenate((x_all, x_file_all), axis=0) y_all = np.concatenate((y_all, y_file_all), axis=0) while (x_all.shape[0] > batch_size): x_batch = copy.deepcopy(x_all[:batch_size]) y_batch = copy.deepcopy(y_all[:batch_size]) x_all = x_all[batch_size:] y_all = y_all[batch_size:] yield [x_batch, y_batch] else: yield [x_file_all, y_file_all]
def file_reader(data_dir): """Read spectrum files under given directory Args: data_dir: Returns: sample generator, [samples,label_onehots] """ if isinstance(data_dir, list): dir_list = data_dir else: dir_list = [data_dir] for sub_set_dir in dir_list: fea_fpath_list = get_fpath(sub_set_dir, '.npy') for fea_fpath in fea_fpath_list: fea_fpath_abs = os.path.join(sub_set_dir, fea_fpath) fea = np.load(fea_fpath_abs) yield fea
def load_src_fpath(record_dir): src_fpath_fpath_all = get_fpath(record_dir, suffix='.txt', is_absolute=True) fpath_all = {} for src_fpath_fpath in src_fpath_fpath_all: *_, room, mic_pos, fname = src_fpath_fpath.split('/') if fname != 'src_fpath.txt': raise Exception if room not in fpath_all.keys(): fpath_all[room] = {} if mic_pos not in fpath_all[room].keys(): fpath_all[room][mic_pos] = {} with open(src_fpath_fpath, 'r') as src_fpath_file: line_all = src_fpath_file.readlines() for line in line_all[1:]: condition, fpath_tar, fpath_inter = line.split() # print(condition, fpath_tar, fpath_inter) fpath_all[room][mic_pos][condition] = [fpath_tar, fpath_inter] return fpath_all
azi_tar_all = np.arange(8, 29) n_inter_all = [0, 1, 2, 3] n_test = 3 n_room = len(room_all) n_mic_pos = len(mic_pos_test_all) n_azi_tar = azi_tar_all.shape[0] n_n_inter = len(n_inter_all) record_set_dir = '../Data/Records/test' fea_set_dir = '../Data/Features/test' # wave file path of TIMIT test set TIMIT_dir = '/home/st/Work_Space/Data/TIMIT/44100Hz/TIMIT/TEST' src_fpath_all = [ os.path.join(TIMIT_dir, item) for item in get_fpath(TIMIT_dir, '.wav') ] def get_wav_fpath(n): return np.random.choice(src_fpath_all, size=n, replace=False) def syn_record(src, room, mic_pos, azi, filter_gpu): brir_fpath = '../Data/BRIRs/test/{}/{}/{}.mat'.format(room, mic_pos, azi) brir = sio.loadmat(brir_fpath)['data'] return filter_gpu.brir_filter(src, brir) def gen_test_sample(room, mic_pos, azi_tar, n_inter, test_i, filter_gpu, front_end):
def wav2npy(reverb_set_dir, npy_dir, is_anechoic): """ read wav files in given directies, one file per time Args: record_set_dir: directory or list of directories where recordings exist batch_size: is_shuffle: Returns: samples generator, [samples, label_all] """ frame_len = 320 shift_len = 160 n_azi = 37 batch_size = 128 os.makedirs(npy_dir, exist_ok=True) # fpath_reverb_all = get_fpath(reverb_set_dir, '.wav', is_absolute=True) if len(fpath_reverb_all) < 1: raise Exception('empty folder:{}'.format(reverb_set_dir)) pb = ProcessBar(len(fpath_reverb_all)) batch_count = 0 x_r = np.zeros((0, frame_len, 2, 1)) x_d = np.zeros((0, frame_len, 2, 1)) y_loc = np.zeros((0, n_azi)) for fpath_reverb in fpath_reverb_all: pb.update() # reverb signal record, fs = wav_tools.read_wav(fpath_reverb) x_r_file = np.expand_dims(wav_tools.frame_data(record, frame_len, shift_len), axis=-1) # direct signal fpath_direct = fpath_reverb.replace('reverb', 'direct') direct, fs = wav_tools.read_wav(fpath_direct) x_d_file = np.expand_dims(wav_tools.frame_data(direct, frame_len, shift_len), axis=-1) # onehot azi label n_sample_file = x_d_file.shape[0] if x_r_file.shape[0] != n_sample_file: raise Exception('sample number do not consist') fname = os.path.basename(fpath_reverb) azi = np.int16(fname.split('_')[0]) y_loc_file = np.zeros((n_sample_file, n_azi)) y_loc_file[:, azi] = 1 x_r = np.concatenate((x_r, x_r_file), axis=0) x_d = np.concatenate((x_d, x_d_file), axis=0) y_loc = np.concatenate((y_loc, y_loc_file), axis=0) while x_d.shape[0] > batch_size: x_r_batch = x_r[:batch_size] x_d_batch = x_d[:batch_size] y_loc_batch = y_loc[:batch_size] npy_fpath = os.path.join(npy_dir, '{}.npy'.format(batch_count)) np.save(npy_fpath, [x_d_batch, x_r_batch, y_loc_batch, is_anechoic]) batch_count = batch_count + 1 x_r = x_r[batch_size:] x_d = x_d[batch_size:] y_loc = y_loc[batch_size:]
def file_reader(fea_dir, band_tar=None, azi_tar=None, is_screen=False, record_dir=None, is_plot=False, fig_name=None, is_pb=False): # theta_vad = 40 theta_corr_coef = 0.3 theta_itd = 44.0 / 44.1 if is_screen: src_fpath_all = load_src_fpath(record_dir) fea_fpath_all = get_fpath(fea_dir, suffix='.npz', is_absolute=True) pb = ProcessBar(len(fea_fpath_all)) for fea_fpath in fea_fpath_all: if is_pb: pb.update() *_, room, mic_pos, fname = fea_fpath[:-4].split('/') azi, wav_i, snr = [np.int16(item) for item in fname.split('_')] if (azi_tar is not None) and (azi != azi_tar): continue fea_file = np.load(fea_fpath) cue_frame_all = fea_file['cue_frame_all'] ccf_frame_all = fea_file['ccf_frame_all'] snr_frame_all = fea_file['snr_frame_all'] if not is_screen: if band_tar is None: yield np.transpose(cue_frame_all, axes=(1, 0, 2)) else: yield cue_frame_all[band_tar] else: n_frame = cue_frame_all.shape[1] flag_frame_all_band_all = [] # feature selection # 1. vad, on one channel(L) src_fpath_tar = \ src_fpath_all[room][mic_pos][f'{azi}_{wav_i}_{snr}'][0] src_fpath_tar = src_fpath_tar.replace('Data/TIMIT', 'Data/TIMIT_wav') src_tar, fs = wav_tools.read_wav(src_fpath_tar) tar_fpath = ''.join((f'{record_dir}/{room}/{mic_pos}/', f'{azi}_{wav_i}_{snr}_tar.wav')) tar, fs = wav_tools.read_wav(tar_fpath) if tar.shape[0] != src_tar.shape[0]: raise Exception() # time delay between source and recording, about 70 samples delay = 190 src_tar = np.concatenate((src_tar[delay:], np.zeros(delay))) vad_flag_frame_all = wav_tools.vad(x=src_tar, frame_len=frame_len, shift_len=shift_len, theta=theta_vad, is_plot=False) vad_flag_frame_all = np.squeeze(vad_flag_frame_all[:n_frame]) for band_i in range(n_band): if band_tar is not None: if band_i != band_tar: continue # 2. SNR in each frequency band snr_flag_frame_all = snr_frame_all[band_i] > 0.0 # 3. correlation coefficients ccf_flag_frame_all = np.greater( np.max(ccf_frame_all[band_i], axis=1), theta_corr_coef) # 4. ITDs range itd_flag_frame_all = np.less( np.abs(cue_frame_all[band_i, :, 0]), theta_itd) # itd ms # combine all criteras flag_frame_all = np.logical_and.reduce( (vad_flag_frame_all, snr_flag_frame_all, ccf_flag_frame_all, itd_flag_frame_all)) flag_frame_all_band_all.append(flag_frame_all) # plot waveform and corresponding criteria result if is_plot: tar_fpath = os.path.join('{}_{}.wav'.format(azi, wav_i)) tar, fs = wav_tools.read_wav(tar_fpath) inter_fpath = '{}_{}_{}.wav'.format(azi, wav_i, snr) inter, fs = wav_tools.read_wav(inter_fpath) front_end = Auditory_model(fs=fs, cf_low=freq_low, cf_high=freq_high, n_band=n_band, is_middle_ear=True, ihc_type='Roman') t_frame = np.arange(n_frame) * shift_len + int(frame_len / 2) fig = plt.figure(figsize=(8, 4), tight_layout=True) ax1 = plt.subplot(221) ax1.plot(np.sum(front_end.filter(inter)[band_i], axis=1)) ax1.plot(np.sum(front_end.filter(tar)[band_i], axis=1)) ax_twin = ax1.twinx() ax_twin.plot(t_frame, flag_frame_all, color='red') ax2 = plt.subplot(223) ax2.plot(t_frame, vad_flag_frame_all + 0.09, label='vad') ax2.plot(t_frame, snr_flag_frame_all + 0.06, label='snr') ax2.plot(t_frame, ccf_flag_frame_all + 0.03, label='ccf') ax2.plot(t_frame, itd_flag_frame_all, label='itd') ax2.legend() ax3 = plt.subplot(122) plot_cue_sample(cue_frame_all[band_i], ax3) plot_cue_sample(cue_frame_all[band_i, flag_frame_all, :], ax3) plot_tools.savefig(fig, fig_name=fig_name, fig_dir='./') return if band_i is None: flag_frame_all = np.logical_and.reduce(flag_frame_all_band_all) yield np.transpose(cue_frame_all[:, flag_frame_all, :], axes=(1, 0, 2)) else: flag_frame_all = flag_frame_all_band_all[0] yield cue_frame_all[band_tar, flag_frame_all, :]