コード例 #1
0
ファイル: file_reader.py プロジェクト: makabakas/WavLoc
def file_reader(record_set_dir, batch_size=-1, is_shuffle=True):
    """ read wav files in given directies, one file per time
    Args:
        record_set_dir: directory or list of directories where recordings exist
    Returns:
        samples generator, [samples, label_all]
    """
    if isinstance(record_set_dir, list):
        dirs = record_set_dir
    else:
        dirs = [record_set_dir]
    #
    fpath_all = []
    for sub_set_dir in dirs:
        fpath_all_sub = get_fpath(sub_set_dir, '.wav', is_absolute=True)
        fpath_all.extend(fpath_all_sub)

    if is_shuffle:
        np.random.shuffle(fpath_all)

    # print('#file',len(fpath_all))
    # raise Exception()

    if len(fpath_all) < 1:
        raise Exception('empty folder:{}'.format(record_set_dir))

    frame_len = 320
    shift_len = 160
    n_azi = 37

    if batch_size > 1:
        x_all = np.zeros((0, frame_len, 2, 1))
        y_all = np.zeros((0, n_azi))

    for fpath in fpath_all:
        record, fs = wav_tools.read_wav(fpath)
        x_file_all = wav_tools.frame_data(record, frame_len, shift_len)
        x_file_all = np.expand_dims(x_file_all, axis=-1)

        # onehot azi label
        n_sample_file = x_file_all.shape[0]
        fname = os.path.basename(fpath)
        azi = np.int16(fname.split('_')[0])
        y_file_all = np.zeros((n_sample_file, n_azi))
        y_file_all[:, azi] = 1

        if batch_size > 0:
            x_all = np.concatenate((x_all, x_file_all), axis=0)
            y_all = np.concatenate((y_all, y_file_all), axis=0)

            while x_all.shape[0] > batch_size:
                x_batch = copy.deepcopy(x_all[:batch_size])
                y_batch = copy.deepcopy(y_all[:batch_size])

                x_all = x_all[batch_size:]
                y_all = y_all[batch_size:]

                yield [x_batch, y_batch]
        else:
            yield [x_file_all, y_file_all]
コード例 #2
0
def cal_fea(record_dir, fea_dir):
    """calculate GCC-PHAT features
    Args:
        record_dir: wave dataset directory
    """
    if not os.path.exists(record_dir):
        os.makedirs(record_dir)

    wav_fpath_all = get_fpath(dir_path=record_dir,
                              suffix='.wav',
                              pattern='reverb')

    pb = ProcessBar(max_value=len(wav_fpath_all),
                    title=f'GCC_PHAT {record_dir}')
    pool = Pool(24)
    for wav_fpath in wav_fpath_all:
        fea_fpath = os.path.join(fea_dir, '{}.npy'.format(wav_fpath[:-4]))
        if os.path.exists(fea_fpath):
            # warnings.warn(f'{fea_fpath} exists!')
            continue

        data, fs = wav_tools.read_wav(os.path.join(record_dir, wav_fpath))
        frame_all = wav_tools.frame_data(data, frame_len=320, shift_len=160)
        n_frame = frame_all.shape[0]

        fea_frame_all = pool.map(gcc_phat_parallel_f,
                                 [frame_all[i] for i in range(n_frame)])
        fea_frame_all = np.asarray(fea_frame_all)

        dir_tmp = os.path.dirname(fea_fpath)
        if not os.path.exists(dir_tmp):
            os.makedirs(dir_tmp)
        np.save(fea_fpath, fea_frame_all)

        pb.update()
コード例 #3
0
ファイル: file_reader_v2.py プロジェクト: makabakas/WavLoc
def file_reader(reverb_set_dir,
                batch_size=128,
                is_shuffle=True,
                frame_len=320,
                shift_len=160,
                n_azi=37):
    """ read wav files in given directies, one file per time
    Args:
        record_set_dir: directory or list of directories where recordings exist
        batch_size:
        is_shuffle:
    Returns:
        samples generator, [samples, label_all]
    """
    if isinstance(reverb_set_dir, list):
        dir_all = reverb_set_dir
    else:
        dir_all = [reverb_set_dir]
    #
    fpath_reverb_all = []
    for dir_fpath in dir_all:
        fpath_all_tmp = get_fpath(dir_fpath, '.npy', is_absolute=True)
        fpath_reverb_all.extend(fpath_all_tmp)

    if is_shuffle:
        np.random.shuffle(fpath_reverb_all)

    for fpath_reverb in fpath_reverb_all:
        x_d_batch, x_r_batch, y_loc_batch, is_anechoic = np.load(
            fpath_reverb, allow_pickle=True)
        # if x_d.shape[0] == batch_size and x_r.shape[0] == batch_size and y_loc.shape[0] == batch_size:
        yield x_r_batch, y_loc_batch
コード例 #4
0
def file_reader(dataset_dir, norm_coef_fpath, batch_size=-1, is_shuffle=True):
    """Read spectrum files under given directory
    Args:
        dataset_dir: string or list of strings
        norm_coef_fpath: file path of normalization coefficients 
        batch_size: if not specified, return the data of a file per time
        is_shuffle: 
    Returns:
        sample,label_onehot
    """
    n_azi = 37  # number of sound position
    fea_len = 37  # size of feature

    if isinstance(dataset_dir, list):
        dir_all = dataset_dir
    else:
        dir_all = [dataset_dir]

    mean, std = np.load(norm_coef_fpath)  #

    fpath_all = []
    for dir_tmp in dir_all:
        fpath_all_tmp = get_fpath(dir_tmp, suffix='.npy', is_absolute=True)
        fpath_all.extend(fpath_all_tmp)

    if is_shuffle:
        np.random.shuffle(fpath_all)  # randomize files order

    if len(fpath_all) < 1:
        raise Exception('folder is empty: {}'.format(dataset_dir))

    if batch_size > 0:
        x_all = np.zeros((0, fea_len))
        y_all = np.zeros((0, n_azi))

    for fpath in fpath_all:
        fea_file_all = np.load(fpath)
        x_file_all = np.divide(fea_file_all - mean, std)
        n_sample_tmp = x_file_all.shape[0]

        fname, _ = os.path.basename(fpath).split('.')
        azi, _ = map(int, fname.split('_'))
        y_file_all = np.zeros((n_sample_tmp, n_azi))
        y_file_all[:, azi] = 1

        if batch_size > 0:  #
            x_all = np.concatenate((x_all, x_file_all), axis=0)
            y_all = np.concatenate((y_all, y_file_all), axis=0)
            while (x_all.shape[0] > batch_size):
                x_batch = copy.deepcopy(x_all[:batch_size])
                y_batch = copy.deepcopy(y_all[:batch_size])

                x_all = x_all[batch_size:]
                y_all = y_all[batch_size:]

                yield [x_batch, y_batch]
        else:
            yield [x_file_all, y_file_all]
コード例 #5
0
def file_reader(data_dir):
    """Read spectrum files under given directory
    Args:
        data_dir:
    Returns:
        sample generator, [samples,label_onehots]
    """
    if isinstance(data_dir, list):
        dir_list = data_dir
    else:
        dir_list = [data_dir]

    for sub_set_dir in dir_list:
        fea_fpath_list = get_fpath(sub_set_dir, '.npy')
        for fea_fpath in fea_fpath_list:
            fea_fpath_abs = os.path.join(sub_set_dir, fea_fpath)
            fea = np.load(fea_fpath_abs)
            yield fea
コード例 #6
0
def load_src_fpath(record_dir):
    src_fpath_fpath_all = get_fpath(record_dir,
                                    suffix='.txt',
                                    is_absolute=True)
    fpath_all = {}
    for src_fpath_fpath in src_fpath_fpath_all:
        *_, room, mic_pos, fname = src_fpath_fpath.split('/')
        if fname != 'src_fpath.txt':
            raise Exception
        if room not in fpath_all.keys():
            fpath_all[room] = {}
        if mic_pos not in fpath_all[room].keys():
            fpath_all[room][mic_pos] = {}
        with open(src_fpath_fpath, 'r') as src_fpath_file:
            line_all = src_fpath_file.readlines()
            for line in line_all[1:]:
                condition, fpath_tar, fpath_inter = line.split()
                # print(condition, fpath_tar, fpath_inter)
                fpath_all[room][mic_pos][condition] = [fpath_tar, fpath_inter]
    return fpath_all
コード例 #7
0
azi_tar_all = np.arange(8, 29)
n_inter_all = [0, 1, 2, 3]

n_test = 3
n_room = len(room_all)
n_mic_pos = len(mic_pos_test_all)
n_azi_tar = azi_tar_all.shape[0]
n_n_inter = len(n_inter_all)

record_set_dir = '../Data/Records/test'
fea_set_dir = '../Data/Features/test'

# wave file path of TIMIT test set
TIMIT_dir = '/home/st/Work_Space/Data/TIMIT/44100Hz/TIMIT/TEST'
src_fpath_all = [
    os.path.join(TIMIT_dir, item) for item in get_fpath(TIMIT_dir, '.wav')
]


def get_wav_fpath(n):
    return np.random.choice(src_fpath_all, size=n, replace=False)


def syn_record(src, room, mic_pos, azi, filter_gpu):
    brir_fpath = '../Data/BRIRs/test/{}/{}/{}.mat'.format(room, mic_pos, azi)
    brir = sio.loadmat(brir_fpath)['data']
    return filter_gpu.brir_filter(src, brir)


def gen_test_sample(room, mic_pos, azi_tar, n_inter, test_i, filter_gpu,
                    front_end):
コード例 #8
0
ファイル: wav2npy.py プロジェクト: makabakas/WavLoc
def wav2npy(reverb_set_dir, npy_dir, is_anechoic):
    """ read wav files in given directies, one file per time
    Args:
        record_set_dir: directory or list of directories where recordings exist
        batch_size:
        is_shuffle:
    Returns:
        samples generator, [samples, label_all]
    """

    frame_len = 320
    shift_len = 160
    n_azi = 37
    batch_size = 128

    os.makedirs(npy_dir, exist_ok=True)

    #
    fpath_reverb_all = get_fpath(reverb_set_dir, '.wav', is_absolute=True)
    if len(fpath_reverb_all) < 1:
        raise Exception('empty folder:{}'.format(reverb_set_dir))

    pb = ProcessBar(len(fpath_reverb_all))

    batch_count = 0
    x_r = np.zeros((0, frame_len, 2, 1))
    x_d = np.zeros((0, frame_len, 2, 1))
    y_loc = np.zeros((0, n_azi))

    for fpath_reverb in fpath_reverb_all:
        pb.update()
        # reverb signal
        record, fs = wav_tools.read_wav(fpath_reverb)
        x_r_file = np.expand_dims(wav_tools.frame_data(record, frame_len,
                                                       shift_len),
                                  axis=-1)
        # direct signal
        fpath_direct = fpath_reverb.replace('reverb', 'direct')
        direct, fs = wav_tools.read_wav(fpath_direct)
        x_d_file = np.expand_dims(wav_tools.frame_data(direct, frame_len,
                                                       shift_len),
                                  axis=-1)

        # onehot azi label
        n_sample_file = x_d_file.shape[0]
        if x_r_file.shape[0] != n_sample_file:
            raise Exception('sample number do not consist')

        fname = os.path.basename(fpath_reverb)
        azi = np.int16(fname.split('_')[0])
        y_loc_file = np.zeros((n_sample_file, n_azi))
        y_loc_file[:, azi] = 1

        x_r = np.concatenate((x_r, x_r_file), axis=0)
        x_d = np.concatenate((x_d, x_d_file), axis=0)
        y_loc = np.concatenate((y_loc, y_loc_file), axis=0)

        while x_d.shape[0] > batch_size:
            x_r_batch = x_r[:batch_size]
            x_d_batch = x_d[:batch_size]
            y_loc_batch = y_loc[:batch_size]

            npy_fpath = os.path.join(npy_dir, '{}.npy'.format(batch_count))
            np.save(npy_fpath,
                    [x_d_batch, x_r_batch, y_loc_batch, is_anechoic])
            batch_count = batch_count + 1

            x_r = x_r[batch_size:]
            x_d = x_d[batch_size:]
            y_loc = y_loc[batch_size:]
コード例 #9
0
def file_reader(fea_dir,
                band_tar=None,
                azi_tar=None,
                is_screen=False,
                record_dir=None,
                is_plot=False,
                fig_name=None,
                is_pb=False):
    #
    theta_vad = 40
    theta_corr_coef = 0.3
    theta_itd = 44.0 / 44.1

    if is_screen:
        src_fpath_all = load_src_fpath(record_dir)

    fea_fpath_all = get_fpath(fea_dir, suffix='.npz', is_absolute=True)
    pb = ProcessBar(len(fea_fpath_all))
    for fea_fpath in fea_fpath_all:
        if is_pb:
            pb.update()
        *_, room, mic_pos, fname = fea_fpath[:-4].split('/')
        azi, wav_i, snr = [np.int16(item) for item in fname.split('_')]
        if (azi_tar is not None) and (azi != azi_tar):
            continue

        fea_file = np.load(fea_fpath)
        cue_frame_all = fea_file['cue_frame_all']
        ccf_frame_all = fea_file['ccf_frame_all']
        snr_frame_all = fea_file['snr_frame_all']

        if not is_screen:
            if band_tar is None:
                yield np.transpose(cue_frame_all, axes=(1, 0, 2))
            else:
                yield cue_frame_all[band_tar]
        else:
            n_frame = cue_frame_all.shape[1]
            flag_frame_all_band_all = []

            # feature selection
            # 1. vad, on one channel(L)
            src_fpath_tar = \
                src_fpath_all[room][mic_pos][f'{azi}_{wav_i}_{snr}'][0]
            src_fpath_tar = src_fpath_tar.replace('Data/TIMIT',
                                                  'Data/TIMIT_wav')
            src_tar, fs = wav_tools.read_wav(src_fpath_tar)

            tar_fpath = ''.join((f'{record_dir}/{room}/{mic_pos}/',
                                 f'{azi}_{wav_i}_{snr}_tar.wav'))
            tar, fs = wav_tools.read_wav(tar_fpath)
            if tar.shape[0] != src_tar.shape[0]:
                raise Exception()

            # time delay between source and recording, about 70 samples
            delay = 190
            src_tar = np.concatenate((src_tar[delay:], np.zeros(delay)))
            vad_flag_frame_all = wav_tools.vad(x=src_tar,
                                               frame_len=frame_len,
                                               shift_len=shift_len,
                                               theta=theta_vad,
                                               is_plot=False)
            vad_flag_frame_all = np.squeeze(vad_flag_frame_all[:n_frame])

            for band_i in range(n_band):
                if band_tar is not None:
                    if band_i != band_tar:
                        continue

                # 2. SNR in each frequency band
                snr_flag_frame_all = snr_frame_all[band_i] > 0.0

                # 3. correlation coefficients
                ccf_flag_frame_all = np.greater(
                    np.max(ccf_frame_all[band_i], axis=1), theta_corr_coef)

                # 4. ITDs range
                itd_flag_frame_all = np.less(
                    np.abs(cue_frame_all[band_i, :, 0]), theta_itd)  # itd ms

                # combine all criteras
                flag_frame_all = np.logical_and.reduce(
                    (vad_flag_frame_all, snr_flag_frame_all,
                     ccf_flag_frame_all, itd_flag_frame_all))
                flag_frame_all_band_all.append(flag_frame_all)

            # plot waveform and corresponding criteria result
            if is_plot:
                tar_fpath = os.path.join('{}_{}.wav'.format(azi, wav_i))
                tar, fs = wav_tools.read_wav(tar_fpath)

                inter_fpath = '{}_{}_{}.wav'.format(azi, wav_i, snr)
                inter, fs = wav_tools.read_wav(inter_fpath)

                front_end = Auditory_model(fs=fs,
                                           cf_low=freq_low,
                                           cf_high=freq_high,
                                           n_band=n_band,
                                           is_middle_ear=True,
                                           ihc_type='Roman')
                t_frame = np.arange(n_frame) * shift_len + int(frame_len / 2)
                fig = plt.figure(figsize=(8, 4), tight_layout=True)
                ax1 = plt.subplot(221)
                ax1.plot(np.sum(front_end.filter(inter)[band_i], axis=1))
                ax1.plot(np.sum(front_end.filter(tar)[band_i], axis=1))
                ax_twin = ax1.twinx()
                ax_twin.plot(t_frame, flag_frame_all, color='red')

                ax2 = plt.subplot(223)
                ax2.plot(t_frame, vad_flag_frame_all + 0.09, label='vad')
                ax2.plot(t_frame, snr_flag_frame_all + 0.06, label='snr')
                ax2.plot(t_frame, ccf_flag_frame_all + 0.03, label='ccf')
                ax2.plot(t_frame, itd_flag_frame_all, label='itd')
                ax2.legend()

                ax3 = plt.subplot(122)
                plot_cue_sample(cue_frame_all[band_i], ax3)
                plot_cue_sample(cue_frame_all[band_i, flag_frame_all, :], ax3)

                plot_tools.savefig(fig, fig_name=fig_name, fig_dir='./')
                return

            if band_i is None:
                flag_frame_all = np.logical_and.reduce(flag_frame_all_band_all)
                yield np.transpose(cue_frame_all[:, flag_frame_all, :],
                                   axes=(1, 0, 2))
            else:
                flag_frame_all = flag_frame_all_band_all[0]
                yield cue_frame_all[band_tar, flag_frame_all, :]