Beispiel #1
0
def main(config_path):

    src, fs = wav_tools.read_wav('data/src.wav')

    new_brir_path = 'data/test.wav'
    # configs = parse_config_file(config_path)
    # new_config_path = 'config.cfg'
    # new_fig_path = 'test.png'
    # syn_brir(configs, new_config_path, new_brir_path, new_fig_path,
    #      parallel_type=2, n_worker=12)
    new_brir, fs = wav_tools.read_wav(new_brir_path)
    new_record = wav_tools.brir_filter(src, new_brir)
    wav_tools.write_wav(new_record, fs, 'data/new/reverb/15_0387_0.wav')

    brir, fs = wav_tools.read_wav(config_path.replace('cfg', 'wav'))
    record = wav_tools.brir_filter(src, brir)
    wav_tools.write_wav(new_record, fs, 'data/pre/reverb/15_0387_0.wav')

    # brir
    fig, ax = plt.subplots(3, 3, tight_layout=True, figsize=[10, 8])
    ax[0, 0].plot(brir[:, 0])
    ax[0, 0].set_ylabel('brir')
    ax[0, 0].set_title('pre')
    ax[0, 1].plot(new_brir[:, 0])
    ax[0, 1].set_title('new')
    ax[0, 2].plot(brir[:, 0] - new_brir[:, 0])
    ax[0, 2].yaxis.set_major_formatter(ticker.LogFormatter())
    ax[0, 2].set_title('difference')

    ax[1, 0].plot(record[:, 0])
    ax[1, 0].set_ylabel('record')
    ax[1, 1].plot(new_record[:, 0])
    ax[1, 2].plot(record[:, 0] - new_record[:, 0])
    ax[1, 2].yaxis.set_major_formatter(ticker.LogFormatter())

    specgram, freqs, bins, im = ax[2, 0].specgram(record[:, 0],
                                                  Fs=fs,
                                                  NFFT=512,
                                                  noverlap=256,
                                                  cmap='jet')
    new_specgram, freqs, bins, im = ax[2, 1].specgram(new_record[:, 0],
                                                      Fs=fs,
                                                      NFFT=512,
                                                      noverlap=256,
                                                      cmap='jet')
    ax[2, 2].imshow(specgram - new_specgram,
                    aspect='auto',
                    cmap='jet',
                    extent=[bins[0], bins[-1], freqs[0], freqs[-1]])
    fig.savefig('images/validate.png')
Beispiel #2
0
def file_reader(record_set_dir, batch_size=-1, is_shuffle=True):
    """ read wav files in given directies, one file per time
    Args:
        record_set_dir: directory or list of directories where recordings exist
    Returns:
        samples generator, [samples, label_all]
    """
    if isinstance(record_set_dir, list):
        dirs = record_set_dir
    else:
        dirs = [record_set_dir]
    #
    fpath_all = []
    for sub_set_dir in dirs:
        fpath_all_sub = get_fpath(sub_set_dir, '.wav', is_absolute=True)
        fpath_all.extend(fpath_all_sub)

    if is_shuffle:
        np.random.shuffle(fpath_all)

    # print('#file',len(fpath_all))
    # raise Exception()

    if len(fpath_all) < 1:
        raise Exception('empty folder:{}'.format(record_set_dir))

    frame_len = 320
    shift_len = 160
    n_azi = 37

    if batch_size > 1:
        x_all = np.zeros((0, frame_len, 2, 1))
        y_all = np.zeros((0, n_azi))

    for fpath in fpath_all:
        record, fs = wav_tools.read_wav(fpath)
        x_file_all = wav_tools.frame_data(record, frame_len, shift_len)
        x_file_all = np.expand_dims(x_file_all, axis=-1)

        # onehot azi label
        n_sample_file = x_file_all.shape[0]
        fname = os.path.basename(fpath)
        azi = np.int16(fname.split('_')[0])
        y_file_all = np.zeros((n_sample_file, n_azi))
        y_file_all[:, azi] = 1

        if batch_size > 0:
            x_all = np.concatenate((x_all, x_file_all), axis=0)
            y_all = np.concatenate((y_all, y_file_all), axis=0)

            while x_all.shape[0] > batch_size:
                x_batch = copy.deepcopy(x_all[:batch_size])
                y_batch = copy.deepcopy(y_all[:batch_size])

                x_all = x_all[batch_size:]
                y_all = y_all[batch_size:]

                yield [x_batch, y_batch]
        else:
            yield [x_file_all, y_file_all]
Beispiel #3
0
def main():
    args = parse_args()
    x, fs = wav_tools.read_wav(args.wav_path)
    frame_len = int(fs * args.frame_len)
    frame_shift = int(fs * args.frame_shift)
    cal_spectrogram(x, frame_len, frame_shift, fs, args.freq_low,
                    args.freq_high, args.n_band, args.fig_path, args.dpi)
Beispiel #4
0
def cal_fea(record_dir, fea_dir):
    """calculate GCC-PHAT features
    Args:
        record_dir: wave dataset directory
    """
    if not os.path.exists(record_dir):
        os.makedirs(record_dir)

    wav_fpath_all = get_fpath(dir_path=record_dir,
                              suffix='.wav',
                              pattern='reverb')

    pb = ProcessBar(max_value=len(wav_fpath_all),
                    title=f'GCC_PHAT {record_dir}')
    pool = Pool(24)
    for wav_fpath in wav_fpath_all:
        fea_fpath = os.path.join(fea_dir, '{}.npy'.format(wav_fpath[:-4]))
        if os.path.exists(fea_fpath):
            # warnings.warn(f'{fea_fpath} exists!')
            continue

        data, fs = wav_tools.read_wav(os.path.join(record_dir, wav_fpath))
        frame_all = wav_tools.frame_data(data, frame_len=320, shift_len=160)
        n_frame = frame_all.shape[0]

        fea_frame_all = pool.map(gcc_phat_parallel_f,
                                 [frame_all[i] for i in range(n_frame)])
        fea_frame_all = np.asarray(fea_frame_all)

        dir_tmp = os.path.dirname(fea_fpath)
        if not os.path.exists(dir_tmp):
            os.makedirs(dir_tmp)
        np.save(fea_fpath, fea_frame_all)

        pb.update()
Beispiel #5
0
def syn_record(src_fpath_all, set_dir, n_wav_per_azi, task_i, pb):
    """synthesize spatial recordings as well corresponding direct sound for
    each set
    """
    filter_gpu = Filter_GPU(gpu_index=1)

    brirs_direct = load_brirs('Anechoic')
    wav_count = 0
    for room in room_all:
        direct_dir = os.path.join(set_dir, 'direct', room)
        os.makedirs(direct_dir, exist_ok=True)
        rever_dir = os.path.join(set_dir, 'reverb', room)
        os.makedirs(rever_dir, exist_ok=True)

        brirs_room = load_brirs(room)
        for azi_i in range(n_azi):
            for i in range(n_wav_per_azi):
                pb.update(task_i)
                src_fpath = src_fpath_all[wav_count]
                wav_count = wav_count + 1

                src, fs = wav_tools.read_wav(src_fpath)
                src = truncate_silence(src)

                direct = filter_gpu.brir_filter(src, brirs_direct[azi_i])
                # direct = wav_tools.brir_filter(src, brirs_direct[azi_i])
                direct_fpath = os.path.join(direct_dir, f'{azi_i}_{i}.wav')
                wav_tools.write_wav(direct, fs, direct_fpath)

                reverb = filter_gpu.brir_filter(src, brirs_room[azi_i])
                # reverb = wav_tools.brir_filter(src, brirs_room[azi_i])
                reverb_fpath = os.path.join(rever_dir, f'{azi_i}_{i}.wav')
                wav_tools.write_wav(reverb, fs, reverb_fpath)
Beispiel #6
0
def gen_test_sample(room, mic_pos, azi_tar, n_inter, test_i, filter_gpu,
                    front_end):

    src_azi_all = np.zeros(n_inter + 1)
    src_azi_all[0] = azi_tar

    src_fpath_all = get_wav_fpath(n_inter + 1)
    src_tar, _ = wav_tools.read_wav(src_fpath_all[0])
    record_tar = syn_record(src_tar, room, mic_pos, azi_tar, filter_gpu)

    mix = record_tar
    mix_len = mix.shape[0]
    for i in range(n_inter):
        # minimal azimuth separation 10^o
        inter_azi = azi_tar
        while np.abs(azi_tar - inter_azi) < azi_sep_theta:
            inter_azi = np.random.choice(azi_tar_all, size=1)[0]
        src_azi_all[i + 1] = inter_azi

        src_inter, _ = wav_tools.read_wav(src_fpath_all[1 + i])
        src_inter_norm = wav_tools.set_snr(src_inter, src_tar, 0)
        record_inter = syn_record(src_inter_norm, room, mic_pos, inter_azi,
                                  filter_gpu)
        mix_len = min((mix_len, record_inter.shape[0]))
        mix = mix[:mix_len] + record_inter[:mix_len]

    mix_fpath = os.path.join(
        record_set_dir, room, f'{mic_pos}', '_'.join(
            (f'{azi_tar}', f'{n_inter}', f'{test_i}.npy')))
    os.makedirs(os.path.dirname(mix_fpath), exist_ok=True)
    np.save(mix_fpath, [mix, src_azi_all])

    fea_fpath = os.path.join(
        fea_set_dir, room, f'{mic_pos}', '_'.join(
            (f'{azi_tar}', f'{n_inter}', f'{test_i}.npy')))
    os.makedirs(os.path.dirname(fea_fpath), exist_ok=True)

    [cue_frame_all, ccf_frame_all] = front_end.cal_cues(tar=mix,
                                                        frame_len=frame_len,
                                                        shift_len=shift_len,
                                                        max_delay=max_delay,
                                                        n_worker=1)
    np.save(fea_fpath, [cue_frame_all, ccf_frame_all, src_azi_all])
Beispiel #7
0
def main():

    wav, fs = wav_tools.read_wav('record.wav')
    gt_filter = GTF(fs, freq_low=80, freq_high=5e3, n_band=32)
    wav_band_all_py = gt_filter.filter_py(wav)
    np.save('wav_band_all_py.npy', wav_band_all_py)
    # wav_band_all_py = np.load('wav_band_all_py.npy')
    print(np.max(wav_band_all_py))

    wav_band_all = gt_filter.filter(wav)
    print(np.max(wav_band_all))

    for band_i in range(32):
        fig, ax = plt.subplots(2, 1)
        ax[0].plot(wav_band_all[band_i, :, 0].T)
        ax[0].plot(wav_band_all_py[band_i, :, 0].T)
        ax[0].set_xlim([5000, 5050])

        ax[1].plot(wav_band_all_py[band_i, :, 0].T)
        ax[1].plot(wav_band_all[band_i, :, 0].T)
        ax[1].set_xlim([5000, 5050])

        fig.savefig(f'../images/eg_{band_i}.png')
        plt.close(fig)
Beispiel #8
0
def wav2npy(reverb_set_dir, npy_dir, is_anechoic):
    """ read wav files in given directies, one file per time
    Args:
        record_set_dir: directory or list of directories where recordings exist
        batch_size:
        is_shuffle:
    Returns:
        samples generator, [samples, label_all]
    """

    frame_len = 320
    shift_len = 160
    n_azi = 37
    batch_size = 128

    os.makedirs(npy_dir, exist_ok=True)

    #
    fpath_reverb_all = get_fpath(reverb_set_dir, '.wav', is_absolute=True)
    if len(fpath_reverb_all) < 1:
        raise Exception('empty folder:{}'.format(reverb_set_dir))

    pb = ProcessBar(len(fpath_reverb_all))

    batch_count = 0
    x_r = np.zeros((0, frame_len, 2, 1))
    x_d = np.zeros((0, frame_len, 2, 1))
    y_loc = np.zeros((0, n_azi))

    for fpath_reverb in fpath_reverb_all:
        pb.update()
        # reverb signal
        record, fs = wav_tools.read_wav(fpath_reverb)
        x_r_file = np.expand_dims(wav_tools.frame_data(record, frame_len,
                                                       shift_len),
                                  axis=-1)
        # direct signal
        fpath_direct = fpath_reverb.replace('reverb', 'direct')
        direct, fs = wav_tools.read_wav(fpath_direct)
        x_d_file = np.expand_dims(wav_tools.frame_data(direct, frame_len,
                                                       shift_len),
                                  axis=-1)

        # onehot azi label
        n_sample_file = x_d_file.shape[0]
        if x_r_file.shape[0] != n_sample_file:
            raise Exception('sample number do not consist')

        fname = os.path.basename(fpath_reverb)
        azi = np.int16(fname.split('_')[0])
        y_loc_file = np.zeros((n_sample_file, n_azi))
        y_loc_file[:, azi] = 1

        x_r = np.concatenate((x_r, x_r_file), axis=0)
        x_d = np.concatenate((x_d, x_d_file), axis=0)
        y_loc = np.concatenate((y_loc, y_loc_file), axis=0)

        while x_d.shape[0] > batch_size:
            x_r_batch = x_r[:batch_size]
            x_d_batch = x_d[:batch_size]
            y_loc_batch = y_loc[:batch_size]

            npy_fpath = os.path.join(npy_dir, '{}.npy'.format(batch_count))
            np.save(npy_fpath,
                    [x_d_batch, x_r_batch, y_loc_batch, is_anechoic])
            batch_count = batch_count + 1

            x_r = x_r[batch_size:]
            x_d = x_d[batch_size:]
            y_loc = y_loc[batch_size:]
Beispiel #9
0
    tar_fpath = '../Data/Records/train/RT_0.5/5/19_11_20_tar.wav'
    inter_fpath = '../Data/Records/train/RT_0.5/5/19_11_20_inter.wav'
    band_i = 20

    *_, mic_pos, fname = fea_fpath.split('/')
    tar_azi, inter_azi, snr = [int(item) for item in fname[:-4].split('_')]

    fea_file = np.load(fea_fpath)
    cue_frame_all = fea_file['cue_frame_all']
    ccf_frame_all = fea_file['ccf_frame_all']
    snr_frame_all = fea_file['snr_frame_all']

    n_frame = cue_frame_all.shape[1]

    # 1. vad, on one channel(L)
    tar_record, fs = wav_tools.read_wav(tar_fpath)
    inter_record, fs = wav_tools.read_wav(inter_fpath)

    theta_vad = 40
    vad_flag_frame_all = wav_tools.vad(x=tar_record[:, 0],
                                       frame_len=frame_len,
                                       shift_len=shift_len,
                                       theta=theta_vad,
                                       is_plot=False)
    vad_flag_frame_all = vad_flag_frame_all[:n_frame]

    # 2. SNR in each frequency band

    snr_flag_frame_all = snr_frame_all[band_i] > 0.0

    # 3. correlation coefficients
Beispiel #10
0
def file_reader(fea_dir,
                band_tar=None,
                azi_tar=None,
                is_screen=False,
                record_dir=None,
                is_plot=False,
                fig_name=None,
                is_pb=False):
    #
    theta_vad = 40
    theta_corr_coef = 0.3
    theta_itd = 44.0 / 44.1

    if is_screen:
        src_fpath_all = load_src_fpath(record_dir)

    fea_fpath_all = get_fpath(fea_dir, suffix='.npz', is_absolute=True)
    pb = ProcessBar(len(fea_fpath_all))
    for fea_fpath in fea_fpath_all:
        if is_pb:
            pb.update()
        *_, room, mic_pos, fname = fea_fpath[:-4].split('/')
        azi, wav_i, snr = [np.int16(item) for item in fname.split('_')]
        if (azi_tar is not None) and (azi != azi_tar):
            continue

        fea_file = np.load(fea_fpath)
        cue_frame_all = fea_file['cue_frame_all']
        ccf_frame_all = fea_file['ccf_frame_all']
        snr_frame_all = fea_file['snr_frame_all']

        if not is_screen:
            if band_tar is None:
                yield np.transpose(cue_frame_all, axes=(1, 0, 2))
            else:
                yield cue_frame_all[band_tar]
        else:
            n_frame = cue_frame_all.shape[1]
            flag_frame_all_band_all = []

            # feature selection
            # 1. vad, on one channel(L)
            src_fpath_tar = \
                src_fpath_all[room][mic_pos][f'{azi}_{wav_i}_{snr}'][0]
            src_fpath_tar = src_fpath_tar.replace('Data/TIMIT',
                                                  'Data/TIMIT_wav')
            src_tar, fs = wav_tools.read_wav(src_fpath_tar)

            tar_fpath = ''.join((f'{record_dir}/{room}/{mic_pos}/',
                                 f'{azi}_{wav_i}_{snr}_tar.wav'))
            tar, fs = wav_tools.read_wav(tar_fpath)
            if tar.shape[0] != src_tar.shape[0]:
                raise Exception()

            # time delay between source and recording, about 70 samples
            delay = 190
            src_tar = np.concatenate((src_tar[delay:], np.zeros(delay)))
            vad_flag_frame_all = wav_tools.vad(x=src_tar,
                                               frame_len=frame_len,
                                               shift_len=shift_len,
                                               theta=theta_vad,
                                               is_plot=False)
            vad_flag_frame_all = np.squeeze(vad_flag_frame_all[:n_frame])

            for band_i in range(n_band):
                if band_tar is not None:
                    if band_i != band_tar:
                        continue

                # 2. SNR in each frequency band
                snr_flag_frame_all = snr_frame_all[band_i] > 0.0

                # 3. correlation coefficients
                ccf_flag_frame_all = np.greater(
                    np.max(ccf_frame_all[band_i], axis=1), theta_corr_coef)

                # 4. ITDs range
                itd_flag_frame_all = np.less(
                    np.abs(cue_frame_all[band_i, :, 0]), theta_itd)  # itd ms

                # combine all criteras
                flag_frame_all = np.logical_and.reduce(
                    (vad_flag_frame_all, snr_flag_frame_all,
                     ccf_flag_frame_all, itd_flag_frame_all))
                flag_frame_all_band_all.append(flag_frame_all)

            # plot waveform and corresponding criteria result
            if is_plot:
                tar_fpath = os.path.join('{}_{}.wav'.format(azi, wav_i))
                tar, fs = wav_tools.read_wav(tar_fpath)

                inter_fpath = '{}_{}_{}.wav'.format(azi, wav_i, snr)
                inter, fs = wav_tools.read_wav(inter_fpath)

                front_end = Auditory_model(fs=fs,
                                           cf_low=freq_low,
                                           cf_high=freq_high,
                                           n_band=n_band,
                                           is_middle_ear=True,
                                           ihc_type='Roman')
                t_frame = np.arange(n_frame) * shift_len + int(frame_len / 2)
                fig = plt.figure(figsize=(8, 4), tight_layout=True)
                ax1 = plt.subplot(221)
                ax1.plot(np.sum(front_end.filter(inter)[band_i], axis=1))
                ax1.plot(np.sum(front_end.filter(tar)[band_i], axis=1))
                ax_twin = ax1.twinx()
                ax_twin.plot(t_frame, flag_frame_all, color='red')

                ax2 = plt.subplot(223)
                ax2.plot(t_frame, vad_flag_frame_all + 0.09, label='vad')
                ax2.plot(t_frame, snr_flag_frame_all + 0.06, label='snr')
                ax2.plot(t_frame, ccf_flag_frame_all + 0.03, label='ccf')
                ax2.plot(t_frame, itd_flag_frame_all, label='itd')
                ax2.legend()

                ax3 = plt.subplot(122)
                plot_cue_sample(cue_frame_all[band_i], ax3)
                plot_cue_sample(cue_frame_all[band_i, flag_frame_all, :], ax3)

                plot_tools.savefig(fig, fig_name=fig_name, fig_dir='./')
                return

            if band_i is None:
                flag_frame_all = np.logical_and.reduce(flag_frame_all_band_all)
                yield np.transpose(cue_frame_all[:, flag_frame_all, :],
                                   axes=(1, 0, 2))
            else:
                flag_frame_all = flag_frame_all_band_all[0]
                yield cue_frame_all[band_tar, flag_frame_all, :]
Beispiel #11
0
def main():
    args = parse_args()
    x, fs = wav_tools.read_wav(args.wav_path)
    filter(x, fs, args.freq_low, args.freq_high, args.n_band, args.result_dir,
           args.fig_path)