Esempio n. 1
0
def main():
    from nara_wpe import project_root
    import soundfile as sf
    from nara_wpe.utils import stft
    from nara_wpe.utils import istft as istft
    from nara_wpe.utils import get_stft_center_frequencies
    from tqdm import tqdm
    from librosa.core.audio import resample

    channels = 8

    parameter_set = 'Katka'

    if parameter_set == 'Katka':
        sampling_rate = 16000
        stft_size, stft_shift = 512, 128
        delay = 3
        iterations = 5

        def get_K(f):
            return 10

    elif parameter_set == 'Yoshioka2012GeneralWPE':
        sampling_rate = 8000
        stft_size, stft_shift = 128, 64
        delay = 2
        iterations = 2

        def get_K(f):
            if center_frequencies[f] < 800:
                K = 18
            elif center_frequencies[f] < 1500:
                K = 15
            else:
                K = 12
            return K

    else:
        raise ValueError

    file_template = 'AMI_WSJ20-Array1-{}_T10c0201.wav'
    signal_list = [
        sf.read(str(project_root / 'data' / file_template.format(d + 1)))[0]
        for d in range(channels)
    ]
    signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list]
    y = np.stack(signal_list, axis=0)

    center_frequencies = get_stft_center_frequencies(stft_size, sampling_rate)

    Y = stft(y, size=stft_size, shift=stft_shift)

    X = np.copy(Y)
    D, T, F = Y.shape
    for f in tqdm(range(F), total=F):
        K = get_K(f)
        X[:, :, f] = wpe_v5(Y[:, :, f],
                            K=K,
                            delay=delay,
                            iterations=iterations)

    x = istft(X, size=stft_size, shift=stft_shift)

    sf.write(str(project_root / 'data' / 'wpe_out.wav'),
             x[0],
             samplerate=sampling_rate)
Esempio n. 2
0
def main(channels, sampling_rate, file_template, taps_frequency_dependent,
         delay, iterations):
    """
    User interface for WPE. The defaults of the command line interface are
    suited for example audio files of nara_wpe.

     'Yoshioka2012GeneralWPE'
        sampling_rate = 8000
        delay = 2
        iterations = 2

    """
    from nara_wpe import project_root
    import soundfile as sf
    from nara_wpe.utils import stft
    from nara_wpe.utils import istft
    from nara_wpe.utils import get_stft_center_frequencies
    from tqdm import tqdm
    from librosa.core.audio import resample

    stft_options = dict(size=512,
                        shift=128,
                        window_length=None,
                        fading=True,
                        pad=True,
                        symmetric_window=False)

    def get_taps(f, mode=taps_frequency_dependent):
        if mode:
            if center_frequencies[f] < 800:
                taps = 18
            elif center_frequencies[f] < 1500:
                taps = 15
            else:
                taps = 12
        else:
            taps = 10
        return taps

    if file_template == 'AMI_WSJ20-Array1-{}_T10c0201.wav':
        signal_list = [
            sf.read(str(project_root / 'data' /
                        file_template.format(d + 1)))[0]
            for d in range(channels)
        ]
    else:
        signal = sf.read(file_template)[0].transpose(1, 0)
        signal_list = list(signal)
    signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list]
    y = np.stack(signal_list, axis=0)

    center_frequencies = get_stft_center_frequencies(stft_options['size'],
                                                     sampling_rate)

    Y = stft(y, **stft_options)

    X = np.copy(Y)
    D, T, F = Y.shape
    for f in tqdm(range(F), total=F):
        taps = get_taps(f)
        X[:, :, f] = wpe_v7(Y[:, :, f],
                            taps=taps,
                            delay=delay,
                            iterations=iterations)

    x = istft(X, size=stft_options['size'], shift=stft_options['shift'])

    sf.write(str(project_root / 'data' / 'wpe_out.wav'),
             x[0],
             samplerate=sampling_rate)
    print('Output in {}'.format(str(project_root / 'data' / 'wpe_out.wav')))