def main(): from nara_wpe import project_root import soundfile as sf from nara_wpe.utils import stft from nara_wpe.utils import istft as istft from nara_wpe.utils import get_stft_center_frequencies from tqdm import tqdm from librosa.core.audio import resample channels = 8 parameter_set = 'Katka' if parameter_set == 'Katka': sampling_rate = 16000 stft_size, stft_shift = 512, 128 delay = 3 iterations = 5 def get_K(f): return 10 elif parameter_set == 'Yoshioka2012GeneralWPE': sampling_rate = 8000 stft_size, stft_shift = 128, 64 delay = 2 iterations = 2 def get_K(f): if center_frequencies[f] < 800: K = 18 elif center_frequencies[f] < 1500: K = 15 else: K = 12 return K else: raise ValueError file_template = 'AMI_WSJ20-Array1-{}_T10c0201.wav' signal_list = [ sf.read(str(project_root / 'data' / file_template.format(d + 1)))[0] for d in range(channels) ] signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list] y = np.stack(signal_list, axis=0) center_frequencies = get_stft_center_frequencies(stft_size, sampling_rate) Y = stft(y, size=stft_size, shift=stft_shift) X = np.copy(Y) D, T, F = Y.shape for f in tqdm(range(F), total=F): K = get_K(f) X[:, :, f] = wpe_v5(Y[:, :, f], K=K, delay=delay, iterations=iterations) x = istft(X, size=stft_size, shift=stft_shift) sf.write(str(project_root / 'data' / 'wpe_out.wav'), x[0], samplerate=sampling_rate)
def main(channels, sampling_rate, file_template, taps_frequency_dependent, delay, iterations): """ User interface for WPE. The defaults of the command line interface are suited for example audio files of nara_wpe. 'Yoshioka2012GeneralWPE' sampling_rate = 8000 delay = 2 iterations = 2 """ from nara_wpe import project_root import soundfile as sf from nara_wpe.utils import stft from nara_wpe.utils import istft from nara_wpe.utils import get_stft_center_frequencies from tqdm import tqdm from librosa.core.audio import resample stft_options = dict(size=512, shift=128, window_length=None, fading=True, pad=True, symmetric_window=False) def get_taps(f, mode=taps_frequency_dependent): if mode: if center_frequencies[f] < 800: taps = 18 elif center_frequencies[f] < 1500: taps = 15 else: taps = 12 else: taps = 10 return taps if file_template == 'AMI_WSJ20-Array1-{}_T10c0201.wav': signal_list = [ sf.read(str(project_root / 'data' / file_template.format(d + 1)))[0] for d in range(channels) ] else: signal = sf.read(file_template)[0].transpose(1, 0) signal_list = list(signal) signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list] y = np.stack(signal_list, axis=0) center_frequencies = get_stft_center_frequencies(stft_options['size'], sampling_rate) Y = stft(y, **stft_options) X = np.copy(Y) D, T, F = Y.shape for f in tqdm(range(F), total=F): taps = get_taps(f) X[:, :, f] = wpe_v7(Y[:, :, f], taps=taps, delay=delay, iterations=iterations) x = istft(X, size=stft_options['size'], shift=stft_options['shift']) sf.write(str(project_root / 'data' / 'wpe_out.wav'), x[0], samplerate=sampling_rate) print('Output in {}'.format(str(project_root / 'data' / 'wpe_out.wav')))