def istft(self, X): from nara_wpe.utils import istft return istft( X, size=self.stft_size, shift=self.stft_shift, fading=self.stft_fading, )
def main(): from nara_wpe import project_root import soundfile as sf from nara_wpe.utils import stft from nara_wpe.utils import istft as istft from nara_wpe.utils import get_stft_center_frequencies from tqdm import tqdm from librosa.core.audio import resample channels = 8 parameter_set = 'Katka' if parameter_set == 'Katka': sampling_rate = 16000 stft_size, stft_shift = 512, 128 delay = 3 iterations = 5 def get_K(f): return 10 elif parameter_set == 'Yoshioka2012GeneralWPE': sampling_rate = 8000 stft_size, stft_shift = 128, 64 delay = 2 iterations = 2 def get_K(f): if center_frequencies[f] < 800: K = 18 elif center_frequencies[f] < 1500: K = 15 else: K = 12 return K else: raise ValueError file_template = 'AMI_WSJ20-Array1-{}_T10c0201.wav' signal_list = [ sf.read(str(project_root / 'data' / file_template.format(d + 1)))[0] for d in range(channels) ] signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list] y = np.stack(signal_list, axis=0) center_frequencies = get_stft_center_frequencies(stft_size, sampling_rate) Y = stft(y, size=stft_size, shift=stft_shift) X = np.copy(Y) D, T, F = Y.shape for f in tqdm(range(F), total=F): K = get_K(f) X[:, :, f] = wpe_v5(Y[:, :, f], K=K, delay=delay, iterations=iterations) x = istft(X, size=stft_size, shift=stft_shift) sf.write(str(project_root / 'data' / 'wpe_out.wav'), x[0], samplerate=sampling_rate)
input_files = args.files[:len(args.files) // 2] output_files = args.files[len(args.files) // 2:] out_dir = os.path.dirname(output_files[0]) try: os.makedirs(out_dir) except OSError as e: if e.errno != errno.EEXIST: raise stft_options = dict(size=512, shift=128, window_length=None, fading=True, pad=True, symmetric_window=False) sampling_rate = 16000 delay = 3 iterations = 5 taps = 10 signal_list = [sf.read(f)[0] for f in input_files] y = np.stack(signal_list, axis=0) Y = stft(y, **stft_options).transpose(2, 0, 1) Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0) z = istft(Z, size=stft_options['size'], shift=stft_options['shift']) for d in range(len(signal_list)): sf.write(output_files[d], z[d, :], sampling_rate)
def main(channels, sampling_rate, file_template, taps_frequency_dependent, delay, iterations): """ User interface for WPE. The defaults of the command line interface are suited for example audio files of nara_wpe. 'Yoshioka2012GeneralWPE' sampling_rate = 8000 delay = 2 iterations = 2 """ from nara_wpe import project_root import soundfile as sf from nara_wpe.utils import stft from nara_wpe.utils import istft from nara_wpe.utils import get_stft_center_frequencies from tqdm import tqdm from librosa.core.audio import resample stft_options = dict(size=512, shift=128, window_length=None, fading=True, pad=True, symmetric_window=False) def get_taps(f, mode=taps_frequency_dependent): if mode: if center_frequencies[f] < 800: taps = 18 elif center_frequencies[f] < 1500: taps = 15 else: taps = 12 else: taps = 10 return taps if file_template == 'AMI_WSJ20-Array1-{}_T10c0201.wav': signal_list = [ sf.read(str(project_root / 'data' / file_template.format(d + 1)))[0] for d in range(channels) ] else: signal = sf.read(file_template)[0].transpose(1, 0) signal_list = list(signal) signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list] y = np.stack(signal_list, axis=0) center_frequencies = get_stft_center_frequencies(stft_options['size'], sampling_rate) Y = stft(y, **stft_options) X = np.copy(Y) D, T, F = Y.shape for f in tqdm(range(F), total=F): taps = get_taps(f) X[:, :, f] = wpe_v7(Y[:, :, f], taps=taps, delay=delay, iterations=iterations) x = istft(X, size=stft_options['size'], shift=stft_options['shift']) sf.write(str(project_root / 'data' / 'wpe_out.wav'), x[0], samplerate=sampling_rate) print('Output in {}'.format(str(project_root / 'data' / 'wpe_out.wav')))
#!/usr/bin/python3 import numpy as np import soundfile as sf from tqdm import tqdm from nara_wpe.wpe import wpe from nara_wpe.wpe import get_power from nara_wpe.utils import stft, istft, get_stft_center_frequencies import sys basename = sys.argv[1] x = int(sys.argv[2]) print("Starting single-channel WPE") stft_options = dict(size=512, shift=128) for i in range(x): y, fs = sf.read('single_channel/' + basename + '.CH' + str(i + 1) + '.wav') Y = np.expand_dims(y, axis=0) Y = stft(Y, size=512, shift=128) Y = Y.transpose(2, 0, 1) Z = wpe(Y) z_np = istft(Z.transpose(1, 2, 0), size=stft_options['size'], shift=stft_options['shift']) sf.write( 'single_dereverb/wpe/' + sys.argv[1] + '.CH' + str(i + 1) + '.wav', z_np.T, fs)
os.makedirs(out_dir) except OSError as e: if e.errno != errno.EEXIST: raise stft_options = dict( size=512, shift=128, window_length=None, fading=True, pad=True, symmetric_window=False ) sampling_rate = 16000 delay = 3 iterations = 5 taps = 10 signal_list = [ sf.read(f)[0] for f in input_files ] y = np.stack(signal_list, axis=0) Y = stft(y, **stft_options).transpose(2, 0, 1) Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0) z = istft(Z, size=stft_options['size'], shift=stft_options['shift']) for d in range(len(signal_list)): sf.write(output_files[d], z[d,:], sampling_rate)
output_files = args.files[len(args.files) // 2 :] out_dir = os.path.dirname(output_files[0]) try: os.makedirs(out_dir) except OSError as e: if e.errno != errno.EEXIST: raise stft_options = dict( size=512, shift=128, window_length=None, fading=True, pad=True, symmetric_window=False, ) sampling_rate = 16000 delay = 3 iterations = 5 taps = 10 signal_list = [sf.read(f)[0] for f in input_files] y = np.stack(signal_list, axis=0) Y = stft(y, **stft_options).transpose(2, 0, 1) Z = wpe(Y, iterations=iterations, statistics_mode="full").transpose(1, 2, 0) z = istft(Z, size=stft_options["size"], shift=stft_options["shift"]) for d in range(len(signal_list)): sf.write(output_files[d], z[d, :], sampling_rate)
def wpe_worker( wav_scp, processing_id=None, processing_num=None, data_root="MISP_121h", output_root="MISP_121h_WPE_", ): sampling_rate = 16000 iterations = 5 stft_options = dict( size=512, shift=128, window_length=None, fading=True, pad=True, symmetric_window=False, ) with codecs.open(wav_scp, "r") as handle: lines_content = handle.readlines() wav_lines = [ *map(lambda x: x[:-1] if x[-1] in ["\n"] else x, lines_content) ] for wav_idx in tqdm( range(len(wav_lines)), leave=True, desc="0" if processing_id is None else str(processing_id), ): if processing_id is None: processing_token = True else: if wav_idx % processing_num == processing_id: processing_token = True else: processing_token = False if processing_token: file_list = wav_lines[wav_idx].split(" ") name, wav_list = file_list[0], file_list[1:] signal_list = [] for f in wav_list: _, data = wf.read(f) if data.dtype == np.int16: data = np.float32(data) / 32768 signal_list.append(data) print("wait to process {} : {}".format(wav_idx, wav_list[0])) min_len = len(signal_list[0]) max_len = len(signal_list[0]) for i in range(1, len(signal_list)): min_len = min(min_len, len(signal_list[i])) max_len = max(max_len, len(signal_list[i])) if min_len != max_len: for i in range(len(signal_list)): signal_list[i] = signal_list[i][:min_len] y = np.stack(signal_list, axis=0) Y = stft(y, **stft_options).transpose(2, 0, 1) Z = wpe(Y, iterations=iterations, statistics_mode="full").transpose(1, 2, 0) z = istft(Z, size=stft_options["size"], shift=stft_options["shift"]) for d in range(len(signal_list)): store_path = wav_list[d].replace(data_root, output_root) if not os.path.exists(os.path.split(store_path)[0]): os.makedirs(os.path.split(store_path)[0], exist_ok=True) tmpwav = np.int16(z[d, :] * 32768) wf.write(store_path, sampling_rate, tmpwav) return None
] signal_list_len = len(signal_list) y = np.stack(signal_list, axis=0) del signal_list Y = stft(y, **stft_options).transpose(1, 2, 0) del y T, _, _ = Y.shape Z_list = [] online_wpe = OnlineWPE(taps=taps, delay=delay, alpha=alpha, frequency_bins=Y.shape[1], channel=channels) for Y_step in tqdm(aquire_framebuffer()): if np.sum(Y_step.flatten()) != 0: Z_list.append(online_wpe.step_frame(Y_step)) else: Z_list.append(Y_step[0, :, :].reshape( (Y_step.shape[1], Y_step.shape[2]))) del Y Z = np.asarray(np.stack(Z_list)).transpose(2, 0, 1) z = istft(Z, size=stft_options['size'], shift=stft_options['shift']).astype('int16') del Z for d in range(signal_list_len): sf.write(out_session + '.CH' + str(d + 1) + '.wav', z[d, :], sampling_rate)