def test_delay_zero_cancels_all(self): delay = 0 X_hat = wpe.wpe(self.Y, self.K, delay=delay) # Beginning is never zero. It is a copy of input signal. tc.assert_allclose(X_hat[:, delay + self.K - 1:], np.zeros_like(X_hat[:, delay + self.K - 1:]), atol=1e-10)
def __call__(self, xs): """Return enhanced :param np.ndarray xs: (Time, Channel, Frequency) :return: enhanced_xs :rtype: np.ndarray """ # nara_wpe.wpe: (F, C, T) xs = wpe(xs.transpose((2, 1, 0)), taps=self.taps, delay=self.delay, iterations=self.iterations, psd_context=self.psd_context, statistics_mode=self.statistics_mode) return xs.transpose(2, 1, 0)
input_files = args.files[:len(args.files) // 2] output_files = args.files[len(args.files) // 2:] out_dir = os.path.dirname(output_files[0]) try: os.makedirs(out_dir) except OSError as e: if e.errno != errno.EEXIST: raise stft_options = dict(size=512, shift=128, window_length=None, fading=True, pad=True, symmetric_window=False) sampling_rate = 16000 delay = 3 iterations = 5 taps = 10 signal_list = [sf.read(f)[0] for f in input_files] y = np.stack(signal_list, axis=0) Y = stft(y, **stft_options).transpose(2, 0, 1) Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0) z = istft(Z, size=stft_options['size'], shift=stft_options['shift']) for d in range(len(signal_list)): sf.write(output_files[d], z[d, :], sampling_rate)
#!/usr/bin/python3 import numpy as np import soundfile as sf from tqdm import tqdm from nara_wpe.wpe import wpe from nara_wpe.wpe import get_power from nara_wpe.utils import stft, istft, get_stft_center_frequencies import sys basename = sys.argv[1] x = int(sys.argv[2]) print("Starting single-channel WPE") stft_options = dict(size=512, shift=128) for i in range(x): y, fs = sf.read('single_channel/' + basename + '.CH' + str(i + 1) + '.wav') Y = np.expand_dims(y, axis=0) Y = stft(Y, size=512, shift=128) Y = Y.transpose(2, 0, 1) Z = wpe(Y) z_np = istft(Z.transpose(1, 2, 0), size=stft_options['size'], shift=stft_options['shift']) sf.write( 'single_dereverb/wpe/' + sys.argv[1] + '.CH' + str(i + 1) + '.wav', z_np.T, fs)
#WPEの繰り返し回数 n_wpe_iterations=20 #残響除去のパラメータ D=2 Lh=5 #過去のマイクロホン入力信号 x_bar=make_x_bar(stft_data,D,Lh) #WPEで残響除去 x_dereverb_wpe,cost_buff_wpe=dereverberation_wpe(stft_data,x_bar,n_wpe_iterations) #nara WPEで残響除去 x_dereverb_nara_wpe=wpe.wpe(np.transpose(stft_data,(1,0,2)),taps=Lh,delay=D,iterations=n_wpe_iterations) x_dereverb_nara_wpe=np.transpose(x_dereverb_nara_wpe,(1,0,2))[0,...] #x:入力信号( M, Nk, Lt) t,x_dereverb_wpe=sp.istft(x_dereverb_wpe,fs=sample_rate,window="hann",nperseg=N,noverlap=N-Nshift) t,x_dereverb_nara_wpe=sp.istft(x_dereverb_nara_wpe,fs=sample_rate,window="hann",nperseg=N,noverlap=N-Nshift) snr_pre=calculate_snr(multi_conv_data_no_reverb[0,...],multi_conv_data[0,...]) snr_wpe_post=calculate_snr(multi_conv_data_no_reverb[0,...],x_dereverb_wpe) snr_nara_wpe_post=calculate_snr(multi_conv_data_no_reverb[0,...],x_dereverb_nara_wpe) write_file_from_time_signal(x_dereverb_wpe[:wave_len]*np.iinfo(np.int16).max/20.,"./dereverb_wpe_{}_{}.wav".format(Lh,D),sample_rate) write_file_from_time_signal(x_dereverb_nara_wpe[:wave_len]*np.iinfo(np.int16).max/20.,"./dereverb_nara_wpe.wav",sample_rate)
os.makedirs(out_dir) except OSError as e: if e.errno != errno.EEXIST: raise stft_options = dict( size=512, shift=128, window_length=None, fading=True, pad=True, symmetric_window=False ) sampling_rate = 16000 delay = 3 iterations = 5 taps = 10 signal_list = [ sf.read(f)[0] for f in input_files ] y = np.stack(signal_list, axis=0) Y = stft(y, **stft_options).transpose(2, 0, 1) Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0) z = istft(Z, size=stft_options['size'], shift=stft_options['shift']) for d in range(len(signal_list)): sf.write(output_files[d], z[d,:], sampling_rate)
def wpe_worker( wav_scp, processing_id=None, processing_num=None, data_root="MISP_121h", output_root="MISP_121h_WPE_", ): sampling_rate = 16000 iterations = 5 stft_options = dict( size=512, shift=128, window_length=None, fading=True, pad=True, symmetric_window=False, ) with codecs.open(wav_scp, "r") as handle: lines_content = handle.readlines() wav_lines = [ *map(lambda x: x[:-1] if x[-1] in ["\n"] else x, lines_content) ] for wav_idx in tqdm( range(len(wav_lines)), leave=True, desc="0" if processing_id is None else str(processing_id), ): if processing_id is None: processing_token = True else: if wav_idx % processing_num == processing_id: processing_token = True else: processing_token = False if processing_token: file_list = wav_lines[wav_idx].split(" ") name, wav_list = file_list[0], file_list[1:] signal_list = [] for f in wav_list: _, data = wf.read(f) if data.dtype == np.int16: data = np.float32(data) / 32768 signal_list.append(data) print("wait to process {} : {}".format(wav_idx, wav_list[0])) min_len = len(signal_list[0]) max_len = len(signal_list[0]) for i in range(1, len(signal_list)): min_len = min(min_len, len(signal_list[i])) max_len = max(max_len, len(signal_list[i])) if min_len != max_len: for i in range(len(signal_list)): signal_list[i] = signal_list[i][:min_len] y = np.stack(signal_list, axis=0) Y = stft(y, **stft_options).transpose(2, 0, 1) Z = wpe(Y, iterations=iterations, statistics_mode="full").transpose(1, 2, 0) z = istft(Z, size=stft_options["size"], shift=stft_options["shift"]) for d in range(len(signal_list)): store_path = wav_list[d].replace(data_root, output_root) if not os.path.exists(os.path.split(store_path)[0]): os.makedirs(os.path.split(store_path)[0], exist_ok=True) tmpwav = np.int16(z[d, :] * 32768) wf.write(store_path, sampling_rate, tmpwav) return None
import numpy as np import soundfile as sf from nara_wpe.utils import istft, stft from nara_wpe.wpe import wpe parser = argparse.ArgumentParser(description='de-reverb audio wave.') parser.add_argument('inwave', metavar='InWaveFile', type=str) parser.add_argument('outwave', metavar='OutWaveFile', type=str) args = parser.parse_args() stft_options = dict(size=512, shift=128) delay = 3 iterations = 5 taps = 10 data, sampling_rate = sf.read(args.inwave) signal_list = [data] y = np.stack(signal_list, axis=0) Y = stft(y, **stft_options).transpose(2, 0, 1) Z = wpe(Y, taps=taps, delay=delay, iterations=iterations, statistics_mode='full').transpose(1, 2, 0) z = istft(Z, size=stft_options['size'], shift=stft_options['shift']) sf.write(args.outwave, z[0], sampling_rate)