def incorrect_input_size(D, num_frames): if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] # parameters block_size = 512 hop = block_size # create STFT object stft = STFT(block_size, hop=hop, channels=D, transform=transform, num_frames=num_frames) try: # passing more frames than 'hop' stft.analysis(x_local) computed = False except: computed = True return computed
def no_overlap_no_filter(D, num_frames=1, fixed_memory=False, streaming=True): """ D - number of channels num_frames - how many frames to process, None will process one frame at a time fixed_memory - whether to enforce checks for size (real-time consideration) streaming - whether or not to stitch between frames """ if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] # parameters block_size = 512 # make sure the FFT size is a power of 2 hop = block_size # no overlap if not streaming: num_samples = (num_frames - 1) * hop + block_size x_local = x_local[:num_samples, ] # Create the STFT object if fixed_memory: stft = STFT( block_size, hop=hop, channels=D, transform=transform, num_frames=num_frames, streaming=streaming, ) else: stft = STFT(block_size, hop=hop, channels=D, transform=transform, streaming=streaming) # collect the processed blocks processed_x = np.zeros(x_local.shape) if streaming: n = 0 hop_frames = hop * num_frames # process the signals while full blocks are available while x_local.shape[0] - n > hop_frames: stft.analysis(x_local[n:n + hop_frames, ]) processed_x[n:n + hop_frames, ] = stft.synthesis() n += hop_frames else: stft.analysis(x_local) processed_x = stft.synthesis() n = processed_x.shape[0] error = np.max(np.abs(x_local[:n, ] - processed_x[:n, ])) return error
def apply_spectral_sub( noisy_signal, nfft=512, db_reduc=25, lookback=12, beta=30, alpha=1 ): """ One-shot function to apply spectral subtraction approach. Parameters ---------- noisy_signal : numpy array Real signal in time domain. nfft: int FFT size. Length of gain filter, i.e. the number of frequency bins, is given by ``nfft//2+1``. db_reduc: float Maximum reduction in dB for each bin. lookback: int How many frames to look back for the noise estimate. beta: float Overestimation factor to "push" the gain filter value (at each frequency) closer to the dB reduction specified by ``db_reduc``. alpha: float, optional Exponent factor to modify transition behavior towards the dB reduction specified by ``db_reduc``. Default is 1. Returns ------- numpy array Enhanced/denoised signal. """ from pyroomacoustics import hann from pyroomacoustics.transform import STFT hop = nfft // 2 window = hann(nfft, flag="asymmetric", length="full") stft = STFT(nfft, hop=hop, analysis_window=window, streaming=True) scnr = SpectralSub(nfft, db_reduc, lookback, beta, alpha) processed_audio = np.zeros(noisy_signal.shape) n = 0 while noisy_signal.shape[0] - n >= hop: # SCNR in frequency domain stft.analysis( noisy_signal[ n : (n + hop), ] ) gain_filt = scnr.compute_gain_filter(stft.X) # back to time domain processed_audio[ n : n + hop, ] = stft.synthesis(gain_filt * stft.X) # update step n += hop return processed_audio
def with_half_overlap_with_filter(D, num_frames=1, fixed_memory=False, streaming=True): """ D - number of channels num_frames - how many frames to process, None will process one frame at a time fixed_memory - whether to enforce checks for size (real-time consideration) streaming - whether or not to stitch between frames """ if D == 1: x_local = x[:, 0] y_local = y[:, 0] h_local = h[:, 0] else: x_local = x[:, :D] y_local = y[:, :D] h_local = h[:, :D] # parameters block_size = 512 - h_len + 1 # make sure the FFT size is a power of 2 hop = block_size // 2 # half overlap window = pra.hann(block_size) # the analysis window if not streaming: num_samples = (num_frames - 1) * hop + block_size x_local = x_local[:num_samples, ] # Create the STFT object if fixed_memory: stft = STFT(block_size, hop=hop, channels=D, transform=transform, num_frames=num_frames, analysis_window=window, streaming=streaming) else: stft = STFT(block_size, hop=hop, channels=D, transform=transform, analysis_window=window, streaming=streaming) # setup the filter stft.set_filter(h_local, zb=h_len - 1) # collect the processed blocks processed_x = np.zeros(x_local.shape) if not streaming: stft.analysis(x_local) stft.process() processed_x = stft.synthesis() n = processed_x.shape[0] error = np.max( np.abs(y_local[block_size:n - block_size, ] - processed_x[block_size:n - block_size, ])) else: n = 0 hop_frames = hop * num_frames # process the signals while full blocks are available while x_local.shape[0] - n > hop_frames: stft.analysis(x_local[n:n + hop_frames, ]) stft.process() # apply the filter processed_x[n:n + hop_frames, ] = stft.synthesis() n += hop_frames error = np.max(np.abs(y_local[:n - hop, ] - processed_x[hop:n, ])) # if D==1: # import matplotlib.pyplot as plt # plt.figure() # plt.plot(y_local) # plt.plot(processed_x) # plt.show() return error
def with_arbitrary_overlap_synthesis_window(D, num_frames=1, fixed_memory=False, streaming=True, overlap=0.5): """ D - number of channels num_frames - how many frames to process, None will process one frame at a time fixed_memory - whether to enforce checks for size (real-time consideration) streaming - whether or not to stitch between frames """ if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] # parameters block_size = 512 # make sure the FFT size is a power of 2 hop = int((1 - overlap) * block_size) # quarter overlap if not streaming: num_samples = (num_frames - 1) * hop + block_size x_local = x_local[:num_samples, ] analysis_window = pra.hann(block_size) synthesis_window = pra.transform.compute_synthesis_window( analysis_window, hop) # Create the STFT object if fixed_memory: stft = STFT(block_size, hop=hop, channels=D, transform=transform, num_frames=num_frames, analysis_window=analysis_window, synthesis_window=synthesis_window, streaming=streaming) else: stft = STFT(block_size, hop=hop, channels=D, analysis_window=analysis_window, synthesis_window=synthesis_window, transform=transform, streaming=streaming) # collect the processed blocks processed_x = np.zeros(x_local.shape) if streaming: n = 0 hop_frames = hop * num_frames # process the signals while full blocks are available while x_local.shape[0] - n > hop_frames: stft.analysis(x_local[n:n + hop_frames, ]) processed_x[n:n + hop_frames, ] = stft.synthesis() n += hop_frames error = np.max( np.abs(x_local[:n - block_size + hop, ] - processed_x[block_size - hop:n, ])) if 20 * np.log10(error) > -10: import matplotlib.pyplot as plt if x_local.ndim == 1: plt.plot(x_local[:n - block_size + hop]) plt.plot(processed_x[block_size - hop:n]) else: plt.plot(x_local[:n - block_size + hop, 0]) plt.plot(processed_x[block_size - hop:n, 0]) plt.show() else: stft.analysis(x_local) processed_x = stft.synthesis() n = processed_x.shape[0] L = block_size - hop error = np.max(np.abs(x_local[L:-L, ] - processed_x[L:, ])) if 20 * np.log10(error) > -10: import matplotlib.pyplot as plt if x_local.ndim == 1: plt.plot(x_local[L:-L]) plt.plot(processed_x[L:]) else: plt.plot(x_local[L:-L, 0]) plt.plot(processed_x[L:, 0]) plt.show() return error
def apply_iterative_wiener(noisy_signal, frame_len=512, lpc_order=20, iterations=2, alpha=0.8, thresh=0.01): """ One-shot function to apply iterative Wiener filtering for denoising. Parameters ---------- noisy_signal : numpy array Real signal in time domain. frame_len : int Frame length in samples. 50% overlap is used with hanning window. lpc_order : int Number of LPC coefficients to compute iterations : int How many iterations to perform in updating the Wiener filter for each signal frame. alpha : int Smoothing factor within [0,1] for updating noise level. Closer to `1` gives more weight to the previous noise level, while closer to `0` gives more weight to the current frame's level. Closer to `0` can track more rapid changes in the noise level. However, if a speech frame is incorrectly identified as noise, you can end up removing desired speech. thresh : float Threshold to distinguish between (signal+noise) and (noise) frames. A high value will classify more frames as noise but might remove desired signal! Returns ------- numpy array Enhanced/denoised signal. """ from pyroomacoustics import hann from pyroomacoustics.transform import STFT hop = frame_len // 2 window = hann(frame_len, flag='asymmetric', length='full') stft = STFT(frame_len, hop=hop, analysis_window=window, streaming=True) scnr = IterativeWiener(frame_len, lpc_order, iterations, alpha, thresh) processed_audio = np.zeros(noisy_signal.shape) n = 0 while noisy_signal.shape[0] - n >= hop: # SCNR in frequency domain stft.analysis(noisy_signal[n:(n + hop), ]) X = scnr.compute_filtered_output(current_frame=stft.fft_in_buffer, frame_dft=stft.X) # back to time domain processed_audio[n:n + hop, ] = stft.synthesis(X) # update step n += hop return processed_audio
One frame at a time """ print("Averaging computation time over %d cases of %d channels of %d samples (%0.1f s at %0.1f kHz)." % (num_times,num_mic,len(signals),(len(signals)/fs),fs/1000) ) print() print("----- SINGLE FRAME AT A TIME -----") print("With STFT object (not fixed) : ", end="") stft = STFT(block_size, hop=hop, channels=num_mic, streaming=True, analysis_window=win) start = time.time() for k in range(num_times): x_r = np.zeros(signals.shape) n = 0 while signals.shape[0] - n > hop: stft.analysis(signals[n:n+hop,]) x_r[n:n+hop,] = stft.synthesis() n += hop avg_time = (time.time()-start)/num_times print("%0.3f sec" % avg_time) err_dB = 20*np.log10(np.max(np.abs(signals[:n-hop,] - x_r[hop:n,]))) print("Error [dB] : %0.3f" % err_dB) print("With STFT object (fixed) : ", end="") stft = STFT(block_size, hop=hop, channels=num_mic, num_frames=1, streaming=True, analysis_window=win) start = time.time() for k in range(num_times): x_r = np.zeros(signals.shape)