def istft( stft_signal, size=1024, shift=256, window=signal.blackman, fading=True, window_length=None, symmetric_window=False, ): """ Calculated the inverse short time Fourier transform to exactly reconstruct the time signal. Notes: Be careful if you make modifications in the frequency domain (e.g. beamforming) because the synthesis window is calculated according to the unmodified! analysis window. Args: stft_signal: Single channel complex STFT signal with dimensions (..., frames, size/2+1). size: Scalar FFT-size. shift: Scalar FFT-shift. Typically shift is a fraction of size. window: Window function handle. fading: Removes the additional padding, if done during STFT. window_length: Sometimes one desires to use a shorter window than the fft size. In that case, the window is padded with zeros. The default is to use the fft-size as a window size. symmetric_window: symmetric or periodic window. Assume window is periodic. Since the implementation of the windows in scipy.signal have a curious behaviour for odd window_length. Use window(len+1)[:-1]. Since is equal to the behaviour of MATLAB. Returns: Single channel complex STFT signal Single channel time signal. """ # Note: frame_axis and frequency_axis would make this function much more # complicated stft_signal = np.array(stft_signal) assert stft_signal.shape[-1] == size // 2 + 1, str(stft_signal.shape) if window_length is None: window_length = size if symmetric_window: window = window(window_length) else: window = window(window_length + 1)[:-1] window = _biorthogonal_window_fastest(window, shift) # window = _biorthogonal_window_fastest( # window, shift, use_amplitude_for_biorthogonal_window) # if disable_sythesis_window: # window = np.ones_like(window) time_signal = np.zeros( list(stft_signal.shape[:-2]) + [stft_signal.shape[-2] * shift + window_length - shift]) # Get the correct view to time_signal time_signal_seg = segment_axis_v2(time_signal, window_length, shift, end=None) # Unbuffered inplace add np.add.at(time_signal_seg, Ellipsis, window * np.real(irfft(stft_signal))[..., :window_length]) # The [..., :window_length] is the inverse of the window padding in rfft. # Compensate fade-in and fade-out if fading: time_signal = time_signal[..., window_length - shift:time_signal.shape[-1] - (window_length - shift)] return time_signal
def stft( time_signal, size, shift, axis=-1, window=signal.blackman, window_length=None, fading=True, pad=True, symmetric_window=False, ): """ ToDo: Open points: - sym_window need literature - fading why it is better? - should pad have more degrees of freedom? Calculates the short time Fourier transformation of a multi channel multi speaker time signal. It is able to add additional zeros for fade-in and fade out and should yield an STFT signal which allows perfect reconstruction. Args: time_signal: Multi channel time signal with dimensions AA x ... x AZ x T x BA x ... x BZ. size: Scalar FFT-size. shift: Scalar FFT-shift, the step between successive frames in samples. Typically shift is a fraction of size. axis: Scalar axis of time. Default: None means the biggest dimension. window: Window function handle. Default is blackman window. fading: Pads the signal with zeros for better reconstruction. window_length: Sometimes one desires to use a shorter window than the fft size. In that case, the window is padded with zeros. The default is to use the fft-size as a window size. pad: If true zero pad the signal to match the shape, else cut symmetric_window: symmetric or periodic window. Assume window is periodic. Since the implementation of the windows in scipy.signal have a curious behaviour for odd window_length. Use window(len+1)[:-1]. Since is equal to the behaviour of MATLAB. Returns: Single channel complex STFT signal with dimensions AA x ... x AZ x T' times size/2+1 times BA x ... x BZ. """ time_signal = np.array(time_signal) axis = axis % time_signal.ndim if window_length is None: window_length = size # Pad with zeros to have enough samples for the window function to fade. if fading: pad_width = np.zeros((time_signal.ndim, 2), dtype=np.int) pad_width[axis, :] = window_length - shift time_signal = np.pad(time_signal, pad_width, mode='constant') if symmetric_window: window = window(window_length) else: # https://github.com/scipy/scipy/issues/4551 window = window(window_length + 1)[:-1] time_signal_seg = segment_axis_v2(time_signal, window_length, shift=shift, axis=axis, end='pad' if pad else 'cut') letters = string.ascii_lowercase[:time_signal_seg.ndim] mapping = letters + ',' + letters[axis + 1] + '->' + letters try: # ToDo: Implement this more memory efficient return rfft(np.einsum(mapping, time_signal_seg, window), n=size, axis=axis + 1) except ValueError as e: raise ValueError( 'Could not calculate the stft, something does not match.\n' + 'mapping: {}, '.format(mapping) + 'time_signal_seg.shape: {}, '.format(time_signal_seg.shape) + 'window.shape: {}, '.format(window.shape) + 'size: {}'.format(size) + 'axis+1: {axis+1}')