コード例 #1
0
ファイル: utils.py プロジェクト: xdcesc/nara_wpe
def istft(
    stft_signal,
    size=1024,
    shift=256,
    window=signal.blackman,
    fading=True,
    window_length=None,
    symmetric_window=False,
):
    """
    Calculated the inverse short time Fourier transform to exactly reconstruct
    the time signal.

    Notes:
        Be careful if you make modifications in the frequency domain (e.g.
        beamforming) because the synthesis window is calculated according to
        the unmodified! analysis window.

    Args:
        stft_signal: Single channel complex STFT signal
            with dimensions (..., frames, size/2+1).
        size: Scalar FFT-size.
        shift: Scalar FFT-shift. Typically shift is a fraction of size.
        window: Window function handle.
        fading: Removes the additional padding, if done during STFT.
        window_length: Sometimes one desires to use a shorter window than
            the fft size. In that case, the window is padded with zeros.
            The default is to use the fft-size as a window size.
        symmetric_window: symmetric or periodic window. Assume window is
            periodic. Since the implementation of the windows in scipy.signal have a
            curious behaviour for odd window_length. Use window(len+1)[:-1]. Since
            is equal to the behaviour of MATLAB.

    Returns:
        Single channel complex STFT signal
        Single channel time signal.
    """
    # Note: frame_axis and frequency_axis would make this function much more
    #       complicated
    stft_signal = np.array(stft_signal)

    assert stft_signal.shape[-1] == size // 2 + 1, str(stft_signal.shape)

    if window_length is None:
        window_length = size

    if symmetric_window:
        window = window(window_length)
    else:
        window = window(window_length + 1)[:-1]

    window = _biorthogonal_window_fastest(window, shift)

    # window = _biorthogonal_window_fastest(
    #     window, shift, use_amplitude_for_biorthogonal_window)
    # if disable_sythesis_window:
    #     window = np.ones_like(window)

    time_signal = np.zeros(
        list(stft_signal.shape[:-2]) +
        [stft_signal.shape[-2] * shift + window_length - shift])

    # Get the correct view to time_signal
    time_signal_seg = segment_axis_v2(time_signal,
                                      window_length,
                                      shift,
                                      end=None)

    # Unbuffered inplace add
    np.add.at(time_signal_seg, Ellipsis,
              window * np.real(irfft(stft_signal))[..., :window_length])
    # The [..., :window_length] is the inverse of the window padding in rfft.

    # Compensate fade-in and fade-out
    if fading:
        time_signal = time_signal[...,
                                  window_length - shift:time_signal.shape[-1] -
                                  (window_length - shift)]

    return time_signal
コード例 #2
0
ファイル: utils.py プロジェクト: xdcesc/nara_wpe
def stft(
    time_signal,
    size,
    shift,
    axis=-1,
    window=signal.blackman,
    window_length=None,
    fading=True,
    pad=True,
    symmetric_window=False,
):
    """
    ToDo: Open points:
     - sym_window need literature
     - fading why it is better?
     - should pad have more degrees of freedom?

    Calculates the short time Fourier transformation of a multi channel multi
    speaker time signal. It is able to add additional zeros for fade-in and
    fade out and should yield an STFT signal which allows perfect
    reconstruction.

    Args:
        time_signal: Multi channel time signal with dimensions
            AA x ... x AZ x T x BA x ... x BZ.
        size: Scalar FFT-size.
        shift: Scalar FFT-shift, the step between successive frames in
            samples. Typically shift is a fraction of size.
        axis: Scalar axis of time.
            Default: None means the biggest dimension.
        window: Window function handle. Default is blackman window.
        fading: Pads the signal with zeros for better reconstruction.
        window_length: Sometimes one desires to use a shorter window than
            the fft size. In that case, the window is padded with zeros.
            The default is to use the fft-size as a window size.
        pad: If true zero pad the signal to match the shape, else cut
        symmetric_window: symmetric or periodic window. Assume window is
            periodic. Since the implementation of the windows in scipy.signal have a
            curious behaviour for odd window_length. Use window(len+1)[:-1]. Since
            is equal to the behaviour of MATLAB.

    Returns:
        Single channel complex STFT signal with dimensions
            AA x ... x AZ x T' times size/2+1 times BA x ... x BZ.
    """
    time_signal = np.array(time_signal)

    axis = axis % time_signal.ndim

    if window_length is None:
        window_length = size

    # Pad with zeros to have enough samples for the window function to fade.
    if fading:
        pad_width = np.zeros((time_signal.ndim, 2), dtype=np.int)
        pad_width[axis, :] = window_length - shift
        time_signal = np.pad(time_signal, pad_width, mode='constant')

    if symmetric_window:
        window = window(window_length)
    else:
        # https://github.com/scipy/scipy/issues/4551
        window = window(window_length + 1)[:-1]

    time_signal_seg = segment_axis_v2(time_signal,
                                      window_length,
                                      shift=shift,
                                      axis=axis,
                                      end='pad' if pad else 'cut')

    letters = string.ascii_lowercase[:time_signal_seg.ndim]
    mapping = letters + ',' + letters[axis + 1] + '->' + letters

    try:
        # ToDo: Implement this more memory efficient
        return rfft(np.einsum(mapping, time_signal_seg, window),
                    n=size,
                    axis=axis + 1)
    except ValueError as e:
        raise ValueError(
            'Could not calculate the stft, something does not match.\n' +
            'mapping: {}, '.format(mapping) +
            'time_signal_seg.shape: {}, '.format(time_signal_seg.shape) +
            'window.shape: {}, '.format(window.shape) +
            'size: {}'.format(size) + 'axis+1: {axis+1}')