Exemple #1
0
def inverse_stft(stfts,
                 frame_length,
                 frame_step,
                 fft_length=None,
                 window_fn=window_ops.hann_window,
                 name=None):
    """Computes the inverse [Short-time Fourier Transform][stft] of `stfts`.

  To reconstruct an original waveform, a complimentary window function should
  be used in inverse_stft. Such a window function can be constructed with
  tf.signal.inverse_stft_window_fn.

  Example:

  ```python
  frame_length = 400
  frame_step = 160
  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(waveform, frame_length, frame_step)
  inverse_stft = tf.signal.inverse_stft(
      stft, frame_length, frame_step,
      window_fn=tf.signal.inverse_stft_window_fn(frame_step))
  ```

  if a custom window_fn is used in stft, it must be passed to
  inverse_stft_window_fn:

  ```python
  frame_length = 400
  frame_step = 160
  window_fn = functools.partial(window_ops.hamming_window, periodic=True),
  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(
      waveform, frame_length, frame_step, window_fn=window_fn)
  inverse_stft = tf.signal.inverse_stft(
      stft, frame_length, frame_step,
      window_fn=tf.signal.inverse_stft_window_fn(
         frame_step, forward_window_fn=window_fn))
  ```

  Implemented with GPU-compatible ops and supports gradients.

  Args:
    stfts: A `complex64` `[..., frames, fft_unique_bins]` `Tensor` of STFT bins
      representing a batch of `fft_length`-point STFTs where `fft_unique_bins`
      is `fft_length // 2 + 1`
    frame_length: An integer scalar `Tensor`. The window length in samples.
    frame_step: An integer scalar `Tensor`. The number of samples to step.
    fft_length: An integer scalar `Tensor`. The size of the FFT that produced
      `stfts`. If not provided, uses the smallest power of 2 enclosing
      `frame_length`.
    window_fn: A callable that takes a window length and a `dtype` keyword
      argument and returns a `[window_length]` `Tensor` of samples in the
      provided datatype. If set to `None`, no windowing is used.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `Tensor` of `float32` signals representing the inverse
    STFT for each input STFT in `stfts`.

  Raises:
    ValueError: If `stfts` is not at least rank 2, `frame_length` is not scalar,
      `frame_step` is not scalar, or `fft_length` is not scalar.

  [stft]: https://en.wikipedia.org/wiki/Short-time_Fourier_transform
  """
    with ops.name_scope(name, 'inverse_stft', [stfts]):
        stfts = ops.convert_to_tensor(stfts, name='stfts')
        stfts.shape.with_rank_at_least(2)
        frame_length = ops.convert_to_tensor(frame_length, name='frame_length')
        frame_length.shape.assert_has_rank(0)
        frame_step = ops.convert_to_tensor(frame_step, name='frame_step')
        frame_step.shape.assert_has_rank(0)
        if fft_length is None:
            fft_length = _enclosing_power_of_two(frame_length)
        else:
            fft_length = ops.convert_to_tensor(fft_length, name='fft_length')
            fft_length.shape.assert_has_rank(0)

        real_frames = fft_ops.irfft(stfts, [fft_length])

        # frame_length may be larger or smaller than fft_length, so we pad or
        # truncate real_frames to frame_length.
        frame_length_static = tensor_util.constant_value(frame_length)
        # If we don't know the shape of real_frames's inner dimension, pad and
        # truncate to frame_length.
        if (frame_length_static is None or real_frames.shape.ndims is None
                or real_frames.shape[-1].value is None):
            real_frames = real_frames[..., :frame_length]
            real_frames_rank = array_ops.rank(real_frames)
            real_frames_shape = array_ops.shape(real_frames)
            paddings = array_ops.concat([
                array_ops.zeros([real_frames_rank - 1, 2],
                                dtype=frame_length.dtype),
                [[
                    0,
                    math_ops.maximum(0, frame_length - real_frames_shape[-1])
                ]]
            ], 0)
            real_frames = array_ops.pad(real_frames, paddings)
        # We know real_frames's last dimension and frame_length statically. If they
        # are different, then pad or truncate real_frames to frame_length.
        elif real_frames.shape[-1].value > frame_length_static:
            real_frames = real_frames[..., :frame_length_static]
        elif real_frames.shape[-1].value < frame_length_static:
            pad_amount = frame_length_static - real_frames.shape[-1].value
            real_frames = array_ops.pad(
                real_frames,
                [[0, 0]] * (real_frames.shape.ndims - 1) + [[0, pad_amount]])

        # The above code pads the inner dimension of real_frames to frame_length,
        # but it does so in a way that may not be shape-inference friendly.
        # Restore shape information if we are able to.
        if frame_length_static is not None and real_frames.shape.ndims is not None:
            real_frames.set_shape([None] * (real_frames.shape.ndims - 1) +
                                  [frame_length_static])

        # Optionally window and overlap-add the inner 2 dimensions of real_frames
        # into a single [samples] dimension.
        if window_fn is not None:
            window = window_fn(frame_length, dtype=stfts.dtype.real_dtype)
            real_frames *= window
        return reconstruction_ops.overlap_and_add(real_frames, frame_step)
Exemple #2
0
def dct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disable=redefined-builtin
    """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`.

  Types I, II, III and IV are supported.
  Type I is implemented using a length `2N` padded `tf.signal.rfft`.
  Type II is implemented using a length `2N` padded `tf.signal.rfft`, as
   described here: [Type 2 DCT using 2N FFT padded (Makhoul)]
   (https://dsp.stackexchange.com/a/10606).
  Type III is a fairly straightforward inverse of Type II
   (i.e. using a length `2N` padded `tf.signal.irfft`).
   Type IV is calculated through 2N length DCT2 of padded signal and
  picking the odd indices.

  @compatibility(scipy)
  Equivalent to [scipy.fftpack.dct]
   (https://docs.scipy.org/doc/scipy-1.4.0/reference/generated/scipy.fftpack.dct.html)
   for Type-I, Type-II, Type-III and Type-IV DCT.
  @end_compatibility

  Args:
    input: A `[..., samples]` `float32`/`float64` `Tensor` containing the
      signals to take the DCT of.
    type: The DCT type to perform. Must be 1, 2, 3 or 4.
    n: The length of the transform. If length is less than sequence length,
      only the first n elements of the sequence are considered for the DCT.
      If n is greater than the sequence length, zeros are padded and then
      the DCT is computed as usual.
    axis: For future expansion. The axis to compute the DCT along. Must be `-1`.
    norm: The normalization to apply. `None` for no normalization or `'ortho'`
      for orthonormal normalization.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `float32`/`float64` `Tensor` containing the DCT of
    `input`.

  Raises:
    ValueError: If `type` is not `1`, `2`, `3` or `4`, `axis` is
      not `-1`, `n` is not `None` or greater than 0,
      or `norm` is not `None` or `'ortho'`.
    ValueError: If `type` is `1` and `norm` is `ortho`.

  [dct]: https://en.wikipedia.org/wiki/Discrete_cosine_transform
  """
    _validate_dct_arguments(input, type, n, axis, norm)
    with _ops.name_scope(name, "dct", [input]):
        input = _ops.convert_to_tensor(input)
        zero = _ops.convert_to_tensor(0.0, dtype=input.dtype)

        seq_len = (tensor_shape.dimension_value(input.shape[-1])
                   or _array_ops.shape(input)[-1])
        if n is not None:
            if n <= seq_len:
                input = input[..., 0:n]
            else:
                rank = len(input.shape)
                padding = [[0, 0] for _ in range(rank)]
                padding[rank - 1][1] = n - seq_len
                padding = _ops.convert_to_tensor(padding, dtype=_dtypes.int32)
                input = _array_ops.pad(input, paddings=padding)

        axis_dim = (tensor_shape.dimension_value(input.shape[-1])
                    or _array_ops.shape(input)[-1])
        axis_dim_float = _math_ops.cast(axis_dim, input.dtype)

        if type == 1:
            dct1_input = _array_ops.concat([input, input[..., -2:0:-1]],
                                           axis=-1)
            dct1 = _math_ops.real(fft_ops.rfft(dct1_input))
            return dct1

        if type == 2:
            scale = 2.0 * _math_ops.exp(
                _math_ops.complex(
                    zero, -_math_ops.range(axis_dim_float) * _math.pi * 0.5 /
                    axis_dim_float))

            # TODO(rjryan): Benchmark performance and memory usage of the various
            # approaches to computing a DCT via the RFFT.
            dct2 = _math_ops.real(
                fft_ops.rfft(input, fft_length=[2 * axis_dim])[..., :axis_dim]
                * scale)

            if norm == "ortho":
                n1 = 0.5 * _math_ops.rsqrt(axis_dim_float)
                n2 = n1 * _math.sqrt(2.0)
                # Use tf.pad to make a vector of [n1, n2, n2, n2, ...].
                weights = _array_ops.pad(_array_ops.expand_dims(n1, 0),
                                         [[0, axis_dim - 1]],
                                         constant_values=n2)
                dct2 *= weights

            return dct2

        elif type == 3:
            if norm == "ortho":
                n1 = _math_ops.sqrt(axis_dim_float)
                n2 = n1 * _math.sqrt(0.5)
                # Use tf.pad to make a vector of [n1, n2, n2, n2, ...].
                weights = _array_ops.pad(_array_ops.expand_dims(n1, 0),
                                         [[0, axis_dim - 1]],
                                         constant_values=n2)
                input *= weights
            else:
                input *= axis_dim_float
            scale = 2.0 * _math_ops.exp(
                _math_ops.complex(
                    zero,
                    _math_ops.range(axis_dim_float) * _math.pi * 0.5 /
                    axis_dim_float))
            dct3 = _math_ops.real(
                fft_ops.irfft(scale * _math_ops.complex(input, zero),
                              fft_length=[2 * axis_dim]))[..., :axis_dim]

            return dct3

        elif type == 4:
            # DCT-2 of 2N length zero-padded signal, unnormalized.
            dct2 = dct(input, type=2, n=2 * axis_dim, axis=axis, norm=None)
            # Get odd indices of DCT-2 of zero padded 2N signal to obtain
            # DCT-4 of the original N length signal.
            dct4 = dct2[..., 1::2]
            if norm == "ortho":
                dct4 *= _math.sqrt(0.5) * _math_ops.rsqrt(axis_dim_float)

            return dct4
Exemple #3
0
def irfft(input_tensor, fft_length=None, name=None):
    return fft_ops.irfft(input_tensor, fft_length, name)
Exemple #4
0
def dct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disable=redefined-builtin
  """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`.

  Currently only Types I, II and III are supported.
  Type I is implemented using a length `2N` padded `tf.spectral.rfft`.
  Type II is implemented using a length `2N` padded `tf.spectral.rfft`, as
  described here:
  https://dsp.stackexchange.com/a/10606.
  Type III is a fairly straightforward inverse of Type II
  (i.e. using a length `2N` padded `tf.spectral.irfft`).

  @compatibility(scipy)
  Equivalent to scipy.fftpack.dct for Type-I, Type-II and Type-III DCT.
  https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
  @end_compatibility

  Args:
    input: A `[..., samples]` `float32` `Tensor` containing the signals to
      take the DCT of.
    type: The DCT type to perform. Must be 1, 2 or 3.
    n: For future expansion. The length of the transform. Must be `None`.
    axis: For future expansion. The axis to compute the DCT along. Must be `-1`.
    norm: The normalization to apply. `None` for no normalization or `'ortho'`
      for orthonormal normalization.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `float32` `Tensor` containing the DCT of `input`.

  Raises:
    ValueError: If `type` is not `1`, `2` or `3`, `n` is not `None, `axis` is
      not `-1`, or `norm` is not `None` or `'ortho'`.
    ValueError: If `type` is `1` and `norm` is `ortho`.

  [dct]: https://en.wikipedia.org/wiki/Discrete_cosine_transform
  """
  _validate_dct_arguments(input, type, n, axis, norm)
  with _ops.name_scope(name, "dct", [input]):
    # We use the RFFT to compute the DCT and TensorFlow only supports float32
    # for FFTs at the moment.
    input = _ops.convert_to_tensor(input, dtype=_dtypes.float32)

    axis_dim = (tensor_shape.dimension_value(input.shape[-1])
                or _array_ops.shape(input)[-1])
    axis_dim_float = _math_ops.to_float(axis_dim)

    if type == 1:
      dct1_input = _array_ops.concat([input, input[..., -2:0:-1]], axis=-1)
      dct1 = _math_ops.real(fft_ops.rfft(dct1_input))
      return dct1

    if type == 2:
      scale = 2.0 * _math_ops.exp(
          _math_ops.complex(
              0.0, -_math_ops.range(axis_dim_float) * _math.pi * 0.5 /
              axis_dim_float))

      # TODO(rjryan): Benchmark performance and memory usage of the various
      # approaches to computing a DCT via the RFFT.
      dct2 = _math_ops.real(
          fft_ops.rfft(
              input, fft_length=[2 * axis_dim])[..., :axis_dim] * scale)

      if norm == "ortho":
        n1 = 0.5 * _math_ops.rsqrt(axis_dim_float)
        n2 = n1 * _math_ops.sqrt(2.0)
        # Use tf.pad to make a vector of [n1, n2, n2, n2, ...].
        weights = _array_ops.pad(
            _array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]],
            constant_values=n2)
        dct2 *= weights

      return dct2

    elif type == 3:
      if norm == "ortho":
        n1 = _math_ops.sqrt(axis_dim_float)
        n2 = n1 * _math_ops.sqrt(0.5)
        # Use tf.pad to make a vector of [n1, n2, n2, n2, ...].
        weights = _array_ops.pad(
            _array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]],
            constant_values=n2)
        input *= weights
      else:
        input *= axis_dim_float
      scale = 2.0 * _math_ops.exp(
          _math_ops.complex(
              0.0,
              _math_ops.range(axis_dim_float) * _math.pi * 0.5 /
              axis_dim_float))
      dct3 = _math_ops.real(
          fft_ops.irfft(
              scale * _math_ops.complex(input, 0.0),
              fft_length=[2 * axis_dim]))[..., :axis_dim]

      return dct3
Exemple #5
0
def dct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disable=redefined-builtin
    """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`.

  Currently only Types I, II and III are supported.
  Type I is implemented using a length `2N` padded `tf.spectral.rfft`.
  Type II is implemented using a length `2N` padded `tf.spectral.rfft`, as
  described here:
  https://dsp.stackexchange.com/a/10606.
  Type III is a fairly straightforward inverse of Type II
  (i.e. using a length `2N` padded `tf.spectral.irfft`).

  @compatibility(scipy)
  Equivalent to scipy.fftpack.dct for Type-I, Type-II and Type-III DCT.
  https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
  @end_compatibility

  Args:
    input: A `[..., samples]` `float32` `Tensor` containing the signals to
      take the DCT of.
    type: The DCT type to perform. Must be 1, 2 or 3.
    n: For future expansion. The length of the transform. Must be `None`.
    axis: For future expansion. The axis to compute the DCT along. Must be `-1`.
    norm: The normalization to apply. `None` for no normalization or `'ortho'`
      for orthonormal normalization.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `float32` `Tensor` containing the DCT of `input`.

  Raises:
    ValueError: If `type` is not `1`, `2` or `3`, `n` is not `None, `axis` is
      not `-1`, or `norm` is not `None` or `'ortho'`.
    ValueError: If `type` is `1` and `norm` is `ortho`.

  [dct]: https://en.wikipedia.org/wiki/Discrete_cosine_transform
  """
    _validate_dct_arguments(input, type, n, axis, norm)
    with _ops.name_scope(name, "dct", [input]):
        # We use the RFFT to compute the DCT and TensorFlow only supports float32
        # for FFTs at the moment.
        input = _ops.convert_to_tensor(input, dtype=_dtypes.float32)

        axis_dim = (tensor_shape.dimension_value(input.shape[-1])
                    or _array_ops.shape(input)[-1])
        axis_dim_float = _math_ops.to_float(axis_dim)

        if type == 1:
            dct1_input = _array_ops.concat([input, input[..., -2:0:-1]],
                                           axis=-1)
            dct1 = _math_ops.real(fft_ops.rfft(dct1_input))
            return dct1

        if type == 2:
            scale = 2.0 * _math_ops.exp(
                _math_ops.complex(
                    0.0, -_math_ops.range(axis_dim_float) * _math.pi * 0.5 /
                    axis_dim_float))

            # TODO(rjryan): Benchmark performance and memory usage of the various
            # approaches to computing a DCT via the RFFT.
            dct2 = _math_ops.real(
                fft_ops.rfft(input, fft_length=[2 * axis_dim])[..., :axis_dim]
                * scale)

            if norm == "ortho":
                n1 = 0.5 * _math_ops.rsqrt(axis_dim_float)
                n2 = n1 * _math_ops.sqrt(2.0)
                # Use tf.pad to make a vector of [n1, n2, n2, n2, ...].
                weights = _array_ops.pad(_array_ops.expand_dims(n1, 0),
                                         [[0, axis_dim - 1]],
                                         constant_values=n2)
                dct2 *= weights

            return dct2

        elif type == 3:
            if norm == "ortho":
                n1 = _math_ops.sqrt(axis_dim_float)
                n2 = n1 * _math_ops.sqrt(0.5)
                # Use tf.pad to make a vector of [n1, n2, n2, n2, ...].
                weights = _array_ops.pad(_array_ops.expand_dims(n1, 0),
                                         [[0, axis_dim - 1]],
                                         constant_values=n2)
                input *= weights
            else:
                input *= axis_dim_float
            scale = 2.0 * _math_ops.exp(
                _math_ops.complex(
                    0.0,
                    _math_ops.range(axis_dim_float) * _math.pi * 0.5 /
                    axis_dim_float))
            dct3 = _math_ops.real(
                fft_ops.irfft(scale * _math_ops.complex(input, 0.0),
                              fft_length=[2 * axis_dim]))[..., :axis_dim]

            return dct3
def inverse_stft(stfts,
                 frame_length,
                 frame_step,
                 fft_length=None,
                 window_fn=window_ops.hann_window,
                 name=None):
  """Computes the inverse [Short-time Fourier Transform][stft] of `stfts`.

  To reconstruct an original waveform, a complimentary window function should
  be used in inverse_stft. Such a window function can be constructed with
  tf.signal.inverse_stft_window_fn.

  Example:

  ```python
  frame_length = 400
  frame_step = 160
  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(waveform, frame_length, frame_step)
  inverse_stft = tf.signal.inverse_stft(
      stft, frame_length, frame_step,
      window_fn=tf.signal.inverse_stft_window_fn(frame_step))
  ```

  if a custom window_fn is used in stft, it must be passed to
  inverse_stft_window_fn:

  ```python
  frame_length = 400
  frame_step = 160
  window_fn = functools.partial(window_ops.hamming_window, periodic=True),
  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(
      waveform, frame_length, frame_step, window_fn=window_fn)
  inverse_stft = tf.signal.inverse_stft(
      stft, frame_length, frame_step,
      window_fn=tf.signal.inverse_stft_window_fn(
         frame_step, forward_window_fn=window_fn))
  ```

  Implemented with GPU-compatible ops and supports gradients.

  Args:
    stfts: A `complex64` `[..., frames, fft_unique_bins]` `Tensor` of STFT bins
      representing a batch of `fft_length`-point STFTs where `fft_unique_bins`
      is `fft_length // 2 + 1`
    frame_length: An integer scalar `Tensor`. The window length in samples.
    frame_step: An integer scalar `Tensor`. The number of samples to step.
    fft_length: An integer scalar `Tensor`. The size of the FFT that produced
      `stfts`. If not provided, uses the smallest power of 2 enclosing
      `frame_length`.
    window_fn: A callable that takes a window length and a `dtype` keyword
      argument and returns a `[window_length]` `Tensor` of samples in the
      provided datatype. If set to `None`, no windowing is used.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `Tensor` of `float32` signals representing the inverse
    STFT for each input STFT in `stfts`.

  Raises:
    ValueError: If `stfts` is not at least rank 2, `frame_length` is not scalar,
      `frame_step` is not scalar, or `fft_length` is not scalar.

  [stft]: https://en.wikipedia.org/wiki/Short-time_Fourier_transform
  """
  with ops.name_scope(name, 'inverse_stft', [stfts]):
    stfts = ops.convert_to_tensor(stfts, name='stfts')
    stfts.shape.with_rank_at_least(2)
    frame_length = ops.convert_to_tensor(frame_length, name='frame_length')
    frame_length.shape.assert_has_rank(0)
    frame_step = ops.convert_to_tensor(frame_step, name='frame_step')
    frame_step.shape.assert_has_rank(0)
    if fft_length is None:
      fft_length = _enclosing_power_of_two(frame_length)
    else:
      fft_length = ops.convert_to_tensor(fft_length, name='fft_length')
      fft_length.shape.assert_has_rank(0)

    real_frames = fft_ops.irfft(stfts, [fft_length])

    # frame_length may be larger or smaller than fft_length, so we pad or
    # truncate real_frames to frame_length.
    frame_length_static = tensor_util.constant_value(frame_length)
    # If we don't know the shape of real_frames's inner dimension, pad and
    # truncate to frame_length.
    if (frame_length_static is None or
        real_frames.shape.ndims is None or
        real_frames.shape[-1].value is None):
      real_frames = real_frames[..., :frame_length]
      real_frames_rank = array_ops.rank(real_frames)
      real_frames_shape = array_ops.shape(real_frames)
      paddings = array_ops.concat(
          [array_ops.zeros([real_frames_rank - 1, 2],
                           dtype=frame_length.dtype),
           [[0, math_ops.maximum(0, frame_length - real_frames_shape[-1])]]], 0)
      real_frames = array_ops.pad(real_frames, paddings)
    # We know real_frames's last dimension and frame_length statically. If they
    # are different, then pad or truncate real_frames to frame_length.
    elif real_frames.shape[-1].value > frame_length_static:
      real_frames = real_frames[..., :frame_length_static]
    elif real_frames.shape[-1].value < frame_length_static:
      pad_amount = frame_length_static - real_frames.shape[-1].value
      real_frames = array_ops.pad(real_frames,
                                  [[0, 0]] * (real_frames.shape.ndims - 1) +
                                  [[0, pad_amount]])

    # The above code pads the inner dimension of real_frames to frame_length,
    # but it does so in a way that may not be shape-inference friendly.
    # Restore shape information if we are able to.
    if frame_length_static is not None and real_frames.shape.ndims is not None:
      real_frames.set_shape([None] * (real_frames.shape.ndims - 1) +
                            [frame_length_static])

    # Optionally window and overlap-add the inner 2 dimensions of real_frames
    # into a single [samples] dimension.
    if window_fn is not None:
      window = window_fn(frame_length, dtype=stfts.dtype.real_dtype)
      real_frames *= window
    return reconstruction_ops.overlap_and_add(real_frames, frame_step)