def inverse_stft(stfts, frame_length, frame_step, fft_length=None, window_fn=window_ops.hann_window, name=None): """Computes the inverse [Short-time Fourier Transform][stft] of `stfts`. To reconstruct an original waveform, a complimentary window function should be used in inverse_stft. Such a window function can be constructed with tf.signal.inverse_stft_window_fn. Example: ```python frame_length = 400 frame_step = 160 waveform = tf.placeholder(dtype=tf.float32, shape=[1000]) stft = tf.signal.stft(waveform, frame_length, frame_step) inverse_stft = tf.signal.inverse_stft( stft, frame_length, frame_step, window_fn=tf.signal.inverse_stft_window_fn(frame_step)) ``` if a custom window_fn is used in stft, it must be passed to inverse_stft_window_fn: ```python frame_length = 400 frame_step = 160 window_fn = functools.partial(window_ops.hamming_window, periodic=True), waveform = tf.placeholder(dtype=tf.float32, shape=[1000]) stft = tf.signal.stft( waveform, frame_length, frame_step, window_fn=window_fn) inverse_stft = tf.signal.inverse_stft( stft, frame_length, frame_step, window_fn=tf.signal.inverse_stft_window_fn( frame_step, forward_window_fn=window_fn)) ``` Implemented with GPU-compatible ops and supports gradients. Args: stfts: A `complex64` `[..., frames, fft_unique_bins]` `Tensor` of STFT bins representing a batch of `fft_length`-point STFTs where `fft_unique_bins` is `fft_length // 2 + 1` frame_length: An integer scalar `Tensor`. The window length in samples. frame_step: An integer scalar `Tensor`. The number of samples to step. fft_length: An integer scalar `Tensor`. The size of the FFT that produced `stfts`. If not provided, uses the smallest power of 2 enclosing `frame_length`. window_fn: A callable that takes a window length and a `dtype` keyword argument and returns a `[window_length]` `Tensor` of samples in the provided datatype. If set to `None`, no windowing is used. name: An optional name for the operation. Returns: A `[..., samples]` `Tensor` of `float32` signals representing the inverse STFT for each input STFT in `stfts`. Raises: ValueError: If `stfts` is not at least rank 2, `frame_length` is not scalar, `frame_step` is not scalar, or `fft_length` is not scalar. [stft]: https://en.wikipedia.org/wiki/Short-time_Fourier_transform """ with ops.name_scope(name, 'inverse_stft', [stfts]): stfts = ops.convert_to_tensor(stfts, name='stfts') stfts.shape.with_rank_at_least(2) frame_length = ops.convert_to_tensor(frame_length, name='frame_length') frame_length.shape.assert_has_rank(0) frame_step = ops.convert_to_tensor(frame_step, name='frame_step') frame_step.shape.assert_has_rank(0) if fft_length is None: fft_length = _enclosing_power_of_two(frame_length) else: fft_length = ops.convert_to_tensor(fft_length, name='fft_length') fft_length.shape.assert_has_rank(0) real_frames = fft_ops.irfft(stfts, [fft_length]) # frame_length may be larger or smaller than fft_length, so we pad or # truncate real_frames to frame_length. frame_length_static = tensor_util.constant_value(frame_length) # If we don't know the shape of real_frames's inner dimension, pad and # truncate to frame_length. if (frame_length_static is None or real_frames.shape.ndims is None or real_frames.shape[-1].value is None): real_frames = real_frames[..., :frame_length] real_frames_rank = array_ops.rank(real_frames) real_frames_shape = array_ops.shape(real_frames) paddings = array_ops.concat([ array_ops.zeros([real_frames_rank - 1, 2], dtype=frame_length.dtype), [[ 0, math_ops.maximum(0, frame_length - real_frames_shape[-1]) ]] ], 0) real_frames = array_ops.pad(real_frames, paddings) # We know real_frames's last dimension and frame_length statically. If they # are different, then pad or truncate real_frames to frame_length. elif real_frames.shape[-1].value > frame_length_static: real_frames = real_frames[..., :frame_length_static] elif real_frames.shape[-1].value < frame_length_static: pad_amount = frame_length_static - real_frames.shape[-1].value real_frames = array_ops.pad( real_frames, [[0, 0]] * (real_frames.shape.ndims - 1) + [[0, pad_amount]]) # The above code pads the inner dimension of real_frames to frame_length, # but it does so in a way that may not be shape-inference friendly. # Restore shape information if we are able to. if frame_length_static is not None and real_frames.shape.ndims is not None: real_frames.set_shape([None] * (real_frames.shape.ndims - 1) + [frame_length_static]) # Optionally window and overlap-add the inner 2 dimensions of real_frames # into a single [samples] dimension. if window_fn is not None: window = window_fn(frame_length, dtype=stfts.dtype.real_dtype) real_frames *= window return reconstruction_ops.overlap_and_add(real_frames, frame_step)
def dct(input, type=2, n=None, axis=-1, norm=None, name=None): # pylint: disable=redefined-builtin """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`. Types I, II, III and IV are supported. Type I is implemented using a length `2N` padded `tf.signal.rfft`. Type II is implemented using a length `2N` padded `tf.signal.rfft`, as described here: [Type 2 DCT using 2N FFT padded (Makhoul)] (https://dsp.stackexchange.com/a/10606). Type III is a fairly straightforward inverse of Type II (i.e. using a length `2N` padded `tf.signal.irfft`). Type IV is calculated through 2N length DCT2 of padded signal and picking the odd indices. @compatibility(scipy) Equivalent to [scipy.fftpack.dct] (https://docs.scipy.org/doc/scipy-1.4.0/reference/generated/scipy.fftpack.dct.html) for Type-I, Type-II, Type-III and Type-IV DCT. @end_compatibility Args: input: A `[..., samples]` `float32`/`float64` `Tensor` containing the signals to take the DCT of. type: The DCT type to perform. Must be 1, 2, 3 or 4. n: The length of the transform. If length is less than sequence length, only the first n elements of the sequence are considered for the DCT. If n is greater than the sequence length, zeros are padded and then the DCT is computed as usual. axis: For future expansion. The axis to compute the DCT along. Must be `-1`. norm: The normalization to apply. `None` for no normalization or `'ortho'` for orthonormal normalization. name: An optional name for the operation. Returns: A `[..., samples]` `float32`/`float64` `Tensor` containing the DCT of `input`. Raises: ValueError: If `type` is not `1`, `2`, `3` or `4`, `axis` is not `-1`, `n` is not `None` or greater than 0, or `norm` is not `None` or `'ortho'`. ValueError: If `type` is `1` and `norm` is `ortho`. [dct]: https://en.wikipedia.org/wiki/Discrete_cosine_transform """ _validate_dct_arguments(input, type, n, axis, norm) with _ops.name_scope(name, "dct", [input]): input = _ops.convert_to_tensor(input) zero = _ops.convert_to_tensor(0.0, dtype=input.dtype) seq_len = (tensor_shape.dimension_value(input.shape[-1]) or _array_ops.shape(input)[-1]) if n is not None: if n <= seq_len: input = input[..., 0:n] else: rank = len(input.shape) padding = [[0, 0] for _ in range(rank)] padding[rank - 1][1] = n - seq_len padding = _ops.convert_to_tensor(padding, dtype=_dtypes.int32) input = _array_ops.pad(input, paddings=padding) axis_dim = (tensor_shape.dimension_value(input.shape[-1]) or _array_ops.shape(input)[-1]) axis_dim_float = _math_ops.cast(axis_dim, input.dtype) if type == 1: dct1_input = _array_ops.concat([input, input[..., -2:0:-1]], axis=-1) dct1 = _math_ops.real(fft_ops.rfft(dct1_input)) return dct1 if type == 2: scale = 2.0 * _math_ops.exp( _math_ops.complex( zero, -_math_ops.range(axis_dim_float) * _math.pi * 0.5 / axis_dim_float)) # TODO(rjryan): Benchmark performance and memory usage of the various # approaches to computing a DCT via the RFFT. dct2 = _math_ops.real( fft_ops.rfft(input, fft_length=[2 * axis_dim])[..., :axis_dim] * scale) if norm == "ortho": n1 = 0.5 * _math_ops.rsqrt(axis_dim_float) n2 = n1 * _math.sqrt(2.0) # Use tf.pad to make a vector of [n1, n2, n2, n2, ...]. weights = _array_ops.pad(_array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]], constant_values=n2) dct2 *= weights return dct2 elif type == 3: if norm == "ortho": n1 = _math_ops.sqrt(axis_dim_float) n2 = n1 * _math.sqrt(0.5) # Use tf.pad to make a vector of [n1, n2, n2, n2, ...]. weights = _array_ops.pad(_array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]], constant_values=n2) input *= weights else: input *= axis_dim_float scale = 2.0 * _math_ops.exp( _math_ops.complex( zero, _math_ops.range(axis_dim_float) * _math.pi * 0.5 / axis_dim_float)) dct3 = _math_ops.real( fft_ops.irfft(scale * _math_ops.complex(input, zero), fft_length=[2 * axis_dim]))[..., :axis_dim] return dct3 elif type == 4: # DCT-2 of 2N length zero-padded signal, unnormalized. dct2 = dct(input, type=2, n=2 * axis_dim, axis=axis, norm=None) # Get odd indices of DCT-2 of zero padded 2N signal to obtain # DCT-4 of the original N length signal. dct4 = dct2[..., 1::2] if norm == "ortho": dct4 *= _math.sqrt(0.5) * _math_ops.rsqrt(axis_dim_float) return dct4
def irfft(input_tensor, fft_length=None, name=None): return fft_ops.irfft(input_tensor, fft_length, name)
def dct(input, type=2, n=None, axis=-1, norm=None, name=None): # pylint: disable=redefined-builtin """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`. Currently only Types I, II and III are supported. Type I is implemented using a length `2N` padded `tf.spectral.rfft`. Type II is implemented using a length `2N` padded `tf.spectral.rfft`, as described here: https://dsp.stackexchange.com/a/10606. Type III is a fairly straightforward inverse of Type II (i.e. using a length `2N` padded `tf.spectral.irfft`). @compatibility(scipy) Equivalent to scipy.fftpack.dct for Type-I, Type-II and Type-III DCT. https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html @end_compatibility Args: input: A `[..., samples]` `float32` `Tensor` containing the signals to take the DCT of. type: The DCT type to perform. Must be 1, 2 or 3. n: For future expansion. The length of the transform. Must be `None`. axis: For future expansion. The axis to compute the DCT along. Must be `-1`. norm: The normalization to apply. `None` for no normalization or `'ortho'` for orthonormal normalization. name: An optional name for the operation. Returns: A `[..., samples]` `float32` `Tensor` containing the DCT of `input`. Raises: ValueError: If `type` is not `1`, `2` or `3`, `n` is not `None, `axis` is not `-1`, or `norm` is not `None` or `'ortho'`. ValueError: If `type` is `1` and `norm` is `ortho`. [dct]: https://en.wikipedia.org/wiki/Discrete_cosine_transform """ _validate_dct_arguments(input, type, n, axis, norm) with _ops.name_scope(name, "dct", [input]): # We use the RFFT to compute the DCT and TensorFlow only supports float32 # for FFTs at the moment. input = _ops.convert_to_tensor(input, dtype=_dtypes.float32) axis_dim = (tensor_shape.dimension_value(input.shape[-1]) or _array_ops.shape(input)[-1]) axis_dim_float = _math_ops.to_float(axis_dim) if type == 1: dct1_input = _array_ops.concat([input, input[..., -2:0:-1]], axis=-1) dct1 = _math_ops.real(fft_ops.rfft(dct1_input)) return dct1 if type == 2: scale = 2.0 * _math_ops.exp( _math_ops.complex( 0.0, -_math_ops.range(axis_dim_float) * _math.pi * 0.5 / axis_dim_float)) # TODO(rjryan): Benchmark performance and memory usage of the various # approaches to computing a DCT via the RFFT. dct2 = _math_ops.real( fft_ops.rfft( input, fft_length=[2 * axis_dim])[..., :axis_dim] * scale) if norm == "ortho": n1 = 0.5 * _math_ops.rsqrt(axis_dim_float) n2 = n1 * _math_ops.sqrt(2.0) # Use tf.pad to make a vector of [n1, n2, n2, n2, ...]. weights = _array_ops.pad( _array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]], constant_values=n2) dct2 *= weights return dct2 elif type == 3: if norm == "ortho": n1 = _math_ops.sqrt(axis_dim_float) n2 = n1 * _math_ops.sqrt(0.5) # Use tf.pad to make a vector of [n1, n2, n2, n2, ...]. weights = _array_ops.pad( _array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]], constant_values=n2) input *= weights else: input *= axis_dim_float scale = 2.0 * _math_ops.exp( _math_ops.complex( 0.0, _math_ops.range(axis_dim_float) * _math.pi * 0.5 / axis_dim_float)) dct3 = _math_ops.real( fft_ops.irfft( scale * _math_ops.complex(input, 0.0), fft_length=[2 * axis_dim]))[..., :axis_dim] return dct3
def dct(input, type=2, n=None, axis=-1, norm=None, name=None): # pylint: disable=redefined-builtin """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`. Currently only Types I, II and III are supported. Type I is implemented using a length `2N` padded `tf.spectral.rfft`. Type II is implemented using a length `2N` padded `tf.spectral.rfft`, as described here: https://dsp.stackexchange.com/a/10606. Type III is a fairly straightforward inverse of Type II (i.e. using a length `2N` padded `tf.spectral.irfft`). @compatibility(scipy) Equivalent to scipy.fftpack.dct for Type-I, Type-II and Type-III DCT. https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html @end_compatibility Args: input: A `[..., samples]` `float32` `Tensor` containing the signals to take the DCT of. type: The DCT type to perform. Must be 1, 2 or 3. n: For future expansion. The length of the transform. Must be `None`. axis: For future expansion. The axis to compute the DCT along. Must be `-1`. norm: The normalization to apply. `None` for no normalization or `'ortho'` for orthonormal normalization. name: An optional name for the operation. Returns: A `[..., samples]` `float32` `Tensor` containing the DCT of `input`. Raises: ValueError: If `type` is not `1`, `2` or `3`, `n` is not `None, `axis` is not `-1`, or `norm` is not `None` or `'ortho'`. ValueError: If `type` is `1` and `norm` is `ortho`. [dct]: https://en.wikipedia.org/wiki/Discrete_cosine_transform """ _validate_dct_arguments(input, type, n, axis, norm) with _ops.name_scope(name, "dct", [input]): # We use the RFFT to compute the DCT and TensorFlow only supports float32 # for FFTs at the moment. input = _ops.convert_to_tensor(input, dtype=_dtypes.float32) axis_dim = (tensor_shape.dimension_value(input.shape[-1]) or _array_ops.shape(input)[-1]) axis_dim_float = _math_ops.to_float(axis_dim) if type == 1: dct1_input = _array_ops.concat([input, input[..., -2:0:-1]], axis=-1) dct1 = _math_ops.real(fft_ops.rfft(dct1_input)) return dct1 if type == 2: scale = 2.0 * _math_ops.exp( _math_ops.complex( 0.0, -_math_ops.range(axis_dim_float) * _math.pi * 0.5 / axis_dim_float)) # TODO(rjryan): Benchmark performance and memory usage of the various # approaches to computing a DCT via the RFFT. dct2 = _math_ops.real( fft_ops.rfft(input, fft_length=[2 * axis_dim])[..., :axis_dim] * scale) if norm == "ortho": n1 = 0.5 * _math_ops.rsqrt(axis_dim_float) n2 = n1 * _math_ops.sqrt(2.0) # Use tf.pad to make a vector of [n1, n2, n2, n2, ...]. weights = _array_ops.pad(_array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]], constant_values=n2) dct2 *= weights return dct2 elif type == 3: if norm == "ortho": n1 = _math_ops.sqrt(axis_dim_float) n2 = n1 * _math_ops.sqrt(0.5) # Use tf.pad to make a vector of [n1, n2, n2, n2, ...]. weights = _array_ops.pad(_array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]], constant_values=n2) input *= weights else: input *= axis_dim_float scale = 2.0 * _math_ops.exp( _math_ops.complex( 0.0, _math_ops.range(axis_dim_float) * _math.pi * 0.5 / axis_dim_float)) dct3 = _math_ops.real( fft_ops.irfft(scale * _math_ops.complex(input, 0.0), fft_length=[2 * axis_dim]))[..., :axis_dim] return dct3
def inverse_stft(stfts, frame_length, frame_step, fft_length=None, window_fn=window_ops.hann_window, name=None): """Computes the inverse [Short-time Fourier Transform][stft] of `stfts`. To reconstruct an original waveform, a complimentary window function should be used in inverse_stft. Such a window function can be constructed with tf.signal.inverse_stft_window_fn. Example: ```python frame_length = 400 frame_step = 160 waveform = tf.placeholder(dtype=tf.float32, shape=[1000]) stft = tf.signal.stft(waveform, frame_length, frame_step) inverse_stft = tf.signal.inverse_stft( stft, frame_length, frame_step, window_fn=tf.signal.inverse_stft_window_fn(frame_step)) ``` if a custom window_fn is used in stft, it must be passed to inverse_stft_window_fn: ```python frame_length = 400 frame_step = 160 window_fn = functools.partial(window_ops.hamming_window, periodic=True), waveform = tf.placeholder(dtype=tf.float32, shape=[1000]) stft = tf.signal.stft( waveform, frame_length, frame_step, window_fn=window_fn) inverse_stft = tf.signal.inverse_stft( stft, frame_length, frame_step, window_fn=tf.signal.inverse_stft_window_fn( frame_step, forward_window_fn=window_fn)) ``` Implemented with GPU-compatible ops and supports gradients. Args: stfts: A `complex64` `[..., frames, fft_unique_bins]` `Tensor` of STFT bins representing a batch of `fft_length`-point STFTs where `fft_unique_bins` is `fft_length // 2 + 1` frame_length: An integer scalar `Tensor`. The window length in samples. frame_step: An integer scalar `Tensor`. The number of samples to step. fft_length: An integer scalar `Tensor`. The size of the FFT that produced `stfts`. If not provided, uses the smallest power of 2 enclosing `frame_length`. window_fn: A callable that takes a window length and a `dtype` keyword argument and returns a `[window_length]` `Tensor` of samples in the provided datatype. If set to `None`, no windowing is used. name: An optional name for the operation. Returns: A `[..., samples]` `Tensor` of `float32` signals representing the inverse STFT for each input STFT in `stfts`. Raises: ValueError: If `stfts` is not at least rank 2, `frame_length` is not scalar, `frame_step` is not scalar, or `fft_length` is not scalar. [stft]: https://en.wikipedia.org/wiki/Short-time_Fourier_transform """ with ops.name_scope(name, 'inverse_stft', [stfts]): stfts = ops.convert_to_tensor(stfts, name='stfts') stfts.shape.with_rank_at_least(2) frame_length = ops.convert_to_tensor(frame_length, name='frame_length') frame_length.shape.assert_has_rank(0) frame_step = ops.convert_to_tensor(frame_step, name='frame_step') frame_step.shape.assert_has_rank(0) if fft_length is None: fft_length = _enclosing_power_of_two(frame_length) else: fft_length = ops.convert_to_tensor(fft_length, name='fft_length') fft_length.shape.assert_has_rank(0) real_frames = fft_ops.irfft(stfts, [fft_length]) # frame_length may be larger or smaller than fft_length, so we pad or # truncate real_frames to frame_length. frame_length_static = tensor_util.constant_value(frame_length) # If we don't know the shape of real_frames's inner dimension, pad and # truncate to frame_length. if (frame_length_static is None or real_frames.shape.ndims is None or real_frames.shape[-1].value is None): real_frames = real_frames[..., :frame_length] real_frames_rank = array_ops.rank(real_frames) real_frames_shape = array_ops.shape(real_frames) paddings = array_ops.concat( [array_ops.zeros([real_frames_rank - 1, 2], dtype=frame_length.dtype), [[0, math_ops.maximum(0, frame_length - real_frames_shape[-1])]]], 0) real_frames = array_ops.pad(real_frames, paddings) # We know real_frames's last dimension and frame_length statically. If they # are different, then pad or truncate real_frames to frame_length. elif real_frames.shape[-1].value > frame_length_static: real_frames = real_frames[..., :frame_length_static] elif real_frames.shape[-1].value < frame_length_static: pad_amount = frame_length_static - real_frames.shape[-1].value real_frames = array_ops.pad(real_frames, [[0, 0]] * (real_frames.shape.ndims - 1) + [[0, pad_amount]]) # The above code pads the inner dimension of real_frames to frame_length, # but it does so in a way that may not be shape-inference friendly. # Restore shape information if we are able to. if frame_length_static is not None and real_frames.shape.ndims is not None: real_frames.set_shape([None] * (real_frames.shape.ndims - 1) + [frame_length_static]) # Optionally window and overlap-add the inner 2 dimensions of real_frames # into a single [samples] dimension. if window_fn is not None: window = window_fn(frame_length, dtype=stfts.dtype.real_dtype) real_frames *= window return reconstruction_ops.overlap_and_add(real_frames, frame_step)