def test_gradient_batch(self): with self.session(use_gpu=True) as sess: signal = array_ops.zeros((2, 10, 10)) frame_hop = 10 reconstruction = reconstruction_ops.overlap_and_add( signal, frame_hop) # Multiply the first batch-item's reconstruction by zeros. This will block # gradient from flowing into the first batch item from the loss. Multiply # the second batch item by the integers from 0 to 99. Since there is zero # overlap, the gradient for this batch item will be 0-99 shaped as (10, # 10). reconstruction *= array_ops.stack([ array_ops.zeros((100, )), math_ops.to_float(math_ops.range(100)) ]) loss = math_ops.reduce_sum(reconstruction) # Verify that only the second batch item receives gradient. gradient = sess.run(gradients_impl.gradients([loss], [signal])[0]) expected_gradient = np.stack([ np.zeros((10, 10)), np.reshape(np.arange(100).astype(np.float32), (10, 10)) ]) self.assertAllEqual(expected_gradient, gradient)
def test_simple(self): def make_input(frame_length, num_frames=3): """Generate a tensor of num_frames frames of frame_length.""" return np.reshape(np.arange(1, num_frames * frame_length + 1), (-1, frame_length)) # List of (signal, expected_result, frame_hop). configurations = [ # All hop lengths on a frame length of 2. (make_input(2), [1, 5, 9, 6], 1), (make_input(2), [1, 2, 3, 4, 5, 6], 2), # All hop lengths on a frame length of 3. (make_input(3), [1, 6, 15, 14, 9], 1), (make_input(3), [1, 2, 7, 5, 13, 8, 9], 2), (make_input(3), [1, 2, 3, 4, 5, 6, 7, 8, 9], 3), # All hop lengths on a frame length of 4. (make_input(4), [1, 7, 18, 21, 19, 12], 1), (make_input(4), [1, 2, 8, 10, 16, 18, 11, 12], 2), (make_input(4), [1, 2, 3, 9, 6, 7, 17, 10, 11, 12], 3), (make_input(4), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 4), ] with self.session(use_gpu=True): for signal, expected, frame_hop in configurations: reconstruction = reconstruction_ops.overlap_and_add( np.array(signal), frame_hop).eval() expected_output = np.array(expected) self.assertAllClose(reconstruction, expected_output)
def test_gradient_batch(self): # TODO(rjryan): Eager gradient tests. if context.executing_eagerly(): return signal = array_ops.zeros((2, 10, 10)) frame_hop = 10 reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop) # Multiply the first batch-item's reconstruction by zeros. This will block # gradient from flowing into the first batch item from the loss. Multiply # the second batch item by the integers from 0 to 99. Since there is zero # overlap, the gradient for this batch item will be 0-99 shaped as (10, # 10). reconstruction *= array_ops.stack([ array_ops.zeros((100, )), math_ops.cast(math_ops.range(100), dtypes.float32) ]) loss = math_ops.reduce_sum(reconstruction) # Verify that only the second batch item receives gradient. gradient = self.evaluate(gradients_impl.gradients([loss], [signal])[0]) expected_gradient = np.stack([ np.zeros((10, 10)), np.reshape(np.arange(100).astype(np.float32), (10, 10)) ]) self.assertAllEqual(expected_gradient, gradient)
def test_simple(self): def make_input(frame_length, num_frames=3): """Generate a tensor of num_frames frames of frame_length.""" return np.reshape(np.arange(1, num_frames * frame_length + 1), (-1, frame_length)) # List of (signal, expected_result, frame_hop). configurations = [ # All hop lengths on a frame length of 2. (make_input(2), [1, 5, 9, 6], 1), (make_input(2), [1, 2, 3, 4, 5, 6], 2), # All hop lengths on a frame length of 3. (make_input(3), [1, 6, 15, 14, 9], 1), (make_input(3), [1, 2, 7, 5, 13, 8, 9], 2), (make_input(3), [1, 2, 3, 4, 5, 6, 7, 8, 9], 3), # All hop lengths on a frame length of 4. (make_input(4), [1, 7, 18, 21, 19, 12], 1), (make_input(4), [1, 2, 8, 10, 16, 18, 11, 12], 2), (make_input(4), [1, 2, 3, 9, 6, 7, 17, 10, 11, 12], 3), (make_input(4), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 4), ] with self.session(use_gpu=True): for signal, expected, frame_hop in configurations: reconstruction = reconstruction_ops.overlap_and_add( np.array(signal), frame_hop).eval() expected_output = np.array(expected) self.assertAllClose(reconstruction, expected_output)
def test_powers(self): signal = constant_op.constant(np.squeeze(self.powers[0, :, :]), dtype=dtypes.int64) reconstruction = reconstruction_ops.overlap_and_add( signal, self.frame_hop) output = self.evaluate(reconstruction) string_output = [np.base_repr(x, self.bases[0]) for x in output] self.assertEqual(string_output, self.expected_string)
def test_powers(self): signal = constant_op.constant(np.squeeze(self.powers[0, :, :]), dtype=dtypes.int64) reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop) with self.session(use_gpu=True): output = self.evaluate(reconstruction) string_output = [np.base_repr(x, self.bases[0]) for x in output] self.assertEqual(string_output, self.expected_string)
def test_all_ones(self): signal = constant_op.constant(np.ones((3, 5)), dtype=dtypes.int64) reconstruction = reconstruction_ops.overlap_and_add(signal, 2) with self.session(use_gpu=True) as sess: output = sess.run(reconstruction) expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1]) self.assertAllClose(output, expected_output)
def test_all_ones(self): signal = constant_op.constant(np.ones((3, 5)), dtype=dtypes.int64) reconstruction = reconstruction_ops.overlap_and_add(signal, 2) with self.session(use_gpu=True) as sess: output = sess.run(reconstruction) expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1]) self.assertAllClose(output, expected_output)
def test_gradient_numerical(self): with self.session(use_gpu=True): shape = (2, 10, 10) framed_signal = array_ops.zeros(shape) frame_hop = 10 reconstruction = reconstruction_ops.overlap_and_add( framed_signal, frame_hop) error = test.compute_gradient_error(framed_signal, shape, reconstruction, [2, 100]) self.assertLess(error, 2e-5)
def test_fast_path(self): # This test uses tensor names and does not work in eager mode. if context.executing_eagerly(): return signal = array_ops.ones([3, 5]) frame_step = 5 reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step) self.assertEqual(reconstruction.name, "overlap_and_add/fast_path:0") expected_output = np.ones([15]) self.assertAllClose(reconstruction, expected_output)
def test_gradient_numerical(self): with self.session(use_gpu=True): shape = (2, 10, 10) framed_signal = array_ops.zeros(shape) frame_hop = 10 reconstruction = reconstruction_ops.overlap_and_add( framed_signal, frame_hop) error = test.compute_gradient_error( framed_signal, shape, reconstruction, [2, 100]) self.assertLess(error, 2e-5)
def test_simple(self, frame_length, expected, frame_hop): def make_input(frame_length, num_frames=3): """Generate a tensor of num_frames frames of frame_length.""" return np.reshape(np.arange(1, num_frames * frame_length + 1), (-1, frame_length)) signal = make_input(frame_length) reconstruction = reconstruction_ops.overlap_and_add( np.array(signal), frame_hop) expected_output = np.array(expected) self.assertAllClose(reconstruction, expected_output)
def test_unknown_shapes(self): # This test uses placeholders and does not work in Eager mode. if context.executing_eagerly(): return signal = array_ops.placeholder_with_default(np.ones( (4, 3, 5)).astype(np.int32), shape=[None, None, None]) frame_step = array_ops.placeholder_with_default(2, shape=[]) reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step) self.assertEqual(reconstruction.shape.as_list(), [None, None]) expected_output = np.array([[1, 1, 2, 2, 3, 2, 2, 1, 1]] * 4) self.assertAllClose(reconstruction, expected_output)
def test_all_ones(self): signal = array_ops.ones([3, 5]) reconstruction = reconstruction_ops.overlap_and_add(signal, 2) self.assertEqual(reconstruction.shape.as_list(), [9]) with self.session(use_gpu=True): output = self.evaluate(reconstruction) expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1]) self.assertAllClose(output, expected_output)
def test_gradient(self, shape, frame_hop): # TODO(rjryan): Eager gradient tests. if context.executing_eagerly(): return signal = array_ops.zeros(shape) reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop) loss = math_ops.reduce_sum(reconstruction) # Increasing any sample in the input frames by one will increase the sum # of all the samples in the reconstruction by 1, so the gradient should # be all ones, no matter the shape or hop. gradient = self.evaluate(gradients_impl.gradients([loss], [signal])[0]) self.assertTrue((gradient == 1.0).all())
def test_gradient_numerical(self): # TODO(rjryan): Eager gradient tests. if context.executing_eagerly(): return with self.session(use_gpu=True): shape = (2, 10, 10) framed_signal = array_ops.zeros(shape) frame_hop = 10 reconstruction = reconstruction_ops.overlap_and_add( framed_signal, frame_hop) error = test.compute_gradient_error(framed_signal, shape, reconstruction, [2, 100]) self.assertLess(error, 2e-5)
def test_batch(self): signal = constant_op.constant(self.powers, dtype=dtypes.int64) reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop) with self.session(use_gpu=True): output = self.evaluate(reconstruction) accumulator = True for i in range(self.batch_size): string_output = [np.base_repr(x, self.bases[i]) for x in output[i, :]] accumulator = accumulator and (string_output == self.expected_string) self.assertTrue(accumulator)
def test_one_element_batch(self): input_matrix = np.squeeze(self.powers[0, :, :]) input_matrix = input_matrix[np.newaxis, :, :].astype(float) signal = constant_op.constant(input_matrix, dtype=dtypes.float32) reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop) with self.session(use_gpu=True): output = self.evaluate(reconstruction) string_output = [np.base_repr(int(x), self.bases[0]) for x in np.squeeze(output)] self.assertEqual(output.shape, (1, 9)) self.assertEqual(string_output, self.expected_string)
def test_fast_path(self): signal = array_ops.placeholder(dtype=dtypes.int32, shape=[3, 5]) frame_step = 5 reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step) self.assertEqual(reconstruction.name, "overlap_and_add/fast_path:0") with self.session(use_gpu=True) as sess: output = sess.run(reconstruction, feed_dict={signal: np.ones([3, 5])}) expected_output = np.ones([15]) self.assertAllClose(output, expected_output)
def test_unknown_rank(self): signal = array_ops.placeholder(dtype=dtypes.int32, shape=None) frame_step = array_ops.placeholder(dtype=dtypes.int32, shape=[]) reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step) self.assertEqual(reconstruction.shape, None) with self.session(use_gpu=True) as sess: output = sess.run(reconstruction, feed_dict={ signal: np.ones([4, 3, 5]), frame_step: 2 }) expected_output = np.array([[1, 1, 2, 2, 3, 2, 2, 1, 1]] * 4) self.assertAllClose(output, expected_output)
def test_fast_path(self): # This test uses tensor names and does not work in eager mode. if context.executing_eagerly(): return signal = array_ops.ones([3, 5]) frame_step = 5 reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step) self.assertEqual(reconstruction.name, "overlap_and_add/fast_path:0") with self.session(use_gpu=True) as sess: output = self.evaluate(reconstruction) expected_output = np.ones([15]) self.assertAllClose(output, expected_output)
def test_unknown_shapes(self): # This test uses placeholders and does not work in eager mode. if context.executing_eagerly(): return signal = array_ops.placeholder(dtype=dtypes.int32, shape=[None, None, None]) frame_step = array_ops.placeholder(dtype=dtypes.int32, shape=[]) reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step) self.assertEqual(reconstruction.shape.as_list(), [None, None]) with self.session(use_gpu=True) as sess: output = sess.run(reconstruction, feed_dict={signal: np.ones([4, 3, 5]), frame_step: 2}) expected_output = np.array([[1, 1, 2, 2, 3, 2, 2, 1, 1]] * 4) self.assertAllClose(output, expected_output)
def test_unknown_rank(self): # This test uses placeholders and does not work in eager mode. if context.executing_eagerly(): return signal = array_ops.placeholder(dtype=dtypes.int32, shape=None) frame_step = array_ops.placeholder(dtype=dtypes.int32, shape=[]) reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step) self.assertEqual(reconstruction.shape, None) with self.session(use_gpu=True) as sess: output = sess.run(reconstruction, feed_dict={signal: np.ones([4, 3, 5]), frame_step: 2}) expected_output = np.array([[1, 1, 2, 2, 3, 2, 2, 1, 1]] * 4) self.assertAllClose(output, expected_output)
def test_gradient(self): configurations = [ ((1, 128), 1), ((5, 35), 17), ((10, 128), 128), ((2, 10, 128), 127), ((2, 2, 10, 128), 126), ((2, 2, 2, 10, 128), 125), ] with self.session(use_gpu=True) as sess: for shape, frame_hop in configurations: signal = array_ops.zeros(shape) reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop) loss = math_ops.reduce_sum(reconstruction) # Increasing any sample in the input frames by one will increase the sum # of all the samples in the reconstruction by 1, so the gradient should # be all ones, no matter the shape or hop. gradient = sess.run(gradients_impl.gradients([loss], [signal])[0]) self.assertTrue((gradient == 1.0).all())
def test_gradient_batch(self): with self.session(use_gpu=True) as sess: signal = array_ops.zeros((2, 10, 10)) frame_hop = 10 reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop) # Multiply the first batch-item's reconstruction by zeros. This will block # gradient from flowing into the first batch item from the loss. Multiply # the second batch item by the integers from 0 to 99. Since there is zero # overlap, the gradient for this batch item will be 0-99 shaped as (10, # 10). reconstruction *= array_ops.stack( [array_ops.zeros((100,)), math_ops.to_float(math_ops.range(100))]) loss = math_ops.reduce_sum(reconstruction) # Verify that only the second batch item receives gradient. gradient = sess.run(gradients_impl.gradients([loss], [signal])[0]) expected_gradient = np.stack([ np.zeros((10, 10)), np.reshape(np.arange(100).astype(np.float32), (10, 10))]) self.assertAllEqual(expected_gradient, gradient)
def inverse_stft(stfts, frame_length, frame_step, fft_length=None, window_fn=window_ops.hann_window, name=None): """Computes the inverse [Short-time Fourier Transform][stft] of `stfts`. To reconstruct an original waveform, a complimentary window function should be used in inverse_stft. Such a window function can be constructed with tf.signal.inverse_stft_window_fn. Example: ```python frame_length = 400 frame_step = 160 waveform = tf.placeholder(dtype=tf.float32, shape=[1000]) stft = tf.signal.stft(waveform, frame_length, frame_step) inverse_stft = tf.signal.inverse_stft( stft, frame_length, frame_step, window_fn=tf.signal.inverse_stft_window_fn(frame_step)) ``` if a custom window_fn is used in stft, it must be passed to inverse_stft_window_fn: ```python frame_length = 400 frame_step = 160 window_fn = functools.partial(window_ops.hamming_window, periodic=True), waveform = tf.placeholder(dtype=tf.float32, shape=[1000]) stft = tf.signal.stft( waveform, frame_length, frame_step, window_fn=window_fn) inverse_stft = tf.signal.inverse_stft( stft, frame_length, frame_step, window_fn=tf.signal.inverse_stft_window_fn( frame_step, forward_window_fn=window_fn)) ``` Implemented with GPU-compatible ops and supports gradients. Args: stfts: A `complex64` `[..., frames, fft_unique_bins]` `Tensor` of STFT bins representing a batch of `fft_length`-point STFTs where `fft_unique_bins` is `fft_length // 2 + 1` frame_length: An integer scalar `Tensor`. The window length in samples. frame_step: An integer scalar `Tensor`. The number of samples to step. fft_length: An integer scalar `Tensor`. The size of the FFT that produced `stfts`. If not provided, uses the smallest power of 2 enclosing `frame_length`. window_fn: A callable that takes a window length and a `dtype` keyword argument and returns a `[window_length]` `Tensor` of samples in the provided datatype. If set to `None`, no windowing is used. name: An optional name for the operation. Returns: A `[..., samples]` `Tensor` of `float32` signals representing the inverse STFT for each input STFT in `stfts`. Raises: ValueError: If `stfts` is not at least rank 2, `frame_length` is not scalar, `frame_step` is not scalar, or `fft_length` is not scalar. [stft]: https://en.wikipedia.org/wiki/Short-time_Fourier_transform """ with ops.name_scope(name, 'inverse_stft', [stfts]): stfts = ops.convert_to_tensor(stfts, name='stfts') stfts.shape.with_rank_at_least(2) frame_length = ops.convert_to_tensor(frame_length, name='frame_length') frame_length.shape.assert_has_rank(0) frame_step = ops.convert_to_tensor(frame_step, name='frame_step') frame_step.shape.assert_has_rank(0) if fft_length is None: fft_length = _enclosing_power_of_two(frame_length) else: fft_length = ops.convert_to_tensor(fft_length, name='fft_length') fft_length.shape.assert_has_rank(0) real_frames = fft_ops.irfft(stfts, [fft_length]) # frame_length may be larger or smaller than fft_length, so we pad or # truncate real_frames to frame_length. frame_length_static = tensor_util.constant_value(frame_length) # If we don't know the shape of real_frames's inner dimension, pad and # truncate to frame_length. if (frame_length_static is None or real_frames.shape.ndims is None or real_frames.shape[-1].value is None): real_frames = real_frames[..., :frame_length] real_frames_rank = array_ops.rank(real_frames) real_frames_shape = array_ops.shape(real_frames) paddings = array_ops.concat([ array_ops.zeros([real_frames_rank - 1, 2], dtype=frame_length.dtype), [[ 0, math_ops.maximum(0, frame_length - real_frames_shape[-1]) ]] ], 0) real_frames = array_ops.pad(real_frames, paddings) # We know real_frames's last dimension and frame_length statically. If they # are different, then pad or truncate real_frames to frame_length. elif real_frames.shape[-1].value > frame_length_static: real_frames = real_frames[..., :frame_length_static] elif real_frames.shape[-1].value < frame_length_static: pad_amount = frame_length_static - real_frames.shape[-1].value real_frames = array_ops.pad( real_frames, [[0, 0]] * (real_frames.shape.ndims - 1) + [[0, pad_amount]]) # The above code pads the inner dimension of real_frames to frame_length, # but it does so in a way that may not be shape-inference friendly. # Restore shape information if we are able to. if frame_length_static is not None and real_frames.shape.ndims is not None: real_frames.set_shape([None] * (real_frames.shape.ndims - 1) + [frame_length_static]) # Optionally window and overlap-add the inner 2 dimensions of real_frames # into a single [samples] dimension. if window_fn is not None: window = window_fn(frame_length, dtype=stfts.dtype.real_dtype) real_frames *= window return reconstruction_ops.overlap_and_add(real_frames, frame_step)
def inverse_mdct(mdcts, window_fn=window_ops.vorbis_window, norm=None, name=None): """Computes the inverse modified DCT of `mdcts`. To reconstruct an original waveform, the same window function should be used with `mdct` and `inverse_mdct`. Example usage: >>> @tf.function ... def compare_round_trip(): ... samples = 1000 ... frame_length = 400 ... halflen = frame_length // 2 ... waveform = tf.random.normal(dtype=tf.float32, shape=[samples]) ... waveform_pad = tf.pad(waveform, [[halflen, 0],]) ... mdct = tf.signal.mdct(waveform_pad, frame_length, pad_end=True, ... window_fn=tf.signal.vorbis_window) ... inverse_mdct = tf.signal.inverse_mdct(mdct, ... window_fn=tf.signal.vorbis_window) ... inverse_mdct = inverse_mdct[halflen: halflen + samples] ... return waveform, inverse_mdct >>> waveform, inverse_mdct = compare_round_trip() >>> np.allclose(waveform.numpy(), inverse_mdct.numpy(), rtol=1e-3, atol=1e-4) True Implemented with TPU/GPU-compatible ops and supports gradients. Args: mdcts: A `float32`/`float64` `[..., frames, frame_length // 2]` `Tensor` of MDCT bins representing a batch of `frame_length // 2`-point MDCTs. window_fn: A callable that takes a window length and a `dtype` keyword argument and returns a `[window_length]` `Tensor` of samples in the provided datatype. If set to `None`, no windowing is used. norm: If "ortho", orthonormal inverse DCT4 is performed, if it is None, a regular dct4 followed by scaling of `1/frame_length` is performed. name: An optional name for the operation. Returns: A `[..., samples]` `Tensor` of `float32`/`float64` signals representing the inverse MDCT for each input MDCT in `mdcts` where `samples` is `(frames - 1) * (frame_length // 2) + frame_length`. Raises: ValueError: If `mdcts` is not at least rank 2. [mdct]: https://en.wikipedia.org/wiki/Modified_discrete_cosine_transform """ with ops.name_scope(name, 'inverse_mdct', [mdcts]): mdcts = ops.convert_to_tensor(mdcts, name='mdcts') mdcts.shape.with_rank_at_least(2) half_len = math_ops.cast(mdcts.shape[-1], dtype=dtypes.int32) if norm is None: half_len_float = math_ops.cast(half_len, dtype=mdcts.dtype) result_idct4 = (0.5 / half_len_float) * dct_ops.dct(mdcts, type=4) elif norm == 'ortho': result_idct4 = dct_ops.dct(mdcts, type=4, norm='ortho') split_result = array_ops.split(result_idct4, 2, axis=-1) real_frames = array_ops.concat( (split_result[1], -array_ops.reverse(split_result[1], [-1]), -array_ops.reverse(split_result[0], [-1]), -split_result[0]), axis=-1) # Optionally window and overlap-add the inner 2 dimensions of real_frames # into a single [samples] dimension. if window_fn is not None: window = window_fn(2 * half_len, dtype=mdcts.dtype) real_frames *= window else: real_frames *= 1.0 / np.sqrt(2) return reconstruction_ops.overlap_and_add(real_frames, half_len)
def inverse_stdct(stdcts, frame_length, frame_step, fft_length=None, window_fn=window_ops.hann_window, name=None): """ Inverse short-time discrete cosine transform. Argument/s: Returns: """ with ops.name_scope(name, 'inverse_stdct', [stdcts]): stdcts = ops.convert_to_tensor(stdcts, name='stdcts') stdcts.shape.with_rank_at_least(2) frame_length = ops.convert_to_tensor(frame_length, name='frame_length') frame_length.shape.assert_has_rank(0) frame_step = ops.convert_to_tensor(frame_step, name='frame_step') frame_step.shape.assert_has_rank(0) if fft_length is None: fft_length = _enclosing_power_of_two(frame_length) else: fft_length = ops.convert_to_tensor(fft_length, name='fft_length') fft_length.shape.assert_has_rank(0) frames = dct_ops.idct(stdcts, n=fft_length) # frame_length may be larger or smaller than fft_length, so we pad or # truncate frames to frame_length. frame_length_static = tensor_util.constant_value(frame_length) # If we don't know the shape of frames's inner dimension, pad and # truncate to frame_length. if (frame_length_static is None or frames.shape.ndims is None or frames.shape.as_list()[-1] is None): frames = frames[..., :frame_length] frames_rank = array_ops.rank(frames) frames_shape = array_ops.shape(frames) paddings = array_ops.concat( [array_ops.zeros([frames_rank - 1, 2], dtype=frame_length.dtype), [[0, math_ops.maximum(0, frame_length - frames_shape[-1])]]], 0) frames = array_ops.pad(frames, paddings) # We know frames's last dimension and frame_length statically. If they # are different, then pad or truncate frames to frame_length. elif frames.shape.as_list()[-1] > frame_length_static: frames = frames[..., :frame_length_static] elif frames.shape.as_list()[-1] < frame_length_static: pad_amount = frame_length_static - frames.shape.as_list()[-1] frames = array_ops.pad(frames, [[0, 0]] * (frames.shape.ndims - 1) + [[0, pad_amount]]) # The above code pads the inner dimension of frames to frame_length, # but it does so in a way that may not be shape-inference friendly. # Restore shape information if we are able to. if frame_length_static is not None and frames.shape.ndims is not None: frames.set_shape([None] * (frames.shape.ndims - 1) + [frame_length_static]) # Optionally window and overlap-add the inner 2 dimensions of frames # into a single [samples] dimension. if window_fn is not None: window = window_fn(frame_length, dtype=stdcts.dtype.real_dtype) frames *= window return reconstruction_ops.overlap_and_add(frames, frame_step)
def inverse_stft(stfts, frame_length, frame_step, fft_length=None, window_fn=window_ops.hann_window, name=None): """Computes the inverse [Short-time Fourier Transform][stft] of `stfts`. To reconstruct an original waveform, a complimentary window function should be used in inverse_stft. Such a window function can be constructed with tf.signal.inverse_stft_window_fn. Example: ```python frame_length = 400 frame_step = 160 waveform = tf.placeholder(dtype=tf.float32, shape=[1000]) stft = tf.signal.stft(waveform, frame_length, frame_step) inverse_stft = tf.signal.inverse_stft( stft, frame_length, frame_step, window_fn=tf.signal.inverse_stft_window_fn(frame_step)) ``` if a custom window_fn is used in stft, it must be passed to inverse_stft_window_fn: ```python frame_length = 400 frame_step = 160 window_fn = functools.partial(window_ops.hamming_window, periodic=True), waveform = tf.placeholder(dtype=tf.float32, shape=[1000]) stft = tf.signal.stft( waveform, frame_length, frame_step, window_fn=window_fn) inverse_stft = tf.signal.inverse_stft( stft, frame_length, frame_step, window_fn=tf.signal.inverse_stft_window_fn( frame_step, forward_window_fn=window_fn)) ``` Implemented with GPU-compatible ops and supports gradients. Args: stfts: A `complex64` `[..., frames, fft_unique_bins]` `Tensor` of STFT bins representing a batch of `fft_length`-point STFTs where `fft_unique_bins` is `fft_length // 2 + 1` frame_length: An integer scalar `Tensor`. The window length in samples. frame_step: An integer scalar `Tensor`. The number of samples to step. fft_length: An integer scalar `Tensor`. The size of the FFT that produced `stfts`. If not provided, uses the smallest power of 2 enclosing `frame_length`. window_fn: A callable that takes a window length and a `dtype` keyword argument and returns a `[window_length]` `Tensor` of samples in the provided datatype. If set to `None`, no windowing is used. name: An optional name for the operation. Returns: A `[..., samples]` `Tensor` of `float32` signals representing the inverse STFT for each input STFT in `stfts`. Raises: ValueError: If `stfts` is not at least rank 2, `frame_length` is not scalar, `frame_step` is not scalar, or `fft_length` is not scalar. [stft]: https://en.wikipedia.org/wiki/Short-time_Fourier_transform """ with ops.name_scope(name, 'inverse_stft', [stfts]): stfts = ops.convert_to_tensor(stfts, name='stfts') stfts.shape.with_rank_at_least(2) frame_length = ops.convert_to_tensor(frame_length, name='frame_length') frame_length.shape.assert_has_rank(0) frame_step = ops.convert_to_tensor(frame_step, name='frame_step') frame_step.shape.assert_has_rank(0) if fft_length is None: fft_length = _enclosing_power_of_two(frame_length) else: fft_length = ops.convert_to_tensor(fft_length, name='fft_length') fft_length.shape.assert_has_rank(0) real_frames = fft_ops.irfft(stfts, [fft_length]) # frame_length may be larger or smaller than fft_length, so we pad or # truncate real_frames to frame_length. frame_length_static = tensor_util.constant_value(frame_length) # If we don't know the shape of real_frames's inner dimension, pad and # truncate to frame_length. if (frame_length_static is None or real_frames.shape.ndims is None or real_frames.shape[-1].value is None): real_frames = real_frames[..., :frame_length] real_frames_rank = array_ops.rank(real_frames) real_frames_shape = array_ops.shape(real_frames) paddings = array_ops.concat( [array_ops.zeros([real_frames_rank - 1, 2], dtype=frame_length.dtype), [[0, math_ops.maximum(0, frame_length - real_frames_shape[-1])]]], 0) real_frames = array_ops.pad(real_frames, paddings) # We know real_frames's last dimension and frame_length statically. If they # are different, then pad or truncate real_frames to frame_length. elif real_frames.shape[-1].value > frame_length_static: real_frames = real_frames[..., :frame_length_static] elif real_frames.shape[-1].value < frame_length_static: pad_amount = frame_length_static - real_frames.shape[-1].value real_frames = array_ops.pad(real_frames, [[0, 0]] * (real_frames.shape.ndims - 1) + [[0, pad_amount]]) # The above code pads the inner dimension of real_frames to frame_length, # but it does so in a way that may not be shape-inference friendly. # Restore shape information if we are able to. if frame_length_static is not None and real_frames.shape.ndims is not None: real_frames.set_shape([None] * (real_frames.shape.ndims - 1) + [frame_length_static]) # Optionally window and overlap-add the inner 2 dimensions of real_frames # into a single [samples] dimension. if window_fn is not None: window = window_fn(frame_length, dtype=stfts.dtype.real_dtype) real_frames *= window return reconstruction_ops.overlap_and_add(real_frames, frame_step)
def test_all_ones(self): signal = array_ops.ones([3, 5]) reconstruction = reconstruction_ops.overlap_and_add(signal, 2) self.assertEqual(reconstruction.shape.as_list(), [9]) expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1]) self.assertAllClose(reconstruction, expected_output)
def f(signal): return reconstruction_ops.overlap_and_add(signal, frame_hop)