예제 #1
0
    def test_gradient_batch(self):
        with self.session(use_gpu=True) as sess:
            signal = array_ops.zeros((2, 10, 10))
            frame_hop = 10
            reconstruction = reconstruction_ops.overlap_and_add(
                signal, frame_hop)

            # Multiply the first batch-item's reconstruction by zeros. This will block
            # gradient from flowing into the first batch item from the loss. Multiply
            # the second batch item by the integers from 0 to 99. Since there is zero
            # overlap, the gradient for this batch item will be 0-99 shaped as (10,
            # 10).
            reconstruction *= array_ops.stack([
                array_ops.zeros((100, )),
                math_ops.to_float(math_ops.range(100))
            ])
            loss = math_ops.reduce_sum(reconstruction)

            # Verify that only the second batch item receives gradient.
            gradient = sess.run(gradients_impl.gradients([loss], [signal])[0])
            expected_gradient = np.stack([
                np.zeros((10, 10)),
                np.reshape(np.arange(100).astype(np.float32), (10, 10))
            ])
            self.assertAllEqual(expected_gradient, gradient)
예제 #2
0
  def test_simple(self):
    def make_input(frame_length, num_frames=3):
      """Generate a tensor of num_frames frames of frame_length."""
      return np.reshape(np.arange(1, num_frames * frame_length + 1),
                        (-1, frame_length))

    # List of (signal, expected_result, frame_hop).
    configurations = [
        # All hop lengths on a frame length of 2.
        (make_input(2), [1, 5, 9, 6], 1),
        (make_input(2), [1, 2, 3, 4, 5, 6], 2),

        # All hop lengths on a frame length of 3.
        (make_input(3), [1, 6, 15, 14, 9], 1),
        (make_input(3), [1, 2, 7, 5, 13, 8, 9], 2),
        (make_input(3), [1, 2, 3, 4, 5, 6, 7, 8, 9], 3),

        # All hop lengths on a frame length of 4.
        (make_input(4), [1, 7, 18, 21, 19, 12], 1),
        (make_input(4), [1, 2, 8, 10, 16, 18, 11, 12], 2),
        (make_input(4), [1, 2, 3, 9, 6, 7, 17, 10, 11, 12], 3),
        (make_input(4), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 4),
    ]

    with self.session(use_gpu=True):
      for signal, expected, frame_hop in configurations:
        reconstruction = reconstruction_ops.overlap_and_add(
            np.array(signal), frame_hop).eval()
        expected_output = np.array(expected)
        self.assertAllClose(reconstruction, expected_output)
    def test_gradient_batch(self):
        # TODO(rjryan): Eager gradient tests.
        if context.executing_eagerly():
            return
        signal = array_ops.zeros((2, 10, 10))
        frame_hop = 10
        reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop)

        # Multiply the first batch-item's reconstruction by zeros. This will block
        # gradient from flowing into the first batch item from the loss. Multiply
        # the second batch item by the integers from 0 to 99. Since there is zero
        # overlap, the gradient for this batch item will be 0-99 shaped as (10,
        # 10).
        reconstruction *= array_ops.stack([
            array_ops.zeros((100, )),
            math_ops.cast(math_ops.range(100), dtypes.float32)
        ])
        loss = math_ops.reduce_sum(reconstruction)

        # Verify that only the second batch item receives gradient.
        gradient = self.evaluate(gradients_impl.gradients([loss], [signal])[0])
        expected_gradient = np.stack([
            np.zeros((10, 10)),
            np.reshape(np.arange(100).astype(np.float32), (10, 10))
        ])
        self.assertAllEqual(expected_gradient, gradient)
예제 #4
0
    def test_simple(self):
        def make_input(frame_length, num_frames=3):
            """Generate a tensor of num_frames frames of frame_length."""
            return np.reshape(np.arange(1, num_frames * frame_length + 1),
                              (-1, frame_length))

        # List of (signal, expected_result, frame_hop).
        configurations = [
            # All hop lengths on a frame length of 2.
            (make_input(2), [1, 5, 9, 6], 1),
            (make_input(2), [1, 2, 3, 4, 5, 6], 2),

            # All hop lengths on a frame length of 3.
            (make_input(3), [1, 6, 15, 14, 9], 1),
            (make_input(3), [1, 2, 7, 5, 13, 8, 9], 2),
            (make_input(3), [1, 2, 3, 4, 5, 6, 7, 8, 9], 3),

            # All hop lengths on a frame length of 4.
            (make_input(4), [1, 7, 18, 21, 19, 12], 1),
            (make_input(4), [1, 2, 8, 10, 16, 18, 11, 12], 2),
            (make_input(4), [1, 2, 3, 9, 6, 7, 17, 10, 11, 12], 3),
            (make_input(4), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 4),
        ]

        with self.session(use_gpu=True):
            for signal, expected, frame_hop in configurations:
                reconstruction = reconstruction_ops.overlap_and_add(
                    np.array(signal), frame_hop).eval()
                expected_output = np.array(expected)
                self.assertAllClose(reconstruction, expected_output)
    def test_powers(self):
        signal = constant_op.constant(np.squeeze(self.powers[0, :, :]),
                                      dtype=dtypes.int64)
        reconstruction = reconstruction_ops.overlap_and_add(
            signal, self.frame_hop)

        output = self.evaluate(reconstruction)
        string_output = [np.base_repr(x, self.bases[0]) for x in output]
        self.assertEqual(string_output, self.expected_string)
예제 #6
0
  def test_powers(self):
    signal = constant_op.constant(np.squeeze(self.powers[0, :, :]),
                                  dtype=dtypes.int64)
    reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop)

    with self.session(use_gpu=True):
      output = self.evaluate(reconstruction)
      string_output = [np.base_repr(x, self.bases[0]) for x in output]

      self.assertEqual(string_output, self.expected_string)
  def test_all_ones(self):
    signal = constant_op.constant(np.ones((3, 5)), dtype=dtypes.int64)
    reconstruction = reconstruction_ops.overlap_and_add(signal, 2)

    with self.session(use_gpu=True) as sess:
      output = sess.run(reconstruction)

      expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1])

      self.assertAllClose(output, expected_output)
예제 #8
0
    def test_all_ones(self):
        signal = constant_op.constant(np.ones((3, 5)), dtype=dtypes.int64)
        reconstruction = reconstruction_ops.overlap_and_add(signal, 2)

        with self.session(use_gpu=True) as sess:
            output = sess.run(reconstruction)

            expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1])

            self.assertAllClose(output, expected_output)
예제 #9
0
 def test_gradient_numerical(self):
     with self.session(use_gpu=True):
         shape = (2, 10, 10)
         framed_signal = array_ops.zeros(shape)
         frame_hop = 10
         reconstruction = reconstruction_ops.overlap_and_add(
             framed_signal, frame_hop)
         error = test.compute_gradient_error(framed_signal, shape,
                                             reconstruction, [2, 100])
         self.assertLess(error, 2e-5)
 def test_fast_path(self):
     # This test uses tensor names and does not work in eager mode.
     if context.executing_eagerly():
         return
     signal = array_ops.ones([3, 5])
     frame_step = 5
     reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step)
     self.assertEqual(reconstruction.name, "overlap_and_add/fast_path:0")
     expected_output = np.ones([15])
     self.assertAllClose(reconstruction, expected_output)
예제 #11
0
 def test_gradient_numerical(self):
   with self.session(use_gpu=True):
     shape = (2, 10, 10)
     framed_signal = array_ops.zeros(shape)
     frame_hop = 10
     reconstruction = reconstruction_ops.overlap_and_add(
         framed_signal, frame_hop)
     error = test.compute_gradient_error(
         framed_signal, shape, reconstruction, [2, 100])
     self.assertLess(error, 2e-5)
    def test_simple(self, frame_length, expected, frame_hop):
        def make_input(frame_length, num_frames=3):
            """Generate a tensor of num_frames frames of frame_length."""
            return np.reshape(np.arange(1, num_frames * frame_length + 1),
                              (-1, frame_length))

        signal = make_input(frame_length)
        reconstruction = reconstruction_ops.overlap_and_add(
            np.array(signal), frame_hop)
        expected_output = np.array(expected)
        self.assertAllClose(reconstruction, expected_output)
 def test_unknown_shapes(self):
     # This test uses placeholders and does not work in Eager mode.
     if context.executing_eagerly():
         return
     signal = array_ops.placeholder_with_default(np.ones(
         (4, 3, 5)).astype(np.int32),
                                                 shape=[None, None, None])
     frame_step = array_ops.placeholder_with_default(2, shape=[])
     reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step)
     self.assertEqual(reconstruction.shape.as_list(), [None, None])
     expected_output = np.array([[1, 1, 2, 2, 3, 2, 2, 1, 1]] * 4)
     self.assertAllClose(reconstruction, expected_output)
  def test_all_ones(self):
    signal = array_ops.ones([3, 5])
    reconstruction = reconstruction_ops.overlap_and_add(signal, 2)

    self.assertEqual(reconstruction.shape.as_list(), [9])

    with self.session(use_gpu=True):
      output = self.evaluate(reconstruction)

      expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1])

      self.assertAllClose(output, expected_output)
 def test_gradient(self, shape, frame_hop):
     # TODO(rjryan): Eager gradient tests.
     if context.executing_eagerly():
         return
     signal = array_ops.zeros(shape)
     reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop)
     loss = math_ops.reduce_sum(reconstruction)
     # Increasing any sample in the input frames by one will increase the sum
     # of all the samples in the reconstruction by 1, so the gradient should
     # be all ones, no matter the shape or hop.
     gradient = self.evaluate(gradients_impl.gradients([loss], [signal])[0])
     self.assertTrue((gradient == 1.0).all())
예제 #16
0
 def test_gradient_numerical(self):
     # TODO(rjryan): Eager gradient tests.
     if context.executing_eagerly():
         return
     with self.session(use_gpu=True):
         shape = (2, 10, 10)
         framed_signal = array_ops.zeros(shape)
         frame_hop = 10
         reconstruction = reconstruction_ops.overlap_and_add(
             framed_signal, frame_hop)
         error = test.compute_gradient_error(framed_signal, shape,
                                             reconstruction, [2, 100])
         self.assertLess(error, 2e-5)
  def test_batch(self):
    signal = constant_op.constant(self.powers, dtype=dtypes.int64)
    reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop)

    with self.session(use_gpu=True):
      output = self.evaluate(reconstruction)

      accumulator = True
      for i in range(self.batch_size):
        string_output = [np.base_repr(x, self.bases[i]) for x in output[i, :]]
        accumulator = accumulator and (string_output == self.expected_string)

      self.assertTrue(accumulator)
  def test_one_element_batch(self):
    input_matrix = np.squeeze(self.powers[0, :, :])
    input_matrix = input_matrix[np.newaxis, :, :].astype(float)
    signal = constant_op.constant(input_matrix, dtype=dtypes.float32)
    reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop)

    with self.session(use_gpu=True):
      output = self.evaluate(reconstruction)

      string_output = [np.base_repr(int(x), self.bases[0]) for x in
                       np.squeeze(output)]

      self.assertEqual(output.shape, (1, 9))
      self.assertEqual(string_output, self.expected_string)
예제 #19
0
    def test_fast_path(self):
        signal = array_ops.placeholder(dtype=dtypes.int32, shape=[3, 5])
        frame_step = 5
        reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step)

        self.assertEqual(reconstruction.name, "overlap_and_add/fast_path:0")

        with self.session(use_gpu=True) as sess:
            output = sess.run(reconstruction,
                              feed_dict={signal: np.ones([3, 5])})

            expected_output = np.ones([15])

            self.assertAllClose(output, expected_output)
예제 #20
0
    def test_unknown_rank(self):
        signal = array_ops.placeholder(dtype=dtypes.int32, shape=None)
        frame_step = array_ops.placeholder(dtype=dtypes.int32, shape=[])
        reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step)

        self.assertEqual(reconstruction.shape, None)

        with self.session(use_gpu=True) as sess:
            output = sess.run(reconstruction,
                              feed_dict={
                                  signal: np.ones([4, 3, 5]),
                                  frame_step: 2
                              })

            expected_output = np.array([[1, 1, 2, 2, 3, 2, 2, 1, 1]] * 4)

            self.assertAllClose(output, expected_output)
예제 #21
0
  def test_fast_path(self):
    # This test uses tensor names and does not work in eager mode.
    if context.executing_eagerly():
      return

    signal = array_ops.ones([3, 5])
    frame_step = 5
    reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step)

    self.assertEqual(reconstruction.name, "overlap_and_add/fast_path:0")

    with self.session(use_gpu=True) as sess:
      output = self.evaluate(reconstruction)

      expected_output = np.ones([15])

      self.assertAllClose(output, expected_output)
  def test_unknown_shapes(self):
    # This test uses placeholders and does not work in eager mode.
    if context.executing_eagerly():
      return

    signal = array_ops.placeholder(dtype=dtypes.int32, shape=[None, None, None])
    frame_step = array_ops.placeholder(dtype=dtypes.int32, shape=[])
    reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step)

    self.assertEqual(reconstruction.shape.as_list(), [None, None])

    with self.session(use_gpu=True) as sess:
      output = sess.run(reconstruction,
                        feed_dict={signal: np.ones([4, 3, 5]), frame_step: 2})

      expected_output = np.array([[1, 1, 2, 2, 3, 2, 2, 1, 1]] * 4)

      self.assertAllClose(output, expected_output)
예제 #23
0
  def test_unknown_rank(self):
    # This test uses placeholders and does not work in eager mode.
    if context.executing_eagerly():
      return

    signal = array_ops.placeholder(dtype=dtypes.int32, shape=None)
    frame_step = array_ops.placeholder(dtype=dtypes.int32, shape=[])
    reconstruction = reconstruction_ops.overlap_and_add(signal, frame_step)

    self.assertEqual(reconstruction.shape, None)

    with self.session(use_gpu=True) as sess:
      output = sess.run(reconstruction,
                        feed_dict={signal: np.ones([4, 3, 5]), frame_step: 2})

      expected_output = np.array([[1, 1, 2, 2, 3, 2, 2, 1, 1]] * 4)

      self.assertAllClose(output, expected_output)
  def test_gradient(self):
    configurations = [
        ((1, 128), 1),
        ((5, 35), 17),
        ((10, 128), 128),
        ((2, 10, 128), 127),
        ((2, 2, 10, 128), 126),
        ((2, 2, 2, 10, 128), 125),
    ]

    with self.session(use_gpu=True) as sess:
      for shape, frame_hop in configurations:
        signal = array_ops.zeros(shape)
        reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop)
        loss = math_ops.reduce_sum(reconstruction)
        # Increasing any sample in the input frames by one will increase the sum
        # of all the samples in the reconstruction by 1, so the gradient should
        # be all ones, no matter the shape or hop.
        gradient = sess.run(gradients_impl.gradients([loss], [signal])[0])
        self.assertTrue((gradient == 1.0).all())
예제 #25
0
  def test_gradient_batch(self):
    with self.session(use_gpu=True) as sess:
      signal = array_ops.zeros((2, 10, 10))
      frame_hop = 10
      reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop)

      # Multiply the first batch-item's reconstruction by zeros. This will block
      # gradient from flowing into the first batch item from the loss. Multiply
      # the second batch item by the integers from 0 to 99. Since there is zero
      # overlap, the gradient for this batch item will be 0-99 shaped as (10,
      # 10).
      reconstruction *= array_ops.stack(
          [array_ops.zeros((100,)), math_ops.to_float(math_ops.range(100))])
      loss = math_ops.reduce_sum(reconstruction)

      # Verify that only the second batch item receives gradient.
      gradient = sess.run(gradients_impl.gradients([loss], [signal])[0])
      expected_gradient = np.stack([
          np.zeros((10, 10)),
          np.reshape(np.arange(100).astype(np.float32), (10, 10))])
      self.assertAllEqual(expected_gradient, gradient)
예제 #26
0
def inverse_stft(stfts,
                 frame_length,
                 frame_step,
                 fft_length=None,
                 window_fn=window_ops.hann_window,
                 name=None):
    """Computes the inverse [Short-time Fourier Transform][stft] of `stfts`.

  To reconstruct an original waveform, a complimentary window function should
  be used in inverse_stft. Such a window function can be constructed with
  tf.signal.inverse_stft_window_fn.

  Example:

  ```python
  frame_length = 400
  frame_step = 160
  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(waveform, frame_length, frame_step)
  inverse_stft = tf.signal.inverse_stft(
      stft, frame_length, frame_step,
      window_fn=tf.signal.inverse_stft_window_fn(frame_step))
  ```

  if a custom window_fn is used in stft, it must be passed to
  inverse_stft_window_fn:

  ```python
  frame_length = 400
  frame_step = 160
  window_fn = functools.partial(window_ops.hamming_window, periodic=True),
  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(
      waveform, frame_length, frame_step, window_fn=window_fn)
  inverse_stft = tf.signal.inverse_stft(
      stft, frame_length, frame_step,
      window_fn=tf.signal.inverse_stft_window_fn(
         frame_step, forward_window_fn=window_fn))
  ```

  Implemented with GPU-compatible ops and supports gradients.

  Args:
    stfts: A `complex64` `[..., frames, fft_unique_bins]` `Tensor` of STFT bins
      representing a batch of `fft_length`-point STFTs where `fft_unique_bins`
      is `fft_length // 2 + 1`
    frame_length: An integer scalar `Tensor`. The window length in samples.
    frame_step: An integer scalar `Tensor`. The number of samples to step.
    fft_length: An integer scalar `Tensor`. The size of the FFT that produced
      `stfts`. If not provided, uses the smallest power of 2 enclosing
      `frame_length`.
    window_fn: A callable that takes a window length and a `dtype` keyword
      argument and returns a `[window_length]` `Tensor` of samples in the
      provided datatype. If set to `None`, no windowing is used.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `Tensor` of `float32` signals representing the inverse
    STFT for each input STFT in `stfts`.

  Raises:
    ValueError: If `stfts` is not at least rank 2, `frame_length` is not scalar,
      `frame_step` is not scalar, or `fft_length` is not scalar.

  [stft]: https://en.wikipedia.org/wiki/Short-time_Fourier_transform
  """
    with ops.name_scope(name, 'inverse_stft', [stfts]):
        stfts = ops.convert_to_tensor(stfts, name='stfts')
        stfts.shape.with_rank_at_least(2)
        frame_length = ops.convert_to_tensor(frame_length, name='frame_length')
        frame_length.shape.assert_has_rank(0)
        frame_step = ops.convert_to_tensor(frame_step, name='frame_step')
        frame_step.shape.assert_has_rank(0)
        if fft_length is None:
            fft_length = _enclosing_power_of_two(frame_length)
        else:
            fft_length = ops.convert_to_tensor(fft_length, name='fft_length')
            fft_length.shape.assert_has_rank(0)

        real_frames = fft_ops.irfft(stfts, [fft_length])

        # frame_length may be larger or smaller than fft_length, so we pad or
        # truncate real_frames to frame_length.
        frame_length_static = tensor_util.constant_value(frame_length)
        # If we don't know the shape of real_frames's inner dimension, pad and
        # truncate to frame_length.
        if (frame_length_static is None or real_frames.shape.ndims is None
                or real_frames.shape[-1].value is None):
            real_frames = real_frames[..., :frame_length]
            real_frames_rank = array_ops.rank(real_frames)
            real_frames_shape = array_ops.shape(real_frames)
            paddings = array_ops.concat([
                array_ops.zeros([real_frames_rank - 1, 2],
                                dtype=frame_length.dtype),
                [[
                    0,
                    math_ops.maximum(0, frame_length - real_frames_shape[-1])
                ]]
            ], 0)
            real_frames = array_ops.pad(real_frames, paddings)
        # We know real_frames's last dimension and frame_length statically. If they
        # are different, then pad or truncate real_frames to frame_length.
        elif real_frames.shape[-1].value > frame_length_static:
            real_frames = real_frames[..., :frame_length_static]
        elif real_frames.shape[-1].value < frame_length_static:
            pad_amount = frame_length_static - real_frames.shape[-1].value
            real_frames = array_ops.pad(
                real_frames,
                [[0, 0]] * (real_frames.shape.ndims - 1) + [[0, pad_amount]])

        # The above code pads the inner dimension of real_frames to frame_length,
        # but it does so in a way that may not be shape-inference friendly.
        # Restore shape information if we are able to.
        if frame_length_static is not None and real_frames.shape.ndims is not None:
            real_frames.set_shape([None] * (real_frames.shape.ndims - 1) +
                                  [frame_length_static])

        # Optionally window and overlap-add the inner 2 dimensions of real_frames
        # into a single [samples] dimension.
        if window_fn is not None:
            window = window_fn(frame_length, dtype=stfts.dtype.real_dtype)
            real_frames *= window
        return reconstruction_ops.overlap_and_add(real_frames, frame_step)
예제 #27
0
def inverse_mdct(mdcts,
                 window_fn=window_ops.vorbis_window,
                 norm=None,
                 name=None):
    """Computes the inverse modified DCT of `mdcts`.

  To reconstruct an original waveform, the same window function should
  be used with `mdct` and `inverse_mdct`.

  Example usage:

  >>> @tf.function
  ... def compare_round_trip():
  ...   samples = 1000
  ...   frame_length = 400
  ...   halflen = frame_length // 2
  ...   waveform = tf.random.normal(dtype=tf.float32, shape=[samples])
  ...   waveform_pad = tf.pad(waveform, [[halflen, 0],])
  ...   mdct = tf.signal.mdct(waveform_pad, frame_length, pad_end=True,
  ...                         window_fn=tf.signal.vorbis_window)
  ...   inverse_mdct = tf.signal.inverse_mdct(mdct,
  ...                                         window_fn=tf.signal.vorbis_window)
  ...   inverse_mdct = inverse_mdct[halflen: halflen + samples]
  ...   return waveform, inverse_mdct
  >>> waveform, inverse_mdct = compare_round_trip()
  >>> np.allclose(waveform.numpy(), inverse_mdct.numpy(), rtol=1e-3, atol=1e-4)
  True

  Implemented with TPU/GPU-compatible ops and supports gradients.

  Args:
    mdcts: A `float32`/`float64` `[..., frames, frame_length // 2]`
      `Tensor` of MDCT bins representing a batch of `frame_length // 2`-point
      MDCTs.
    window_fn: A callable that takes a window length and a `dtype` keyword
      argument and returns a `[window_length]` `Tensor` of samples in the
      provided datatype. If set to `None`, no windowing is used.
    norm: If "ortho", orthonormal inverse DCT4 is performed, if it is None,
      a regular dct4 followed by scaling of `1/frame_length` is performed.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `Tensor` of `float32`/`float64` signals representing
    the inverse MDCT for each input MDCT in `mdcts` where `samples` is
    `(frames - 1) * (frame_length // 2) + frame_length`.

  Raises:
    ValueError: If `mdcts` is not at least rank 2.

  [mdct]: https://en.wikipedia.org/wiki/Modified_discrete_cosine_transform
  """
    with ops.name_scope(name, 'inverse_mdct', [mdcts]):
        mdcts = ops.convert_to_tensor(mdcts, name='mdcts')
        mdcts.shape.with_rank_at_least(2)
        half_len = math_ops.cast(mdcts.shape[-1], dtype=dtypes.int32)

        if norm is None:
            half_len_float = math_ops.cast(half_len, dtype=mdcts.dtype)
            result_idct4 = (0.5 / half_len_float) * dct_ops.dct(mdcts, type=4)
        elif norm == 'ortho':
            result_idct4 = dct_ops.dct(mdcts, type=4, norm='ortho')
        split_result = array_ops.split(result_idct4, 2, axis=-1)
        real_frames = array_ops.concat(
            (split_result[1], -array_ops.reverse(split_result[1], [-1]),
             -array_ops.reverse(split_result[0], [-1]), -split_result[0]),
            axis=-1)

        # Optionally window and overlap-add the inner 2 dimensions of real_frames
        # into a single [samples] dimension.
        if window_fn is not None:
            window = window_fn(2 * half_len, dtype=mdcts.dtype)
            real_frames *= window
        else:
            real_frames *= 1.0 / np.sqrt(2)
        return reconstruction_ops.overlap_and_add(real_frames, half_len)
예제 #28
0
def inverse_stdct(stdcts,
                 frame_length,
                 frame_step,
                 fft_length=None,
                 window_fn=window_ops.hann_window,
                 name=None):
  """
	Inverse short-time discrete cosine transform.

	Argument/s:

	Returns:
  """
  with ops.name_scope(name, 'inverse_stdct', [stdcts]):
    stdcts = ops.convert_to_tensor(stdcts, name='stdcts')
    stdcts.shape.with_rank_at_least(2)
    frame_length = ops.convert_to_tensor(frame_length, name='frame_length')
    frame_length.shape.assert_has_rank(0)
    frame_step = ops.convert_to_tensor(frame_step, name='frame_step')
    frame_step.shape.assert_has_rank(0)
    if fft_length is None:
      fft_length = _enclosing_power_of_two(frame_length)
    else:
      fft_length = ops.convert_to_tensor(fft_length, name='fft_length')
      fft_length.shape.assert_has_rank(0)

    frames = dct_ops.idct(stdcts, n=fft_length)

    # frame_length may be larger or smaller than fft_length, so we pad or
    # truncate frames to frame_length.
    frame_length_static = tensor_util.constant_value(frame_length)
    # If we don't know the shape of frames's inner dimension, pad and
    # truncate to frame_length.
    if (frame_length_static is None or frames.shape.ndims is None or
        frames.shape.as_list()[-1] is None):
      frames = frames[..., :frame_length]
      frames_rank = array_ops.rank(frames)
      frames_shape = array_ops.shape(frames)
      paddings = array_ops.concat(
          [array_ops.zeros([frames_rank - 1, 2],
                           dtype=frame_length.dtype),
           [[0, math_ops.maximum(0, frame_length - frames_shape[-1])]]], 0)
      frames = array_ops.pad(frames, paddings)
    # We know frames's last dimension and frame_length statically. If they
    # are different, then pad or truncate frames to frame_length.
    elif frames.shape.as_list()[-1] > frame_length_static:
      frames = frames[..., :frame_length_static]
    elif frames.shape.as_list()[-1] < frame_length_static:
      pad_amount = frame_length_static - frames.shape.as_list()[-1]
      frames = array_ops.pad(frames,
                                  [[0, 0]] * (frames.shape.ndims - 1) +
                                  [[0, pad_amount]])

    # The above code pads the inner dimension of frames to frame_length,
    # but it does so in a way that may not be shape-inference friendly.
    # Restore shape information if we are able to.
    if frame_length_static is not None and frames.shape.ndims is not None:
      frames.set_shape([None] * (frames.shape.ndims - 1) +
                            [frame_length_static])

    # Optionally window and overlap-add the inner 2 dimensions of frames
    # into a single [samples] dimension.
    if window_fn is not None:
      window = window_fn(frame_length, dtype=stdcts.dtype.real_dtype)
      frames *= window
    return reconstruction_ops.overlap_and_add(frames, frame_step)
예제 #29
0
def inverse_stft(stfts,
                 frame_length,
                 frame_step,
                 fft_length=None,
                 window_fn=window_ops.hann_window,
                 name=None):
  """Computes the inverse [Short-time Fourier Transform][stft] of `stfts`.

  To reconstruct an original waveform, a complimentary window function should
  be used in inverse_stft. Such a window function can be constructed with
  tf.signal.inverse_stft_window_fn.

  Example:

  ```python
  frame_length = 400
  frame_step = 160
  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(waveform, frame_length, frame_step)
  inverse_stft = tf.signal.inverse_stft(
      stft, frame_length, frame_step,
      window_fn=tf.signal.inverse_stft_window_fn(frame_step))
  ```

  if a custom window_fn is used in stft, it must be passed to
  inverse_stft_window_fn:

  ```python
  frame_length = 400
  frame_step = 160
  window_fn = functools.partial(window_ops.hamming_window, periodic=True),
  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(
      waveform, frame_length, frame_step, window_fn=window_fn)
  inverse_stft = tf.signal.inverse_stft(
      stft, frame_length, frame_step,
      window_fn=tf.signal.inverse_stft_window_fn(
         frame_step, forward_window_fn=window_fn))
  ```

  Implemented with GPU-compatible ops and supports gradients.

  Args:
    stfts: A `complex64` `[..., frames, fft_unique_bins]` `Tensor` of STFT bins
      representing a batch of `fft_length`-point STFTs where `fft_unique_bins`
      is `fft_length // 2 + 1`
    frame_length: An integer scalar `Tensor`. The window length in samples.
    frame_step: An integer scalar `Tensor`. The number of samples to step.
    fft_length: An integer scalar `Tensor`. The size of the FFT that produced
      `stfts`. If not provided, uses the smallest power of 2 enclosing
      `frame_length`.
    window_fn: A callable that takes a window length and a `dtype` keyword
      argument and returns a `[window_length]` `Tensor` of samples in the
      provided datatype. If set to `None`, no windowing is used.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `Tensor` of `float32` signals representing the inverse
    STFT for each input STFT in `stfts`.

  Raises:
    ValueError: If `stfts` is not at least rank 2, `frame_length` is not scalar,
      `frame_step` is not scalar, or `fft_length` is not scalar.

  [stft]: https://en.wikipedia.org/wiki/Short-time_Fourier_transform
  """
  with ops.name_scope(name, 'inverse_stft', [stfts]):
    stfts = ops.convert_to_tensor(stfts, name='stfts')
    stfts.shape.with_rank_at_least(2)
    frame_length = ops.convert_to_tensor(frame_length, name='frame_length')
    frame_length.shape.assert_has_rank(0)
    frame_step = ops.convert_to_tensor(frame_step, name='frame_step')
    frame_step.shape.assert_has_rank(0)
    if fft_length is None:
      fft_length = _enclosing_power_of_two(frame_length)
    else:
      fft_length = ops.convert_to_tensor(fft_length, name='fft_length')
      fft_length.shape.assert_has_rank(0)

    real_frames = fft_ops.irfft(stfts, [fft_length])

    # frame_length may be larger or smaller than fft_length, so we pad or
    # truncate real_frames to frame_length.
    frame_length_static = tensor_util.constant_value(frame_length)
    # If we don't know the shape of real_frames's inner dimension, pad and
    # truncate to frame_length.
    if (frame_length_static is None or
        real_frames.shape.ndims is None or
        real_frames.shape[-1].value is None):
      real_frames = real_frames[..., :frame_length]
      real_frames_rank = array_ops.rank(real_frames)
      real_frames_shape = array_ops.shape(real_frames)
      paddings = array_ops.concat(
          [array_ops.zeros([real_frames_rank - 1, 2],
                           dtype=frame_length.dtype),
           [[0, math_ops.maximum(0, frame_length - real_frames_shape[-1])]]], 0)
      real_frames = array_ops.pad(real_frames, paddings)
    # We know real_frames's last dimension and frame_length statically. If they
    # are different, then pad or truncate real_frames to frame_length.
    elif real_frames.shape[-1].value > frame_length_static:
      real_frames = real_frames[..., :frame_length_static]
    elif real_frames.shape[-1].value < frame_length_static:
      pad_amount = frame_length_static - real_frames.shape[-1].value
      real_frames = array_ops.pad(real_frames,
                                  [[0, 0]] * (real_frames.shape.ndims - 1) +
                                  [[0, pad_amount]])

    # The above code pads the inner dimension of real_frames to frame_length,
    # but it does so in a way that may not be shape-inference friendly.
    # Restore shape information if we are able to.
    if frame_length_static is not None and real_frames.shape.ndims is not None:
      real_frames.set_shape([None] * (real_frames.shape.ndims - 1) +
                            [frame_length_static])

    # Optionally window and overlap-add the inner 2 dimensions of real_frames
    # into a single [samples] dimension.
    if window_fn is not None:
      window = window_fn(frame_length, dtype=stfts.dtype.real_dtype)
      real_frames *= window
    return reconstruction_ops.overlap_and_add(real_frames, frame_step)
 def test_all_ones(self):
     signal = array_ops.ones([3, 5])
     reconstruction = reconstruction_ops.overlap_and_add(signal, 2)
     self.assertEqual(reconstruction.shape.as_list(), [9])
     expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1])
     self.assertAllClose(reconstruction, expected_output)
 def f(signal):
     return reconstruction_ops.overlap_and_add(signal, frame_hop)