def _compare(self, signal, frame_length, frame_step, fft_length, tol): actual_stft = spectral_ops.stft( signal, frame_length, frame_step, fft_length, pad_end=False) signal_ph = array_ops.placeholder_with_default(signal, shape=signal.shape) actual_stft_from_ph = spectral_ops.stft( signal_ph, frame_length, frame_step, fft_length, pad_end=False) actual_inverse_stft = spectral_ops.inverse_stft( actual_stft, frame_length, frame_step, fft_length) actual_stft, actual_stft_from_ph, actual_inverse_stft = self.evaluate( [actual_stft, actual_stft_from_ph, actual_inverse_stft]) actual_stft_ph = array_ops.placeholder_with_default( actual_stft, shape=actual_stft.shape) actual_inverse_stft_from_ph = self.evaluate( spectral_ops.inverse_stft( actual_stft_ph, frame_length, frame_step, fft_length)) # Confirm that there is no difference in output when shape/rank is fully # unknown or known. self.assertAllClose(actual_stft, actual_stft_from_ph) self.assertAllClose(actual_inverse_stft, actual_inverse_stft_from_ph) expected_stft = SpectralOpsTest._np_stft( signal, fft_length, frame_step, frame_length) self.assertAllClose(expected_stft, actual_stft, rtol=tol, atol=tol) expected_inverse_stft = SpectralOpsTest._np_inverse_stft( expected_stft, fft_length, frame_step, frame_length) self.assertAllClose( expected_inverse_stft, actual_inverse_stft, rtol=tol, atol=tol)
def _compare(self, signal, frame_length, frame_step, fft_length): with spectral_ops_test_util.fft_kernel_label_map(), ( self.cached_session(use_gpu=True)) as sess: actual_stft = spectral_ops.stft( signal, frame_length, frame_step, fft_length, pad_end=False) signal_ph = array_ops.placeholder(dtype=dtypes.as_dtype(signal.dtype)) actual_stft_from_ph = spectral_ops.stft( signal_ph, frame_length, frame_step, fft_length, pad_end=False) actual_inverse_stft = spectral_ops.inverse_stft( actual_stft, frame_length, frame_step, fft_length) actual_stft, actual_stft_from_ph, actual_inverse_stft = sess.run( [actual_stft, actual_stft_from_ph, actual_inverse_stft], feed_dict={signal_ph: signal}) actual_stft_ph = array_ops.placeholder(dtype=actual_stft.dtype) actual_inverse_stft_from_ph = sess.run( spectral_ops.inverse_stft( actual_stft_ph, frame_length, frame_step, fft_length), feed_dict={actual_stft_ph: actual_stft}) # Confirm that there is no difference in output when shape/rank is fully # unknown or known. self.assertAllClose(actual_stft, actual_stft_from_ph) self.assertAllClose(actual_inverse_stft, actual_inverse_stft_from_ph) expected_stft = SpectralOpsTest._np_stft( signal, fft_length, frame_step, frame_length) self.assertAllClose(expected_stft, actual_stft, 1e-4, 1e-4) expected_inverse_stft = SpectralOpsTest._np_inverse_stft( expected_stft, fft_length, frame_step, frame_length) self.assertAllClose( expected_inverse_stft, actual_inverse_stft, 1e-4, 1e-4)
def test_stft_round_trip(self, signal_length, frame_length, frame_step, fft_length, np_rtype, threshold, corrected_threshold): # Generate a random white Gaussian signal. signal = np.random.normal(size=signal_length).astype(np_rtype) stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft = spectral_ops.inverse_stft(stft, frame_length, frame_step, fft_length) inverse_stft_corrected = spectral_ops.inverse_stft( stft, frame_length, frame_step, fft_length, window_fn=spectral_ops.inverse_stft_window_fn(frame_step)) inverse_stft, inverse_stft_corrected = self.evaluate( [inverse_stft, inverse_stft_corrected]) # Truncate signal to the size of inverse stft. signal = signal[:inverse_stft.shape[0]] # Ignore the frame_length samples at either edge. signal = signal[frame_length:-frame_length] inverse_stft = inverse_stft[frame_length:-frame_length] inverse_stft_corrected = inverse_stft_corrected[ frame_length:-frame_length] # Check that the inverse and original signal are close up to a scale # factor. inverse_stft_scaled = inverse_stft / np.mean(np.abs(inverse_stft)) signal_scaled = signal / np.mean(np.abs(signal)) self.assertLess(np.std(inverse_stft_scaled - signal_scaled), threshold) # Check that the inverse with correction and original signal are close. self.assertLess(np.std(inverse_stft_corrected - signal), corrected_threshold)
def _compare(self, signal, frame_length, frame_step, fft_length): with spectral_ops_test_util.fft_kernel_label_map(), ( self.cached_session(use_gpu=True)) as sess: actual_stft = spectral_ops.stft( signal, frame_length, frame_step, fft_length, pad_end=False) signal_ph = array_ops.placeholder(dtype=dtypes.as_dtype(signal.dtype)) actual_stft_from_ph = spectral_ops.stft( signal_ph, frame_length, frame_step, fft_length, pad_end=False) actual_inverse_stft = spectral_ops.inverse_stft( actual_stft, frame_length, frame_step, fft_length) actual_stft, actual_stft_from_ph, actual_inverse_stft = sess.run( [actual_stft, actual_stft_from_ph, actual_inverse_stft], feed_dict={signal_ph: signal}) actual_stft_ph = array_ops.placeholder(dtype=actual_stft.dtype) actual_inverse_stft_from_ph = sess.run( spectral_ops.inverse_stft( actual_stft_ph, frame_length, frame_step, fft_length), feed_dict={actual_stft_ph: actual_stft}) # Confirm that there is no difference in output when shape/rank is fully # unknown or known. self.assertAllClose(actual_stft, actual_stft_from_ph) self.assertAllClose(actual_inverse_stft, actual_inverse_stft_from_ph) expected_stft = SpectralOpsTest._np_stft( signal, fft_length, frame_step, frame_length) self.assertAllClose(expected_stft, actual_stft, 1e-4, 1e-4) expected_inverse_stft = SpectralOpsTest._np_inverse_stft( expected_stft, fft_length, frame_step, frame_length) self.assertAllClose( expected_inverse_stft, actual_inverse_stft, 1e-4, 1e-4)
def test_stft_round_trip(self): # Tuples of (signal_length, frame_length, frame_step, fft_length, # threshold, corrected_threshold). test_configs = [ # 87.5% overlap. (4096, 256, 32, 256, 1e-5, 1e-6), # 75% overlap. (4096, 256, 64, 256, 1e-5, 1e-6), # Odd frame hop. (4096, 128, 25, 128, 1e-3, 1e-6), # Odd frame length. (4096, 127, 32, 128, 1e-3, 1e-6), # 50% overlap. (4096, 128, 64, 128, 0.40, 1e-6), ] for (signal_length, frame_length, frame_step, fft_length, threshold, corrected_threshold) in test_configs: # Generate a random white Gaussian signal. signal = random_ops.random_normal([signal_length]) with spectral_ops_test_util.fft_kernel_label_map(), ( self.cached_session(use_gpu=True)) as sess: stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft = spectral_ops.inverse_stft( stft, frame_length, frame_step, fft_length) inverse_stft_corrected = spectral_ops.inverse_stft( stft, frame_length, frame_step, fft_length, window_fn=spectral_ops.inverse_stft_window_fn(frame_step)) signal, inverse_stft, inverse_stft_corrected = sess.run( [signal, inverse_stft, inverse_stft_corrected]) # Truncate signal to the size of inverse stft. signal = signal[:inverse_stft.shape[0]] # Ignore the frame_length samples at either edge. signal = signal[frame_length:-frame_length] inverse_stft = inverse_stft[frame_length:-frame_length] inverse_stft_corrected = inverse_stft_corrected[ frame_length:-frame_length] # Check that the inverse and original signal are close up to a scale # factor. inverse_stft_scaled = inverse_stft / np.mean( np.abs(inverse_stft)) signal_scaled = signal / np.mean(np.abs(signal)) self.assertLess(np.std(inverse_stft_scaled - signal_scaled), threshold) # Check that the inverse with correction and original signal are close. self.assertLess(np.std(inverse_stft_corrected - signal), corrected_threshold)
def test_stft_round_trip(self): # Tuples of (signal_length, frame_length, frame_step, fft_length, # threshold, corrected_threshold). test_configs = [ # 87.5% overlap. (4096, 256, 32, 256, 1e-5, 1e-6), # 75% overlap. (4096, 256, 64, 256, 1e-5, 1e-6), # Odd frame hop. (4096, 128, 25, 128, 1e-3, 1e-6), # Odd frame length. (4096, 127, 32, 128, 1e-3, 1e-6), # 50% overlap. (4096, 128, 64, 128, 0.40, 1e-6), ] for (signal_length, frame_length, frame_step, fft_length, threshold, corrected_threshold) in test_configs: # Generate a random white Gaussian signal. signal = random_ops.random_normal([signal_length]) with spectral_ops_test_util.fft_kernel_label_map(), ( self.cached_session(use_gpu=True)) as sess: stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft = spectral_ops.inverse_stft(stft, frame_length, frame_step, fft_length) inverse_stft_corrected = spectral_ops.inverse_stft( stft, frame_length, frame_step, fft_length, window_fn=spectral_ops.inverse_stft_window_fn(frame_step)) signal, inverse_stft, inverse_stft_corrected = sess.run( [signal, inverse_stft, inverse_stft_corrected]) # Truncate signal to the size of inverse stft. signal = signal[:inverse_stft.shape[0]] # Ignore the frame_length samples at either edge. signal = signal[frame_length:-frame_length] inverse_stft = inverse_stft[frame_length:-frame_length] inverse_stft_corrected = inverse_stft_corrected[ frame_length:-frame_length] # Check that the inverse and original signal are close up to a scale # factor. inverse_stft_scaled = inverse_stft / np.mean(np.abs(inverse_stft)) signal_scaled = signal / np.mean(np.abs(signal)) self.assertLess(np.std(inverse_stft_scaled - signal_scaled), threshold) # Check that the inverse with correction and original signal are close. self.assertLess(np.std(inverse_stft_corrected - signal), corrected_threshold)
def test_shapes(self): signal = np.zeros((512,)).astype(np.float32) # If fft_length is not provided, the smallest enclosing power of 2 of # frame_length (8) is used. stft = spectral_ops.stft(signal, frame_length=7, frame_step=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], self.evaluate(stft).shape) stft = spectral_ops.stft(signal, frame_length=8, frame_step=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], self.evaluate(stft).shape) stft = spectral_ops.stft(signal, frame_length=8, frame_step=8, fft_length=16, pad_end=True) self.assertAllEqual([64, 9], stft.shape.as_list()) self.assertAllEqual([64, 9], self.evaluate(stft).shape) stft = spectral_ops.stft(signal, frame_length=16, frame_step=8, fft_length=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], self.evaluate(stft).shape) stft = np.zeros((32, 9)).astype(np.complex64) inverse_stft = spectral_ops.inverse_stft(stft, frame_length=8, fft_length=16, frame_step=8) expected_length = (stft.shape[0] - 1) * 8 + 8 self.assertAllEqual([256], inverse_stft.shape.as_list()) self.assertAllEqual([expected_length], self.evaluate(inverse_stft).shape)
def test_gradients_numerical(self): with spectral_ops_test_util.fft_kernel_label_map(), ( self.session(use_gpu=True)): # Tuples of (signal_length, frame_length, frame_step, fft_length, # stft_bound, inverse_stft_bound). # TODO(rjryan): Investigate why STFT gradient error is so high. test_configs = [ (64, 16, 8, 16), (64, 16, 16, 16), (64, 16, 7, 16), (64, 7, 4, 9), (29, 5, 1, 10), ] for (signal_length, frame_length, frame_step, fft_length) in test_configs: signal_shape = [signal_length] signal = random_ops.random_uniform(signal_shape) stft_shape = [max(0, 1 + (signal_length - frame_length) // frame_step), fft_length // 2 + 1] stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft_shape = [(stft_shape[0] - 1) * frame_step + frame_length] inverse_stft = spectral_ops.inverse_stft(stft, frame_length, frame_step, fft_length) stft_error = test.compute_gradient_error(signal, [signal_length], stft, stft_shape) inverse_stft_error = test.compute_gradient_error( stft, stft_shape, inverse_stft, inverse_stft_shape) self.assertLess(stft_error, 2e-3) self.assertLess(inverse_stft_error, 5e-4)
def test_shapes(self): with spectral_ops_test_util.fft_kernel_label_map(), ( self.session(use_gpu=True)): signal = np.zeros((512,)).astype(np.float32) # If fft_length is not provided, the smallest enclosing power of 2 of # frame_length (8) is used. stft = spectral_ops.stft(signal, frame_length=7, frame_step=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], self.evaluate(stft).shape) stft = spectral_ops.stft(signal, frame_length=8, frame_step=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], self.evaluate(stft).shape) stft = spectral_ops.stft(signal, frame_length=8, frame_step=8, fft_length=16, pad_end=True) self.assertAllEqual([64, 9], stft.shape.as_list()) self.assertAllEqual([64, 9], self.evaluate(stft).shape) stft = spectral_ops.stft(signal, frame_length=16, frame_step=8, fft_length=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], self.evaluate(stft).shape) stft = np.zeros((32, 9)).astype(np.complex64) inverse_stft = spectral_ops.inverse_stft(stft, frame_length=8, fft_length=16, frame_step=8) expected_length = (stft.shape[0] - 1) * 8 + 8 self.assertAllEqual([256], inverse_stft.shape.as_list()) self.assertAllEqual([expected_length], self.evaluate(inverse_stft).shape)
def test_gradients_numerical(self): with spectral_ops_test_util.fft_kernel_label_map(), ( self.session(use_gpu=True)): # Tuples of (signal_length, frame_length, frame_step, fft_length, # stft_bound, inverse_stft_bound). # TODO(rjryan): Investigate why STFT gradient error is so high. test_configs = [ (64, 16, 8, 16), (64, 16, 16, 16), (64, 16, 7, 16), (64, 7, 4, 9), (29, 5, 1, 10), ] for (signal_length, frame_length, frame_step, fft_length) in test_configs: signal_shape = [signal_length] signal = random_ops.random_uniform(signal_shape) stft_shape = [max(0, 1 + (signal_length - frame_length) // frame_step), fft_length // 2 + 1] stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft_shape = [(stft_shape[0] - 1) * frame_step + frame_length] inverse_stft = spectral_ops.inverse_stft(stft, frame_length, frame_step, fft_length) stft_error = test.compute_gradient_error(signal, [signal_length], stft, stft_shape) inverse_stft_error = test.compute_gradient_error( stft, stft_shape, inverse_stft, inverse_stft_shape) self.assertLess(stft_error, 2e-3) self.assertLess(inverse_stft_error, 5e-4)
def backward(stft): return spectral_ops.inverse_stft(stft, frame_length, frame_step, fft_length)