def _compare(self, signal, frame_length, frame_step, fft_length): with spectral_ops_test_util.fft_kernel_label_map(), ( self.test_session(use_gpu=True)) as sess: actual_stft = spectral_ops.stft( signal, frame_length, frame_step, fft_length, pad_end=False) signal_ph = array_ops.placeholder(dtype=dtypes.as_dtype(signal.dtype)) actual_stft_from_ph = spectral_ops.stft( signal_ph, frame_length, frame_step, fft_length, pad_end=False) actual_inverse_stft = spectral_ops.inverse_stft( actual_stft, frame_length, frame_step, fft_length) actual_stft, actual_stft_from_ph, actual_inverse_stft = sess.run( [actual_stft, actual_stft_from_ph, actual_inverse_stft], feed_dict={signal_ph: signal}) actual_stft_ph = array_ops.placeholder(dtype=actual_stft.dtype) actual_inverse_stft_from_ph = sess.run( spectral_ops.inverse_stft( actual_stft_ph, frame_length, frame_step, fft_length), feed_dict={actual_stft_ph: actual_stft}) # Confirm that there is no difference in output when shape/rank is fully # unknown or known. self.assertAllClose(actual_stft, actual_stft_from_ph) self.assertAllClose(actual_inverse_stft, actual_inverse_stft_from_ph) expected_stft = SpectralOpsTest._np_stft( signal, fft_length, frame_step, frame_length) self.assertAllClose(expected_stft, actual_stft, 1e-4, 1e-4) expected_inverse_stft = SpectralOpsTest._np_inverse_stft( expected_stft, fft_length, frame_step, frame_length) self.assertAllClose( expected_inverse_stft, actual_inverse_stft, 1e-4, 1e-4)
def test_shapes(self): with spectral_ops_test_util.fft_kernel_label_map(), ( self.test_session(use_gpu=True)): signal = np.zeros((512,)).astype(np.float32) # If fft_length is not provided, the smallest enclosing power of 2 of # frame_length (8) is used. stft = spectral_ops.stft(signal, frame_length=7, frame_step=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], stft.eval().shape) stft = spectral_ops.stft(signal, frame_length=8, frame_step=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], stft.eval().shape) stft = spectral_ops.stft(signal, frame_length=8, frame_step=8, fft_length=16, pad_end=True) self.assertAllEqual([64, 9], stft.shape.as_list()) self.assertAllEqual([64, 9], stft.eval().shape) stft = np.zeros((32, 9)).astype(np.complex64) inverse_stft = spectral_ops.inverse_stft(stft, frame_length=8, fft_length=16, frame_step=8) expected_length = (stft.shape[0] - 1) * 8 + 8 self.assertAllEqual([None], inverse_stft.shape.as_list()) self.assertAllEqual([expected_length], inverse_stft.eval().shape)
def test_shapes(self): with spectral_ops_test_util.fft_kernel_label_map(), ( self.test_session(use_gpu=True)): signal = np.zeros((512,)).astype(np.float32) # If fft_length is not provided, the smallest enclosing power of 2 of # frame_length (8) is used. stft = spectral_ops.stft(signal, frame_length=7, frame_step=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], stft.eval().shape) stft = spectral_ops.stft(signal, frame_length=8, frame_step=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], stft.eval().shape) stft = spectral_ops.stft(signal, frame_length=8, frame_step=8, fft_length=16, pad_end=True) self.assertAllEqual([64, 9], stft.shape.as_list()) self.assertAllEqual([64, 9], stft.eval().shape) stft = spectral_ops.stft(signal, frame_length=16, frame_step=8, fft_length=8, pad_end=True) self.assertAllEqual([64, 5], stft.shape.as_list()) self.assertAllEqual([64, 5], stft.eval().shape) stft = np.zeros((32, 9)).astype(np.complex64) inverse_stft = spectral_ops.inverse_stft(stft, frame_length=8, fft_length=16, frame_step=8) expected_length = (stft.shape[0] - 1) * 8 + 8 self.assertAllEqual([None], inverse_stft.shape.as_list()) self.assertAllEqual([expected_length], inverse_stft.eval().shape)
def testContribSignalSTFT(self): ws = 512 hs = 128 dims = (ws * 20,) shape = BATCH_DIMS + dims data = np.arange(np.prod(shape)) / np.prod(dims) np.random.seed(123) np.random.shuffle(data) data = np.reshape(data.astype(np.float32), shape) window = sps.get_window("hann", ws) expected = sps.stft( data, nperseg=ws, noverlap=ws - hs, boundary=None, window=window)[2] expected = np.swapaxes(expected, -1, -2) expected *= window.sum() # scipy divides by window sum with self.test_session() as sess: with self.test_scope(): ph = array_ops.placeholder( dtypes.as_dtype(data.dtype), shape=data.shape) out = signal.stft(ph, ws, hs) grad = gradients_impl.gradients(out, ph, grad_ys=array_ops.ones_like(out)) # For gradients, we simply verify that they compile & execute. value, _ = sess.run([out, grad], {ph: data}) self.assertAllClose(expected, value, rtol=RTOL, atol=ATOL)
def testContribSignalSTFT(self): ws = 512 hs = 128 dims = (ws * 20, ) shape = BATCH_DIMS + dims data = np.arange(np.prod(shape)) / np.prod(dims) np.random.seed(123) np.random.shuffle(data) data = np.reshape(data.astype(np.float32), shape) window = sps.get_window("hann", ws) expected = sps.stft(data, nperseg=ws, noverlap=ws - hs, boundary=None, window=window)[2] expected = np.swapaxes(expected, -1, -2) expected *= window.sum() # scipy divides by window sum with self.test_session() as sess: with self.test_scope(): ph = array_ops.placeholder(dtypes.as_dtype(data.dtype), shape=data.shape) out = signal.stft(ph, ws, hs) grad = gradients_impl.gradients( out, ph, grad_ys=array_ops.ones_like(out)) # For gradients, we simply verify that they compile & execute. value, _ = sess.run([out, grad], {ph: data}) self.assertAllClose(expected, value, rtol=RTOL, atol=ATOL)
def test_gradients_numerical(self): with spectral_ops_test_util.fft_kernel_label_map(), ( self.test_session(use_gpu=True)): # Tuples of (signal_length, frame_length, frame_step, fft_length, # stft_bound, inverse_stft_bound). # TODO(rjryan): Investigate why STFT gradient error is so high. test_configs = [ (64, 16, 8, 16), (64, 16, 16, 16), (64, 16, 7, 16), (64, 7, 4, 9), (29, 5, 1, 10), ] for (signal_length, frame_length, frame_step, fft_length) in test_configs: signal_shape = [signal_length] signal = random_ops.random_uniform(signal_shape) stft_shape = [max(0, 1 + (signal_length - frame_length) // frame_step), fft_length // 2 + 1] stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft_shape = [(stft_shape[0] - 1) * frame_step + frame_length] inverse_stft = spectral_ops.inverse_stft(stft, frame_length, frame_step, fft_length) stft_error = test.compute_gradient_error(signal, [signal_length], stft, stft_shape) inverse_stft_error = test.compute_gradient_error( stft, stft_shape, inverse_stft, inverse_stft_shape) self.assertLess(stft_error, 2e-3) self.assertLess(inverse_stft_error, 5e-4)
def _compute_stft_gradient(signal, frame_length=32, frame_step=16, fft_length=32): """Computes the gradient of the STFT with respect to `signal`.""" stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length) magnitude_stft = math_ops.abs(stft) loss = math_ops.reduce_sum(magnitude_stft) return gradients_impl.gradients([loss], [signal])[0]
def _compare_round_trip(self, signal, frame_length, frame_step, fft_length): with spectral_ops_test_util.fft_kernel_label_map(), (self.test_session( use_gpu=True)) as sess: stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft = spectral_ops.inverse_stft(stft, frame_length, frame_step, fft_length) signal, inverse_stft = sess.run([signal, inverse_stft]) # Since the shapes can differ due to padding, pad both signals to the max # of their lengths. max_length = max(signal.shape[0], inverse_stft.shape[0]) signal = np.pad(signal, (0, max_length - signal.shape[0]), "constant") inverse_stft = np.pad(inverse_stft, (0, max_length - inverse_stft.shape[0]), "constant") # Ignore the frame_length samples at either edge. start = frame_length end = signal.shape[0] - frame_length ratio = signal[start:end] / inverse_stft[start:end] # Check that the inverse and original signal are equal up to a constant # factor. self.assertLess(np.var(ratio), 2e-5)
def test_stft_round_trip(self): # Tuples of (signal_length, frame_length, frame_step, fft_length, # threshold, corrected_threshold). test_configs = [ # 87.5% overlap. (4096, 256, 32, 256, 1e-5, 1e-6), # 75% overlap. (4096, 256, 64, 256, 1e-5, 1e-6), # Odd frame hop. (4096, 128, 25, 128, 1e-3, 1e-6), # Odd frame length. (4096, 127, 32, 128, 1e-3, 1e-6), # 50% overlap. (4096, 128, 64, 128, 0.40, 1e-6), ] for (signal_length, frame_length, frame_step, fft_length, threshold, corrected_threshold) in test_configs: # Generate a random white Gaussian signal. signal = random_ops.random_normal([signal_length]) with spectral_ops_test_util.fft_kernel_label_map(), ( self.test_session(use_gpu=True)) as sess: stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft = spectral_ops.inverse_stft( stft, frame_length, frame_step, fft_length) inverse_stft_corrected = spectral_ops.inverse_stft( stft, frame_length, frame_step, fft_length, window_fn=spectral_ops.inverse_stft_window_fn(frame_step)) signal, inverse_stft, inverse_stft_corrected = sess.run( [signal, inverse_stft, inverse_stft_corrected]) # Truncate signal to the size of inverse stft. signal = signal[:inverse_stft.shape[0]] # Ignore the frame_length samples at either edge. signal = signal[frame_length:-frame_length] inverse_stft = inverse_stft[frame_length:-frame_length] inverse_stft_corrected = inverse_stft_corrected[ frame_length:-frame_length] # Check that the inverse and original signal are close up to a scale # factor. inverse_stft_scaled = inverse_stft / np.mean( np.abs(inverse_stft)) signal_scaled = signal / np.mean(np.abs(signal)) self.assertLess(np.std(inverse_stft_scaled - signal_scaled), threshold) # Check that the inverse with correction and original signal are close. self.assertLess(np.std(inverse_stft_corrected - signal), corrected_threshold)
def test_stft_round_trip(self): # Tuples of (signal_length, frame_length, frame_step, fft_length, # threshold, corrected_threshold). test_configs = [ # 87.5% overlap. (4096, 256, 32, 256, 1e-5, 1e-6), # 75% overlap. (4096, 256, 64, 256, 1e-5, 1e-6), # Odd frame hop. (4096, 128, 25, 128, 1e-3, 1e-6), # Odd frame length. (4096, 127, 32, 128, 1e-3, 1e-6), # 50% overlap. (4096, 128, 64, 128, 0.40, 1e-6), ] for (signal_length, frame_length, frame_step, fft_length, threshold, corrected_threshold) in test_configs: # Generate a random white Gaussian signal. signal = random_ops.random_normal([signal_length]) with spectral_ops_test_util.fft_kernel_label_map(), ( self.test_session(use_gpu=True)) as sess: stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft = spectral_ops.inverse_stft(stft, frame_length, frame_step, fft_length) inverse_stft_corrected = spectral_ops.inverse_stft( stft, frame_length, frame_step, fft_length, window_fn=spectral_ops.inverse_stft_window_fn(frame_step)) signal, inverse_stft, inverse_stft_corrected = sess.run( [signal, inverse_stft, inverse_stft_corrected]) # Truncate signal to the size of inverse stft. signal = signal[:inverse_stft.shape[0]] # Ignore the frame_length samples at either edge. signal = signal[frame_length:-frame_length] inverse_stft = inverse_stft[frame_length:-frame_length] inverse_stft_corrected = inverse_stft_corrected[ frame_length:-frame_length] # Check that the inverse and original signal are close up to a scale # factor. inverse_stft_scaled = inverse_stft / np.mean(np.abs(inverse_stft)) signal_scaled = signal / np.mean(np.abs(signal)) self.assertLess(np.std(inverse_stft_scaled - signal_scaled), threshold) # Check that the inverse with correction and original signal are close. self.assertLess(np.std(inverse_stft_corrected - signal), corrected_threshold)
def _compare(self, signal, frame_length, frame_step, fft_length): with spectral_ops_test_util.fft_kernel_label_map(), ( self.test_session(use_gpu=True)) as sess: actual_stft = spectral_ops.stft( signal, frame_length, frame_step, fft_length, pad_end=False) actual_inverse_stft = spectral_ops.inverse_stft( actual_stft, frame_length, frame_step, fft_length) actual_stft, actual_inverse_stft = sess.run( [actual_stft, actual_inverse_stft]) expected_stft = SpectralOpsTest._np_stft( signal, fft_length, frame_step, frame_length) self.assertAllClose(expected_stft, actual_stft, 1e-4, 1e-4) expected_inverse_stft = SpectralOpsTest._np_inverse_stft( expected_stft, fft_length, frame_step, frame_length) self.assertAllClose( expected_inverse_stft, actual_inverse_stft, 1e-4, 1e-4)
def _compare(self, signal, frame_length, frame_step, fft_length): with spectral_ops_test_util.fft_kernel_label_map(), (self.test_session( use_gpu=True)) as sess: actual_stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) actual_inverse_stft = spectral_ops.inverse_stft( actual_stft, frame_length, frame_step, fft_length) actual_stft, actual_inverse_stft = sess.run( [actual_stft, actual_inverse_stft]) expected_stft = SpectralOpsTest._np_stft(signal, fft_length, frame_step, frame_length) self.assertAllClose(expected_stft, actual_stft, 1e-4, 1e-4) expected_inverse_stft = SpectralOpsTest._np_inverse_stft( expected_stft, fft_length, frame_step, frame_length) self.assertAllClose(expected_inverse_stft, actual_inverse_stft, 1e-4, 1e-4)
def _compare_round_trip(self, signal, frame_length, frame_step, fft_length): with spectral_ops_test_util.fft_kernel_label_map(), ( self.test_session(use_gpu=True)) as sess: stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length, pad_end=False) inverse_stft = spectral_ops.inverse_stft(stft, frame_length, frame_step, fft_length) signal, inverse_stft = sess.run([signal, inverse_stft]) # Since the shapes can differ due to padding, pad both signals to the max # of their lengths. max_length = max(signal.shape[0], inverse_stft.shape[0]) signal = np.pad(signal, (0, max_length - signal.shape[0]), "constant") inverse_stft = np.pad(inverse_stft, (0, max_length - inverse_stft.shape[0]), "constant") # Ignore the frame_length samples at either edge. start = frame_length end = signal.shape[0] - frame_length ratio = signal[start:end] / inverse_stft[start:end] # Check that the inverse and original signal are equal up to a constant # factor. self.assertLess(np.var(ratio), 2e-5)