def get_signal(self, audio, gain, phase): """Filter audio with LTV-FIR filter. Args: audio: Dry audio. 2-D Tensor of shape [batch, n_samples]. gain: Amplitude of modulated signal. Shape [batch_size, n_samples, 1]. phase: The normlaized instantaneous length of the delay, in the range of [center_ms - depth_ms, center_ms + depth_ms] from 0 to 1.0. Shape [batch_size, n_samples, 1]. Returns: signal: Modulated audio of shape [batch, n_samples]. """ max_delay_ms = self.center_ms + self.depth_ms max_length_samples = int(self.sample_rate / 1000.0 * max_delay_ms) depth_phase = self.depth_ms / max_delay_ms center_phase = self.center_ms / max_delay_ms phase = phase * depth_phase + center_phase wet_audio = core.variable_length_delay(audio=audio, phase=phase, max_length=max_length_samples) # Remove channel dimension. if len(gain.shape) == 3: gain = gain[..., 0] wet_audio *= gain return (wet_audio + audio) if self.add_dry else wet_audio
def test_variable_length_delay_is_accurate(self, batch_size, n_samples, max_length): """Tests accuracy of variable length delay. Generate a sine wave and delay at various amounts. If max_length is equal to the period of oscillation, a half wave delay is equal to negation and full wave delay is equal to identity. Args: batch_size: Number of batch examples in the test phase signal. n_samples: Number of samples in the test signal. max_length: Maximimum delay in samples. """ threshold = 1e-2 # Start with a sin wave of same period as max_length. n_cycles = float(n_samples) / max_length wav_np = np.sin(np.linspace(0, 2.0 * np.pi * n_cycles, n_samples)) wav_np = np.tile(wav_np[np.newaxis, :], [batch_size, 1]).astype(np.float32) # Three different decay amounts (none, half-wave, full-wave). ones = tf.ones_like(wav_np)[..., tf.newaxis] phase_no_delay = 0.0 * ones phase_half_delay = 0.5 * ones phase_full_delay = 1.0 * ones with self.cached_session() as sess: wav_tf_no_delay = sess.run( core.variable_length_delay(phase_no_delay, wav_np, max_length)) wav_tf_half_delay = sess.run( core.variable_length_delay(phase_half_delay, wav_np, max_length)) wav_tf_full_delay = sess.run( core.variable_length_delay(phase_full_delay, wav_np, max_length)) for target, source in [(wav_np, wav_tf_no_delay), (-wav_np, wav_tf_half_delay), (wav_np, wav_tf_full_delay)]: # Ignore front of sample because of zero padding. difference = target[:, max_length:] - source[:, max_length:] difference = np.abs(difference).mean() self.assertLessEqual(difference, threshold)