def __init__(self, size='large', f0_bins=128, spectral_fn=lambda x: spectral_ops.compute_mag(x, size=1024), name='resnet_f0_encoder'): super(ResnetF0Encoder, self).__init__(name=name) self.f0_bins = f0_bins self.spectral_fn = spectral_fn # Layers. self.resnet = nn.resnet(size=size) self.dense_out = nn.dense(f0_bins)
def compute_mel(audio, sample_rate=16000, lo_hz=0.0, hi_hz=8000.0, bins=64, fft_size=2048, overlap=0.75, pad_end=True): '''Compute Mel spectrogram with Tensorflow.''' mag = spectral_ops.compute_mag(audio, fft_size, overlap, pad_end) num_spectrogram_bins = int(mag.shape[-1]) linear_to_mel_matrix = tf.signal.linear_to_mel_weight_matrix( bins, num_spectrogram_bins, sample_rate, lo_hz, hi_hz) mel = tf.tensordot(mag, linear_to_mel_matrix, 1) mel.set_shape(mag.shape[:-1].concatenate(linear_to_mel_matrix.shape[-1:])) return mel
def test_diff(self): amp = 1e-2 audio = amp * (np.random.rand(64000).astype(np.float32) * 2.0 - 1.0) audio = np.expand_dims(audio, 0) #audio is now in [B,T] format to match the axis of the diff operations in losses.SpectralLoss() frame_size = 2048 hop_size = 128 overlap = 1.0 - float(hop_size) / frame_size pad_end = True diff = spectral_ops.diff mag = spectral_ops.compute_mag(audio, size=frame_size, overlap=overlap, pad_end=pad_end) delta_t = diff(mag, axis=1) assert (delta_t.shape[1] == mag.shape[1] - 1) delta_delta_t = diff(diff(mag, axis=1), axis=1) assert (delta_delta_t.shape[1] == mag.shape[1] - 2) delta_f = diff(mag, axis=2) assert (delta_f.shape[2] == mag.shape[2] - 1) delta_delta_f = diff(diff(mag, axis=2), axis=2) assert (delta_delta_f.shape[2] == mag.shape[2] - 2)
def plot_mag_spectrum(audio, audio_rate, n_fft=2048, f_band=None): '''Plot magnitude spectrum using DDSP's compute_mag.''' mag = spectral_ops.compute_mag(audio, n_fft).numpy().T plt.figure() plt.imshow(mag, origin="lower") plt.show()