Esempio n. 1
0
def spectrogram_summary(name, audio, audio_gen, step):
    """Writes a summary of spectrograms for a batch of images."""
    specgram = lambda a: spectral_ops.compute_logmag(tf_float32(a), size=768)

    # Batch spectrogram operations
    spectrograms = specgram(audio)
    spectrograms_gen = specgram(audio_gen)

    batch_size = int(audio.shape[0])
    for sample_idx in range(batch_size):
        # Manually specify exact size of fig for tensorboard
        fig, axs = plt.subplots(2, 1, figsize=(8, 8))

        ax = axs[0]
        spec = np.rot90(spectrograms[sample_idx])
        ax.matshow(spec, vmin=-5, vmax=1, aspect='auto', cmap=plt.cm.magma)
        ax.set_title('original')
        ax.set_xticks([])
        ax.set_yticks([])

        ax = axs[1]
        spec = np.rot90(spectrograms_gen[sample_idx])
        ax.matshow(spec, vmin=-5, vmax=1, aspect='auto', cmap=plt.cm.magma)
        ax.set_title('synthesized')
        ax.set_xticks([])
        ax.set_yticks([])

        # Format and save plot to image
        tag = 'spectrogram/{}_{}'.format(name, sample_idx)
        fig_summary(tag, fig, step)
Esempio n. 2
0
def specplot(audio,
             vmin=-5,
             vmax=1,
             rotate=True,
             size=512 + 256,
             **matshow_kwargs):
    """Plot the log magnitude spectrogram of audio."""
    # If batched, take first element.
    if len(audio.shape) == 2:
        audio = audio[0]

    logmag = spectral_ops.compute_logmag(core.tf_float32(audio), size=size)
    if rotate:
        logmag = np.rot90(logmag)
    # Plotting.
    plt.matshow(logmag,
                vmin=vmin,
                vmax=vmax,
                cmap=plt.cm.magma,
                aspect='auto',
                **matshow_kwargs)
    plt.xticks([])
    plt.yticks([])
    plt.xlabel('Time')
    plt.ylabel('Frequency')
Esempio n. 3
0
 def call(self, audio, *conditioning):
   if self.spectral_op == 'compute_mfcc':
       z = spectral_ops.compute_mfcc(
           audio,
           lo_hz=20.0,
           hi_hz=8000.0,
           fft_size=self.fft_size,
           mel_bins=128,
           mfcc_bins=30,
           overlap=self.overlap,
           pad_end=True)
   elif self.spectral_op == 'compute_logmag':
       z = spectral_ops.compute_logmag(core.tf_float32(audio), size=self.fft_size)
   
   # Normalize.
   z = self.z_norm(z[:, :, tf.newaxis, :])[:, :, 0, :]
   n_timesteps = z.shape[1]
   conditioning = [resample(c, n_timesteps) for c  in conditioning]
   
   z = tf.concat([z] + conditioning, axis=-1)
   # Run an RNN over the latents.
   z = self.rnn(z)
   # Bounce down to compressed z dimensions.
   w = tf.math.sigmoid(self.confidence(z))
   z = self.dense_out(z)
   z = tf.reduce_sum(z * w, axis=1, keepdims=True) / tf.reduce_sum(w, axis=1, keepdims=True)
   return z
Esempio n. 4
0
def spectrogram(audio, sess=None, rotate=False, size=2048):
  """Compute logmag spectrogram."""
  if sess is None:
    sess = tf.Session()
  mag = sess.run(
      spectral_ops.compute_logmag(
          tf.convert_to_tensor(audio, tf.float32), size=size))
  if rotate:
    mag = np.rot90(mag)
  return mag
Esempio n. 5
0
def get_spectrogram(audio, rotate=False, size=1024):
    """Compute logmag spectrogram."""
    mag = spectral_ops.compute_logmag(tf_float32(audio), size=size)
    if rotate:
        mag = np.rot90(mag)
    return mag