Ejemplo n.º 1
0
def log_mel_spec_to_audio(log_spec):
    """
    Estimate audio signal from log-mel spectrogram
    using Griffin-Lim algorithm.
    """
    spec = np.exp(log_spec)
    return mel_to_audio(spec, n_iter=32, **LIBROSA_SPEC_KWARGS)
Ejemplo n.º 2
0
def log_melspectrogram_to_audio(mels, sr, hop_length, n_iter):
    n_fft = hop_length * 4

    # Use floating point
    mels = mels.astype(float) / numpy.max(mels)

    # inverse of log
    # FIXME: totaly fails to converge, leading to a lot of white noise
    mels = numpy.exp(mels + 1e-6) - 1.0
    print(numpy.min(mels), numpy.max(mels), numpy.mean(mels))

    # invert to get waveform
    y = inverse.mel_to_audio(mels,
                             sr=sr,
                             hop_length=hop_length,
                             n_fft=n_fft,
                             n_iter=n_iter)

    # Normalize audio amplitude?
    print(numpy.min(y), numpy.max(y), numpy.mean(y))

    norm_factor = 0.5 / max(numpy.min(y), numpy.max(y))
    print('norm', norm_factor)
    y = y * norm_factor

    return y
Ejemplo n.º 3
0
    def save_sample(self, n=5):
        for idxs, labels, data in self.data_loader:
            labels = labels[:n]
            data = data[:n]
            break
        x = data.type('torch.FloatTensor').to(self.device)
        o_size = x.size()
        x = self._reshape(x)
        x_recon, *_ = self.model(x)
        # Revert the shape
        x = x.view(*o_size).detach().cpu().numpy()
        x_recon = x_recon.view(*o_size).detach().cpu().numpy()
        # Join chunks
        x = x.transpose(0, 2, 1, 3).reshape(o_size[0], o_size[2], -1)
        x_recon = x_recon.transpose(0, 2, 1,
                                    3).reshape(o_size[0], o_size[2], -1)
        for i in range(x.shape[0]):
            idx = idxs[i]
            fpath = self.data_loader.dataset.path_to_data[idx]
            fname = path.splitext(path.basename(fpath))[0]
            # save spectrogram
            np.save(path.join(self.config.sample_dir, fname + ".npy"), x[i])
            np.save(path.join(self.config.sample_dir, fname + "_recon.npy"),
                    x_recon[i])

            try:
                # save waveform
                au = inverse.mel_to_audio(core.db_to_power(x[i]),
                                          sr=22050,
                                          n_fft=2048,
                                          hop_length=735)
                recon_au = inverse.mel_to_audio(core.db_to_power(x_recon[i]),
                                                sr=22050,
                                                n_fft=2048,
                                                hop_length=735)
                rate = len(au) // 5
                wavfile.write(
                    path.join(self.config.sample_dir, fname + ".wav"), rate,
                    au)
                wavfile.write(
                    path.join(self.config.sample_dir, fname + "_recon.wav"),
                    rate, recon_au)
            except Exception as e:
                self.logger.debug(str(e))
                self.logger.info("Cannot save from mel to audio")
Ejemplo n.º 4
0
def save_wav(spectrogram: t.Any, path: t.Union[str, Path]) -> None:
    signal = mel_to_audio(spectrogram)
    write_wav(path, signal, sr=22050)
Ejemplo n.º 5
0
 def __call__(self, x: t.Any) -> t.Any:
     return mel_to_audio(x, sr=self.sr)
Ejemplo n.º 6
0
from librosa.feature import inverse
import numpy as np
import torch
from scipy.io import wavfile

if __name__ == '__main__':
    mel = np.load('data/mel/p225/p225_013.npy').astype(np.float32)
    audio_arr = inverse.mel_to_audio(mel, sr=48000, n_iter=10)
    wavfile.write('test.wav', 48000, audio_arr)
Ejemplo n.º 7
0
def pointwise_mel_to_audio(x):
    return mel_to_audio(x,
                        sr=melspectrogram_transform_parameters["sample_rate"],
                        n_fft=melspectrogram_transform_parameters["n_fft"],
                        hop_length=melspectrogram_transform_parameters["hop_length"],
                        n_iter=4)