def log_mel_spec_to_audio(log_spec): """ Estimate audio signal from log-mel spectrogram using Griffin-Lim algorithm. """ spec = np.exp(log_spec) return mel_to_audio(spec, n_iter=32, **LIBROSA_SPEC_KWARGS)
def log_melspectrogram_to_audio(mels, sr, hop_length, n_iter): n_fft = hop_length * 4 # Use floating point mels = mels.astype(float) / numpy.max(mels) # inverse of log # FIXME: totaly fails to converge, leading to a lot of white noise mels = numpy.exp(mels + 1e-6) - 1.0 print(numpy.min(mels), numpy.max(mels), numpy.mean(mels)) # invert to get waveform y = inverse.mel_to_audio(mels, sr=sr, hop_length=hop_length, n_fft=n_fft, n_iter=n_iter) # Normalize audio amplitude? print(numpy.min(y), numpy.max(y), numpy.mean(y)) norm_factor = 0.5 / max(numpy.min(y), numpy.max(y)) print('norm', norm_factor) y = y * norm_factor return y
def save_sample(self, n=5): for idxs, labels, data in self.data_loader: labels = labels[:n] data = data[:n] break x = data.type('torch.FloatTensor').to(self.device) o_size = x.size() x = self._reshape(x) x_recon, *_ = self.model(x) # Revert the shape x = x.view(*o_size).detach().cpu().numpy() x_recon = x_recon.view(*o_size).detach().cpu().numpy() # Join chunks x = x.transpose(0, 2, 1, 3).reshape(o_size[0], o_size[2], -1) x_recon = x_recon.transpose(0, 2, 1, 3).reshape(o_size[0], o_size[2], -1) for i in range(x.shape[0]): idx = idxs[i] fpath = self.data_loader.dataset.path_to_data[idx] fname = path.splitext(path.basename(fpath))[0] # save spectrogram np.save(path.join(self.config.sample_dir, fname + ".npy"), x[i]) np.save(path.join(self.config.sample_dir, fname + "_recon.npy"), x_recon[i]) try: # save waveform au = inverse.mel_to_audio(core.db_to_power(x[i]), sr=22050, n_fft=2048, hop_length=735) recon_au = inverse.mel_to_audio(core.db_to_power(x_recon[i]), sr=22050, n_fft=2048, hop_length=735) rate = len(au) // 5 wavfile.write( path.join(self.config.sample_dir, fname + ".wav"), rate, au) wavfile.write( path.join(self.config.sample_dir, fname + "_recon.wav"), rate, recon_au) except Exception as e: self.logger.debug(str(e)) self.logger.info("Cannot save from mel to audio")
def save_wav(spectrogram: t.Any, path: t.Union[str, Path]) -> None: signal = mel_to_audio(spectrogram) write_wav(path, signal, sr=22050)
def __call__(self, x: t.Any) -> t.Any: return mel_to_audio(x, sr=self.sr)
from librosa.feature import inverse import numpy as np import torch from scipy.io import wavfile if __name__ == '__main__': mel = np.load('data/mel/p225/p225_013.npy').astype(np.float32) audio_arr = inverse.mel_to_audio(mel, sr=48000, n_iter=10) wavfile.write('test.wav', 48000, audio_arr)
def pointwise_mel_to_audio(x): return mel_to_audio(x, sr=melspectrogram_transform_parameters["sample_rate"], n_fft=melspectrogram_transform_parameters["n_fft"], hop_length=melspectrogram_transform_parameters["hop_length"], n_iter=4)