Example #1
0
 def __init__(self, n_fft=1024, n_mels=80):
     super().__init__()
     self.mel_to_lin = transforms.InverseMelScale(n_stft=n_fft // 2 + 1,
                                                  n_mels=n_mels,
                                                  sample_rate=sample_rate,
                                                  max_iter=2048)
     self.griffin_lim = transforms.GriffinLim(n_fft=n_fft, hop_length=256)
Example #2
0
 def test_griffinlim(self, momentum, rand_init):
     n_fft = 400
     power = 1
     n_iter = 3
     spec = get_spectrogram(
         get_whitenoise(sample_rate=8000, duration=0.05, n_channels=2),
         n_fft=n_fft, power=power)
     transform = _DeterministicWrapper(
         T.GriffinLim(n_fft=n_fft, n_iter=n_iter, momentum=momentum, rand_init=rand_init, power=power))
     self.assert_grad(transform, [spec])
Example #3
0
def _spec_to_wav(spec, sr=sample_rate, engine='librosa'):
    ''' using Griffin-Lim algorithm '''

    if engine == 'librosa':
        return librosa.griffinlim(spec,
                                  hop_length=stft_params['hop_length'],
                                  win_length=stft_params['win_length'])
    elif engine == 'torch':
        return tf.GriffinLim(**stft_params, power=power)(spec)

    raise ValueError(engine)
plot_waveform(waveform, sample_rate, title="Original")
play_audio(waveform, sample_rate)

n_fft = 1024
win_length = None
hop_length = 512

spec = T.Spectrogram(
    n_fft=n_fft,
    win_length=win_length,
    hop_length=hop_length,
)(waveform)

griffin_lim = T.GriffinLim(
    n_fft=n_fft,
    win_length=win_length,
    hop_length=hop_length,
)
waveform = griffin_lim(spec)

plot_waveform(waveform, sample_rate, title="Reconstructed")
play_audio(waveform, sample_rate)

######################################################################
# Mel Filter Bank
# ---------------
#
# ``torchaudio.functional.create_fb_matrix`` generates the filter bank
# for converting frequency bins to mel-scale bins.
#
# Since this function does not require input audio/features, there is no
Example #5
0
 def test_GriffinLim(self):
     tensor = torch.rand((1, 201, 6))
     self._assert_consistency(T.GriffinLim(length=1000, rand_init=False),
                              tensor)
Example #6
0
def audio2mel2audio(dataset_path,
                    audiopaths_and_text,
                    melpaths_and_text,
                    args,
                    use_intermed=None):

    melpaths_and_text_list = \
        load_filepaths_and_text(dataset_path, melpaths_and_text)

    audiopaths_and_text_list = \
        load_filepaths_and_text(dataset_path, audiopaths_and_text)

    # n = 10
    # print(f"The first {n} melpaths and text are {melpaths_and_text_list[:n]}")
    # print(f"The first {n} audiopaths and text are {audiopaths_and_text_list[:n]}")

    # torchaudio implementation
    spec = T.Spectrogram(
            n_fft=args.filter_length,
            win_length=args.win_length,
            hop_length=args.hop_length,
            power=1,
            normalized=True,

        )
    # print(spec)

    griffin_lim = T.GriffinLim(
            n_fft=args.filter_length,
            win_length=args.win_length,
            hop_length=args.hop_length,
            n_iter=args.n_iters,
            power=1,
            normalized=True,
        )
    # import pdb; pdb.set_trace()
    print(args)
    data_path = "/data/logotypografia_simple/cleaned_wavs/"

    #  tacotron-based implementations
    # stft_fn = STFT(args.filter_length, args.hop_length, args.win_length)
    # data_loader = TextMelLoader(dataset_path, audiopaths_and_text, args)
    for i in range(len(melpaths_and_text_list)):
        # specotrogram calculation based on internal components, buggy
        # spec = data_loader.get_spec(audiopaths_and_text_list[i][0])
        # wave = griffin_lim(spec, stft_fn, n_iters=30)
        # wave = wave.detach().cpu().numpy()

        #  spectrogram calculation based on torchaudio
        wav_name = data_path + audiopaths_and_text_list[i][0].split("/")[-1]

        audio, sampling_rate = load_wav_to_torch(wav_name)

        _spectrogram = spec(audio)
        inv_waveform = griffin_lim(_spectrogram)

        # torch.save(mel, f"grifin_lin/{}")
        inv_wav_name = "griffin_lim_inv_audio_custom7/" \
                       + audiopaths_and_text_list[i][0].split("/")[-1]
        print(f"Saving reconstructed wav with name {inv_wav_name}")
        write(inv_wav_name, 16000, inv_waveform.detach().cpu().numpy())