Esempio n. 1
0
 def griffin_lim(mel, hparams=None):
     """
     Inverts a mel spectrogram using Griffin-Lim. The mel spectrogram is expected to have been built
     with the same parameters present in hparams.py.
     """
     hparams = hparams or default_hparams
     return audio.inv_melspectrogram(mel, hparams)
Esempio n. 2
0
    ## Run a test
    print("Testing your configuration with small inputs.")
    print("\tTesting the encoder...")
    encoder.embed_utterance(np.zeros(encoder.sampling_rate))
    embed = np.random.rand(speaker_embedding_size)
    embed /= np.linalg.norm(embed)
    embeds = [embed, np.zeros(speaker_embedding_size)]
    texts = ["你好", "欢迎使用语音克隆工具"]
    print("\tTesting the synthesizer... (loading the model will output a lot of text)")
    mels = synthesizer.synthesize_spectrograms(texts, embeds)

    mel = np.concatenate(mels, axis=1)
    no_action = lambda *args: None

    generated_wav = audio.inv_melspectrogram(mel, hparams=audio.melgan_hparams)
    print("All test passed! You can now synthesize speech.\n\n")

    print("Interactive generation loop")
    num_generated = 0
    args.out_dir.mkdir(exist_ok=True, parents=True)
    while True:
        try:
            # Get the reference audio filepath
            message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, " \
                      "wav, m4a, flac, ...):\n"
            ref = input(message)
            in_fpath = Path(ref.replace("\"", "").replace("\'", ""))
            if not in_fpath.is_file():
                in_fpath = np.random.choice(reference_paths)
            print('Reference audio: {}'.format(in_fpath))