def griffin_lim(mel, hparams=None): """ Inverts a mel spectrogram using Griffin-Lim. The mel spectrogram is expected to have been built with the same parameters present in hparams.py. """ hparams = hparams or default_hparams return audio.inv_melspectrogram(mel, hparams)
## Run a test print("Testing your configuration with small inputs.") print("\tTesting the encoder...") encoder.embed_utterance(np.zeros(encoder.sampling_rate)) embed = np.random.rand(speaker_embedding_size) embed /= np.linalg.norm(embed) embeds = [embed, np.zeros(speaker_embedding_size)] texts = ["你好", "欢迎使用语音克隆工具"] print("\tTesting the synthesizer... (loading the model will output a lot of text)") mels = synthesizer.synthesize_spectrograms(texts, embeds) mel = np.concatenate(mels, axis=1) no_action = lambda *args: None generated_wav = audio.inv_melspectrogram(mel, hparams=audio.melgan_hparams) print("All test passed! You can now synthesize speech.\n\n") print("Interactive generation loop") num_generated = 0 args.out_dir.mkdir(exist_ok=True, parents=True) while True: try: # Get the reference audio filepath message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, " \ "wav, m4a, flac, ...):\n" ref = input(message) in_fpath = Path(ref.replace("\"", "").replace("\'", "")) if not in_fpath.is_file(): in_fpath = np.random.choice(reference_paths) print('Reference audio: {}'.format(in_fpath))