def test_no_espeak_phonemes(): # prepare the config config = VitsConfig( batch_size=2, eval_batch_size=2, num_loader_workers=0, num_eval_loader_workers=0, text_cleaner="english_cleaners", use_phonemes=True, use_espeak_phonemes=False, phoneme_language="en-us", phoneme_cache_path="tests/data/ljspeech/phoneme_cache/", run_eval=True, test_delay_epochs=-1, epochs=1, print_step=1, print_eval=True, datasets=[dataset_config_en, dataset_config_pt], ) config.save_json(config_path) # run test run_cli( f'CUDA_VISIBLE_DEVICES="" python TTS/bin/find_unique_phonemes.py --config_path "{config_path}"' )
def test_synthesize(): """Test synthesize.py with diffent arguments.""" output_path = os.path.join(get_tests_output_path(), "output.wav") run_cli("tts --list_models") # single speaker model run_cli(f'tts --text "This is an example." --out_path "{output_path}"')
def run_test_train(): command = ( f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --config_path {config_path} " f"--coqpit.output_path {output_path} " "--coqpit.datasets.0.name ljspeech " "--coqpit.datasets.0.meta_file_train metadata.csv " "--coqpit.datasets.0.meta_file_val metadata.csv " "--coqpit.datasets.0.path tests/data/ljspeech ") run_cli(command)
def test_run_all_models(): """Check if all the models are downloadable and tts models run correctly.""" print(" > Run synthesizer with all the models.") download_dir = get_user_data_dir("tts") output_path = os.path.join(get_tests_output_path(), "output.wav") manager = ModelManager(output_prefix=get_tests_output_path()) model_names = manager.list_models() for model_name in model_names: print(f"\n > Run - {model_name}") model_path, _, _ = manager.download_model(model_name) if "tts_models" in model_name: local_download_dir = os.path.dirname(model_path) # download and run the model speaker_files = glob.glob(local_download_dir + "/speaker*") language_files = glob.glob(local_download_dir + "/language*") language_id = "" if len(speaker_files) > 0: # multi-speaker model if "speaker_ids" in speaker_files[0]: speaker_manager = SpeakerManager( speaker_id_file_path=speaker_files[0]) elif "speakers" in speaker_files[0]: speaker_manager = SpeakerManager( d_vectors_file_path=speaker_files[0]) # multi-lingual model - Assuming multi-lingual models are also multi-speaker if len(language_files ) > 0 and "language_ids" in language_files[0]: language_manager = LanguageManager( language_ids_file_path=language_files[0]) language_id = language_manager.language_names[0] speaker_id = list(speaker_manager.ids.keys())[0] run_cli( f"tts --model_name {model_name} " f'--text "This is an example." --out_path "{output_path}" --speaker_idx "{speaker_id}" --language_idx "{language_id}" ' ) else: # single-speaker model run_cli( f"tts --model_name {model_name} " f'--text "This is an example." --out_path "{output_path}"') # remove downloaded models shutil.rmtree(download_dir) else: # only download the model manager.download_model(model_name) print(f" | > OK: {model_name}") folders = glob.glob(os.path.join(manager.output_prefix, "*")) assert len(folders) == len(model_names) shutil.rmtree(manager.output_prefix)
def test_voice_conversion(): print(" > Run voice conversion inference using YourTTS model.") model_name = "tts_models/multilingual/multi-dataset/your_tts" language_id = "en" speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav") reference_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0032.wav") output_path = os.path.join(get_tests_output_path(), "output.wav") run_cli( f"tts --model_name {model_name}" f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} " )
def test_Tacotron(): # set paths config_path = os.path.join(get_tests_input_path(), "test_tacotron_config.json") checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar") output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/") # load config c = load_config(config_path) # create model num_chars = len(phonemes if c.use_phonemes else symbols) model = setup_model(num_chars, 1, c, speaker_embedding_dim=None) # save model torch.save({"model": model.state_dict()}, checkpoint_path) # run test run_cli( f'CUDA_VISIBLE_DEVICES="" python TTS/bin/extract_tts_spectrograms.py --config_path "{config_path}" --checkpoint_path "{checkpoint_path}" --output_path "{output_path}"' ) run_cli(f'rm -rf "{output_path}" "{checkpoint_path}"')
def test_synthesize(): """Test synthesize.py with diffent arguments.""" output_path = os.path.join(get_tests_output_path(), "output.wav") run_cli("tts --list_models") # single speaker model run_cli(f'tts --text "This is an example." --out_path "{output_path}"') run_cli("tts --model_name tts_models/en/ljspeech/glow-tts " f'--text "This is an example." --out_path "{output_path}"') run_cli("tts --model_name tts_models/en/ljspeech/glow-tts " "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan " f'--text "This is an example." --out_path "{output_path}"')
def test_Tacotron(): # set paths config_path = os.path.join(get_tests_input_path(), "test_tacotron_config.json") checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth") output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/") # load config c = load_config(config_path) # create model model = setup_model(c) # save model torch.save({"model": model.state_dict()}, checkpoint_path) # run test run_cli( f'CUDA_VISIBLE_DEVICES="" python TTS/bin/extract_tts_spectrograms.py --config_path "{config_path}" --checkpoint_path "{checkpoint_path}" --output_path "{output_path}"' ) run_cli(f'rm -rf "{output_path}" "{checkpoint_path}"')
def test(): # set paths wav_path = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs") output_path = os.path.join(get_tests_output_path(), "output_wavs_removed_silence/") output_resample_path = os.path.join(get_tests_output_path(), "output_ljspeech_16khz/") # resample audios run_cli( f'CUDA_VISIBLE_DEVICES="" python TTS/bin/resample.py --input_dir "{wav_path}" --output_dir "{output_resample_path}" --output_sr 16000' ) # run test run_cli( f'CUDA_VISIBLE_DEVICES="" python TTS/bin/remove_silence_using_vad.py --input_dir "{output_resample_path}" --output_dir "{output_path}"' ) run_cli(f'rm -rf "{output_resample_path}"') run_cli(f'rm -rf "{output_path}"')
num_loader_workers=0, num_val_loader_workers=0, run_eval=True, test_delay_epochs=-1, epochs=1, seq_len=8192, eval_split_size=1, print_step=1, print_eval=True, data_path="tests/data/ljspeech", output_path=output_path, test_noise_schedule={"min_val": 1e-6, "max_val": 1e-2, "num_steps": 2}, ) config.audio.do_trim_silence = True config.audio.trim_db = 60 config.save_json(config_path) # train the model for one epoch command_train = ( f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --config_path {config_path} " ) run_cli(command_train) # Find latest folder continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) # restore the model and continue training for one more epoch command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --continue_path {continue_path} " run_cli(command_train) shutil.rmtree(continue_path)
config.use_speaker_embedding = True config.model_args.use_speaker_embedding = True config.audio.trim_db = 60 config.save_json(config_path) # train the model for one epoch command_train = ( f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tts.py --config_path {config_path} " f"--coqpit.output_path {output_path} " "--coqpit.datasets.0.name ljspeech_test " "--coqpit.datasets.0.meta_file_train metadata.csv " "--coqpit.datasets.0.meta_file_val metadata.csv " "--coqpit.datasets.0.path tests/data/ljspeech " "--coqpit.datasets.0.meta_file_attn_mask tests/data/ljspeech/metadata_attn_mask.txt " "--coqpit.test_delay_epochs 0") run_cli(command_train) # Find latest folder continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) # Inference using TTS API continue_config_path = os.path.join(continue_path, "config.json") continue_restore_path, _ = get_last_checkpoint(continue_path) out_wav_path = os.path.join(get_tests_output_path(), "output.wav") speaker_id = "ljspeech-1" continue_speakers_path = os.path.join(continue_path, "speakers.json") # Check integrity of the config with open(continue_config_path, "r", encoding="utf-8") as f: config_loaded = json.load(f)