Exemple #1
0
 def _create_random_model(self):
     # pylint: disable=global-statement
     config = load_config(
         os.path.join(get_tests_output_path(), "dummy_model_config.json"))
     model = setup_model(config)
     output_path = os.path.join(get_tests_output_path())
     save_checkpoint(config, model, None, None, 10, 1, output_path)
    def _create_random_model(self):
        # pylint: disable=global-statement
        global symbols, phonemes
        config = load_config(os.path.join(get_tests_output_path(), 'dummy_model_config.json'))
        if 'characters' in config.keys():
            symbols, phonemes = make_symbols(**config.characters)

        num_chars = len(phonemes) if config.use_phonemes else len(symbols)
        model = setup_model(num_chars, 0, config)
        output_path = os.path.join(get_tests_output_path())
        save_checkpoint(model, None, 10, 10, 1, output_path)
Exemple #3
0
def test_run_all_models():
    """Check if all the models are downloadable and tts models run correctly."""
    print(" > Run synthesizer with all the models.")
    download_dir = get_user_data_dir("tts")
    output_path = os.path.join(get_tests_output_path(), "output.wav")
    manager = ModelManager(output_prefix=get_tests_output_path())
    model_names = manager.list_models()
    for model_name in model_names:
        print(f"\n > Run - {model_name}")
        model_path, _, _ = manager.download_model(model_name)
        if "tts_models" in model_name:
            local_download_dir = os.path.dirname(model_path)
            # download and run the model
            speaker_files = glob.glob(local_download_dir + "/speaker*")
            language_files = glob.glob(local_download_dir + "/language*")
            language_id = ""
            if len(speaker_files) > 0:
                # multi-speaker model
                if "speaker_ids" in speaker_files[0]:
                    speaker_manager = SpeakerManager(
                        speaker_id_file_path=speaker_files[0])
                elif "speakers" in speaker_files[0]:
                    speaker_manager = SpeakerManager(
                        d_vectors_file_path=speaker_files[0])

                # multi-lingual model - Assuming multi-lingual models are also multi-speaker
                if len(language_files
                       ) > 0 and "language_ids" in language_files[0]:
                    language_manager = LanguageManager(
                        language_ids_file_path=language_files[0])
                    language_id = language_manager.language_names[0]

                speaker_id = list(speaker_manager.ids.keys())[0]
                run_cli(
                    f"tts --model_name  {model_name} "
                    f'--text "This is an example." --out_path "{output_path}" --speaker_idx "{speaker_id}" --language_idx "{language_id}" '
                )
            else:
                # single-speaker model
                run_cli(
                    f"tts --model_name  {model_name} "
                    f'--text "This is an example." --out_path "{output_path}"')
            # remove downloaded models
            shutil.rmtree(download_dir)
        else:
            # only download the model
            manager.download_model(model_name)
        print(f" | > OK: {model_name}")

    folders = glob.glob(os.path.join(manager.output_prefix, "*"))
    assert len(folders) == len(model_names)
    shutil.rmtree(manager.output_prefix)
Exemple #4
0
 def test_in_out(self):
     self._create_random_model()
     tts_root_path = get_tests_output_path()
     tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth.tar")
     tts_config = os.path.join(tts_root_path, "dummy_model_config.json")
     synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None)
     synthesizer.tts("Better this test works!!")
Exemple #5
0
def test_synthesize():
    """Test synthesize.py with diffent arguments."""
    output_path = os.path.join(get_tests_output_path(), "output.wav")
    run_cli("tts --list_models")

    # single speaker model
    run_cli(f'tts --text "This is an example." --out_path "{output_path}"')
 def test_Tacotron():
     # set paths
     config_path = os.path.join(get_tests_input_path(), "test_tacotron_config.json")
     checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar")
     output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/")
     # load config
     c = load_config(config_path)
     # create model
     num_chars = len(phonemes if c.use_phonemes else symbols)
     model = setup_model(num_chars, 1, c, speaker_embedding_dim=None)
     # save model
     torch.save({"model": model.state_dict()}, checkpoint_path)
     # run test
     run_cli(
         f'CUDA_VISIBLE_DEVICES="" python TTS/bin/extract_tts_spectrograms.py --config_path "{config_path}" --checkpoint_path "{checkpoint_path}" --output_path "{output_path}"'
     )
     run_cli(f'rm -rf "{output_path}" "{checkpoint_path}"')
 def test_in_out(self):
     self._create_random_model()
     config = load_config(os.path.join(get_tests_input_path(), 'server_config.json'))
     tts_root_path = get_tests_output_path()
     config['tts_checkpoint'] = os.path.join(tts_root_path, config['tts_checkpoint'])
     config['tts_config'] = os.path.join(tts_root_path, config['tts_config'])
     synthesizer = Synthesizer(config['tts_checkpoint'], config['tts_config'], None, None)
     synthesizer.tts("Better this test works!!")
Exemple #8
0
 def test_Tacotron():
     # set paths
     config_path = os.path.join(get_tests_input_path(),
                                "test_tacotron_config.json")
     checkpoint_path = os.path.join(get_tests_output_path(),
                                    "checkpoint_test.pth")
     output_path = os.path.join(get_tests_output_path(),
                                "output_extract_tts_spectrograms/")
     # load config
     c = load_config(config_path)
     # create model
     model = setup_model(c)
     # save model
     torch.save({"model": model.state_dict()}, checkpoint_path)
     # run test
     run_cli(
         f'CUDA_VISIBLE_DEVICES="" python TTS/bin/extract_tts_spectrograms.py --config_path "{config_path}" --checkpoint_path "{checkpoint_path}" --output_path "{output_path}"'
     )
     run_cli(f'rm -rf "{output_path}" "{checkpoint_path}"')
Exemple #9
0
    def test():
        # set paths
        wav_path = os.path.join(get_tests_input_path(),
                                "../data/ljspeech/wavs")
        output_path = os.path.join(get_tests_output_path(),
                                   "output_wavs_removed_silence/")
        output_resample_path = os.path.join(get_tests_output_path(),
                                            "output_ljspeech_16khz/")

        # resample audios
        run_cli(
            f'CUDA_VISIBLE_DEVICES="" python TTS/bin/resample.py --input_dir "{wav_path}" --output_dir "{output_resample_path}" --output_sr 16000'
        )

        # run test
        run_cli(
            f'CUDA_VISIBLE_DEVICES="" python TTS/bin/remove_silence_using_vad.py --input_dir "{output_resample_path}" --output_dir "{output_path}"'
        )
        run_cli(f'rm -rf "{output_resample_path}"')
        run_cli(f'rm -rf "{output_path}"')
Exemple #10
0
 def test_load_checkpoint(self):
     chkp_path = os.path.join(get_tests_output_path(),
                              "dummy_glow_tts_checkpoint.pth")
     config = VitsConfig(VitsArgs(num_chars=32))
     model = Vits.init_from_config(config, verbose=False).to(device)
     chkp = {}
     chkp["model"] = model.state_dict()
     torch.save(chkp, chkp_path)
     model.load_checkpoint(config, chkp_path)
     self.assertTrue(model.training)
     model.load_checkpoint(config, chkp_path, eval=True)
     self.assertFalse(model.training)
Exemple #11
0
def test_synthesize():
    """Test synthesize.py with diffent arguments."""
    output_path = os.path.join(get_tests_output_path(), "output.wav")
    run_cli("tts --list_models")

    # single speaker model
    run_cli(f'tts --text "This is an example." --out_path "{output_path}"')
    run_cli("tts --model_name tts_models/en/ljspeech/glow-tts "
            f'--text "This is an example." --out_path "{output_path}"')
    run_cli("tts --model_name tts_models/en/ljspeech/glow-tts  "
            "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan "
            f'--text "This is an example." --out_path "{output_path}"')
Exemple #12
0
def test_voice_conversion():
    print(" > Run voice conversion inference using YourTTS model.")
    model_name = "tts_models/multilingual/multi-dataset/your_tts"
    language_id = "en"
    speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs",
                               "LJ001-0001.wav")
    reference_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs",
                                 "LJ001-0032.wav")
    output_path = os.path.join(get_tests_output_path(), "output.wav")
    run_cli(
        f"tts --model_name  {model_name}"
        f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} "
    )
def test_pqmf():
    w, sr = load(WAV_FILE)

    layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0)
    w, sr = load(WAV_FILE)
    w2 = torch.from_numpy(w[None, None, :])
    b2 = layer.analysis(w2)
    w2_ = layer.synthesis(b2)

    print(w2_.max())
    print(w2_.min())
    print(w2_.mean())
    sf.write(os.path.join(get_tests_output_path(), "pqmf_output.wav"),
             w2_.flatten().detach(), sr)
def test_if_all_models_available():
    """Check if all the models are downloadable."""
    print(
        " > Checking the availability of all the models under the ModelManager."
    )
    manager = ModelManager(output_prefix=get_tests_output_path())
    model_names = manager.list_models()
    for model_name in model_names:
        manager.download_model(model_name)
        print(f" | > OK: {model_name}")

    folders = glob.glob(os.path.join(manager.output_prefix, "*"))
    assert len(folders) == len(model_names)
    shutil.rmtree(manager.output_prefix)
def test_pqmf():
    w, sr = load(WAV_FILE)

    layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0)
    w, sr = load(WAV_FILE)
    w2 = tf.convert_to_tensor(w[None, None, :])
    b2 = layer.analysis(w2)
    w2_ = layer.synthesis(b2)
    w2_ = w2.numpy()

    print(w2_.max())
    print(w2_.min())
    print(w2_.mean())
    sf.write(os.path.join(get_tests_output_path(), 'tf_pqmf_output.wav'),
             w2_.flatten(), sr)
Exemple #16
0
    def test_train_eval_log(self):
        batch_size = 2
        config = VitsConfig(
            model_args=VitsArgs(num_chars=32, spec_segment_size=10))
        model = Vits.init_from_config(config, verbose=False).to(device)
        model.run_data_dep_init = False
        model.train()
        batch = self._create_batch(config, batch_size)
        logger = TensorboardLogger(log_dir=os.path.join(
            get_tests_output_path(), "dummy_vits_logs"),
                                   model_name="vits_test_train_log")
        criterion = model.get_criterion()
        criterion = [criterion[0].to(device), criterion[1].to(device)]
        outputs = [None] * 2
        outputs[0], _ = model.train_step(batch, criterion, 0)
        outputs[1], _ = model.train_step(batch, criterion, 1)
        model.train_log(batch, outputs, logger, None, 1)

        model.eval_log(batch, outputs, logger, None, 1)
        logger.finish()
Exemple #17
0
 def test_train_eval_log(self):
     batch_size = BATCH_SIZE
     input_dummy, input_lengths, mel_spec, mel_lengths, _ = self._create_inputs(
         batch_size)
     batch = {}
     batch["text_input"] = input_dummy
     batch["text_lengths"] = input_lengths
     batch["mel_lengths"] = mel_lengths
     batch["mel_input"] = mel_spec
     batch["d_vectors"] = None
     batch["speaker_ids"] = None
     config = GlowTTSConfig(num_chars=32)
     model = GlowTTS.init_from_config(config, verbose=False).to(device)
     model.run_data_dep_init = False
     model.train()
     logger = TensorboardLogger(log_dir=os.path.join(
         get_tests_output_path(), "dummy_glow_tts_logs"),
                                model_name="glow_tts_test_train_log")
     criterion = model.get_criterion()
     outputs, _ = model.train_step(batch, criterion)
     model.train_log(batch, outputs, logger, None, 1)
     model.eval_log(batch, outputs, logger, None, 1)
     logger.finish()
Exemple #18
0
import glob
import os
import shutil

from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import WavegradConfig

config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")

config = WavegradConfig(
    batch_size=8,
    eval_batch_size=8,
    num_loader_workers=0,
    num_val_loader_workers=0,
    run_eval=True,
    test_delay_epochs=-1,
    epochs=1,
    seq_len=8192,
    eval_split_size=1,
    print_step=1,
    print_eval=True,
    data_path="tests/data/ljspeech",
    output_path=output_path,
    test_noise_schedule={"min_val": 1e-6, "max_val": 1e-2, "num_steps": 2},
)
config.audio.do_trim_silence = True
config.audio.trim_db = 60
config.save_json(config_path)

# train the model for one epoch
Exemple #19
0
 def _create_random_model(self):
     config = load_config(os.path.join(get_tests_output_path(), 'dummy_model_config.json'))
     num_chars = len(phonemes) if config.use_phonemes else len(symbols)
     model = setup_model(num_chars, 0, config)
     output_path = os.path.join(get_tests_output_path())
     save_checkpoint(model, None, None, None, output_path, 10, 10)
Exemple #20
0
import glob
import os
import shutil

from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import SpeedySpeechConfig

config_path = os.path.join(get_tests_output_path(), "test_speedy_speech_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")


config = SpeedySpeechConfig(
    batch_size=8,
    eval_batch_size=8,
    num_loader_workers=0,
    num_val_loader_workers=0,
    text_cleaner="english_cleaners",
    use_phonemes=True,
    phoneme_language="zh-CN",
    phoneme_cache_path="tests/data/ljspeech/phoneme_cache/",
    run_eval=True,
    test_delay_epochs=-1,
    epochs=1,
    print_step=1,
    print_eval=True,
)
config.audio.do_trim_silence = True
config.audio.trim_db = 60
config.save_json(config_path)

# train the model for one epoch
from TTS.config.shared_configs import BaseAudioConfig
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig


def run_test_train():
    command = (
        f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --config_path {config_path} "
        f"--coqpit.output_path {output_path} "
        "--coqpit.datasets.0.name ljspeech "
        "--coqpit.datasets.0.meta_file_train metadata.csv "
        "--coqpit.datasets.0.meta_file_val metadata.csv "
        "--coqpit.datasets.0.path tests/data/ljspeech ")
    run_cli(command)


config_path = os.path.join(get_tests_output_path(),
                           "test_speaker_encoder_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")

config = SpeakerEncoderConfig(
    batch_size=4,
    num_speakers_in_batch=1,
    num_utters_per_speaker=10,
    num_loader_workers=0,
    max_train_step=2,
    print_step=1,
    save_step=1,
    print_eval=True,
    audio=BaseAudioConfig(num_mels=80),
)
config.audio.do_trim_silence = True
import glob
import json
import os
import shutil

from trainer import get_last_checkpoint

from tests import get_device_id, get_tests_output_path, run_cli
from TTS.config.shared_configs import BaseAudioConfig
from TTS.tts.configs.fast_pitch_config import FastPitchConfig

config_path = os.path.join(get_tests_output_path(),
                           "fast_pitch_speaker_emb_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")

audio_config = BaseAudioConfig(
    sample_rate=22050,
    do_trim_silence=True,
    trim_db=60.0,
    signal_norm=False,
    mel_fmin=0.0,
    mel_fmax=8000,
    spec_gain=1.0,
    log_func="np.log",
    ref_level_db=20,
    preemphasis=0.0,
)

config = FastPitchConfig(
    audio=audio_config,
    batch_size=8,
Exemple #23
0
import glob
import json
import os
import shutil

from trainer import get_last_checkpoint

from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs.tacotron2_config import Tacotron2Config

config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")

config = Tacotron2Config(
    r=5,
    batch_size=8,
    eval_batch_size=8,
    num_loader_workers=0,
    num_eval_loader_workers=0,
    text_cleaner="english_cleaners",
    use_phonemes=False,
    phoneme_language="en-us",
    phoneme_cache_path=os.path.join(get_tests_output_path(), "train_outputs/phoneme_cache/"),
    run_eval=True,
    test_delay_epochs=-1,
    epochs=1,
    print_step=1,
    print_eval=True,
    test_sentences=[
        "Be a voice, not an echo.",
    ],
Exemple #24
0
import shutil
import unittest

import numpy as np
import torch
from torch.utils.data import DataLoader

from tests import get_tests_output_path
from TTS.tts.configs import BaseTTSConfig
from TTS.tts.datasets import TTSDataset
from TTS.tts.datasets.preprocess import ljspeech
from TTS.utils.audio import AudioProcessor

# pylint: disable=unused-variable

OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
os.makedirs(OUTPATH, exist_ok=True)

# create a dummy config for testing data loaders.
c = BaseTTSConfig(text_cleaner="english_cleaners",
                  num_loader_workers=0,
                  batch_size=2)
c.r = 5
c.data_path = "tests/data/ljspeech/"
ok_ljspeech = os.path.exists(c.data_path)

DATA_EXIST = True
if not os.path.exists(c.data_path):
    DATA_EXIST = False

print(" > Dynamic data loader test: {}".format(DATA_EXIST))
Exemple #25
0
import os
import unittest

import torch

from tests import get_tests_output_path, run_cli
from TTS.config.shared_configs import BaseDatasetConfig
from TTS.tts.configs.vits_config import VitsConfig

torch.manual_seed(1)

config_path = os.path.join(get_tests_output_path(), "test_model_config.json")

dataset_config_en = BaseDatasetConfig(
    name="ljspeech",
    meta_file_train="metadata.csv",
    meta_file_val="metadata.csv",
    path="tests/data/ljspeech",
    language="en",
)

dataset_config_pt = BaseDatasetConfig(
    name="ljspeech",
    meta_file_train="metadata.csv",
    meta_file_val="metadata.csv",
    path="tests/data/ljspeech",
    language="pt-br",
)


# pylint: disable=protected-access
import os
import unittest

from tests import get_tests_input_path, get_tests_output_path, get_tests_path
from TTS.config import BaseAudioConfig
from TTS.utils.audio import AudioProcessor

TESTS_PATH = get_tests_path()
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")

os.makedirs(OUT_PATH, exist_ok=True)
conf = BaseAudioConfig(mel_fmax=8000)


# pylint: disable=protected-access
class TestAudio(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.ap = AudioProcessor(**conf)

    def test_audio_synthesis(self):
        """1. load wav
        2. set normalization parameters
        3. extract mel-spec
        4. invert to wav and save the output
        """
        print(" > Sanity check for the process wav -> mel -> wav")

        def _test(max_norm, signal_norm, symmetric_norm, clip_norm):
            self.ap.max_norm = max_norm
Exemple #27
0
import glob
import json
import os
import shutil

from trainer import get_last_checkpoint

from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs.glow_tts_config import GlowTTSConfig

config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")

config = GlowTTSConfig(
    batch_size=2,
    eval_batch_size=8,
    num_loader_workers=0,
    num_eval_loader_workers=0,
    text_cleaner="english_cleaners",
    use_phonemes=True,
    phoneme_language="en-us",
    phoneme_cache_path="tests/data/ljspeech/phoneme_cache/",
    run_eval=True,
    test_delay_epochs=-1,
    epochs=1,
    print_step=1,
    print_eval=True,
    test_sentences=[
        "Be a voice, not an echo.",
    ],
    data_dep_init_steps=1.0,