import torch from tests import get_tests_input_path from torch import nn, optim from TTS.tts.layers.losses import L1LossMasked from TTS.tts.models.tacotron import Tacotron from TTS.utils.io import load_config from TTS.utils.audio import AudioProcessor #pylint: disable=unused-variable torch.manual_seed(1) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") c = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) ap = AudioProcessor(**c.audio) WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") def count_parameters(model): r"""Count number of trainable parameters in a network""" return sum(p.numel() for p in model.parameters() if p.requires_grad) class TacotronTrainTest(unittest.TestCase): @staticmethod def test_train_step(): input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) input_lengths = torch.randint(100, 129, (8, )).long().to(device)
import os import tensorflow as tf import soundfile as sf from librosa.core import load from tests import get_tests_path, get_tests_input_path from TTS.vocoder.tf.layers.pqmf import PQMF TESTS_PATH = get_tests_path() WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") def test_pqmf(): w, sr = load(WAV_FILE) layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0) w, sr = load(WAV_FILE) w2 = tf.convert_to_tensor(w[None, None, :]) b2 = layer.analysis(w2) w2_ = layer.synthesis(b2) w2_ = w2.numpy() print(w2_.max()) print(w2_.min()) print(w2_.mean()) sf.write('tf_pqmf_output.wav', w2_.flatten(), sr)
import os import unittest import torch as T from tests import get_tests_input_path from TTS.speaker_encoder.losses import GE2ELoss, AngleProtoLoss from TTS.speaker_encoder.model import SpeakerEncoder from TTS.utils.io import load_config file_path = get_tests_input_path() c = load_config(os.path.join(file_path, "test_config.json")) class SpeakerEncoderTests(unittest.TestCase): # pylint: disable=R0201 def test_in_out(self): dummy_input = T.rand(4, 20, 80) # B x T x D dummy_hidden = [T.rand(2, 4, 128), T.rand(2, 4, 128)] model = SpeakerEncoder(input_dim=80, proj_dim=256, lstm_dim=768, num_lstm_layers=3) # computing d vectors output = model.forward(dummy_input) assert output.shape[0] == 4 assert output.shape[1] == 256 output = model.inference(dummy_input) assert output.shape[0] == 4 assert output.shape[1] == 256 # compute d vectors by passing LSTM hidden
import os import numpy as np from tests import get_tests_path, get_tests_input_path, get_tests_output_path from torch.utils.data import DataLoader from mozilla_voice_tts.utils.audio import AudioProcessor from mozilla_voice_tts.utils.io import load_config from mozilla_voice_tts.vocoder.datasets.gan_dataset import GANDataset from mozilla_voice_tts.vocoder.datasets.preprocess import load_wav_data file_path = os.path.dirname(os.path.realpath(__file__)) OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/") os.makedirs(OUTPATH, exist_ok=True) C = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) test_data_path = os.path.join(get_tests_path(), "data/ljspeech/") ok_ljspeech = os.path.exists(test_data_path) def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_segments, use_noise_augment, use_cache, num_workers): ''' run dataloader with given parameters and check conditions ''' ap = AudioProcessor(**C.audio) _, train_items = load_wav_data(test_data_path, 10) dataset = GANDataset(ap, train_items, seq_len=seq_len, hop_len=hop_len, pad_short=2000,
import os import unittest import torch from tests import assertHasAttr, assertHasNotAttr, get_tests_input_path from TTS.config import load_config from TTS.speaker_encoder.utils.generic_utils import setup_speaker_encoder_model from TTS.tts.configs.vits_config import VitsConfig from TTS.tts.models.vits import Vits, VitsArgs from TTS.tts.utils.speakers import SpeakerManager LANG_FILE = os.path.join(get_tests_input_path(), "language_ids.json") SPEAKER_ENCODER_CONFIG = os.path.join(get_tests_input_path(), "test_speaker_encoder_config.json") torch.manual_seed(1) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # pylint: disable=no-self-use class TestVits(unittest.TestCase): def test_init_multispeaker(self): num_speakers = 10 args = VitsArgs(num_speakers=num_speakers, use_speaker_embedding=True) model = Vits(args) assertHasAttr(self, model, "emb_g") args = VitsArgs(num_speakers=0, use_speaker_embedding=True) model = Vits(args)
def test_in_out(self): self._create_random_model() config = load_config(os.path.join(get_tests_input_path(), 'server_config.json')) synthesizer = Synthesizer(config) synthesizer.tts("Better this test works!!")
import os import unittest import numpy as np import torch from tests import get_tests_input_path from TTS.config import load_config from TTS.speaker_encoder.utils.generic_utils import setup_speaker_encoder_model from TTS.speaker_encoder.utils.io import save_checkpoint from TTS.tts.utils.speakers import SpeakerManager from TTS.utils.audio import AudioProcessor encoder_config_path = os.path.join(get_tests_input_path(), "test_speaker_encoder_config.json") encoder_model_path = os.path.join(get_tests_input_path(), "checkpoint_0.pth.tar") sample_wav_path = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0001.wav") sample_wav_path2 = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0002.wav") d_vectors_file_path = os.path.join(get_tests_input_path(), "../data/dummy_speakers.json") class SpeakerManagerTest(unittest.TestCase): """Test SpeakerManager for loading embedding files and computing d_vectors from waveforms""" @staticmethod def test_speaker_embedding(): # load config config = load_config(encoder_config_path) config.audio.resample = True # create a dummy speaker encoder model = setup_speaker_encoder_model(config)
import torch from tests import get_tests_input_path from TTS.tts.tf.models.tacotron2 import Tacotron2 from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model from TTS.utils.io import load_config tf.get_logger().setLevel("INFO") # pylint: disable=unused-variable torch.manual_seed(1) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") c = load_config(os.path.join(get_tests_input_path(), "test_config.json")) class TacotronTFTrainTest(unittest.TestCase): @staticmethod def generate_dummy_inputs(): chars_seq = torch.randint(0, 24, (8, 128)).long().to(device) chars_seq_lengths = torch.randint(100, 128, (8, )).long().to(device) chars_seq_lengths = torch.sort(chars_seq_lengths, descending=True)[0] mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device) mel_postnet_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device) mel_lengths = torch.randint(20, 30, (8, )).long().to(device) stop_targets = torch.zeros(8, 30, 1).float().to(device) speaker_ids = torch.randint(0, 5, (8, )).long().to(device) chars_seq = tf.convert_to_tensor(chars_seq.cpu().numpy())
import shutil import numpy as np from tests import get_tests_path, get_tests_input_path, get_tests_output_path from torch.utils.data import DataLoader from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset from TTS.vocoder.datasets.preprocess import load_wav_feat_data, preprocess_wav_files file_path = os.path.dirname(os.path.realpath(__file__)) OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/") os.makedirs(OUTPATH, exist_ok=True) C = load_config(os.path.join(get_tests_input_path(), "test_vocoder_wavernn_config.json")) test_data_path = os.path.join(get_tests_path(), "data/ljspeech/") test_mel_feat_path = os.path.join(test_data_path, "mel") test_quant_feat_path = os.path.join(test_data_path, "quant") ok_ljspeech = os.path.exists(test_data_path) def wavernn_dataset_case(batch_size, seq_len, hop_len, pad, mode, mulaw, num_workers): """ run dataloader with given parameters and check conditions """ ap = AudioProcessor(**C.audio) C.batch_size = batch_size C.mode = mode C.seq_len = seq_len