def test_wavernn(): config = WavernnConfig() config.model_args = WavernnArgs( rnn_dims=512, fc_dims=512, mode="mold", mulaw=False, pad=2, use_aux_net=True, use_upsample_net=True, upsample_factors=[4, 8, 8], feat_dims=80, compute_dims=128, res_out_dims=128, num_res_blocks=10, ) config.audio.hop_length = 256 config.audio.sample_rate = 2048 dummy_x = torch.rand((2, 1280)) dummy_m = torch.rand((2, 80, 9)) y_size = random.randrange(20, 60) dummy_y = torch.rand((80, y_size)) # mode: mold model = Wavernn(config) output = model(dummy_x, dummy_m) assert np.all(output.shape == (2, 1280, 30)), output.shape # mode: gauss config.model_args.mode = "gauss" model = Wavernn(config) output = model(dummy_x, dummy_m) assert np.all(output.shape == (2, 1280, 2)), output.shape # mode: quantized config.model_args.mode = 4 model = Wavernn(config) output = model(dummy_x, dummy_m) assert np.all(output.shape == (2, 1280, 2**4)), output.shape output = model.inference(dummy_y, True, 5500, 550) assert np.all(output.shape == (256 * (y_size - 1),))
from tests import get_device_id, get_tests_output_path, run_cli from TTS.vocoder.configs import WavernnConfig from TTS.vocoder.models.wavernn import WavernnArgs config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") output_path = os.path.join(get_tests_output_path(), "train_outputs") config = WavernnConfig( model_args=WavernnArgs(), batch_size=8, eval_batch_size=8, num_loader_workers=0, num_eval_loader_workers=0, run_eval=True, test_delay_epochs=-1, epochs=1, seq_len=256, # for shorter test time eval_split_size=1, print_step=1, print_eval=True, data_path="tests/data/ljspeech", output_path=output_path, ) config.audio.do_trim_silence = True config.audio.trim_db = 60 config.save_json(config_path) # train the model for one epoch command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --config_path {config_path} " run_cli(command_train)
from TTS.utils.audio import AudioProcessor from TTS.vocoder.configs import WavernnConfig from TTS.vocoder.datasets.preprocess import load_wav_data from TTS.vocoder.models.wavernn import Wavernn output_path = os.path.dirname(os.path.abspath(__file__)) config = WavernnConfig( batch_size=64, eval_batch_size=16, num_loader_workers=4, num_eval_loader_workers=4, run_eval=True, test_delay_epochs=-1, epochs=10000, seq_len=1280, pad_short=2000, use_noise_augment=False, eval_split_size=10, print_step=25, print_eval=True, mixed_precision=False, lr=1e-4, grad_clip=4, data_path=os.path.join(output_path, "../LJSpeech-1.1/wavs/"), output_path=output_path, ) # init audio processor ap = AudioProcessor(**config.audio.to_dict()) # load training samples eval_samples, train_samples = load_wav_data(config.data_path,
import shutil import numpy as np from torch.utils.data import DataLoader from tests import get_tests_output_path, get_tests_path from TTS.utils.audio import AudioProcessor from TTS.vocoder.configs import WavernnConfig from TTS.vocoder.datasets.preprocess import load_wav_feat_data, preprocess_wav_files from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset file_path = os.path.dirname(os.path.realpath(__file__)) OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/") os.makedirs(OUTPATH, exist_ok=True) C = WavernnConfig() test_data_path = os.path.join(get_tests_path(), "data/ljspeech/") test_mel_feat_path = os.path.join(test_data_path, "mel") test_quant_feat_path = os.path.join(test_data_path, "quant") ok_ljspeech = os.path.exists(test_data_path) def wavernn_dataset_case(batch_size, seq_len, hop_len, pad, mode, mulaw, num_workers): """run dataloader with given parameters and check conditions""" ap = AudioProcessor(**C.audio) C.batch_size = batch_size C.mode = mode C.seq_len = seq_len