Beispiel #1
0
def test_wavernn():
    config = WavernnConfig()
    config.model_args = WavernnArgs(
        rnn_dims=512,
        fc_dims=512,
        mode="mold",
        mulaw=False,
        pad=2,
        use_aux_net=True,
        use_upsample_net=True,
        upsample_factors=[4, 8, 8],
        feat_dims=80,
        compute_dims=128,
        res_out_dims=128,
        num_res_blocks=10,
    )
    config.audio.hop_length = 256
    config.audio.sample_rate = 2048

    dummy_x = torch.rand((2, 1280))
    dummy_m = torch.rand((2, 80, 9))
    y_size = random.randrange(20, 60)
    dummy_y = torch.rand((80, y_size))

    # mode: mold
    model = Wavernn(config)
    output = model(dummy_x, dummy_m)
    assert np.all(output.shape == (2, 1280, 30)), output.shape

    # mode: gauss
    config.model_args.mode = "gauss"
    model = Wavernn(config)
    output = model(dummy_x, dummy_m)
    assert np.all(output.shape == (2, 1280, 2)), output.shape

    # mode: quantized
    config.model_args.mode = 4
    model = Wavernn(config)
    output = model(dummy_x, dummy_m)
    assert np.all(output.shape == (2, 1280, 2**4)), output.shape
    output = model.inference(dummy_y, True, 5500, 550)
    assert np.all(output.shape == (256 * (y_size - 1),))
Beispiel #2
0
from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import WavernnConfig
from TTS.vocoder.models.wavernn import WavernnArgs

config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs")

config = WavernnConfig(
    model_args=WavernnArgs(),
    batch_size=8,
    eval_batch_size=8,
    num_loader_workers=0,
    num_eval_loader_workers=0,
    run_eval=True,
    test_delay_epochs=-1,
    epochs=1,
    seq_len=256,  # for shorter test time
    eval_split_size=1,
    print_step=1,
    print_eval=True,
    data_path="tests/data/ljspeech",
    output_path=output_path,
)
config.audio.do_trim_silence = True
config.audio.trim_db = 60
config.save_json(config_path)

# train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --config_path {config_path} "
run_cli(command_train)
Beispiel #3
0
from TTS.utils.audio import AudioProcessor
from TTS.vocoder.configs import WavernnConfig
from TTS.vocoder.datasets.preprocess import load_wav_data
from TTS.vocoder.models.wavernn import Wavernn

output_path = os.path.dirname(os.path.abspath(__file__))
config = WavernnConfig(
    batch_size=64,
    eval_batch_size=16,
    num_loader_workers=4,
    num_eval_loader_workers=4,
    run_eval=True,
    test_delay_epochs=-1,
    epochs=10000,
    seq_len=1280,
    pad_short=2000,
    use_noise_augment=False,
    eval_split_size=10,
    print_step=25,
    print_eval=True,
    mixed_precision=False,
    lr=1e-4,
    grad_clip=4,
    data_path=os.path.join(output_path, "../LJSpeech-1.1/wavs/"),
    output_path=output_path,
)

# init audio processor
ap = AudioProcessor(**config.audio.to_dict())

# load training samples
eval_samples, train_samples = load_wav_data(config.data_path,
Beispiel #4
0
import shutil

import numpy as np
from torch.utils.data import DataLoader

from tests import get_tests_output_path, get_tests_path
from TTS.utils.audio import AudioProcessor
from TTS.vocoder.configs import WavernnConfig
from TTS.vocoder.datasets.preprocess import load_wav_feat_data, preprocess_wav_files
from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset

file_path = os.path.dirname(os.path.realpath(__file__))
OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
os.makedirs(OUTPATH, exist_ok=True)

C = WavernnConfig()

test_data_path = os.path.join(get_tests_path(), "data/ljspeech/")
test_mel_feat_path = os.path.join(test_data_path, "mel")
test_quant_feat_path = os.path.join(test_data_path, "quant")
ok_ljspeech = os.path.exists(test_data_path)


def wavernn_dataset_case(batch_size, seq_len, hop_len, pad, mode, mulaw,
                         num_workers):
    """run dataloader with given parameters and check conditions"""
    ap = AudioProcessor(**C.audio)

    C.batch_size = batch_size
    C.mode = mode
    C.seq_len = seq_len