Beispiel #1
0
def main():
    batch_size = 64
    tacotron = Tacotron(batch_size=batch_size)
    num_lines = sum(1 for _ in open("training/train.txt"))
    max_index = num_lines // batch_size

    while True:
        index = 0
        inputs, linear_targets, mel_targets = get_data(index, batch_size)
        tacotron.train(inputs, linear_targets, mel_targets)
        index += 1
        if index >= max_index:
            index = 0
Beispiel #2
0
def main():
    g = Tacotron()
    print("Training Graph loaded")

    with g.graph.as_default():

        # Training
        sv = tf.train.Supervisor(logdir=hp.logdir, save_model_secs=0)
        with sv.managed_session() as sess:
            for epoch in range(1, hp.num_epochs + 1):
                if sv.should_stop(): break
                for step in tqdm(range(g.num_batch),
                                 total=g.num_batch,
                                 ncols=70,
                                 leave=False,
                                 unit='b'):
                    sess.run(g.train_op)

                # Write checkpoint files at every epoch
                gs = sess.run(g.global_step)
                sv.saver.save(
                    sess, hp.logdir + '/model_epoch_%02d_gs_%d' % (epoch, gs))
Beispiel #3
0
def main():
    config = ConfigXT()
    load = FileXT(config.audio_path)

    print(
        colored('Preprocessing audio for ', 'blue', attrs=['bold']) +
        load.basename)
    data = preprocess.preprocess(load.filename,
                                 config.speaker,
                                 config,
                                 verbose=False)
    dataloader = dataprocess.load_infer(data)

    model = Tacotron(config)
    model.load_state_dict(
        torch.load(config.model_path, map_location='cpu')['state_dict'])
    model = set_device(model, config.device)
    model.eval()

    print(
        colored('Generating mel-spectrogram with ', 'blue', attrs=['bold']) +
        config.model_path)
    mel = []
    y_prev = set_device(torch.zeros(1, config.mel_size, 1), config.device)
    for batch in tqdm(dataloader, leave=False, ascii=True):
        x, y_prev, _ = set_device(batch, config.device)

        y_gen, _ = model(x, y_prev)
        mel.append(y_gen.data)
        y_prev = y_gen[..., -1].unsqueeze(-1)

    mel = torch.cat(mel, dim=-1)
    if config.vocoder == 'wavernn':
        wave = wavernn_infer(mel, config)
    elif config.vocoder == 'waveglow':
        wave = waveglow_infer(mel, config)

    savename = config.model_path.replace('.pt', '_') + FileXT(
        config.vocoder_path).basestem + '_speaker' + str(
            config.speaker) + '_' + load.basename
    torchaudio.save(savename, wave, config.sample_rate)

    print(colored('Audio generated to ', 'blue', attrs=['bold']) + savename)
Beispiel #4
0
# From https://pypi.org/project/tacotron/

import torch
import soundfile as sf
from univoc import Vocoder
from tacotron import load_cmudict, text_to_id, Tacotron

# download pretrained weights for the vocoder (and optionally move to GPU)
vocoder = Vocoder.from_pretrained(
    "https://github.com/bshall/UniversalVocoding/releases/download/v0.2/univoc-ljspeech-7mtpaq.pt"
).cuda()

# download pretrained weights for tacotron (and optionally move to GPU)
tacotron = Tacotron.from_pretrained(
    "https://github.com/bshall/Tacotron/releases/download/v0.1/tacotron-ljspeech-yspjx3.pt"
).cuda()

# load cmudict and add pronunciation of PyTorch
cmudict = load_cmudict()

text = "Your glasses are in Bangladesh."

# convert text to phone ids
x = torch.LongTensor(text_to_id(text, cmudict)).unsqueeze(0).cuda()

# synthesize audio
with torch.no_grad():
    mel, _ = tacotron.generate(x)
    wav, sr = vocoder.generate(mel.transpose(1, 2))

print(wav)
Beispiel #5
0
def main():
    tacotron = Tacotron(1, is_training=False)
    text_input = input()
Beispiel #6
0
def train_model(args):
    with open("tacotron/config.toml") as file:
        cfg = toml.load(file)

    tensorboard_path = Path("tensorboard") / args.checkpoint_dir
    checkpoint_dir = Path(args.checkpoint_dir)
    writer = SummaryWriter(tensorboard_path)

    tacotron = Tacotron(**cfg["model"]).cuda()
    optimizer = optim.Adam(tacotron.parameters(),
                           lr=cfg["train"]["optimizer"]["lr"])
    scaler = amp.GradScaler()
    scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer=optimizer,
        milestones=cfg["train"]["scheduler"]["milestones"],
        gamma=cfg["train"]["scheduler"]["gamma"],
    )

    if args.resume is not None:
        global_step = load_checkpoint(
            tacotron=tacotron,
            optimizer=optimizer,
            scaler=scaler,
            scheduler=scheduler,
            load_path=args.resume,
        )
    else:
        global_step = 0

    root_path = Path(args.dataset_dir)
    text_path = Path(args.text_path)

    dataset = TTSDataset(root_path, text_path)
    sampler = samplers.RandomSampler(dataset)
    batch_sampler = BucketBatchSampler(
        sampler=sampler,
        batch_size=cfg["train"]["batch_size"],
        drop_last=True,
        sort_key=dataset.sort_key,
        bucket_size_multiplier=cfg["train"]["bucket_size_multiplier"],
    )
    collate_fn = partial(
        pad_collate,
        reduction_factor=cfg["model"]["decoder"]["reduction_factor"])
    loader = DataLoader(
        dataset,
        batch_sampler=batch_sampler,
        collate_fn=collate_fn,
        num_workers=cfg["train"]["n_workers"],
        pin_memory=True,
    )

    n_epochs = cfg["train"]["n_steps"] // len(loader) + 1
    start_epoch = global_step // len(loader) + 1

    for epoch in range(start_epoch, n_epochs + 1):
        average_loss = 0

        for i, (mels, texts, mel_lengths, text_lengths,
                attn_flag) in enumerate(tqdm(loader), 1):
            mels, texts = mels.cuda(), texts.cuda()

            optimizer.zero_grad()

            with amp.autocast():
                ys, alphas = tacotron(texts, mels)
                loss = F.l1_loss(ys, mels)

            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            clip_grad_norm_(tacotron.parameters(),
                            cfg["train"]["clip_grad_norm"])
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

            global_step += 1

            average_loss += (loss.item() - average_loss) / i

            if global_step % cfg["train"]["checkpoint_interval"] == 0:
                save_checkpoint(
                    tacotron=tacotron,
                    optimizer=optimizer,
                    scaler=scaler,
                    scheduler=scheduler,
                    step=global_step,
                    checkpoint_dir=checkpoint_dir,
                )

            if attn_flag:
                index = attn_flag[0]
                alpha = alphas[
                    index, :text_lengths[index], :mel_lengths[index] // 2]
                alpha = alpha.detach().cpu().numpy()

                y = ys[index, :, :].detach().cpu().numpy()
                log_alignment(alpha, y, cfg["preprocess"], writer, global_step)

        writer.add_scalar("loss", average_loss, global_step)
        print(
            f"epoch {epoch} : loss {average_loss:.4f} : {scheduler.get_last_lr()}"
        )
Beispiel #7
0
def main():
    config = ConfigXT()
    config_basename = FileXT(config.file).basename
    print("Configuration file: %s" % (config_basename))

    checkpoint_path = config.checkpoint_path
    if not config.test_run:
        checkpoint_path = FileXT(config.checkpoint_path, '').create_path()
        config.save(os.path.join(checkpoint_path, config_basename))
        writer = SummaryWriter(checkpoint_path)

    dataloader = dataprocess.load_train(config)
    model = Tacotron(config)
    model = set_device(model, config.device)
    criterion = torch.nn.L1Loss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.learn_rate,
                                 weight_decay=config.weight_decay)
    scheduler = StepLR(optimizer,
                       step_size=len(dataloader.train) * config.step_size,
                       gamma=config.factor)

    losses = []
    loss_train = LossLog()
    loss_valid = LossLog()
    for epoch in range(config.stop_epoch):
        # Train Loop
        model.train()
        for batch in tqdm(dataloader.train, leave=False, ascii=True):
            x, y_prev, y = set_device(batch, config.device)

            optimizer.zero_grad()
            y_gen, y_decoder_gen = model(x, y_prev)
            loss = criterion(y_gen, y) + criterion(y_decoder_gen, y)
            loss.backward()
            if config.clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), config.clip_grad_norm)
            optimizer.step()
            scheduler.step()

            loss_train.add(loss.item(), y[0].size(0))
            if not config.test_run:
                writer.add_scalar('train/l1_loss', loss.item(),
                                  loss_train.iteration)

        # Validation Loop
        model.eval()
        for batch in tqdm(dataloader.valid, leave=False, ascii=True):
            x, y_prev, y = set_device(batch, config.device)

            y_gen, y_decoder_gen = model(x, y_prev)
            loss = criterion(y_gen, y) + criterion(y_decoder_gen, y)

            loss_valid.add(loss.item(), y[0].size(0))
            if not config.test_run:
                writer.add_scalar('valid/l1_loss', loss.item(),
                                  loss_valid.iteration)

        learn_rate = scheduler.get_lr()[0]
        print(
            "[Epoch %d/%d] [loss train: %.5f] [loss valid: %.5f] [lr: %.5f]" %
            (epoch, config.stop_epoch, loss_train.avg(), loss_valid.avg(),
             learn_rate))

        losses.append([loss_train.avg(), loss_valid.avg()])
        loss_train.reset()
        loss_valid.reset()

        if not config.test_run:
            loss_savename = os.path.join(checkpoint_path, 'loss.pt')
            torch.save(losses, loss_savename)

            savename = os.path.join(checkpoint_path, 'latest_checkpoint.pt')
            save_checkpoint(savename, model, optimizer, learn_rate,
                            loss_train.iteration)

            if epoch % config.save_epoch == 0:
                savename = os.path.join(checkpoint_path,
                                        'epoch' + str(epoch) + '.pt')
                save_checkpoint(savename, model, optimizer, learn_rate,
                                loss_train.iteration)
Beispiel #8
0
def train_model(cfg):
    tensorboard_path = Path(utils.to_absolute_path("tensorboard")) / cfg.checkpoint_dir
    checkpoint_dir = Path(utils.to_absolute_path(cfg.checkpoint_dir))
    writer = SummaryWriter(tensorboard_path)

    tacotron = Tacotron(**cfg.model).cuda()
    optimizer = optim.Adam(tacotron.parameters(), lr=cfg.train.optimizer.lr)
    scaler = amp.GradScaler()
    scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer=optimizer,
        milestones=cfg.train.scheduler.milestones,
        gamma=cfg.train.scheduler.gamma,
    )

    if cfg.resume:
        resume_path = utils.to_absolute_path(cfg.resume)
        global_step = load_checkpoint(
            tacotron=tacotron,
            optimizer=optimizer,
            scaler=scaler,
            scheduler=scheduler,
            load_path=resume_path,
        )
    else:
        global_step = 0

    root_path = Path(utils.to_absolute_path(cfg.dataset_dir))
    text_path = Path(utils.to_absolute_path(cfg.text_path))

    dataset = TTSDataset(root_path, text_path)
    sampler = samplers.RandomSampler(dataset)
    batch_sampler = BucketBatchSampler(
        sampler=sampler,
        batch_size=cfg.train.batch_size,
        drop_last=True,
        sort_key=dataset.sort_key,
        bucket_size_multiplier=cfg.train.bucket_size_multiplier,
    )
    loader = DataLoader(
        dataset,
        batch_sampler=batch_sampler,
        collate_fn=pad_collate,
        num_workers=cfg.train.n_workers,
        pin_memory=True,
    )

    n_epochs = cfg.train.n_steps // len(loader) + 1
    start_epoch = global_step // len(loader) + 1

    for epoch in range(start_epoch, n_epochs + 1):
        average_loss = 0

        for i, (mels, texts, mel_lengths, text_lengths, attn_flag) in enumerate(
            tqdm(loader), 1
        ):
            mels, texts = mels.cuda(), texts.cuda()

            optimizer.zero_grad()

            with amp.autocast():
                ys, alphas = tacotron(texts, mels)
                loss = F.l1_loss(ys, mels)

            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            grad_norm = clip_grad_norm_(tacotron.parameters(), cfg.train.clip_grad_norm)
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

            global_step += 1

            average_loss += (loss.item() - average_loss) / i

            if global_step % cfg.train.checkpoint_interval == 0:
                save_checkpoint(
                    tacotron=tacotron,
                    optimizer=optimizer,
                    scaler=scaler,
                    scheduler=scheduler,
                    step=global_step,
                    checkpoint_dir=checkpoint_dir,
                )

            if attn_flag:
                index = attn_flag[0]
                alpha = alphas[index, : text_lengths[index], : mel_lengths[index] // 2]
                alpha = alpha.detach().cpu().numpy()

                y = ys[index, :, :].detach().cpu().numpy()
                log_alignment(alpha, y, cfg.preprocess, writer, global_step)

        writer.add_scalar("loss", average_loss, global_step)
        print(
            f"epoch {epoch} : average loss {average_loss:.4f} : {scheduler.get_last_lr()}"
        )