コード例 #1
0
ファイル: train.py プロジェクト: twerkmeister/oldtts
def main(args):
    # DISTRUBUTED
    if num_gpus > 1:
        init_distributed(args.rank, num_gpus, args.group_id,
                         c.distributed["backend"], c.distributed["url"])
    num_chars = len(phonemes) if c.use_phonemes else len(symbols)
    model = Tacotron(num_chars=num_chars,
                     embedding_dim=c.embedding_size,
                     linear_dim=ap.num_freq,
                     mel_dim=ap.num_mels,
                     r=c.r,
                     memory_size=c.memory_size)

    optimizer = optim.Adam(model.parameters(), lr=c.lr, weight_decay=0)
    optimizer_st = optim.Adam(model.decoder.stopnet.parameters(),
                              lr=c.lr,
                              weight_decay=0)

    criterion = L1LossMasked()
    criterion_st = nn.BCELoss()

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        try:
            model.load_state_dict(checkpoint['model'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        except:
            print(" > Partial model initialization.")
            partial_init_flag = True
            model_dict = model.state_dict()
            # Partial initialization: if there is a mismatch with new and old layer, it is skipped.
            # 1. filter out unnecessary keys
            pretrained_dict = {
                k: v
                for k, v in checkpoint['model'].items() if k in model_dict
            }
            # 2. filter out different size layers
            pretrained_dict = {
                k: v
                for k, v in pretrained_dict.items()
                if v.numel() == model_dict[k].numel()
            }
            # 3. overwrite entries in the existing state dict
            model_dict.update(pretrained_dict)
            # 4. load the new state dict
            model.load_state_dict(model_dict)
            print(" | > {} / {} layers are initialized".format(
                len(pretrained_dict), len(model_dict)))
        if use_cuda:
            model = model.cuda()
            criterion.cuda()
            criterion_st.cuda()
        for group in optimizer.param_groups:
            group['lr'] = c.lr
        print(" > Model restored from step %d" % checkpoint['step'],
              flush=True)
        start_epoch = checkpoint['epoch']
        best_loss = checkpoint['linear_loss']
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0
        if use_cuda:
            model = model.cuda()
            criterion.cuda()
            criterion_st.cuda()

    # DISTRUBUTED
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)

    if c.lr_decay:
        scheduler = NoamLR(optimizer,
                           warmup_steps=c.warmup_steps,
                           last_epoch=args.restore_step - 1)
    else:
        scheduler = None

    num_params = count_parameters(model)
    print("\n > Model has {} parameters".format(num_params), flush=True)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    for epoch in range(0, c.epochs):
        train_loss, current_step = train(model, criterion, criterion_st,
                                         optimizer, optimizer_st, scheduler,
                                         ap, epoch)
        val_loss = evaluate(model, criterion, criterion_st, ap, current_step,
                            epoch)
        print(" | > Training Loss: {:.5f}   Validation Loss: {:.5f}".format(
            train_loss, val_loss),
              flush=True)
        target_loss = train_loss
        if c.run_eval:
            target_loss = val_loss
        best_loss = save_best_model(model, optimizer, target_loss, best_loss,
                                    OUT_PATH, current_step, epoch)
コード例 #2
0
ファイル: train.py プロジェクト: geneing/WaveRNN
def train(model, optimizer, criterion, scheduler, epochs, batch_size, step, lr,
          args):
    global CONFIG
    global train_ids
    # create train loader
    train_loader = setup_loader(False)

    for p in optimizer.param_groups:
        p["initial_lr"] = lr
        p["lr"] = lr

    best_loss = float('inf')
    skipped_steps = 0
    for e in range(epochs):
        running_loss = 0.0
        start = time.time()
        iters = len(train_loader)
        # train loop
        print(" > Training", flush=True)
        model.train()
        for i, (x, m, y) in enumerate(train_loader):
            if use_cuda:
                x, m, y = x.cuda(), m.cuda(), y.cuda()
            #scheduler.step()
            optimizer.zero_grad()
            y_hat = model(x, m)
            # y_hat = y_hat.transpose(1, 2)
            if type(model.mode) == int:
                y_hat = y_hat.transpose(1, 2).unsqueeze(-1)
            else:
                y = y.float()
            y = y.unsqueeze(-1)
            # m_scaled, _ = model.upsample(m)
            loss = criterion(y_hat, y)
            if loss.item() is None:
                raise RuntimeError(" [!] None loss. Exiting ...")
            loss.backward()
            grad_norm, skip_flag = check_update(model, CONFIG.grad_clip)
            if not skip_flag:
                optimizer.step()
                # Compute avg loss
                if num_gpus > 1:
                    loss = reduce_tensor(loss.data, num_gpus)
                running_loss += loss.item()
                avg_loss = running_loss / (i + 1 - skipped_steps)
            else:
                print(" [!] Skipping the step...")
                skipped_steps += 1
            speed = (i + 1) / (time.time() - start)
            step += 1
            cur_lr = optimizer.param_groups[0]["lr"]

            if step % CONFIG.print_step == 0:
                print(
                    " | > Epoch: {}/{} -- Batch: {}/{} -- Loss: {:.3f}"
                    " -- Speed: {:.2f} steps/sec -- Step: {} -- lr: {} -- GradNorm: {}"
                    .format(e + 1, epochs, i + 1, iters, avg_loss, speed, step,
                            cur_lr, grad_norm),
                    flush=True)
            if step % CONFIG.checkpoint_step == 0 and args.rank == 0:
                save_checkpoint(model, optimizer, avg_loss, MODEL_PATH, step,
                                e)
                print(" > checkpoint saved", flush=True)
        # visual
        # m_scaled, _ = model.upsample(m)
        # plot_spec(m[0], VIS_PATH + "/mel_{}.png".format(step))
        # plot_spec(
        #     m_scaled[0].transpose(0, 1), VIS_PATH + "/mel_scaled_{}.png".format(step)
        # )
        # validation loop
        avg_val_loss = evaluate(model, criterion, batch_size)
        if args.rank == 0:
            best_loss = save_best_model(model, optimizer, avg_val_loss,
                                        best_loss, MODEL_PATH, step, e)
コード例 #3
0
def main(args):
    dataset = importlib.import_module('datasets.' + c.dataset)
    Dataset = getattr(dataset, 'MyDataset')
    audio = importlib.import_module('utils.' + c.audio_processor)
    AudioProcessor = getattr(audio, 'AudioProcessor')

    ap = AudioProcessor(sample_rate=c.sample_rate,
                        num_mels=c.num_mels,
                        min_level_db=c.min_level_db,
                        frame_shift_ms=c.frame_shift_ms,
                        frame_length_ms=c.frame_length_ms,
                        ref_level_db=c.ref_level_db,
                        num_freq=c.num_freq,
                        power=c.power,
                        preemphasis=c.preemphasis)

    # Setup the dataset
    train_dataset = Dataset(c.data_path,
                            c.meta_file_train,
                            c.r,
                            c.text_cleaner,
                            ap=ap,
                            min_seq_len=c.min_seq_len)

    train_loader = DataLoader(train_dataset,
                              batch_size=c.batch_size,
                              shuffle=False,
                              collate_fn=train_dataset.collate_fn,
                              drop_last=False,
                              num_workers=c.num_loader_workers,
                              pin_memory=True)

    if c.run_eval:
        val_dataset = Dataset(c.data_path,
                              c.meta_file_val,
                              c.r,
                              c.text_cleaner,
                              ap=ap)

        val_loader = DataLoader(val_dataset,
                                batch_size=c.eval_batch_size,
                                shuffle=False,
                                collate_fn=val_dataset.collate_fn,
                                drop_last=False,
                                num_workers=4,
                                pin_memory=True)
    else:
        val_loader = None

    model = Tacotron(c.embedding_size, ap.num_freq, c.num_mels, c.r)
    print(" | > Num output units : {}".format(ap.num_freq), flush=True)

    optimizer = optim.Adam(model.parameters(), lr=c.lr)
    optimizer_st = optim.Adam(model.decoder.stopnet.parameters(), lr=c.lr)

    criterion = L1LossMasked()
    criterion_st = nn.BCELoss()

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        model.load_state_dict(checkpoint['model'])
        if use_cuda:
            model = model.cuda()
            criterion.cuda()
            criterion_st.cuda()
        optimizer.load_state_dict(checkpoint['optimizer'])
        # optimizer_st.load_state_dict(checkpoint['optimizer_st'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if torch.is_tensor(v):
                    state[k] = v.cuda()
        print(" > Model restored from step %d" % checkpoint['step'],
              flush=True)
        start_epoch = checkpoint['step'] // len(train_loader)
        best_loss = checkpoint['linear_loss']
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0
        print("\n > Starting a new training", flush=True)
        if use_cuda:
            model = model.cuda()
            criterion.cuda()
            criterion_st.cuda()

    scheduler = AnnealLR(optimizer, warmup_steps=c.warmup_steps)
    num_params = count_parameters(model)
    print(" | > Model has {} parameters".format(num_params), flush=True)

    if not os.path.exists(CHECKPOINT_PATH):
        os.mkdir(CHECKPOINT_PATH)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    for epoch in range(0, c.epochs):
        train_loss, current_step = train(model, criterion, criterion_st,
                                         train_loader, optimizer, optimizer_st,
                                         scheduler, ap, epoch)
        val_loss = evaluate(model, criterion, criterion_st, val_loader, ap,
                            current_step)
        print(" | > Train Loss: {:.5f}   Validation Loss: {:.5f}".format(
            train_loss, val_loss),
              flush=True)
        best_loss = save_best_model(model, optimizer, train_loss, best_loss,
                                    OUT_PATH, current_step, epoch)
コード例 #4
0
ファイル: train.py プロジェクト: yweweler/TTS
def main(args):
    model = Tacotron(c.embedding_size, ap.num_freq, ap.num_mels, c.r)
    print(" | > Num output units : {}".format(ap.num_freq), flush=True)

    optimizer = optim.Adam(model.parameters(), lr=c.lr, weight_decay=0)
    optimizer_st = optim.Adam(model.decoder.stopnet.parameters(),
                              lr=c.lr,
                              weight_decay=0)

    criterion = L1LossMasked()
    criterion_st = nn.BCELoss()

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        model.load_state_dict(checkpoint['model'])
        # Partial initialization: if there is a mismatch with new and old layer, it is skipped.
        # 1. filter out unnecessary keys
        pretrained_dict = {
            k: v
            for k, v in checkpoint['model'].items() if k in model_dict
        }
        # 2. overwrite entries in the existing state dict
        model_dict.update(pretrained_dict)
        # 3. load the new state dict
        model.load_state_dict(model_dict)
        if use_cuda:
            model = model.cuda()
            criterion.cuda()
            criterion_st.cuda()
        optimizer.load_state_dict(checkpoint['optimizer'])
        print(" > Model restored from step %d" % checkpoint['step'],
              flush=True)
        start_epoch = checkpoint['epoch']
        best_loss = checkpoint['linear_loss']
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0
        print("\n > Starting a new training", flush=True)
        if use_cuda:
            model = model.cuda()
            criterion.cuda()
            criterion_st.cuda()

    if c.lr_decay:
        scheduler = NoamLR(optimizer,
                           warmup_steps=c.warmup_steps,
                           last_epoch=args.restore_step - 1)
    else:
        scheduler = None

    num_params = count_parameters(model)
    print(" | > Model has {} parameters".format(num_params), flush=True)

    if not os.path.exists(CHECKPOINT_PATH):
        os.mkdir(CHECKPOINT_PATH)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    for epoch in range(0, c.epochs):
        train_loss, current_step = train(model, criterion, criterion_st,
                                         optimizer, optimizer_st, scheduler,
                                         ap, epoch)
        val_loss = evaluate(model, criterion, criterion_st, ap, current_step)
        print(" | > Train Loss: {:.5f}   Validation Loss: {:.5f}".format(
            train_loss, val_loss),
              flush=True)
        best_loss = save_best_model(model, optimizer, train_loss, best_loss,
                                    OUT_PATH, current_step, epoch)
コード例 #5
0
ファイル: train.py プロジェクト: fzc20070415/mozilla-TTS
def main(args):  #pylint: disable=redefined-outer-name
    # Audio processor
    ap = AudioProcessor(**c.audio)

    # DISTRUBUTED
    if num_gpus > 1:
        init_distributed(args.rank, num_gpus, args.group_id,
                         c.distributed["backend"], c.distributed["url"])
    num_chars = len(phonemes) if c.use_phonemes else len(symbols)

    if c.use_speaker_embedding:
        speakers = get_speakers(c.data_path, c.meta_file_train, c.dataset)
        if args.restore_path:
            prev_out_path = os.path.dirname(args.restore_path)
            speaker_mapping = load_speaker_mapping(prev_out_path)
            assert all([speaker in speaker_mapping
                        for speaker in speakers]), "As of now you, you cannot " \
                                                   "introduce new speakers to " \
                                                   "a previously trained model."
        else:
            speaker_mapping = {name: i for i, name in enumerate(speakers)}
        save_speaker_mapping(OUT_PATH, speaker_mapping)
        num_speakers = len(speaker_mapping)
        print("Training with {} speakers: {}".format(num_speakers,
                                                     ", ".join(speakers)))
    else:
        num_speakers = 0

    model = setup_model(num_chars, num_speakers, c)

    print(" | > Num output units : {}".format(ap.num_freq), flush=True)

    optimizer = optim.Adam(model.parameters(), lr=c.lr, weight_decay=0)
    if c.stopnet and c.separate_stopnet:
        optimizer_st = optim.Adam(model.decoder.stopnet.parameters(),
                                  lr=c.lr,
                                  weight_decay=0)
    else:
        optimizer_st = None

    if c.loss_masking:
        criterion = L1LossMasked() if c.model in ["Tacotron", "TacotronGST"
                                                  ] else MSELossMasked()
    else:
        criterion = nn.L1Loss() if c.model in ["Tacotron", "TacotronGST"
                                               ] else nn.MSELoss()
    criterion_st = nn.BCEWithLogitsLoss() if c.stopnet else None

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        try:
            # TODO: fix optimizer init, model.cuda() needs to be called before
            # optimizer restore
            # optimizer.load_state_dict(checkpoint['optimizer'])
            if c.reinit_layers:
                raise RuntimeError
            model.load_state_dict(checkpoint['model'])
        except:
            print(" > Partial model initialization.")
            model_dict = model.state_dict()
            model_dict = set_init_dict(model_dict, checkpoint, c)
            model.load_state_dict(model_dict)
            del model_dict
        for group in optimizer.param_groups:
            group['lr'] = c.lr
        print(" > Model restored from step %d" % checkpoint['step'],
              flush=True)
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0

    if use_cuda:
        model = model.cuda()
        criterion.cuda()
        if criterion_st:
            criterion_st.cuda()

    # DISTRUBUTED
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)

    if c.lr_decay:
        scheduler = NoamLR(optimizer,
                           warmup_steps=c.warmup_steps,
                           last_epoch=args.restore_step - 1)
    else:
        scheduler = None

    num_params = count_parameters(model)
    print("\n > Model has {} parameters".format(num_params), flush=True)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    for epoch in range(0, c.epochs):
        train_loss, current_step = train(model, criterion, criterion_st,
                                         optimizer, optimizer_st, scheduler,
                                         ap, epoch)
        val_loss = evaluate(model, criterion, criterion_st, ap, current_step,
                            epoch)
        print(" | > Training Loss: {:.5f}   Validation Loss: {:.5f}".format(
            train_loss, val_loss),
              flush=True)
        target_loss = train_loss
        if c.run_eval:
            target_loss = val_loss
        best_loss = save_best_model(model, optimizer, target_loss, best_loss,
                                    OUT_PATH, current_step, epoch)
コード例 #6
0
ファイル: train.py プロジェクト: 201528014227051/TTS
def main(args):

    # Setup the dataset
    train_dataset = LJSpeechDataset(os.path.join(c.data_path,
                                                 'metadata_train.csv'),
                                    os.path.join(c.data_path, 'wavs'),
                                    c.r,
                                    c.sample_rate,
                                    c.text_cleaner,
                                    c.num_mels,
                                    c.min_level_db,
                                    c.frame_shift_ms,
                                    c.frame_length_ms,
                                    c.preemphasis,
                                    c.ref_level_db,
                                    c.num_freq,
                                    c.power,
                                    min_seq_len=c.min_seq_len)

    train_loader = DataLoader(train_dataset,
                              batch_size=c.batch_size,
                              shuffle=False,
                              collate_fn=train_dataset.collate_fn,
                              drop_last=False,
                              num_workers=c.num_loader_workers,
                              pin_memory=True)

    val_dataset = LJSpeechDataset(
        os.path.join(c.data_path, 'metadata_val.csv'),
        os.path.join(c.data_path, 'wavs'), c.r, c.sample_rate, c.text_cleaner,
        c.num_mels, c.min_level_db, c.frame_shift_ms, c.frame_length_ms,
        c.preemphasis, c.ref_level_db, c.num_freq, c.power)

    val_loader = DataLoader(val_dataset,
                            batch_size=c.eval_batch_size,
                            shuffle=False,
                            collate_fn=val_dataset.collate_fn,
                            drop_last=False,
                            num_workers=4,
                            pin_memory=True)

    model = Tacotron(c.embedding_size, c.num_freq, c.num_mels, c.r)

    optimizer = optim.Adam(model.parameters(), lr=c.lr)
    optimizer_st = optim.Adam(model.decoder.stopnet.parameters(), lr=c.lr)

    criterion = L1LossMasked()
    criterion_st = nn.BCELoss()

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        model.load_state_dict(checkpoint['model'])
        optimizer = optim.Adam(model.parameters(), lr=c.lr)
        optimizer.load_state_dict(checkpoint['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if torch.is_tensor(v):
                    state[k] = v.cuda()
        print(" > Model restored from step %d" % checkpoint['step'])
        start_epoch = checkpoint['step'] // len(train_loader)
        best_loss = checkpoint['linear_loss']
        start_epoch = 0
        args.restore_step = checkpoint['step']
        optimizer_st = optim.Adam(model.decoder.stopnet.parameters(), lr=c.lr)
    else:
        args.restore_step = 0
        print("\n > Starting a new training")

    if use_cuda:
        model = nn.DataParallel(model.cuda())
        criterion.cuda()
        criterion_st.cuda()

    num_params = count_parameters(model)
    print(" | > Model has {} parameters".format(num_params))

    if not os.path.exists(CHECKPOINT_PATH):
        os.mkdir(CHECKPOINT_PATH)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    for epoch in range(0, c.epochs):
        train_loss, current_step = train(model, criterion, criterion_st,
                                         train_loader, optimizer, optimizer_st,
                                         epoch)
        val_loss = evaluate(model, criterion, criterion_st, val_loader,
                            current_step)
        print(" >>> Train Loss: {:.5f}\t Validation Loss: {:.5f}".format(
            train_loss, val_loss))
        best_loss = save_best_model(model, optimizer, val_loss, best_loss,
                                    OUT_PATH, current_step, epoch)
コード例 #7
0
ファイル: train.py プロジェクト: stevemurr/TTS
def main(args):
    print(" > Using dataset: {}".format(c.dataset))
    mod = importlib.import_module('datasets.{}'.format(c.dataset))
    Dataset = getattr(mod, c.dataset + "Dataset")

    # Setup the dataset
    train_dataset = Dataset(os.path.join(c.data_path, c.meta_file_train),
                            os.path.join(c.data_path, 'wavs'),
                            c.r,
                            c.sample_rate,
                            c.text_cleaner,
                            c.num_mels,
                            c.min_level_db,
                            c.frame_shift_ms,
                            c.frame_length_ms,
                            c.preemphasis,
                            c.ref_level_db,
                            c.num_freq,
                            c.power,
                            min_seq_len=c.min_seq_len)

    train_loader = DataLoader(train_dataset,
                              batch_size=c.batch_size,
                              shuffle=False,
                              collate_fn=train_dataset.collate_fn,
                              drop_last=True,
                              num_workers=c.num_loader_workers,
                              pin_memory=True)

    val_dataset = Dataset(os.path.join(c.data_path, c.meta_file_val),
                          os.path.join(c.data_path, 'wavs'), c.r,
                          c.sample_rate, c.text_cleaner, c.num_mels,
                          c.min_level_db, c.frame_shift_ms, c.frame_length_ms,
                          c.preemphasis, c.ref_level_db, c.num_freq, c.power)

    val_loader = DataLoader(val_dataset,
                            batch_size=c.eval_batch_size,
                            shuffle=False,
                            collate_fn=val_dataset.collate_fn,
                            drop_last=False,
                            num_workers=4,
                            pin_memory=True)

    model = Tacotron(c.embedding_size, c.num_freq, c.num_mels, c.r)

    if use_cuda:
        criterion = L1LossMasked().cuda()
    else:
        criterion = L1LossMasked()

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        model.load_state_dict(checkpoint['model'])
        optimizer = optim.Adam(model.parameters(), lr=c.lr)
        optimizer.load_state_dict(checkpoint['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if torch.is_tensor(v):
                    state[k] = v.cuda()
        print(" > Model restored from step %d" % checkpoint['step'])
        start_epoch = checkpoint['step'] // len(train_loader)
        best_loss = checkpoint['linear_loss']
        start_epoch = 0
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0
        optimizer = optim.Adam(model.parameters(), lr=c.lr)
        print(" > Starting a new training")

    if use_cuda:
        print(" > Using CUDA.")
        model = nn.DataParallel(model).cuda()

    num_params = count_parameters(model)
    print(" | > Model has {} parameters".format(num_params))

    if not os.path.exists(CHECKPOINT_PATH):
        os.mkdir(CHECKPOINT_PATH)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    for epoch in range(0, c.epochs):
        train_loss, current_step = train(model, criterion, train_loader,
                                         optimizer, epoch)
        val_loss = evaluate(model, criterion, val_loader, current_step)
        best_loss = save_best_model(model, optimizer, val_loss, best_loss,
                                    OUT_PATH, current_step, epoch)
コード例 #8
0
ファイル: train.py プロジェクト: zbloss/TTS
def main(args):
    # DISTRUBUTED
    if num_gpus > 1:
        init_distributed(args.rank, num_gpus, args.group_id,
                         c.distributed["backend"], c.distributed["url"])
    num_chars = len(phonemes) if c.use_phonemes else len(symbols)
    model = setup_model(num_chars, c)

    print(" | > Num output units : {}".format(ap.num_freq), flush=True)

    optimizer = optim.Adam(model.parameters(), lr=c.lr, weight_decay=0)
    if c.stopnet and c.separate_stopnet:
        optimizer_st = optim.Adam(model.decoder.stopnet.parameters(),
                                  lr=c.lr,
                                  weight_decay=0)
    else:
        optimizer_st = None

    if c.loss_masking:
        criterion = L1LossMasked() if c.model == "Tacotron" else MSELossMasked(
        )
    else:
        criterion = nn.L1Loss() if c.model == "Tacotron" else nn.MSELoss()
    criterion_st = nn.BCEWithLogitsLoss() if c.stopnet else None

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        try:
            # TODO: fix optimizer init, model.cuda() needs to be called before
            # optimizer restore
            # optimizer.load_state_dict(checkpoint['optimizer'])
            if len(c.reinit_layers) > 0:
                raise RuntimeError
            model.load_state_dict(checkpoint['model'])
        except:
            print(" > Partial model initialization.")
            partial_init_flag = True
            model_dict = model.state_dict()
            model_dict = set_init_dict(model_dict, checkpoint, c)
            model.load_state_dict(model_dict)
            del model_dict
        for group in optimizer.param_groups:
            group['lr'] = c.lr
        print(" > Model restored from step %d" % checkpoint['step'],
              flush=True)
        start_epoch = checkpoint['epoch']
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0

    if use_cuda:
        model = model.cuda()
        criterion.cuda()
        if criterion_st: criterion_st.cuda()

    # DISTRUBUTED
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)

    if c.lr_decay:
        scheduler = NoamLR(optimizer,
                           warmup_steps=c.warmup_steps,
                           last_epoch=args.restore_step - 1)
    else:
        scheduler = None

    num_params = count_parameters(model)
    print("\n > Model has {} parameters".format(num_params), flush=True)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    for epoch in range(0, c.epochs):
        train_loss, current_step = train(model, criterion, criterion_st,
                                         optimizer, optimizer_st, scheduler,
                                         ap, epoch)
        val_loss = evaluate(model, criterion, criterion_st, ap, current_step,
                            epoch)
        print(" | > Training Loss: {:.5f}   Validation Loss: {:.5f}".format(
            train_loss, val_loss),
              flush=True)
        target_loss = train_loss
        if c.run_eval:
            target_loss = val_loss
        best_loss = save_best_model(model, optimizer, target_loss, best_loss,
                                    OUT_PATH, current_step, epoch)
コード例 #9
0
ファイル: train.py プロジェクト: xzm2004260/TTS
def main(args):

    # Setup the dataset
    train_dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata_train.csv'),
                              os.path.join(c.data_path, 'wavs'),
                              c.r,
                              c.sample_rate,
                              c.text_cleaner,
                              c.num_mels,
                              c.min_level_db,
                              c.frame_shift_ms,
                              c.frame_length_ms,
                              c.preemphasis,
                              c.ref_level_db,
                              c.num_freq,
                              c.power
                             )

    train_loader = DataLoader(train_dataset, batch_size=c.batch_size,
                            shuffle=False, collate_fn=train_dataset.collate_fn,
                            drop_last=False, num_workers=c.num_loader_workers,
                            pin_memory=True)
    
    val_dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata_val.csv'),
                              os.path.join(c.data_path, 'wavs'),
                              c.r,
                              c.sample_rate,
                              c.text_cleaner,
                              c.num_mels,
                              c.min_level_db,
                              c.frame_shift_ms,
                              c.frame_length_ms,
                              c.preemphasis,
                              c.ref_level_db,
                              c.num_freq,
                              c.power
                             )

    val_loader = DataLoader(val_dataset, batch_size=c.batch_size,
                            shuffle=False, collate_fn=val_dataset.collate_fn,
                            drop_last=False, num_workers= 4,
                            pin_memory=True)

    model = Tacotron(c.embedding_size,
                     c.hidden_size,
                     c.num_mels,
                     c.num_freq,
                     c.r,
                     use_atten_mask=True)

    optimizer = optim.Adam(model.parameters(), lr=c.lr)
    
    if use_cuda:
        criterion = nn.L1Loss().cuda()
    else:
        criterion = nn.L1Loss()

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n > Model restored from step %d\n" % checkpoint['step'])
        start_epoch = checkpoint['step'] // len(train_loader)
        best_loss = checkpoint['linear_loss']
        start_epoch = 0
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0
        print("\n > Starting a new training")

    if use_cuda:
        model = nn.DataParallel(model.cuda())

    num_params = count_parameters(model)
    print(" | > Model has {} parameters".format(num_params))
    
    if not os.path.exists(CHECKPOINT_PATH):
        os.mkdir(CHECKPOINT_PATH)
    
    if 'best_loss' not in locals():
        best_loss = float('inf')
    
    for epoch in range(0, c.epochs):
        train_loss, current_step = train(model, criterion, train_loader, optimizer, epoch)
        val_loss = evaluate(model, criterion, val_loader, current_step)
        best_loss = save_best_model(model, optimizer, val_loss,
                                    best_loss, OUT_PATH,
                                    current_step, epoch)
コード例 #10
0
ファイル: train.py プロジェクト: codeaudit/TTS
def main(args):

    # setup output paths and read configs
    c = load_config(args.config_path)
    _ = os.path.dirname(os.path.realpath(__file__))
    OUT_PATH = os.path.join(_, c.output_path)
    OUT_PATH = create_experiment_folder(OUT_PATH)
    CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints')
    shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json'))

    # save config to tmp place to be loaded by subsequent modules.
    file_name = str(os.getpid())
    tmp_path = os.path.join("/tmp/", file_name + '_tts')
    pickle.dump(c, open(tmp_path, "wb"))

    # setup tensorboard
    LOG_DIR = OUT_PATH
    tb = SummaryWriter(LOG_DIR)

    # Ctrl+C handler to remove empty experiment folder
    def signal_handler(signal, frame):
        print(" !! Pressed Ctrl+C !!")
        remove_experiment_folder(OUT_PATH)
        sys.exit(1)

    signal.signal(signal.SIGINT, signal_handler)

    # Setup the dataset
    dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata.csv'),
                              os.path.join(c.data_path, 'wavs'), c.r,
                              c.sample_rate, c.text_cleaner, c.num_mels,
                              c.min_level_db, c.frame_shift_ms,
                              c.frame_length_ms, c.preemphasis, c.ref_level_db,
                              c.num_freq, c.power)

    dataloader = DataLoader(dataset,
                            batch_size=c.batch_size,
                            shuffle=True,
                            collate_fn=dataset.collate_fn,
                            drop_last=True,
                            num_workers=c.num_loader_workers)

    # setup the model
    model = Tacotron(c.embedding_size, c.hidden_size, c.num_mels, c.num_freq,
                     c.r)

    # plot model on tensorboard
    dummy_input = dataset.get_dummy_data()

    ## TODO: onnx does not support RNN fully yet
    # model_proto_path = os.path.join(OUT_PATH, "model.proto")
    # onnx.export(model, dummy_input, model_proto_path, verbose=True)
    # tb.add_graph_onnx(model_proto_path)

    if use_cuda:
        model = nn.DataParallel(model.cuda())

    optimizer = optim.Adam(model.parameters(), lr=c.lr)

    if args.restore_step:
        checkpoint = torch.load(
            os.path.join(args.restore_path,
                         'checkpoint_%d.pth.tar' % args.restore_step))
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n > Model restored from step %d\n" % args.restore_step)
        start_epoch = checkpoint['step'] // len(dataloader)
        best_loss = checkpoint['linear_loss']
    else:
        start_epoch = 0
        print("\n > Starting a new training")

    model = model.train()

    if not os.path.exists(CHECKPOINT_PATH):
        os.mkdir(CHECKPOINT_PATH)

    if use_cuda:
        criterion = nn.L1Loss().cuda()
    else:
        criterion = nn.L1Loss()

    n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)

    #lr_scheduler = ReduceLROnPlateau(optimizer, factor=c.lr_decay,
    #                               patience=c.lr_patience, verbose=True)
    epoch_time = 0
    best_loss = float('inf')
    for epoch in range(0, c.epochs):

        print("\n | > Epoch {}/{}".format(epoch, c.epochs))
        progbar = Progbar(len(dataset) / c.batch_size)

        for num_iter, data in enumerate(dataloader):
            start_time = time.time()

            text_input = data[0]
            text_lengths = data[1]
            linear_input = data[2]
            mel_input = data[3]

            current_step = num_iter + args.restore_step + epoch * len(
                dataloader) + 1

            # setup lr
            current_lr = lr_decay(c.lr, current_step)
            for params_group in optimizer.param_groups:
                params_group['lr'] = current_lr

            optimizer.zero_grad()

            # Add a single frame of zeros to Mel Specs for better end detection
            #try:
            #    mel_input = np.concatenate((np.zeros(
            #        [c.batch_size, 1, c.num_mels], dtype=np.float32),
            #        mel_input[:, 1:, :]), axis=1)
            #except:
            #    raise TypeError("not same dimension")

            # convert inputs to variables
            text_input_var = Variable(text_input)
            mel_spec_var = Variable(mel_input)
            linear_spec_var = Variable(linear_input, volatile=True)

            # sort sequence by length.
            # TODO: might be unnecessary
            sorted_lengths, indices = torch.sort(text_lengths.view(-1),
                                                 dim=0,
                                                 descending=True)
            sorted_lengths = sorted_lengths.long().numpy()

            text_input_var = text_input_var[indices]
            mel_spec_var = mel_spec_var[indices]
            linear_spec_var = linear_spec_var[indices]

            if use_cuda:
                text_input_var = text_input_var.cuda()
                mel_spec_var = mel_spec_var.cuda()
                linear_spec_var = linear_spec_var.cuda()

            mel_output, linear_output, alignments =\
                model.forward(text_input_var, mel_spec_var,
                              input_lengths= torch.autograd.Variable(torch.cuda.LongTensor(sorted_lengths)))

            mel_loss = criterion(mel_output, mel_spec_var)
            #linear_loss = torch.abs(linear_output - linear_spec_var)
            #linear_loss = 0.5 * \
            #torch.mean(linear_loss) + 0.5 * \
            #torch.mean(linear_loss[:, :n_priority_freq, :])
            linear_loss = 0.5 * criterion(linear_output, linear_spec_var) \
                    + 0.5 * criterion(linear_output[:, :, :n_priority_freq],
                                      linear_spec_var[: ,: ,:n_priority_freq])
            loss = mel_loss + linear_loss
            # loss = loss.cuda()

            loss.backward()
            grad_norm = nn.utils.clip_grad_norm(model.parameters(),
                                                1.)  ## TODO: maybe no need
            optimizer.step()

            step_time = time.time() - start_time
            epoch_time += step_time

            progbar.update(num_iter + 1,
                           values=[('total_loss', loss.data[0]),
                                   ('linear_loss', linear_loss.data[0]),
                                   ('mel_loss', mel_loss.data[0]),
                                   ('grad_norm', grad_norm)])

            # Plot Learning Stats
            tb.add_scalar('Loss/TotalLoss', loss.data[0], current_step)
            tb.add_scalar('Loss/LinearLoss', linear_loss.data[0], current_step)
            tb.add_scalar('Loss/MelLoss', mel_loss.data[0], current_step)
            tb.add_scalar('Params/LearningRate',
                          optimizer.param_groups[0]['lr'], current_step)
            tb.add_scalar('Params/GradNorm', grad_norm, current_step)
            tb.add_scalar('Time/StepTime', step_time, current_step)

            align_img = alignments[0].data.cpu().numpy()
            align_img = plot_alignment(align_img)
            tb.add_image('Attn/Alignment', align_img, current_step)

            if current_step % c.save_step == 0:

                if c.checkpoint:
                    # save model
                    save_checkpoint(model, optimizer, linear_loss.data[0],
                                    best_loss, OUT_PATH, current_step, epoch)

                # Diagnostic visualizations
                const_spec = linear_output[0].data.cpu().numpy()
                gt_spec = linear_spec_var[0].data.cpu().numpy()

                const_spec = plot_spectrogram(const_spec, dataset.ap)
                gt_spec = plot_spectrogram(gt_spec, dataset.ap)
                tb.add_image('Spec/Reconstruction', const_spec, current_step)
                tb.add_image('Spec/GroundTruth', gt_spec, current_step)

                align_img = alignments[0].data.cpu().numpy()
                align_img = plot_alignment(align_img)
                tb.add_image('Attn/Alignment', align_img, current_step)

                # Sample audio
                audio_signal = linear_output[0].data.cpu().numpy()
                dataset.ap.griffin_lim_iters = 60
                audio_signal = dataset.ap.inv_spectrogram(audio_signal.T)
                try:
                    tb.add_audio('SampleAudio',
                                 audio_signal,
                                 current_step,
                                 sample_rate=c.sample_rate)
                except:
                    print("\n > Error at audio signal on TB!!")
                    print(audio_signal.max())
                    print(audio_signal.min())

        # average loss after the epoch
        avg_epoch_loss = np.mean(progbar.sum_values['linear_loss'][0] /
                                 max(1, progbar.sum_values['linear_loss'][1]))
        best_loss = save_best_model(model, optimizer, avg_epoch_loss,
                                    best_loss, OUT_PATH, current_step, epoch)

        #lr_scheduler.step(loss.data[0])
        tb.add_scalar('Time/EpochTime', epoch_time, epoch)
        epoch_time = 0