Esempio n. 1
0
def tts_train_loop(paths: Paths, model: Tacotron, optimizer, train_set, lr,
                   train_steps, attn_example):
    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0

        # Perform 1 epoch
        for i, (x, m, ids, _) in enumerate(train_set, 1):

            x, m = x.to(device), m.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention = data_parallel_workaround(
                    model, x, m)
            else:
                m1_hat, m2_hat, attention = model(x, m)

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)

            loss = m1_loss + m2_loss

            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = 'taco_step%sK' % (repr1(k))
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(attention[idx][:, :160]),
                               paths.tts_attention / '%s' % (repr1(step)))
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / '%s' % (repr1(step)),
                                 600)

            msg = '| Epoch: %s/%s (%s/%s) | Loss: %.4f | %.2f steps/s | Step: %sk | ' % (
                repr1(e), repr1(epochs), repr1(i), repr1(total_iters),
                avg_loss, speed, repr1(k))
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')
Esempio n. 2
0
def voc_train_loop(paths: Paths, model: WaveRNN, loss_func, optimizer,
                   train_set, test_set, lr, total_steps):
    # Use same device as model parameters
    device = next(model.parameters()).device

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = (total_steps - model.get_step()) // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0.

        for i, (x, y, m) in enumerate(train_set, 1):
            x, m, y = x.to(device), m.to(device), y.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                y_hat = data_parallel_workaround(model, x, m)
            else:
                y_hat = model(x, m)

            if model.mode == 'RAW':
                y_hat = y_hat.transpose(1, 2).unsqueeze(-1)

            elif model.mode == 'MOL':
                y = y.float()

            y = y.unsqueeze(-1)

            loss = loss_func(y_hat, y)

            optimizer.zero_grad()
            loss.backward()
            if hp.voc_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.voc_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')
            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.voc_checkpoint_every == 0:
                gen_testset(model, test_set, hp.voc_gen_at_checkpoint,
                            hp.voc_gen_batched, hp.voc_target, hp.voc_overlap,
                            paths.voc_output)
                ckpt_name = f'wave_step{k}K'
                save_checkpoint('voc',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | '
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('voc', paths, model, optimizer, is_silent=True)
        model.log(paths.voc_log, msg)
        print(' ')
Esempio n. 3
0
def voc_train_loop(paths: Paths, model: WaveRNN, loss_func, optimizer,
                   train_set, test_set, lr, total_steps):
    # Use same device as model parameters
    device = next(model.parameters()).device

    # set learning rate
    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = (total_steps - model.get_step()) // total_iters + 1

    total_number_of_batches = len(train_set)

    writer = SummaryWriter("runs/{0}-{1}".format(
        model_name_prefix,
        datetime.now().strftime("%Y%m%d-%H%M%S")))
    scheduler = StepLR(optimizer, step_size=1, gamma=0.983)

    for e in range(EPOCH, epochs + 1):

        start = time.time()
        running_loss = 0.
        avg_loss = 0

        for i, (x, y, m) in enumerate(train_set, 1):
            x, m, y = x.to(device), m.to(device), y.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                y_hat = data_parallel_workaround(model, x, m)
            else:
                y_hat = model(x, m)

            if model.mode == 'RAW':
                y_hat = y_hat.transpose(1, 2).unsqueeze(-1)

            elif model.mode == 'MOL':
                y = y.float()

            y = y.unsqueeze(-1)

            loss = loss_func(y_hat, y)

            optimizer.zero_grad()
            loss.backward()
            if hp.voc_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.voc_clip_grad_norm)
            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            # Write to tensorboard per batch
            writer.add_scalar('Epoch loss', loss.item(),
                              e * total_number_of_batches + i)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | '
            stream(msg)
        """
        ####################### Testing ############################
        torch.cuda.empty_cache()
        loss_test = 0
        for _, (x_test, y_test, m_test) in enumerate(test_set, 1):
            x_test, m_test, y_test = x_test.to(device), m_test.to(device), y_test.to(device)
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                raise RuntimeError("Unsupported")
            else:
                y_test_hat = model(x_test, m_test)

            if model.mode == 'RAW':
                y_test_hat = y_test_hat.transpose(1, 2).unsqueeze(-1)
            elif model.mode == 'MOL':
                y_test = y_test.float()

            y_test = y_test.unsqueeze(-1)

            loss_test += loss_func(y_test_hat, y_test).item()
        avg_loss_test = loss_test / len(test_set)
        msg = f'| Epoch: {e}/{epochs} | Test-Loss: {loss_test:.4f} | Test-AvgLoss: {avg_loss_test:.4f} | '
        stream("\n")
        stream(msg)

        writer.add_scalar('Test loss', loss_test, e)
        writer.add_scalar('Average test loss', avg_loss_test, e)
        ############################################################
        """

        # Write to tensorboard per epoch
        writer.add_scalar('Running loss', running_loss, e)
        writer.add_scalar('Average loss', avg_loss, e)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('voc',
                        paths,
                        model,
                        optimizer,
                        name="{0}-epoch-{1}-loss-{2}".format(
                            model_name_prefix, e, avg_loss),
                        is_silent=True)
        model.log(paths.voc_log, msg)
        print(' ')
        scheduler.step()
        print('Epoch:', e, 'LR:', scheduler.get_lr())
def train_loop(paths: Paths, model, optimizer, train_set, lr, train_steps,
               mel_example):
    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0
        dur_running_loss = 0

        # Perform 1 epoch
        for i, (x, m, ids, mel_len, dur) in enumerate(train_set, 1):

            x, m, dur = x.to(device), m.to(device), dur.to(device)
            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m_hat, m_post_hat, dur_hat = data_parallel_workaround(
                    model, x, m, dur)
            else:
                m_hat, m_post_hat, dur_hat = model(x, m, dur)

            lin_loss = F.l1_loss(m_hat, m)
            post_loss = F.l1_loss(m_post_hat, m)
            dur_loss = F.l1_loss(dur_hat, dur)
            loss = lin_loss + post_loss + dur_loss
            optimizer.zero_grad()

            loss.backward()

            if hp.forward_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.forward_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss += post_loss.item()
            avg_loss = running_loss / i
            dur_running_loss += dur_loss.item()
            dur_avg_loss = dur_running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.forward_checkpoint_every == 0:
                ckpt_name = f'fast_speech_step{k}K'
                save_checkpoint('forward',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if mel_example in ids:
                idx = ids.index(mel_example)
                try:
                    seq = x[idx].tolist()
                    m_gen = model.generate(seq)
                    save_spectrogram(m_gen,
                                     paths.forward_mel_plot / f'{step}_gen',
                                     600)
                except Exception:
                    traceback.print_exc()
                save_spectrogram(np_now(m_post_hat[idx]),
                                 paths.forward_mel_plot / f'{step}_gta', 600)
                save_spectrogram(np_now(m[idx]),
                                 paths.forward_mel_plot / f'{step}_target',
                                 600)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Mel Loss: {avg_loss:#.4} ' \
                  f'| Duration Loss: {dur_avg_loss:#.4} | {speed:#.2} steps/s | Step: {k}k | '
            stream(msg)
        model.log(paths.forward_log, msg)

        save_checkpoint('forward', paths, model, optimizer, is_silent=True)
def tts_train_loop(paths: Paths, model: Tacotron, optimizer, train_set, lr,
                   train_steps, attn_example):
    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0

        # Perform 1 epoch
        for i, (x, m, ids, _, att_guides) in enumerate(train_set, 1):

            x, m = x.to(device), m.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention, r = data_parallel_workaround(
                    model, x, m)
            else:
                m1_hat, m2_hat, attention, r = model(x, m)

            #print(att_guides.shape)
            orig_attention = attention
            n = int(len(att_guides[0]) / r)
            #print("n", n)
            #reduce guide by r factor
            ga = [a[t] for a in att_guides for t in range(0, len(a), r)]

            assert n == len(attention[0])
            guided_attention = [ga[k:k + n] for k in range(0, len(ga), n)]

            attention = np_now(attention)
            attention = [
                pad2d_nonzero(x, n, len(att_guides[0][0])) for x in attention
            ]

            guided_attention = torch.tensor(guided_attention)
            guided_attention = guided_attention.to(device)

            attention = torch.tensor(attention)
            attention = attention.to(device)

            #create attention mask
            attention_masks = torch.ne(attention, -1).type(torch.FloatTensor)

            attention_masks = torch.tensor(attention_masks)
            attention_masks = attention.to(device)

            multiply = torch.abs(
                attention * guided_attention) * attention_masks

            attention_loss = torch.sum(multiply)

            mask_sum = torch.sum(attention_masks)

            attention_loss /= mask_sum

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)

            #print("mask sum", mask_sum)
            #print("attention loss", attention_loss)
            #print("m losses", m1_loss, m2_loss)
            prev_loss = m1_loss + m2_loss
            #print("prev loss", prev_loss)
            loss = m1_loss + m2_loss + attention_loss
            #print("loss + att", loss)

            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = f'taco_step{k}K'
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(orig_attention[idx][:, :160]),
                               paths.tts_attention / f'{step}')
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / f'{step}', 600)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:#.4} | {speed:#.2} steps/s | Step: {k}k | '
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')
Esempio n. 6
0
def tts_train_loop_af_offline(paths: Paths,
                              model: Tacotron,
                              optimizer,
                              train_set,
                              lr,
                              train_steps,
                              attn_example,
                              hp=None):
    # setattr(model, 'mode', 'attention_forcing')
    # import pdb

    def smooth(d, eps=float(1e-10)):
        u = 1.0 / float(d.size()[2])
        return eps * u + (1 - eps) * d

    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss_out, running_loss_attn = 0, 0

        # Perform 1 epoch
        for i, (x, m, ids, _, attn_ref) in enumerate(train_set, 1):

            # print(x.size())
            # print(m.size())
            # print(attn_ref.size())
            # # print(m1_hat.size(), m2_hat.size())
            # # print(attention.size(), attention.size(1)*model.r)
            # pdb.set_trace()

            x, m, attn_ref = x.to(device), m.to(device), attn_ref.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention = data_parallel_workaround(
                    model, x, m, False, attn_ref)
            else:
                m1_hat, m2_hat, attention = model(x,
                                                  m,
                                                  generate_gta=False,
                                                  attn_ref=attn_ref)

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)
            # attn_loss = F.kl_div(torch.log(smooth(attention)), smooth(attn_ref), reduction='mean') # 'batchmean'
            attn_loss = F.l1_loss(smooth(attention), smooth(attn_ref))

            loss_out = m1_loss + m2_loss
            loss_attn = attn_loss * hp.attn_loss_coeff
            loss = loss_out + loss_attn

            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss_out += loss_out.item()
            avg_loss_out = running_loss_out / i
            running_loss_attn += loss_attn.item()
            avg_loss_attn = running_loss_attn / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = f'taco_step{k}K'
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(attn_ref[idx][:, :160]),
                               paths.tts_attention / f'{step}_tf')
                save_attention(np_now(attention[idx][:, :160]),
                               paths.tts_attention / f'{step}_af')
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / f'{step}', 600)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss_out: {avg_loss_out:#.4}; Output_attn: {avg_loss_attn:#.4} | {speed:#.2} steps/s | Step: {k}k | '
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')
Esempio n. 7
0
def tts_train_loop(paths: Paths, model: Tacotron, optimizer, train_set, lr,
                   train_steps, attn_example, max_y, max_x):
    device = next(
        model.parameters()).device  # use same device as model parameters

    for g in optimizer.param_groups:
        g['lr'] = lr

    total_iters = len(train_set)
    epochs = train_steps // total_iters + 1

    for e in range(1, epochs + 1):

        start = time.time()
        running_loss = 0

        # Perform 1 epoch
        for i, (x, m, ids, _, padded_att_guides) in enumerate(train_set, 1):

            x, m = x.to(device), m.to(device)

            # Parallelize model onto GPUS using workaround due to python bug
            if device.type == 'cuda' and torch.cuda.device_count() > 1:
                m1_hat, m2_hat, attention, r = data_parallel_workaround(
                    model, x, m)
            else:
                m1_hat, m2_hat, attention, r = model(x, m)

            reduced_guides = []

            att_guide_path = hp.attention_path
            for j, item_id in enumerate(ids):
                att = np.load(f'{att_guide_path}/{item_id}.npy')
                reduced = att[0::r]

                pred_attention = attention[j]
                n_frames = pred_attention.shape[0]
                n_phones = pred_attention.shape[-1]

                #  pred_attention = torch.tensor(pred_attention)
                # reduced = torch.tensor(reduced)

                padded_guides = pad2d_nonzero(reduced, n_frames, n_phones)
                #padded_guides = torch.tensor(padded_guides)
                reduced_guides.append(padded_guides)

            reduced_guides = torch.tensor(reduced_guides)
            mask = torch.ne(reduced_guides, -1).type(torch.FloatTensor)

            mask = torch.tensor(mask)
            padded_guides = [
                pad2d_zero(x, n_frames, n_phones) for x in reduced_guides
            ]
            padded_guides = torch.tensor(padded_guides)
            padded_guides = padded_guides.to(device)
            attention = attention.to(device)
            mask = mask.to(device)
            attention = attention * mask
            print("guide att shape", att.shape)
            print(att)

            print("reduced guide", padded_guides.shape)

            #   print("attention size",n_frames, n_phones)
            print("mask", mask.shape)
            print(mask)

            print(padded_guides.shape, attention.shape, mask.shape)

            print(attention)
            print(padded_guides)

            multiply = torch.pow((attention - padded_guides), 2)
            print(multiply)

            #multiply = torch.pow((pred_attention - padded_guides),2)* mask
            #print(multiply)

            attention_loss = torch.sum(multiply)
            print(attention_loss)
            mask_sum1 = torch.sum(mask)

            attention_loss /= mask_sum1
            print(attention_loss)

            #    batch_attention_losses.append(attention_loss)

            m1_loss = F.l1_loss(m1_hat, m)
            m2_loss = F.l1_loss(m2_hat, m)

            #average_att_loss = sum(batch_attention_losses)/len(batch_attention_losses)
            #print("attention loss", average_att_loss)
            #print("m losses", m1_loss, m2_loss)
            prev_loss = m1_loss + m2_loss
            print("prev loss", prev_loss)
            loss = m1_loss + m2_loss + attention_loss
            print("loss + att", loss)
            #exit()
            optimizer.zero_grad()
            loss.backward()
            if hp.tts_clip_grad_norm is not None:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), hp.tts_clip_grad_norm)
                if np.isnan(grad_norm):
                    print('grad_norm was NaN!')

            optimizer.step()

            running_loss += loss.item()
            avg_loss = running_loss / i

            speed = i / (time.time() - start)

            step = model.get_step()
            k = step // 1000

            if step % hp.tts_checkpoint_every == 0:
                ckpt_name = f'taco_step{k}K'
                save_checkpoint('tts',
                                paths,
                                model,
                                optimizer,
                                name=ckpt_name,
                                is_silent=True)

            if attn_example in ids:
                idx = ids.index(attn_example)
                save_attention(np_now(attention[idx][:, :160]),
                               paths.tts_attention / f'{step}')
                save_spectrogram(np_now(m2_hat[idx]),
                                 paths.tts_mel_plot / f'{step}', 600)

            msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:#.4} | {speed:#.2} steps/s | Step: {k}k | '
            stream(msg)

        # Must save latest optimizer state to ensure that resuming training
        # doesn't produce artifacts
        save_checkpoint('tts', paths, model, optimizer, is_silent=True)
        model.log(paths.tts_log, msg)
        print(' ')