Exemple #1
0
def evaluate_master(model, num_gpus, output_directory, epochs, learning_rate, lr_decay_step, lr_decay_gamma,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard):

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        if args.average_checkpoint == 0:
            model, _, _, iteration = load_checkpoint(checkpoint_path, model, None, None)
        else:
            print("INFO: --average_checkpoint > 0. loading an averaged weight of last {} checkpoints...".format(args.average_checkpoint))
            model, iteration = load_averaged_checkpoint(checkpoint_path, model, args.average_checkpoint)

    if fp16_run:
        raise NotImplementedError("do not run evaluation loop with fp16 mode!")

    testset = Mel2Samp("test", True, True, **data_config)
    test_sampler = None
    test_loader = DataLoader(testset, num_workers=4, shuffle=False,
                             sampler=test_sampler,
                             batch_size=1,
                             pin_memory=False,
                             drop_last=False)

    # Get shared output_directory ready
    if not os.path.isdir(os.path.join(output_directory, waveflow_config["model_name"])):
        os.makedirs(os.path.join(output_directory, waveflow_config["model_name"]), exist_ok=True)
        os.chmod(os.path.join(output_directory, waveflow_config["model_name"]), 0o775)
    print("output directory", os.path.join(output_directory, waveflow_config["model_name"]))
    if not os.path.isdir(os.path.join(output_directory, "samples")):
        os.makedirs(os.path.join(output_directory, "samples"), exist_ok=True)
        os.chmod(os.path.join(output_directory, "samples"), 0o775)
    os.makedirs(os.path.join(output_directory, "samples", waveflow_config["model_name"]), exist_ok=True)
    os.chmod(os.path.join(output_directory, "samples", waveflow_config["model_name"]), 0o775)

    criterion = WaveFlowLossDataParallel(sigma)

    model.eval()
    epoch_eval_loss = 0
    for i, batch in enumerate(test_loader):
        with torch.no_grad():
            mel, audio, filename = batch
            mel, audio = mel.cuda(), audio.cuda()

            outputs = model(audio, mel)

            loss = criterion(outputs)
            reduced_loss = loss.item()
            epoch_eval_loss += reduced_loss

        print("eval data {}: {:.9f}".format(i, reduced_loss))

    epoch_eval_loss = epoch_eval_loss / len(test_loader)
    print("EVAL_FULL {}:\t{:.9f}".format(iteration, epoch_eval_loss))
    model.train()
Exemple #2
0
def load_LJSpeech(trainset_config, batch_size=4, num_gpus=1):
    LJSpeech_dataset = Mel2Samp(**trainset_config)

    # distributed sampler
    train_sampler = DistributedSampler(
        LJSpeech_dataset) if num_gpus > 1 else None

    trainloader = torch.utils.data.DataLoader(LJSpeech_dataset,
                                              batch_size=batch_size,
                                              sampler=train_sampler,
                                              num_workers=4,
                                              pin_memory=False,
                                              drop_last=True)
    return trainloader
Exemple #3
0
def test(sigma, batch_size, seed, checkpoint_path):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda().eval()

    # Load checkpoint if one exists
    model, iteration = load_checkpoint(checkpoint_path, model)

    model.eval()

    testset = Mel2Samp(data_config['testing_files'],
                       data_config['segment_length'],
                       data_config['filter_length'], data_config['hop_length'],
                       data_config['win_length'], data_config['sampling_rate'],
                       data_config['mel_fmin'], data_config['mel_fmax'])
    test_loader = DataLoader(testset,
                             num_workers=1,
                             shuffle=False,
                             sampler=None,
                             batch_size=batch_size,
                             pin_memory=False,
                             drop_last=True)

    with torch.no_grad():
        val_loss = 0.0
        for j, batch in enumerate(test_loader):
            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            val_loss += loss.item()
        val_loss = val_loss / (j + 1)
        model.train()

        print("test loss: {}:\t{:.9f}".format(iteration, val_loss))
Exemple #4
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, checkpoint_path):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Load checkpoint if one exists
    iteration = 0
    print("checkpoint path", checkpoint_path)
    #model = warm_load_checkpoint(checkpoint_path, model)
    model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                  optimizer)
    iteration += 1
    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=True,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()
            loss.backward()
            optimizer.step()
            if (iteration % iters_per_checkpoint == 0):
                print("{}:\t{:.9f}".format(iteration, reduced_loss))
                checkpoint_path = "{}/waveglow".format(output_directory)
                save_checkpoint(model, optimizer, learning_rate, iteration,
                                checkpoint_path)
            iteration += 1
Exemple #5
0
def main(waveglow_path, sigma, output_dir, is_fp16, denoiser_strength):
    # mel_files = files_to_list(mel_files)
    waveglow = torch.load(waveglow_path)['model']
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    testset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    # train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    test_loader = DataLoader(
        testset,
        num_workers=32,
        shuffle=False,
        # sampler=train_sampler,
        batch_size=12,
        pin_memory=False,
        drop_last=True)

    speakers_to_sids = deepcopy(testset.speakers)
    sids_to_speakers = create_reverse_dict(speakers_to_sids)
    ut_to_uids = deepcopy(testset.utterances)
    uids_to_ut = create_reverse_dict(ut_to_uids)

    sid_target = np.random.randint(len(speakers_to_sids))
    speaker_target = sids_to_speakers[sid_target]
    sid_target = torch.LongTensor([[sid_target] * test_loader.batch_size
                                   ]).view(test_loader.batch_size,
                                           1).to(device)

    audios = []
    n_audios = 0
    for i, batch in enumerate(test_loader):
        mel_source, _, sid_source, uid_source, is_last = batch
        mel_source = mel_source.to(device)
        import pdb
        pdb.set_trace()

        with torch.no_grad():
            predicted = waveglow.infer(mel_source, sid_target, sigma=sigma)
            if denoiser_strength > 0:
                predicted = denoiser(predicted, denoiser_strength)
            predicted = predicted * MAX_WAV_VALUE

        for j in range(len(predicted)):
            # p = predicted[j].squeeze().cpu().numpy().astype('int16')
            p = predicted[j].cpu()
            audios.append(p)
            speaker_source = sids_to_speakers[sid_source[j].data.item()]
            ut_source = uids_to_ut[uid_source[j].data.item()]
            last = is_last[j].data.item()
            if last:
                audio_path = os.path.join(
                    output_dir,
                    "{}_{}_to_{}_synthesis.wav".format(speaker_source,
                                                       ut_source,
                                                       speaker_target))
                print("Synthesizing file No.{} at {}".format(
                    n_audios, audio_path))
                save_audio_chunks(audios, audio_path, data_config['stride'],
                                  data_config['sampling_rate'])

                audios = []
                n_audios += 1
Exemple #6
0
def train(num_gpus,
          rank,
          group_name,
          output_directory,
          epochs,
          learning_rate,
          sigma,
          iters_per_checkpoint,
          batch_size,
          seed,
          fp16_run,
          checkpoint_path,
          with_tensorboard,
          num_workers=2):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    # HACK: setup separate training and eval sets
    training_files = data_config['training_files']
    eval_files = data_config['eval_files']
    del data_config['training_files']
    del data_config['eval_files']
    data_config['audio_files'] = training_files
    trainset = Mel2Samp(**data_config)
    data_config['audio_files'] = eval_files
    evalset = Mel2Samp(**data_config)

    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    eval_sampler = DistributedSampler(evalset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======

    print("Creating dataloaders with " + str(num_workers) + " workers")
    train_loader = DataLoader(trainset,
                              num_workers=num_workers,
                              shuffle=True,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)
    eval_loader = DataLoader(evalset,
                             num_workers=num_workers,
                             shuffle=True,
                             sampler=eval_sampler,
                             batch_size=batch_size,
                             pin_memory=False,
                             drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger_train = SummaryWriter(
            os.path.join(output_directory, 'logs', 'train'))
        logger_eval = SummaryWriter(
            os.path.join(output_directory, 'logs', 'eval'))

    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        model.train()
        with tqdm(total=len(train_loader)) as train_pbar:
            for i, batch in enumerate(train_loader):
                model.zero_grad()

                mel, audio = batch
                mel = torch.autograd.Variable(mel.cuda())
                audio = torch.autograd.Variable(audio.cuda())
                outputs = model((mel, audio))

                loss = criterion(outputs)
                if num_gpus > 1:
                    reduced_loss = reduce_tensor(loss.data, num_gpus).item()
                else:
                    reduced_loss = loss.item()

                if fp16_run:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                optimizer.step()

                train_pbar.set_description(
                    "Epoch {} Iter {} Loss {:.3f}".format(
                        epoch, iteration, reduced_loss))
                if with_tensorboard and rank == 0 and iteration % 10 == 0:
                    logger_train.add_scalar('loss', reduced_loss,
                                            i + len(train_loader) * epoch)
                    # adding logging for GPU utilization and memory usage
                    gpu_memory_used, gpu_utilization = get_gpu_stats()
                    k = 'gpu' + str(0)
                    logger_train.add_scalar(k + '/memory', gpu_memory_used,
                                            iteration)
                    logger_train.add_scalar(k + '/load', gpu_utilization,
                                            iteration)
                    logger_train.flush()

                if (iteration % iters_per_checkpoint == 0):
                    if rank == 0:
                        checkpoint_path = "{}/waveglow_{}".format(
                            output_directory, iteration)
                        save_checkpoint(model, optimizer, learning_rate,
                                        iteration, checkpoint_path)

                iteration += 1
                train_pbar.update(1)

        # Eval
        model.eval()
        torch.cuda.empty_cache()

        with torch.no_grad():
            tensorboard_mel, tensorboard_audio = None, None
            loss_accum = []
            with tqdm(total=len(eval_loader)) as eval_pbar:
                for i, batch in enumerate(eval_loader):
                    model.zero_grad()
                    mel, audio = batch
                    mel = torch.autograd.Variable(mel.cuda())
                    audio = torch.autograd.Variable(audio.cuda())
                    outputs = model((mel, audio))
                    loss = criterion(outputs).item()
                    loss_accum.append(loss)
                    eval_pbar.set_description("Epoch {} Eval {:.3f}".format(
                        epoch, loss))
                    outputs = None

                    # use the first batch for tensorboard audio samples
                    if i == 0:
                        tensorboard_mel = mel
                        tensorboard_audio = audio
                    eval_pbar.update(1)

            if with_tensorboard and rank == 0:
                loss_avg = statistics.mean(loss_accum)
                tqdm.write("Epoch {} Eval AVG {}".format(epoch, loss_avg))
                logger_eval.add_scalar('loss', loss_avg, iteration)

            # log audio samples to tensorboard
            tensorboard_audio_generated = model.infer(tensorboard_mel)
            for i in range(0, 5):
                ta = tensorboard_audio[i].cpu().numpy()
                tag = tensorboard_audio_generated[i].cpu().numpy()
                logger_eval.add_audio("sample " + str(i) + "/orig",
                                      ta,
                                      epoch,
                                      sample_rate=data_config['sampling_rate'])
                logger_eval.add_audio("sample " + str(i) + "/gen",
                                      tag,
                                      epoch,
                                      sample_rate=data_config['sampling_rate'])
            logger_eval.flush()
Exemple #7
0
def train(num_gpus, rank, group_name, prj_name, run_name, output_directory,
          epochs, learning_rate, sigma, iters_per_checkpoint, batch_size, seed,
          fp16_run, grad_clip_thresh, checkpoint_path, pretrained_path,
          with_tensorboard, with_wandb):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    if pretrained_path != "":
        model = load_pretrained(pretrained_path, model)

    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        train_sampler = DistributedSampler(trainset)
        shuffle_at_dataloader = False
    else:
        train_sampler = None
        shuffle_at_dataloader = True
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=shuffle_at_dataloader,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            iter_start = time.perf_counter()

            float_epoch = float(iteration) / len(train_loader)

            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss, etc = criterion(outputs)
            (z_L2_normalized, neg_log_s_total, neg_log_det_W_total) = etc
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            is_overflow = False
            if fp16_run:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    amp.master_params(optimizer), grad_clip_thresh)
                is_overflow = math.isnan(grad_norm)
                if not is_overflow:
                    clipped_grad_norm = get_clip_grad_norm(
                        grad_norm, grad_clip_thresh)
            else:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), grad_clip_thresh)
                clipped_grad_norm = get_clip_grad_norm(grad_norm,
                                                       grad_clip_thresh)

            optimizer.step()
            iter_duration = time.perf_counter() - iter_start

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + len(train_loader) * epoch)

            if with_wandb and rank == 0:
                wandb.log(
                    {
                        'iteration': iteration,
                        'epoch': float_epoch,
                        'iter_duration': iter_duration,
                        'training_loss': reduced_loss,
                        'training_loss/z_L2_normalized': z_L2_normalized,
                        'training_loss/neg_log_s_total': neg_log_s_total,
                        'training_loss/neg_log_det_W_total':
                        neg_log_det_W_total,
                    },
                    step=iteration)
                if not is_overflow:
                    wandb.log(
                        {
                            'grad_norm': grad_norm,
                            'clipped_grad_norm': clipped_grad_norm,
                        },
                        step=iteration)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/{}/{}/waveglow_{}".format(
                        output_directory, prj_name, run_name, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemple #8
0
def train(
    num_gpus,
    rank,
    group_name,
    output_directory,
    epochs,
    learning_rate,
    sigma,
    iters_per_checkpoint,
    batch_size,
    seed,
    fp16_run,
    checkpoint_path,
    with_tensorboard,
):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    # =====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    # =====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp

        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(
        trainset,
        num_workers=1,
        shuffle=False,
        sampler=train_sampler,
        batch_size=batch_size,
        pin_memory=False,
        drop_last=True,
    )

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter

        logger = SummaryWriter(os.path.join(output_directory, "logs"))

    # fixed for visualization
    real_mels, real_audios = zip(*[trainset[i] for i in range(8)])
    real_mel = torch.cat(real_mels, dim=-1)
    real_audio = torch.cat(real_audios, dim=0)

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if with_tensorboard and rank == 0:
                step = i + len(train_loader) * epoch
                logger.add_scalar("training_loss", reduced_loss, step)
                if step % 500 == 0:
                    # select the first eight data sample

                    model.eval()
                    with torch.no_grad():
                        device = mel.device
                        fake_audio = (model.infer(
                            torch.stack(real_mels).to(device)).flatten(
                                0, 1).cpu())
                    model.train()
                    fake_mel = trainset.get_mel(fake_audio)

                    logger.add_image(
                        "training_mel_real",
                        plot_spectrogram_to_numpy(real_mel),
                        step,
                        dataformats="HWC",
                    )
                    logger.add_audio(
                        "training_audio_real",
                        real_audio,
                        step,
                        22050,
                    )
                    logger.add_image(
                        "training_mel_fake",
                        plot_spectrogram_to_numpy(fake_mel),
                        step,
                        dataformats="HWC",
                    )
                    logger.add_audio(
                        "training_audio_fake",
                        fake_audio,
                        step,
                        22050,
                    )
                    logger.flush()

            if iteration % iters_per_checkpoint == 0:
                if rank == 0:
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemple #9
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, loss_empthasis, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard, logdirname, datedlogdir, warm_start=False, optimizer='ADAM', start_zero=False):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======
    
    global WaveGlow
    global WaveGlowLoss
    
    ax = True # this is **really** bad coding practice :D
    if ax:
        from efficient_model_ax import WaveGlow
        from efficient_loss import WaveGlowLoss
    else:
        if waveglow_config["yoyo"]: # efficient_mode # TODO: Add to Config File
            from efficient_model import WaveGlow
            from efficient_loss import WaveGlowLoss
        else:
            from glow import WaveGlow, WaveGlowLoss
    
    criterion = WaveGlowLoss(sigma, loss_empthasis)
    model = WaveGlow(**waveglow_config).cuda()
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======
    STFTs = [STFT.TacotronSTFT(filter_length=window,
                                 hop_length=data_config['hop_length'],
                                 win_length=window,
                                 sampling_rate=data_config['sampling_rate'],
                                 n_mel_channels=160,
                                 mel_fmin=0, mel_fmax=16000) for window in data_config['validation_windows']]
    
    loader_STFT = STFT.TacotronSTFT(filter_length=data_config['filter_length'],
                                 hop_length=data_config['hop_length'],
                                 win_length=data_config['win_length'],
                                 sampling_rate=data_config['sampling_rate'],
                                 n_mel_channels=data_config['n_mel_channels'] if 'n_mel_channels' in data_config.keys() else 160,
                                 mel_fmin=data_config['mel_fmin'], mel_fmax=data_config['mel_fmax'])
    
    #optimizer = "Adam"
    optimizer = optimizer.lower()
    optimizer_fused = bool( 0 ) # use Apex fused optimizer, should be identical to normal but slightly faster and only works on RTX cards
    if optimizer_fused:
        from apex import optimizers as apexopt
        if optimizer == "adam":
            optimizer = apexopt.FusedAdam(model.parameters(), lr=learning_rate)
        elif optimizer == "lamb":
            optimizer = apexopt.FusedLAMB(model.parameters(), lr=learning_rate, max_grad_norm=200)
    else:
        if optimizer == "adam":
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        elif optimizer == "lamb":
            from lamb import Lamb as optLAMB
            optimizer = optLAMB(model.parameters(), lr=learning_rate)
            #import torch_optimizer as optim
            #optimizer = optim.Lamb(model.parameters(), lr=learning_rate)
            #raise# PyTorch doesn't currently include LAMB optimizer.
    
    if fp16_run:
        global amp
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
    else:
        amp = None
    
    ## LEARNING RATE SCHEDULER
    if True:
        from torch.optim.lr_scheduler import ReduceLROnPlateau
        min_lr = 1e-8
        factor = 0.1**(1/5) # amount to scale the LR by on Validation Loss plateau
        scheduler = ReduceLROnPlateau(optimizer, 'min', factor=factor, patience=20, cooldown=2, min_lr=min_lr, verbose=True, threshold=0.0001, threshold_mode='abs')
        print("ReduceLROnPlateau used as Learning Rate Scheduler.")
    else: scheduler=False
    
    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration, scheduler = load_checkpoint(checkpoint_path, model,
                                                      optimizer, scheduler, fp16_run, warm_start=warm_start)
        iteration += 1  # next iteration is iteration + 1
    if start_zero:
        iteration = 0
    
    trainset = Mel2Samp(**data_config, check_files=True)
    speaker_lookup = trainset.speaker_ids
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        train_sampler = DistributedSampler(trainset, shuffle=True)
        shuffle = False
    else:
        train_sampler = None
        shuffle = True
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset, num_workers=3, shuffle=shuffle,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)
    
    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)
    
    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        if datedlogdir:
            timestr = time.strftime("%Y_%m_%d-%H_%M_%S")
            log_directory = os.path.join(output_directory, logdirname, timestr)
        else:
            log_directory = os.path.join(output_directory, logdirname)
        logger = SummaryWriter(log_directory)
    
    moving_average = int(min(len(train_loader), 100)) # average loss over entire Epoch
    rolling_sum = StreamingMovingAverage(moving_average)
    start_time = time.time()
    start_time_iter = time.time()
    start_time_dekaiter = time.time()
    model.train()
    
    # best (averaged) training loss
    if os.path.exists(os.path.join(output_directory, "best_model")+".txt"):
        best_model_loss = float(str(open(os.path.join(output_directory, "best_model")+".txt", "r", encoding="utf-8").read()).split("\n")[0])
    else:
        best_model_loss = -6.20
    
    # best (validation) MSE on inferred spectrogram.
    if os.path.exists(os.path.join(output_directory, "best_val_model")+".txt"):
        best_MSE = float(str(open(os.path.join(output_directory, "best_val_model")+".txt", "r", encoding="utf-8").read()).split("\n")[0])
    else:
        best_MSE = 9e9
    
    epoch_offset = max(0, int(iteration / len(train_loader)))
    
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print("{:,} total parameters in model".format(pytorch_total_params))
    pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("{:,} trainable parameters.".format(pytorch_total_params))
    
    print(f"Segment Length: {data_config['segment_length']:,}\nBatch Size: {batch_size:,}\nNumber of GPUs: {num_gpus:,}\nSamples/Iter: {data_config['segment_length']*batch_size*num_gpus:,}")
    
    training = True
    while training:
        try:
            if rank == 0:
                epochs_iterator = tqdm(range(epoch_offset, epochs), initial=epoch_offset, total=epochs, smoothing=0.01, desc="Epoch", position=1, unit="epoch")
            else:
                epochs_iterator = range(epoch_offset, epochs)
            # ================ MAIN TRAINING LOOP! ===================
            for epoch in epochs_iterator:
                print(f"Epoch: {epoch}")
                if num_gpus > 1:
                    train_sampler.set_epoch(epoch)
                
                if rank == 0:
                    iters_iterator = tqdm(enumerate(train_loader), desc=" Iter", smoothing=0, total=len(train_loader), position=0, unit="iter", leave=True)
                else:
                    iters_iterator = enumerate(train_loader)
                for i, batch in iters_iterator:
                    # run external code every iter, allows the run to be adjusted without restarts
                    if (i==0 or iteration % param_interval == 0):
                        try:
                            with open("run_every_epoch.py") as f:
                                internal_text = str(f.read())
                                if len(internal_text) > 0:
                                    #code = compile(internal_text, "run_every_epoch.py", 'exec')
                                    ldict = {'iteration': iteration, 'seconds_elapsed': time.time()-start_time}
                                    exec(internal_text, globals(), ldict)
                                else:
                                    print("No Custom code found, continuing without changes.")
                        except Exception as ex:
                            print(f"Custom code FAILED to run!\n{ex}")
                        globals().update(ldict)
                        locals().update(ldict)
                        if show_live_params:
                            print(internal_text)
                    if not iteration % 50: # check actual learning rate every 20 iters (because I sometimes see learning_rate variable go out-of-sync with real LR)
                        learning_rate = optimizer.param_groups[0]['lr']
                    # Learning Rate Schedule
                    if custom_lr:
                        old_lr = learning_rate
                        if iteration < warmup_start:
                            learning_rate = warmup_start_lr
                        elif iteration < warmup_end:
                            learning_rate = (iteration-warmup_start)*((A_+C_)-warmup_start_lr)/(warmup_end-warmup_start) + warmup_start_lr # learning rate increases from warmup_start_lr to A_ linearly over (warmup_end-warmup_start) iterations.
                        else:
                            if iteration < decay_start:
                                learning_rate = A_ + C_
                            else:
                                iteration_adjusted = iteration - decay_start
                                learning_rate = (A_*(e**(-iteration_adjusted/B_))) + C_
                        assert learning_rate > -1e-8, "Negative Learning Rate."
                        if old_lr != learning_rate:
                            for param_group in optimizer.param_groups:
                                param_group['lr'] = learning_rate
                    else:
                        scheduler.patience = scheduler_patience
                        scheduler.cooldown = scheduler_cooldown
                        if override_scheduler_last_lr:
                            scheduler._last_lr = override_scheduler_last_lr
                        if override_scheduler_best:
                            scheduler.best = override_scheduler_best
                        if override_scheduler_last_lr or override_scheduler_best:
                            print("scheduler._last_lr =", scheduler._last_lr, "scheduler.best =", scheduler.best, "  |", end='')
                    model.zero_grad()
                    mel, audio, speaker_ids = batch
                    mel = torch.autograd.Variable(mel.cuda(non_blocking=True))
                    audio = torch.autograd.Variable(audio.cuda(non_blocking=True))
                    speaker_ids = speaker_ids.cuda(non_blocking=True).long().squeeze(1)
                    outputs = model(mel, audio, speaker_ids)
                    
                    loss = criterion(outputs)
                    if num_gpus > 1:
                        reduced_loss = reduce_tensor(loss.data, num_gpus).item()
                    else:
                        reduced_loss = loss.item()
                    
                    if fp16_run:
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    
                    if (reduced_loss > LossExplosionThreshold) or (math.isnan(reduced_loss)):
                        model.zero_grad()
                        raise LossExplosion(f"\nLOSS EXPLOSION EXCEPTION ON RANK {rank}: Loss reached {reduced_loss} during iteration {iteration}.\n\n\n")
                    
                    if use_grad_clip:
                        if fp16_run:
                            grad_norm = torch.nn.utils.clip_grad_norm_(
                                amp.master_params(optimizer), grad_clip_thresh)
                        else:
                            grad_norm = torch.nn.utils.clip_grad_norm_(
                                model.parameters(), grad_clip_thresh)
                        if type(grad_norm) == torch.Tensor:
                            grad_norm = grad_norm.item()
                        is_overflow = math.isinf(grad_norm) or math.isnan(grad_norm)
                    else: is_overflow = False; grad_norm=0.00001
                    
                    optimizer.step()
                    if not is_overflow and rank == 0:
                        # get current Loss Scale of first optimizer
                        loss_scale = amp._amp_state.loss_scalers[0]._loss_scale if fp16_run else 32768
                        
                        if with_tensorboard:
                            if (iteration % 100000 == 0):
                                # plot distribution of parameters
                                for tag, value in model.named_parameters():
                                    tag = tag.replace('.', '/')
                                    logger.add_histogram(tag, value.data.cpu().numpy(), iteration)
                            logger.add_scalar('training_loss', reduced_loss, iteration)
                            logger.add_scalar('training_loss_samples', reduced_loss, iteration*batch_size)
                            if (iteration % 20 == 0):
                                logger.add_scalar('learning.rate', learning_rate, iteration)
                            if (iteration % 10 == 0):
                                logger.add_scalar('duration', ((time.time() - start_time_dekaiter)/10), iteration)
                        
                        average_loss = rolling_sum.process(reduced_loss)
                        if (iteration % 10 == 0):
                            tqdm.write("{} {}:  {:.3f} {:.3f}  {:.3f} {:08.3F} {:.8f}LR ({:.8f} Effective)  {:.2f}s/iter {:.4f}s/item".format(time.strftime("%H:%M:%S"), iteration, reduced_loss, average_loss, best_MSE, round(grad_norm,3), learning_rate, min((grad_clip_thresh/grad_norm)*learning_rate,learning_rate), (time.time() - start_time_dekaiter)/10, ((time.time() - start_time_dekaiter)/10)/(batch_size*num_gpus)))
                            start_time_dekaiter = time.time()
                        else:
                            tqdm.write("{} {}:  {:.3f} {:.3f}  {:.3f} {:08.3F} {:.8f}LR ({:.8f} Effective) {}LS".format(time.strftime("%H:%M:%S"), iteration, reduced_loss, average_loss, best_MSE, round(grad_norm,3), learning_rate, min((grad_clip_thresh/grad_norm)*learning_rate,learning_rate), loss_scale))
                        start_time_iter = time.time()
                    
                    if rank == 0 and (len(rolling_sum.values) > moving_average-2):
                        if (average_loss+best_model_margin) < best_model_loss:
                            checkpoint_path = os.path.join(output_directory, "best_model")
                            try:
                                save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                            checkpoint_path)
                            except KeyboardInterrupt: # Avoid corrupting the model.
                                save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                            checkpoint_path)
                            text_file = open((f"{checkpoint_path}.txt"), "w", encoding="utf-8")
                            text_file.write(str(average_loss)+"\n"+str(iteration))
                            text_file.close()
                            best_model_loss = average_loss #Only save the model if X better than the current loss.
                    if rank == 0 and iteration > 0 and ((iteration % iters_per_checkpoint == 0) or (os.path.exists(save_file_check_path))):
                        checkpoint_path = f"{output_directory}/waveglow_{iteration}"
                        save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                        checkpoint_path)
                        if (os.path.exists(save_file_check_path)):
                            os.remove(save_file_check_path)
                    
                    if (iteration % validation_interval == 0):
                        if rank == 0:
                            MSE, MAE = validate(model, loader_STFT, STFTs, logger, iteration, data_config['validation_files'], speaker_lookup, sigma, output_directory, data_config)
                            if scheduler:
                                MSE = torch.tensor(MSE, device='cuda')
                                if num_gpus > 1:
                                    broadcast(MSE, 0)
                                scheduler.step(MSE.item())
                                if MSE < best_MSE:
                                    checkpoint_path = os.path.join(output_directory, "best_val_model")
                                    try:
                                        save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                                    checkpoint_path)
                                    except KeyboardInterrupt: # Avoid corrupting the model.
                                        save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup,
                                                    checkpoint_path)
                                    text_file = open((f"{checkpoint_path}.txt"), "w", encoding="utf-8")
                                    text_file.write(str(MSE.item())+"\n"+str(iteration))
                                    text_file.close()
                                    best_MSE = MSE.item() #Only save the model if X better than the current loss.
                        else:
                            if scheduler:
                                MSE = torch.zeros(1, device='cuda')
                                broadcast(MSE, 0)
                                scheduler.step(MSE.item())
                        learning_rate = optimizer.param_groups[0]['lr'] #check actual learning rate (because I sometimes see learning_rate variable go out-of-sync with real LR)
                    iteration += 1
            training = False # exit the While loop
        
        except LossExplosion as ex: # print Exception and continue from checkpoint. (turns out it takes < 4 seconds to restart like this, f*****g awesome)
            print(ex) # print Loss
            checkpoint_path = os.path.join(output_directory, "best_model")
            assert os.path.exists(checkpoint_path), "best_val_model must exist for automatic restarts"
            
            # clearing VRAM for load checkpoint
            audio = mel = speaker_ids = loss = None
            torch.cuda.empty_cache()
            
            model.eval()
            model, optimizer, iteration, scheduler = load_checkpoint(checkpoint_path, model, optimizer, scheduler, fp16_run)
            learning_rate = optimizer.param_groups[0]['lr']
            epoch_offset = max(0, int(iteration / len(train_loader)))
            model.train()
            iteration += 1
            pass # and continue training.
Exemple #10
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard, weight_sharing, optimizer_type,
          dataloader_type):

    ws = weight_sharing
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer_type = optimizer_type.lower()
    if optimizer_type == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    elif optimizer_type == "adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    else:
        print("Unsupported optimizer: %s. Aborting." % optimizer_type)
        return None

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    dataloader_type = dataloader_type.lower()
    if dataloader_type == "vanilla":
        trainset = Mel2Samp(**data_config)
    elif dataloader_type == "split":
        trainset = Mel2SampSplit(**data_config)
    else:
        print("Unsupported dataloader type: %s. Aborting." % dataloader_type)
        return None

    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=(num_gpus == 1),
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    name = "waveglow_ws%d_%s_%s_batch%d" % (ws, optimizer_type,
                                            dataloader_type, batch_size)

    if learning_rate != 1e-4:
        name = name + "_lr{:.0e}".format(learning_rate)

    if num_gpus > 1:
        name = name + "_x%d" % num_gpus

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join("./logs", name))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    stime2 = None
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        stime = time()
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            optimizer.step()

            if (iteration % 100 == 0):
                if not stime2 is None:
                    tot_time2 = time() - stime2
                    print("{}:\t{:.9f}, time: {}".format(
                        iteration, reduced_loss, int(tot_time2)))
                stime2 = time()
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + len(train_loader) * epoch)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/waveglow_{}_{}".format(
                        output_directory, name, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
        tot_time = time() - stime
        print("Epoch %d completed. Time: %d seconds" % (epoch, int(tot_time)))
Exemple #11
0
def train(num_gpus, rank, group_name, stage, output_directory, epochs, learning_rate, sigma, iters_per_checkpoint, batch_size, seed, fp16_run, checkpoint_path, with_tensorboard, logdirname, datedlogdir, warm_start=False, optimizer='ADAM', start_zero=False):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======
    
    from model import HiFiGAN, HiFiGANLoss
    criterion = HiFiGANLoss(**hifigan_config).cuda()
    model = HiFiGAN(**hifigan_config).cuda()
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
        if stage >= 2:
            criterion = apply_gradient_allreduce(criterion)
    #=====END:   ADDED FOR DISTRIBUTED======
    
    criterion, optimizer_d = get_optimizer(criterion, optimizer, fp16_run, optimizer_fused=True) if stage >= 2 else (criterion, None)
    model, optimizer = get_optimizer(model, optimizer, fp16_run, optimizer_fused=True)
    
    ## LEARNING RATE SCHEDULER
    if True:
        from torch.optim.lr_scheduler import ReduceLROnPlateau
        min_lr = 1e-8
        factor = 0.1**(1/5) # amount to scale the LR by on Validation Loss plateau
        scheduler = ReduceLROnPlateau(optimizer, 'min', factor=factor, patience=20, cooldown=2, min_lr=min_lr, verbose=True, threshold=0.0001, threshold_mode='abs')
        print("ReduceLROnPlateau used as Learning Rate Scheduler.")
    else: scheduler=False
    
    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, criterion, optimizer_d, iteration, scheduler = load_checkpoint(checkpoint_path, model,
                                                      optimizer, criterion, optimizer_d, scheduler, fp16_run, stage, warm_start=warm_start)
        iteration += 1  # next iteration is iteration + 1
    if start_zero:
        iteration = 0
    
    trainset = Mel2Samp(**data_config, check_files=True)
    speaker_lookup = trainset.speaker_ids
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        train_sampler = DistributedSampler(trainset, shuffle=True)
        shuffle = False
    else:
        train_sampler = None
        shuffle = True
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset, num_workers=3, shuffle=shuffle,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)
    
    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)
    
    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        if datedlogdir:
            timestr = time.strftime("%Y_%m_%d-%H_%M_%S")
            log_directory = os.path.join(output_directory, logdirname, timestr)
        else:
            log_directory = os.path.join(output_directory, logdirname)
        logger = SummaryWriter(log_directory)
    
    moving_average = int(min(len(train_loader), 200)) # average loss over entire Epoch
    rolling_sum = StreamingMovingAverage(moving_average)
    start_time = time.time()
    start_time_iter = time.time()
    start_time_dekaiter = time.time()
    model.train()
    
    # best (averaged) training loss
    if os.path.exists(os.path.join(output_directory, "best_model")+".txt"):
        best_model_loss = float(str(open(os.path.join(output_directory, "best_model")+".txt", "r", encoding="utf-8").read()).split("\n")[0])
    else:
        best_model_loss = 9e9
    
    # best (validation) MSE on inferred spectrogram.
    if os.path.exists(os.path.join(output_directory, "best_val_model")+".txt"):
        best_MSE = float(str(open(os.path.join(output_directory, "best_val_model")+".txt", "r", encoding="utf-8").read()).split("\n")[0])
    else:
        best_MSE = 9e9
    
    epoch_offset = max(0, int(iteration / len(train_loader)))
    
    print_params(model, name='generator')
    
    print(f"Segment Length: {data_config['segment_length']:,}\nBatch Size: {batch_size:,}\nNumber of GPUs: {num_gpus:,}\nSamples/Iter: {data_config['segment_length']*batch_size*num_gpus:,}")
    
    training = True
    while training:
        try:
            if rank == 0:
                epochs_iterator = tqdm(range(epoch_offset, epochs), initial=epoch_offset, total=epochs, smoothing=0.01, desc="Epoch", position=1, unit="epoch")
            else:
                epochs_iterator = range(epoch_offset, epochs)
            # ================ MAIN TRAINING LOOP! ===================
            for epoch in epochs_iterator:
                print(f"Epoch: {epoch}")
                if num_gpus > 1:
                    train_sampler.set_epoch(epoch)
                
                if rank == 0:
                    iters_iterator = tqdm(enumerate(train_loader), desc=" Iter", smoothing=0, total=len(train_loader), position=0, unit="iter", leave=True)
                else:
                    iters_iterator = enumerate(train_loader)
                for i, batch in iters_iterator:
                    # run external code every iter, allows the run to be adjusted without restarts
                    if (i==0 or iteration % param_interval == 0):
                        try:
                            with open("run_every_epoch.py") as f:
                                internal_text = str(f.read())
                                if len(internal_text) > 0:
                                    #code = compile(internal_text, "run_every_epoch.py", 'exec')
                                    ldict = {'iteration': iteration, 'seconds_elapsed': time.time()-start_time}
                                    exec(internal_text, globals(), ldict)
                                else:
                                    print("No Custom code found, continuing without changes.")
                        except Exception as ex:
                            print(f"Custom code FAILED to run!\n{ex}")
                        globals().update(ldict)
                        locals().update(ldict)
                        if show_live_params:
                            print(internal_text)
                    # Learning Rate Schedule
                    if custom_lr:
                        old_lr = learning_rate
                        if iteration < warmup_start:
                            learning_rate = warmup_start_lr
                        elif iteration < warmup_end:
                            learning_rate = (iteration-warmup_start)*((A_+C_)-warmup_start_lr)/(warmup_end-warmup_start) + warmup_start_lr # learning rate increases from warmup_start_lr to A_ linearly over (warmup_end-warmup_start) iterations.
                        else:
                            if iteration < decay_start:
                                learning_rate = A_ + C_
                            else:
                                iteration_adjusted = iteration - decay_start
                                learning_rate = (A_*(e**(-iteration_adjusted/B_))) + C_
                        assert learning_rate > -1e-8, "Negative Learning Rate."
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = learning_rate
                        if optimizer_d is not None:
                            for param_group in optimizer_d.param_groups:
                                param_group['lr'] = learning_rate*d_lr_scale
                    else:
                        scheduler.patience = scheduler_patience
                        scheduler.cooldown = scheduler_cooldown
                        if override_scheduler_last_lr:
                            scheduler._last_lr = override_scheduler_last_lr
                        if override_scheduler_best:
                            scheduler.best = override_scheduler_best
                        if override_scheduler_last_lr or override_scheduler_best:
                            print(f"scheduler._last_lr = {scheduler._last_lr} scheduler.best = {scheduler.best}  |", end='')
                    model.zero_grad()
                    noisy_audio, gt_audio, speaker_ids = batch
                    noisy_audio = torch.autograd.Variable(noisy_audio.cuda(non_blocking=True))
                    gt_audio = torch.autograd.Variable(gt_audio.cuda(non_blocking=True))
                    speaker_ids = speaker_ids.cuda(non_blocking=True).long().squeeze(1)
                    pred_audio = model(noisy_audio)#, speaker_ids)
                    
                    metrics = criterion(pred_audio, gt_audio, amp, model, optimizer, optimizer_d, num_gpus, use_grad_clip, grad_clip_thresh)
                    
                    if not metrics['is_overflow'] and rank == 0:
                        # get current Loss Scale of first optimizer
                        loss_scale = amp._amp_state.loss_scalers[0]._loss_scale if fp16_run else 32768
                        
                        if with_tensorboard:
                            if (iteration % 100000 == 0):
                                # plot distribution of parameters
                                for tag, value in model.named_parameters():
                                    tag = tag.replace('.', '/')
                                    logger.add_histogram(tag, value.data.cpu().numpy(), iteration)
                            for key, value in metrics.items():
                                if key not in ['is_overflow',]:
                                    logger.add_scalar(key, value, iteration)
                            if (iteration % 20 == 0):
                                logger.add_scalar('learning.rate', learning_rate, iteration)
                            if (iteration % 10 == 0):
                                logger.add_scalar('duration', ((time.time() - start_time_dekaiter)/10), iteration)
                        
                        logged_loss = metrics['g_train_loss'] if stage >= 2 else metrics['train_loss']
                        grad_norm = metrics['grad_norm']
                        average_loss = rolling_sum.process(logged_loss)
                        if (iteration % 10 == 0):
                            tqdm.write("{} {}:  {:.3f} {:.3f}  {:.3f} {:08.3F} {:.8f}LR ({:.8f} Effective)  {:.2f}s/iter {:.4f}s/item".format(time.strftime("%H:%M:%S"), iteration, logged_loss, average_loss, best_MSE, round(grad_norm,3), learning_rate, min((grad_clip_thresh/grad_norm)*learning_rate,learning_rate), (time.time() - start_time_dekaiter)/10, ((time.time() - start_time_dekaiter)/10)/(batch_size*num_gpus)))
                            start_time_dekaiter = time.time()
                        else:
                            tqdm.write("{} {}:  {:.3f} {:.3f}  {:.3f} {:08.3F} {:.8f}LR ({:.8f} Effective) {}LS".format(time.strftime("%H:%M:%S"), iteration, logged_loss, average_loss, best_MSE, round(grad_norm,3), learning_rate, min((grad_clip_thresh/grad_norm)*learning_rate,learning_rate), loss_scale))
                        start_time_iter = time.time()
                    
                    if rank == 0 and (len(rolling_sum.values) > moving_average-2):
                        if (average_loss+best_model_margin) < best_model_loss:
                            checkpoint_path = os.path.join(output_directory, "best_model")
                            try:
                                save_checkpoint(model, optimizer, criterion, optimizer_d, learning_rate, iteration, amp, scheduler, speaker_lookup, checkpoint_path)
                            except KeyboardInterrupt: # Avoid corrupting the model.
                                save_checkpoint(model, optimizer, criterion, optimizer_d, learning_rate, iteration, amp, scheduler, speaker_lookup, checkpoint_path)
                            text_file = open((f"{checkpoint_path}.txt"), "w", encoding="utf-8")
                            text_file.write(str(average_loss)+"\n"+str(iteration))
                            text_file.close()
                            best_model_loss = average_loss #Only save the model if X better than the current loss.
                    if rank == 0 and iteration > 0 and ((iteration % iters_per_checkpoint == 0) or (os.path.exists(save_file_check_path))):
                        checkpoint_path = f"{output_directory}/waveglow_{iteration}"
                        save_checkpoint(model, optimizer, criterion, optimizer_d, learning_rate, iteration, amp, scheduler, speaker_lookup, checkpoint_path)
                        if (os.path.exists(save_file_check_path)):
                            os.remove(save_file_check_path)
                    
                    if iteration%validation_interval == 0:
                        if rank == 0:
                            MSE, MAE = validate(model, trainset, logger, iteration, data_config['validation_files'], speaker_lookup, output_directory, data_config)
                            if scheduler:
                                MSE = torch.tensor(MSE, device='cuda')
                                if num_gpus > 1:
                                    broadcast(MSE, 0)
                                scheduler.step(MSE.item())
                                if MSE < best_MSE:
                                    checkpoint_path = os.path.join(output_directory, "best_val_model")
                                    try:
                                        save_checkpoint(model, optimizer, criterion, optimizer_d, learning_rate, iteration, amp, scheduler, speaker_lookup, checkpoint_path)
                                    except KeyboardInterrupt: # Avoid corrupting the model.
                                        save_checkpoint(model, optimizer, learning_rate, iteration, amp, scheduler, speaker_lookup, checkpoint_path)
                                    text_file = open((f"{checkpoint_path}.txt"), "w", encoding="utf-8")
                                    text_file.write(str(MSE.item())+"\n"+str(iteration))
                                    text_file.close()
                                    best_MSE = MSE.item()
                        else:
                            if scheduler:
                                MSE = torch.zeros(1, device='cuda')
                                broadcast(MSE, 0)
                                scheduler.step(MSE.item())
                    iteration += 1
            training = False # exit the training While loop
        
        except LossExplosion as ex: # print Exception and continue from checkpoint. (turns out it takes < 4 seconds to restart like this, f*****g awesome)
            print(ex) # print Loss
            checkpoint_path = os.path.join(output_directory, "best_model")
            assert os.path.exists(checkpoint_path), "best_model must exist for automatic restarts"
            
            # clearing VRAM for load checkpoint
            audio = mel = speaker_ids = loss = None
            torch.cuda.empty_cache()
            
            model.eval()
            model, optimizer, iteration, scheduler = load_checkpoint(checkpoint_path, model, optimizer, scheduler, fp16_run)
            learning_rate = optimizer.param_groups[0]['lr']
            epoch_offset = max(0, int(iteration / len(train_loader)))
            model.train()
            iteration += 1
            pass # and continue training.
Exemple #12
0
def train(output_directory, epochs, learning_rate, sigma, iters_per_checkpoint,
          batch_size, seed, fp16_run, checkpoint_path, with_tensorboard):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config,
                     filter_length=data_config["filter_length"],
                     hop_length=data_config["hop_length"]).cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)

    trainset = Mel2Samp(**data_config)
    train_loader = DataLoader(trainset,
                              num_workers=6,
                              sampler=RandomSampler(0, 14),
                              batch_size=batch_size,
                              pin_memory=True,
                              drop_last=False)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    model.train()
    model = model.cuda()

    s = time()
    reduced_loss = 0
    for i, batch in enumerate(train_loader):
        model.zero_grad()

        mel, audio = batch
        mel = torch.autograd.Variable(mel.cuda())
        audio = torch.autograd.Variable(audio.cuda())
        outputs = model((mel, audio))

        loss = criterion(outputs)
        reduced_loss += loss.item()

        if fp16_run:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()

        optimizer.step()
        denominator = i % iters_per_checkpoint + 1
        print("iteration:{}, loss:{:.4f}, time:{:.2f}            "
              "".format(iteration + 1, reduced_loss / denominator,
                        (time() - s) / denominator),
              end="\r")

        if with_tensorboard and rank == 0:
            logger.add_scalar('training_loss', reduced_loss / denominator,
                              iteration + 1)

        if (iteration + 1) % iters_per_checkpoint == 0:
            s = time()
            reduced_loss = 0
            if rank == 0:
                checkpoint_path = "{}/waveglow_it{}.pt".format(
                    output_directory, iteration + 1)
                save_checkpoint(model, optimizer, learning_rate, iteration + 1,
                                checkpoint_path)
        iteration += 1
Exemple #13
0
def main(squeezewave_path, sigma, output_dir, is_fp16, denoiser_strength):
    # mel_files = files_to_list(mel_files)
    squeezewave = torch.load(squeezewave_path)['model']
    squeezewave = squeezewave.remove_weightnorm(squeezewave)
    squeezewave.cuda().eval()
    if is_fp16:
        from apex import amp
        squeezewave, _ = amp.initialize(squeezewave, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(squeezewave).cuda()

    n_audio_channel = squeezewave_config["n_audio_channel"]
    testset = Mel2Samp(n_audio_channel, frame_energy_thres=0.02, **data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    # train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    test_loader = DataLoader(
        testset,
        num_workers=0,
        shuffle=False,
        # sampler=train_sampler,
        batch_size=1 if data_config['split'] == 'test' else 12,
        pin_memory=False,
        drop_last=True)

    speakers_to_sids = deepcopy(testset.speakers)
    sids_to_speakers = create_reverse_dict(speakers_to_sids)
    ut_to_uids = deepcopy(testset.utterances)
    uids_to_ut = create_reverse_dict(ut_to_uids)

    # sid_target = np.random.randint(len(speakers_to_sids))
    # speaker_target = sids_to_speakers[sid_target]
    # sid_target = torch.LongTensor([[sid_target] *
    #                                test_loader.batch_size]).view(
    #     test_loader.batch_size, 1).to('cuda')

    audios = []
    mels = []
    n_audios = 0
    for i, batch in enumerate(test_loader):
        audio_source, sid_source, uid_source, is_last = batch
        mel_source = get_mel(audio_source)
        mel_source = mel_source.to('cuda')
        import pdb
        pdb.set_trace()

        with torch.no_grad():
            predicted = squeezewave.infer(mel_source, sigma=sigma)
            if denoiser_strength > 0:
                predicted = denoiser(predicted, denoiser_strength)
                predicted = predicted.squeeze(1)
            # predicted = predicted * MAX_WAV_VALUE

        for j in range(len(predicted)):
            p = predicted[j].cpu()
            audios.append(p)
            mels.append(mel_source[j].cpu())
            speaker_source = sids_to_speakers[sid_source[j].data.item()]
            ut_source = uids_to_ut[uid_source[j].data.item()]
            last = is_last[j].data.item()
            if last:
                ## Hacking to print mel_source here
                fname = os.path.join(
                    output_dir, "{}_{}_mel.pt".format(speaker_source,
                                                      ut_source))
                pdb.set_trace()
                torch.save(mels, fname)
                print("Saved mel to {}".format(fname))
                ##

                # audio_path = os.path.join(
                #     output_dir,
                #     "{}_{}_to_{}_synthesis.wav".format(speaker_source,
                #                                        ut_source,
                #                                        speaker_target))
                audio_path = os.path.join(
                    output_dir,
                    "{}_{}_synthesis.wav".format(speaker_source, ut_source))
                print("Synthesizing file No.{} at {}".format(
                    n_audios, audio_path))
                save_audio_chunks(audios, audio_path, data_config['stride'],
                                  data_config['sampling_rate'])

                audios = []
                mels = []
                n_audios += 1
Exemple #14
0
            if args.average_checkpoint == 0:
                model, optimizer, scheduler, iteration = load_checkpoint_warm_start(checkpoint_path, model, optimizer, scheduler,fp16_run)
            else:
                print("INFO: --average_checkpoint > 0. loading an averaged weight of last {} checkpoints...".format(args.average_checkpoint))
                model, optimizer, scheduler, iteration = load_averaged_checkpoint_warm_start(checkpoint_path, model, optimizer, scheduler,fp16_run)
        else:
            model, optimizer, scheduler, iteration = load_checkpoint(checkpoint_path, model, optimizer, scheduler,fp16_run)
        iteration += 1  # next iteration is iteration + 1

    if distributed_run:
        model = DistributedDataParallel(
            model, device_ids=[args.local_rank], output_device=args.local_rank,
            find_unused_parameters=True)
        model = torch.nn.DataParallel(model)

    trainset = Mel2Samp("train", False, False, **data_config)
    testset = Mel2Samp("test", False, False, **data_config)
    
    
    if distributed_run:
        train_sampler, shuffle = DistributedSampler(trainset), False
        test_sampler,  shuffle = DistributedSampler(testset), False           
     else:
        train_sampler, shuffle = None, True   
        test_sampler,  shuffle = None, True          
             
    train_loader = DataLoader(trainset, num_workers=16, shuffle=shuffle,
                              sampler=train_sampler, batch_size=batch_size,
                              pin_memory=False,  drop_last=True)
    
Exemple #15
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard, warm_start):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    # =====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    # =====END:   ADDED FOR DISTRIBUTED======
    optimizer = Over9000(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
    else:
        amp = None

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer, warm_start)
        if fp16_run and not warm_start:
            amp.load_state_dict(torch.load(checkpoint_path)['amp'])
        iteration += 1

    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=16,
                              shuffle=True,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           factor=0.999,
                                                           patience=250,
                                                           cooldown=250,
                                                           verbose=True,
                                                           min_lr=1e-5)
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            mel, audio = batch
            mel = mel.cuda()
            audio = audio.cuda()
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            if fp16_run:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    amp.master_params(optimizer), 1.0)
            else:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), 1.0)

            optimizer.step()

            if epoch > 1:
                scheduler.step(loss)

            print("{}:\t{:.9f}\t{:.9f}".format(iteration, reduced_loss,
                                               grad_norm))
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + len(train_loader) * epoch)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, amp, iteration,
                                    checkpoint_path)

            iteration += 1
Exemple #16
0
def test_mel2samp():
    """Test mel2samp modules on example data."""
    from mel2samp import Mel2Samp
    
    hparams = hparams_class()
    
    passed = 0
    
    
    # test filelist loader
    try:
        from mel2samp import load_filepaths_and_text
        audio_files = load_filepaths_and_text("code_tests/test_materials/filelists/validation_utf8.txt")
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ Load Filepaths and Text (UTF-8)")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test filelist checker
    try:
        assert audio_files
        from mel2samp import check_files
        audio_files = check_files(audio_files, hparams)
        assert len(audio_files) == 1
        passed+=1
        print("--PASSED--\n")
        del audio_files
    except Exception as ex:
        print("--EXCEPTION-- @ Load Filepaths and Text (UTF-8)")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test initalization
    try:
        trainset = Mel2Samp(hparams)
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ Mel2Samp Initialization")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test 16-BIT .wav to torch
    try:
        from mel2samp import load_wav_to_torch
        x, sr = load_wav_to_torch("code_tests/test_materials/audio_0/example_16bits.wav")
        assert len(x)
        assert x.max() <= 2**15
        assert x.min() >= -(2**15)
        assert sr == 48000
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ Load 16-BIT .wav to Pytorch")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test 24-BIT .wav to torch
    try:
        from mel2samp import load_wav_to_torch
        x, sr = load_wav_to_torch("code_tests/test_materials/audio_0/example_24bits.wav")
        assert len(x)
        assert x.max() <= 2**23
        assert x.min() >= -(2**23)
        assert sr == 48000
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ Load 24-BIT .wav to Pytorch")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test 32-BIT .wav to torch
    try:
        from mel2samp import load_wav_to_torch
        x, sr = load_wav_to_torch("code_tests/test_materials/audio_0/example_32bits.wav")
        assert len(x)
        assert x.max() <= 2**31
        assert x.min() >= -(2**31)
        assert sr == 48000
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ Load 32-BIT .wav to Pytorch")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test 32-BIT .mp3 to torch
    try:
        from mel2samp import load_wav_to_torch
        x, sr = load_wav_to_torch("code_tests/test_materials/audio_0/example_32bits.mp3")
        assert len(x)
        assert x.max() <= 2**31
        assert x.min() >= -(2**31)
        assert sr == 48000
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ Load 32-BIT .mp3 to Pytorch")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test 16-BIT .wav to mel
    try:
        x, sr = load_wav_to_torch("code_tests/test_materials/audio_0/example_16bits.wav")
        x = trainset.get_mel(x)
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ 16-BIT .wav to Mel-spec")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test 24-BIT .wav to mel
    try:
        x, sr = load_wav_to_torch("code_tests/test_materials/audio_0/example_24bits.wav")
        x = trainset.get_mel(x)
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ 24-BIT .wav to Mel-spec")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test 32-BIT .wav to mel
    try:
        x, sr = load_wav_to_torch("code_tests/test_materials/audio_0/example_32bits.wav")
        x = trainset.get_mel(x)
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ 32-BIT .wav to Mel-spec")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test 32-BIT .mp3 to mel
    try:
        x, sr = load_wav_to_torch("code_tests/test_materials/audio_0/example_32bits.mp3")
        x = trainset.get_mel(x)
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ 32-BIT .mp3 to Mel-spec")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test __getitem__ with load_mel_from_disk = False
    try:
        assert trainset # This test will fail if Mel2Samp cannot initalize
        trainset.load_mel_from_disk = False
        trainset.__getitem__(0)
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @  __getitem__ with load_mel_from_disk = False")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test __getitem__ with load_mel_from_disk = True
    try:
        assert trainset # This test will fail if Mel2Samp cannot initalize
        trainset.load_mel_from_disk = True
        trainset.__getitem__(0)
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @  __getitem__ with load_mel_from_disk = True")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test initalization with Pre-empthasis
    try:
        trainset = None
        hparams.preempthasis = 0.98
        trainset = Mel2Samp(hparams)
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @ Mel2Samp with Pre-empthasis Initialization")
        traceback.print_exc(file=sys.stdout)
        print("\n")
    
    
    # test __getitem__ with Pre-empthasis
    try:
        assert trainset # This test will fail if Mel2Samp cannot initalize
        trainset.load_mel_from_disk = False
        trainset.__getitem__(0)
        passed+=1
        print("--PASSED--\n")
    except Exception as ex:
        print("--EXCEPTION-- @  __getitem__ with Pre-empthasis")
        traceback.print_exc(file=sys.stdout)
        print("\n")
Exemple #17
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2Samp(data_config['training_files'],
                        data_config['segment_length'],
                        data_config['filter_length'],
                        data_config['hop_length'],
                        data_config['win_length'],
                        data_config['sampling_rate'],
                        data_config['mel_fmin'],
                        data_config['mel_fmax'],
                        debug=False)

    if 'testing_files' in data_config:
        testset = Mel2Samp(data_config['testing_files'],
                           data_config['segment_length'],
                           data_config['filter_length'],
                           data_config['hop_length'],
                           data_config['win_length'],
                           data_config['sampling_rate'],
                           data_config['mel_fmin'],
                           data_config['mel_fmax'],
                           debug=True)
    else:
        testset = None

    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))
    else:
        logger = None

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            start = time.perf_counter()

            model.zero_grad()

            print("train batch loaded, {} ({} of {})".format(
                iteration, i, len(train_loader)))
            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            is_overflow = False
            if fp16_run:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    amp.master_params(optimizer), 1.0)
                is_overflow = math.isnan(grad_norm)

            optimizer.step()

            duration = time.perf_counter() - start

            print(
                "train batch done, {} ({} of {}): {:.9f} (took {:.2f})".format(
                    iteration, i, len(train_loader), reduced_loss, duration))

            if logger:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + len(train_loader) * epoch)
                logger.add_scalar('duration', duration,
                                  i + len(train_loader) * epoch)

            if testset and not is_overflow and (iteration %
                                                iters_per_checkpoint == 0):
                if testset:
                    validate(model, criterion, testset, iteration, batch_size,
                             num_gpus, logger)

                if rank == 0:
                    rotate_checkpoints(output_directory)
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemple #18
0
def train(model, num_gpus, output_directory, epochs, learning_rate, lr_decay_step, lr_decay_gamma,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard):
    # local eval and synth functions
    def evaluate():
        # eval loop
        model.eval()
        epoch_eval_loss = 0
        for i, batch in enumerate(test_loader):
            with torch.no_grad():
                mel, audio = batch
                mel = torch.autograd.Variable(mel.cuda())
                audio = torch.autograd.Variable(audio.cuda())
                outputs = model(audio, mel)

                loss = criterion(outputs)
                if num_gpus > 1:
                    reduced_loss = loss.mean().item()
                else:
                    reduced_loss = loss.item()
                epoch_eval_loss += reduced_loss

        epoch_eval_loss = epoch_eval_loss / len(test_loader)
        print("EVAL {}:\t{:.9f}".format(iteration, epoch_eval_loss))
        if with_tensorboard:
            logger.add_scalar('eval_loss', epoch_eval_loss, iteration)
            logger.flush()
        model.train()

    def synthesize(sigma):
        model.eval()
        # synthesize loop
        for i, batch in enumerate(synth_loader):
            if i == 0:
                with torch.no_grad():
                    mel, _, filename = batch
                    mel = torch.autograd.Variable(mel.cuda())
                    try:
                        audio = model.reverse(mel, sigma)
                    except AttributeError:
                        audio = model.module.reverse(mel, sigma)
                    except NotImplementedError:
                        print("reverse not implemented for this model. skipping synthesize!")
                        model.train()
                        return

                    audio = audio * MAX_WAV_VALUE
                audio = audio.squeeze()
                audio = audio.cpu().numpy()
                audio = audio.astype('int16')
                audio_path = os.path.join(
                    os.path.join(output_directory, "samples", waveflow_config["model_name"]),
                    "generate_{}.wav".format(iteration))
                write(audio_path, data_config["sampling_rate"], audio)

        model.train()

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    criterion = WaveFlowLossDataParallel(sigma)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer, step_size=lr_decay_step, gamma=lr_decay_gamma)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if args.resume:
        model_directory = os.path.join(
            output_directory, waveflow_config["model_name"]
        )
        logging.info("--resume. Resuming the training from the last "
            "checkpoint found in {}.".format(model_directory))
        last_checkpoint = last_n_checkpoints(model_directory, 1)[0]
        model, optimizer, scheduler, iteration = \
            load_checkpoint(last_checkpoint, model, optimizer, scheduler)

    elif checkpoint_path != "":
        # Warm-start
        if args.warm_start and args.average_checkpoint == 0:
            print("INFO: --warm_start. optimizer and scheduler are initialized and strict=False for load_state_dict().")
            model, optimizer, scheduler, iteration = load_checkpoint_warm_start(
                    checkpoint_path, model, optimizer, scheduler)
        elif args.warm_start and args.average_checkpoint != 0:
            print("INFO: --average_checkpoint > 0. loading an averaged "
                  "weight of last {} checkpoints...".format(args.average_checkpoint))
            model, optimizer, scheduler, iteration = load_averaged_checkpoint_warm_start(
                checkpoint_path, model, optimizer, scheduler
            )
        else:
            model, optimizer, scheduler, iteration = \
                load_checkpoint(checkpoint_path, model, optimizer, scheduler)
        iteration += 1  # next iteration is iteration + 1

    if num_gpus > 1:
        print("num_gpus > 1. converting the model to DataParallel...")
        model = torch.nn.DataParallel(model)

    trainset = Mel2Samp("train", False, False, **data_config)
    train_loader = DataLoader(trainset, num_workers=4, shuffle=True,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    testset = Mel2Samp("test", False, False, **data_config)
    test_sampler = None
    test_loader = DataLoader(testset, num_workers=4, shuffle=False,
                             sampler=test_sampler,
                             batch_size=batch_size,
                             pin_memory=False,
                             drop_last=False)

    synthset = Mel2Samp("test", True, True, **data_config)
    synth_sampler = None
    synth_loader = DataLoader(synthset, num_workers=4, shuffle=False,
                              sampler=synth_sampler,
                              batch_size=1,
                              pin_memory=False,
                              drop_last=False)

    # Get shared output_directory ready
    if not os.path.isdir(os.path.join(output_directory, waveflow_config["model_name"])):
        os.makedirs(os.path.join(output_directory, waveflow_config["model_name"]), exist_ok=True)
        os.chmod(os.path.join(output_directory, waveflow_config["model_name"]), 0o775)
    print("output directory", os.path.join(output_directory, waveflow_config["model_name"]))
    if not os.path.isdir(os.path.join(output_directory, "samples")):
        os.makedirs(os.path.join(output_directory, "samples"), exist_ok=True)
        os.chmod(os.path.join(output_directory, "samples"), 0o775)
    os.makedirs(os.path.join(output_directory, "samples", waveflow_config["model_name"]), exist_ok=True)
    os.chmod(os.path.join(output_directory, "samples", waveflow_config["model_name"]), 0o775)

    if with_tensorboard:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, waveflow_config["model_name"], 'logs'))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in tqdm.tqdm(enumerate(train_loader), total=len(train_loader)):
            tic = time.time()

            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model(audio, mel)

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = loss.mean().item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.mean().backward()

            if fp16_run:
                grad_norm = torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 5.)
            else:
                grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 5.)
            optimizer.step()

            toc = time.time() - tic

            #print("{}:\t{:.9f}, {:.4f} seconds".format(iteration, reduced_loss, toc))
            if with_tensorboard:
                logger.add_scalar('training_loss', reduced_loss, i + len(train_loader) * epoch)
                logger.add_scalar('lr', get_lr(optimizer), i + len(train_loader) * epoch)
                logger.add_scalar('grad_norm', grad_norm, i + len(train_loader) * epoch)
                logger.flush()

            if (iteration % iters_per_checkpoint == 0):
                checkpoint_path = "{}/waveflow_{}".format(
                    os.path.join(output_directory, waveflow_config["model_name"]), iteration)
                save_checkpoint(model, optimizer, scheduler, learning_rate, iteration,
                                checkpoint_path)

                if iteration != 0:
                    evaluate()
                    del mel, audio, outputs, loss
                    gc.collect()
                    synthesize(sigma)

            iteration += 1
            scheduler.step()

        evaluate()
Exemple #19
0
def train(num_gpus,
          rank,
          group_name,
          output_directory,
          epochs,
          learning_rate,
          sigma,
          iters_per_checkpoint,
          batch_size,
          seed,
          fp16_run,
          checkpoint_path,
          with_tensorboard,
          num_workers=4):
    print("num_workers", num_workers)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    # =====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    # =====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = StepLR(optimizer, step_size=1, gamma=0.96)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2Samp(**data_config)
    evalset = Mel2Samp(**eval_data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    eval_sampler = DistributedSampler(evalset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=num_workers,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)
    eval_loader = DataLoader(evalset,
                             num_workers=num_workers,
                             shuffle=False,
                             sampler=eval_sampler,
                             batch_size=batch_size,
                             pin_memory=False,
                             drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    epoch_offset = max(1, int(iteration / len(train_loader)))
    start_time = datetime.datetime.now()
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print('Epoch:', epoch, 'LR:', scheduler.get_lr())
        elapsed = datetime.datetime.now() - start_time
        print("Epoch: [{}][els: {}] {}".format(
            datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"), elapsed,
            epoch))
        model.train()
        total_loss = 0.
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            if waveglow_config["multi_speaker_config"]["use_multi_speaker"]:
                mel, audio, spk_embed_or_id = batch
                spk_embed_or_id = torch.autograd.Variable(
                    spk_embed_or_id.cuda())
            else:
                mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())

            if waveglow_config["multi_speaker_config"]["use_multi_speaker"]:
                outputs = model((mel, audio, spk_embed_or_id))
            else:
                outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            optimizer.step()
            total_loss += reduced_loss
            if i > 0 and i % 10 == 0:
                elapsed = datetime.datetime.now() - start_time
                print(
                    "[{}][els: {}] epoch {},total steps{}, {}/{} steps:\t{:.9f}"
                    .format(
                        datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"),
                        elapsed, epoch, iteration, i, len(train_loader),
                        reduced_loss))
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss,
                                  i + len(train_loader) * epoch)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
        elapsed = datetime.datetime.now() - start_time
        print("[{}][els: {}] {} epoch :\tavg loss {:.9f}".format(
            datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"), elapsed,
            epoch, total_loss / len(train_loader)))
        scheduler.step()
        eval.eval(eval_loader, model, criterion, num_gpus, start_time, epoch,
                  waveglow_config["multi_speaker_config"]["use_multi_speaker"])
Exemple #20
0
def synthesize_master(model, num_gpus, temp, output_directory, epochs, learning_rate, lr_decay_step, lr_decay_gamma,
                      sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
                      checkpoint_path, with_tensorboard):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, _, _, iteration = load_checkpoint(checkpoint_path, model, None, None)
    # remove all weight_norm from the model
    model.remove_weight_norm()
    # fuse mel-spec conditioning layer weights to maximize speed
    model.fuse_conditioning_layers()

    if fp16_run:
        from apex import amp
        model, _ = amp.initialize(model, [], opt_level="O3")

    synthset = Mel2Samp("test", True, True, **data_config)
    synth_sampler = None
    synth_loader = DataLoader(synthset, num_workers=4, shuffle=False,
                              sampler=synth_sampler,
                              batch_size=1,
                              pin_memory=False,
                              drop_last=False)

    # Get shared output_directory ready
    if not os.path.isdir(os.path.join(output_directory, waveflow_config["model_name"])):
        os.makedirs(os.path.join(output_directory, waveflow_config["model_name"]), exist_ok=True)
        os.chmod(os.path.join(output_directory, waveflow_config["model_name"]), 0o775)
    print("output directory", os.path.join(output_directory, waveflow_config["model_name"]))
    if not os.path.isdir(os.path.join(output_directory, "samples")):
        os.makedirs(os.path.join(output_directory, "samples"), exist_ok=True)
        os.chmod(os.path.join(output_directory, "samples"), 0o775)
    os.makedirs(os.path.join(output_directory, "samples", waveflow_config["model_name"]), exist_ok=True)
    os.chmod(os.path.join(output_directory, "samples", waveflow_config["model_name"]), 0o775)

    # synthesize loop
    model.eval()
    for i, batch in enumerate(synth_loader):
        with torch.no_grad():
            mel, _, filename = batch
            mel = torch.autograd.Variable(mel.cuda())
            if fp16_run:
                mel = mel.half()

            torch.cuda.synchronize()
            tic = time.time()
            audio = model.reverse_fast(mel, temp)
            torch.cuda.synchronize()
            toc = time.time() - tic

            print('{}: {:.4f} seconds, {:.4f}kHz'.format(i, toc, audio.shape[1] / toc / 1000))

        audio = audio * MAX_WAV_VALUE
        audio = audio.squeeze()
        audio = audio.cpu().numpy()
        audio = audio.astype('int16')
        audio_path = os.path.join(
            os.path.join(output_directory, "samples", waveflow_config["model_name"]),
            "generate_{}_{}_t{}.wav".format(iteration, i, temp))
        write(audio_path, data_config["sampling_rate"], audio)

    model.train()
Exemple #21
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard):
    torch.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cpu()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset, num_workers=1, shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cpu())
            audio = torch.autograd.Variable(audio.cpu())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss, i + len(train_loader) * epoch)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Exemple #22
0
def train(output_directory, log_directory, checkpoint_path, warm_start,
          warm_start_force, n_gpus, rank, group_name, hparams):
    torch.manual_seed(hparams.seed)
    torch.cuda.manual_seed(hparams.seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if n_gpus > 1:
        init_distributed(rank, n_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    model, criterion = getCore(hparams)

    #=====START: ADDED FOR DISTRIBUTED======
    if n_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    STFT = [
        TacotronSTFT(filter_length=window,
                     hop_length=hparams.hop_length,
                     win_length=window,
                     sampling_rate=hparams.sampling_rate,
                     n_mel_channels=160,
                     mel_fmin=hparams.mel_fmin,
                     mel_fmax=hparams.mel_fmax)
        for window in hparams.validation_windows
    ]

    optimizer = getOptimizer(model, hparams)

    if hparams.fp16_run:
        global amp
        from apex import amp
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=hparams.fp16_opt_level,
                                          min_loss_scale=2.0)
    else:
        amp = None

    # LEARNING RATE SCHEDULER
    if hparams.LRScheduler.lower() == "ReduceLROnPlateau".lower():
        from torch.optim.lr_scheduler import ReduceLROnPlateau
        min_lr = 1e-5
        factor = 0.1**(
            1 / 5)  # amount to scale the LR by on Validation Loss plateau
        scheduler = ReduceLROnPlateau(optimizer,
                                      'min',
                                      factor=factor,
                                      patience=20,
                                      cooldown=2,
                                      min_lr=min_lr,
                                      verbose=True)
        print("ReduceLROnPlateau used as Learning Rate Scheduler.")
    else:
        scheduler = None

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path:
        model, optimizer, iteration, scheduler = load_checkpoint(
            warm_start, warm_start_force, checkpoint_path, model, optimizer,
            scheduler, hparams.fp16_run)
    iteration += 1  # next iteration is iteration + 1

    trainset = Mel2Samp(hparams)
    speaker_lookup = trainset.speaker_ids
    # =====START: ADDED FOR DISTRIBUTED======
    if n_gpus > 1:
        train_sampler = DistributedSampler(trainset, shuffle=True)
        shuffle = False
    else:
        train_sampler = None
        shuffle = True
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=hparams.n_dataloader_workers,
                              shuffle=shuffle,
                              sampler=train_sampler,
                              batch_size=hparams.batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if rank == 0:
        from tensorboardX import SummaryWriter
        if False:  # dated and seperated log dirs for each run
            timestr = time.strftime("%Y_%m_%d-%H_%M_%S")
            log_directory = os.path.join(output_directory, log_directory,
                                         timestr)
        else:
            log_directory = os.path.join(output_directory, log_directory)
        logger = SummaryWriter(log_directory)

    moving_average = int(min(len(train_loader),
                             100))  # average loss over 100 iters
    rolling_sum = StreamingMovingAverage(moving_average)
    start_time = time.time()
    start_time_single_batch = time.time()

    model.train()

    if os.path.exists(os.path.join(output_directory, "best_train_model")):
        best_model_loss = float(
            str(
                open(os.path.join(output_directory, "best_train_model") +
                     ".txt",
                     "r",
                     encoding="utf-8").read()).split("\n")[0])
    else:
        best_model_loss = -4.20
    if os.path.exists(os.path.join(output_directory, "best_val_model")):
        best_MSE = float(
            str(
                open(os.path.join(output_directory, "best_val_model") + ".txt",
                     "r",
                     encoding="utf-8").read()).split("\n")[0])
    else:
        best_MSE = 9e9
    epoch_offset = max(0, int(iteration / len(train_loader)))

    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print("{:,} total parameters.".format(pytorch_total_params))
    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    print("{:,} trainable parameters.".format(pytorch_total_params))

    learning_rate = hparams.learning_rate
    # ================ MAIN TRAINING LOOP! ===================
    for epoch in get_progress_bar(range(epoch_offset, hparams.epochs),
                                  dict(initial=epoch_offset,
                                       total=hparams.epochs,
                                       smoothing=0.01,
                                       desc="Epoch",
                                       position=1,
                                       unit="epoch"),
                                  hparams,
                                  rank=rank):
        cprint(f"Epoch: {epoch}", b_tqdm=hparams.tqdm)
        if n_gpus > 1: train_sampler.set_epoch(epoch)

        for i, batch in get_progress_bar(enumerate(train_loader),
                                         dict(desc=" Iter",
                                              smoothing=0,
                                              total=len(train_loader),
                                              position=0,
                                              unit="iter",
                                              leave=True),
                                         hparams,
                                         rank=rank):
            # run external code every iter, allows the run to be adjusted without restarts
            if (i == 0 or iteration % param_interval == 0):
                try:
                    with open("hparams_realtime.py") as f:
                        internal_text = str(f.read())
                        ldict = {'iteration': iteration}
                        exec(internal_text, globals(), ldict)
                except Exception as ex:
                    cprint(f"Custom code FAILED to run!\n{ex}",
                           b_tqdm=hparams.tqdm)
                globals().update(ldict)
                locals().update(ldict)
                if show_live_params:
                    cprint(internal_text, b_tqdm=hparams.tqdm)
            assert warmup_start <= iteration, "Current iteration less than warmup_start."
            # Learning Rate Schedule
            if custom_lr:
                old_lr = learning_rate
                if iteration < warmup_end:
                    learning_rate = (iteration - warmup_start) * (
                        (A_ + C_) - warmup_start_lr
                    ) / (
                        warmup_end - warmup_start
                    ) + warmup_start_lr  # learning rate increases from warmup_start_lr to A_ linearly over (warmup_end-warmup_start) iterations.
                else:
                    if iteration < decay_start:
                        learning_rate = A_ + C_
                    else:
                        iteration_adjusted = iteration - decay_start
                        learning_rate = (A_ *
                                         (e**(-iteration_adjusted / B_))) + C_
                assert learning_rate > -1e-8, "Negative Learning Rate."
                if old_lr != learning_rate:
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate
            else:
                scheduler.patience = scheduler_patience
                scheduler.cooldown = scheduler_cooldown
                if override_scheduler_last_lr:
                    scheduler._last_lr = override_scheduler_last_lr
                    cprint("Scheduler last_lr overriden. scheduler._last_lr =",
                           scheduler._last_lr,
                           b_tqdm=hparams.tqdm)
                if not iteration % 20:  # check actual learning rate every 20 iters (because I sometimes see learning_rate variable go out-of-sync with real LR)
                    learning_rate = optimizer.param_groups[0]['lr']
                if override_scheduler_best:
                    scheduler.best = override_scheduler_best
                    cprint("Scheduler best metric overriden. scheduler.best =",
                           override_scheduler_best,
                           b_tqdm=hparams.tqdm)

            model.zero_grad()
            mel, audio, speaker_ids = batch
            mel = torch.autograd.Variable(mel.cuda(non_blocking=True))
            audio = torch.autograd.Variable(audio.cuda(non_blocking=True))
            if model.multispeaker:
                speaker_ids = torch.autograd.Variable(
                    speaker_ids.cuda(non_blocking=True)).long().squeeze(1)
                outputs = model(mel, audio, speaker_ids)
            else:
                outputs = model(mel, audio)

            loss = criterion(outputs)
            if n_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, n_gpus).item()
            else:
                reduced_loss = loss.item()

            assert reduced_loss < 1e5, "Model Diverged. Loss > 1e5"
            if hparams.fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            if hparams.b_grad_clip:
                if hparams.fp16_run:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), hparams.grad_clip_thresh)
                else:
                    grad_norm = torch.nn.utils.clip_grad_norm_(
                        model.parameters(), hparams.grad_clip_thresh)
                is_overflow = math.isinf(grad_norm) or math.isnan(grad_norm)
            else:
                is_overflow = False
                grad_norm = 0.00001

            optimizer.step()
            if not is_overflow and rank == 0:
                if (iteration % 100000 == 0):
                    # plot distribution of parameters
                    for tag, value in model.named_parameters():
                        tag = tag.replace('.', '/')
                        logger.add_histogram(tag,
                                             value.data.cpu().numpy(),
                                             iteration)
                logger.add_scalar('training_loss', reduced_loss, iteration)
                if (iteration % 20 == 0):
                    logger.add_scalar('learning.rate', learning_rate,
                                      iteration)
                if (iteration % 10 == 0):
                    logger.add_scalar('duration',
                                      ((time.time() - start_time) / 10),
                                      iteration)
                start_time_single_batch = time.time()

            average_loss = rolling_sum.process(reduced_loss)
            if rank == 0:
                if (iteration % 10 == 0):
                    cprint(
                        "{} {}:  {:.3f}  {:.3f} {:08.3F} {:.8f}LR ({:.8f} Effective)  {:.2f}s/iter {:.4f}s/item"
                        .format(
                            time.strftime("%H:%M:%S"), iteration, reduced_loss,
                            average_loss, round(grad_norm, 3), learning_rate,
                            min((hparams.grad_clip_thresh / grad_norm) *
                                learning_rate, learning_rate),
                            (time.time() - start_time) / 10,
                            ((time.time() - start_time) / 10) /
                            (hparams.batch_size * n_gpus)),
                        b_tqdm=hparams.tqdm)
                    start_time = time.time()
                else:
                    cprint(
                        "{} {}:  {:.3f}  {:.3f} {:08.3F} {:.8f}LR ({:.8f} Effective)"
                        .format(
                            time.strftime("%H:%M:%S"), iteration, reduced_loss,
                            average_loss, round(grad_norm, 3), learning_rate,
                            min((hparams.grad_clip_thresh / grad_norm) *
                                learning_rate, learning_rate)),
                        b_tqdm=hparams.tqdm)

            if rank == 0 and (len(rolling_sum.values) > moving_average - 2):
                if (average_loss + best_model_margin) < best_model_loss:
                    checkpoint_path = os.path.join(output_directory,
                                                   "best_train_model")
                    try:
                        save_checkpoint(model, optimizer, hparams,
                                        learning_rate, iteration, amp,
                                        scheduler, speaker_lookup,
                                        checkpoint_path)
                    except KeyboardInterrupt:  # Avoid corrupting the model.
                        save_checkpoint(model, optimizer, hparams,
                                        learning_rate, iteration, amp,
                                        scheduler, speaker_lookup,
                                        checkpoint_path)
                    text_file = open((f"{checkpoint_path}.txt"),
                                     "w",
                                     encoding="utf-8")
                    text_file.write(str(average_loss) + "\n" + str(iteration))
                    text_file.close()
                    best_model_loss = average_loss  #Only save the model if X better than the current loss.
            if rank == 0 and ((iteration % hparams.iters_per_checkpoint == 0)
                              or (os.path.exists(save_file_check_path))):
                checkpoint_path = f"{output_directory}/waveglow_{iteration}"
                save_checkpoint(model, optimizer, hparams, learning_rate,
                                iteration, amp, scheduler, speaker_lookup,
                                checkpoint_path)
                start_time_single_batch = time.time()
                if (os.path.exists(save_file_check_path)):
                    os.remove(save_file_check_path)

            if (iteration % validation_interval == 0):
                if rank == 0:
                    MSE, MAE = validate(model, STFT, logger, iteration,
                                        speaker_lookup, hparams,
                                        output_directory)
                    if scheduler and n_gpus > 1:
                        MSE = torch.tensor(MSE, device='cuda')
                        broadcast(MSE, 0)
                        scheduler.step(MSE.item())
                        if MSE < best_MSE:
                            checkpoint_path = os.path.join(
                                output_directory, "best_val_model")
                            try:
                                save_checkpoint(model, optimizer, hparams,
                                                learning_rate, iteration, amp,
                                                scheduler, speaker_lookup,
                                                checkpoint_path)
                            except KeyboardInterrupt:  # Avoid corrupting the model.
                                save_checkpoint(model, optimizer, hparams,
                                                learning_rate, iteration, amp,
                                                scheduler, speaker_lookup,
                                                checkpoint_path)
                            text_file = open((f"{checkpoint_path}.txt"),
                                             "w",
                                             encoding="utf-8")
                            text_file.write(
                                str(MSE.item()) + "\n" + str(iteration))
                            text_file.close()
                            best_MSE = MSE.item(
                            )  #Only save the model if X better than the current loss.
                else:
                    if scheduler:
                        MSE = torch.zeros(1, device='cuda')
                        broadcast(MSE, 0)
                        scheduler.step(MSE.item())
            iteration += 1