Ejemplo n.º 1
0
def load_model(hparams, dev='gpu'):
    if dev == 'gpu':
        model = Tacotron2(hparams).cuda()
    elif dev == 'cpu':
        model = Tacotron2(hparams)
    if hparams.fp16_run:
        model.decoder.attention_layer.score_mask_value = finfo('float16').min

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    return model
Ejemplo n.º 2
0
def load_model(hparams):
    if torch.cuda.is_available():
        model = Tacotron2(hparams).cuda()
    else:
        model = Tacotron2(hparams).to("cpu")
        
    if hparams.fp16_run:
        model.decoder.attention_layer.score_mask_value = finfo('float16').min

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    return model
Ejemplo n.º 3
0
  def load(self, t_checkpoint_path, v_checkpoint_path,
           t_config_path=None, v_config_path=None, model_name='tacotron'):
    if t_checkpoint_path.endswith('.pt'):
        self.model_name = 'nvidia'
        print('Constructing model: %s' % self.model_name)

        # set-up params
        hparams = create_hparams()

        # load model from checkpoint
        self.model = Tacotron2(hparams)
        self.model.load_state_dict(torch.load(t_checkpoint_path,
                                              map_location='cpu')['state_dict'])
        _ = self.model.eval()
    else: # elif t_checkpoint_path.endswith('.pth.tar'):
        self.model_name = 'coqui'
        print('Constructing model: %s' % self.model_name)

        # load tts config and audio processor
        self.tts_config = load_config(t_config_path)
        self.tts_model = setup_tts_model(config=self.tts_config)
        self.tts_model.load_checkpoint(self.tts_config,
                                       t_checkpoint_path, eval=True)
        self.ap = AudioProcessor(verbose=False, **self.tts_config.audio)

        # load vocoder config and audio processor
        vocoder_config = load_config(v_config_path)
        self.vocoder_ap = AudioProcessor(verbose=False, **vocoder_config.audio)

    # Load neurips MelGAN for mel2audio synthesis
    self.vocoder = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan')
    melgan_ckpt = torch.load(v_checkpoint_path, map_location='cpu')
    self.vocoder.mel2wav.load_state_dict(melgan_ckpt)
    def T2LoadModel(self, T2Model, Cleaners):
        if os.path.isfile(T2Model):

            ExperimentDataParameters = [
                500, 500, True, False, False, "", "", Cleaners
            ]
            EncoderDecoderParameters = [
                5, 3, 512, 1024, 256, 1000, 0.5, 0.1, 0.1
            ]
            AttentionLocationLayerParameters = [1024, 128, 32, 31]
            MelProcessingNetworkNarameters = [512, 5, 5]
            OptimizationHyperparameters = [
                False, True, 1 * pow(10, -3), 1 * pow(10, -6), 8
            ]

            hparams = create_hparams(
                ExperimentDataParameters,
                self.AudioParameters[self.currentAudioParameters],
                EncoderDecoderParameters, AttentionLocationLayerParameters,
                MelProcessingNetworkNarameters, OptimizationHyperparameters)

            self.Cleaners = Cleaners

            self.model = Tacotron2(hparams).cuda()
            self.model.load_state_dict(torch.load(T2Model)['state_dict'])
            self._ = self.model.cuda().eval().half()
Ejemplo n.º 5
0
def load_model(hparams):
    model = Tacotron2(hparams).cuda()
    if hparams.fp16_run:
        model = batchnorm_to_float(model.half())
        model.decoder.attention_layer.score_mask_value = float(
            finfo('float16').min)
    return model
Ejemplo n.º 6
0
def load_model(hparams):
    if hparams.reverse:
        model = TacotronAsr(hparams).cuda()
    else:
        model = Tacotron2(hparams).cuda()
    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)
    return model
Ejemplo n.º 7
0
def initiate_model(hparams):
    model = Tacotron2(hparams).cuda()
    if hparams.fp16_run:
        model.decoder.attention_layer.score_mask_value = finfo('float16').min

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    return model
Ejemplo n.º 8
0
def load_model(hparams, use_cuda=True):
    device = torch.device('cuda' if use_cuda else 'cpu')
    model = Tacotron2(hparams).to(device)
    if hparams.fp16_run:
        model.decoder.attention_layer.score_mask_value = finfo('float16').min

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    return model
Ejemplo n.º 9
0
def load_model(hparams):
    model = Tacotron2(hparams).cuda()
    model.decoder.residual_encoder.after_optim_step()
    if hparams.fp16_run:
        model.decoder.attention_layer.score_mask_value = finfo('float16').min

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    return model
Ejemplo n.º 10
0
    def load_model(self):
        model = Tacotron2(self.hparams)
        if self.hparams.fp16_run:
            model.decoder.attention_layer.score_mask_value = finfo(
                'float16').min

        if self.hparams.distributed_run:
            model = apply_gradient_allreduce(model)

        return model
Ejemplo n.º 11
0
def load_model(hparams, symbols):
    print(len(symbols))
    model = Tacotron2(hparams, len(symbols)).cuda()
    if hparams.fp16_run:
        model.decoder.attention_layer.score_mask_value = finfo('float16').min

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    return model
Ejemplo n.º 12
0
def load_model(hparams):
    model = Tacotron2(hparams).cuda()
    model = batchnorm_to_float(model.half()) if hparams.fp16_run else model

    if hparams.distributed_run:
        model = DistributedDataParallel(model)
    elif torch.cuda.device_count() > 1:
        model = DataParallel(model)

    return model
Ejemplo n.º 13
0
def init_model(tacotron_cp, speaker_encode_cp):
    hparams = create_hparams(
        'tacotron', f'speaker_encoder={speaker_encode_cp}'
        if speaker_encode_cp is not None else None)

    model = Tacotron2(hparams)
    model.load_state_dict(torch.load(tacotron_cp)['state_dict'])
    model.eval()

    return model
Ejemplo n.º 14
0
    def __init__(self, hparams, tacotron2_path, waveglow_path):
        self.Tacotron2Model = Tacotron2(hparams).cpu()
        self.WaveglowModel = None
        self.Denoiser = None

        self.WaveglowSigma = 0.800
        self.UseDenoiser = False
        self.DenoiserStrength = 0.01

        self.load_models(tacotron2_path, waveglow_path)
Ejemplo n.º 15
0
def load_model(hparams):
    model = Tacotron2(hparams).cuda()
    if hparams.fp16_run:
        model = batchnorm_to_float(model.half())
        model.decoder.attention_layer.score_mask_value = float(finfo('float16').min)

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    return model
Ejemplo n.º 16
0
def load_model(hparams):
    model = Tacotron2(hparams).cuda()
    if hparams.fp16_run:
        model = batchnorm_to_float(model.half())
        model.decoder.attention_layer.score_mask_value = float(
            finfo('float16').min)
    if hparams.distributed_run:
        model = DistributedDataParallel(model)
    elif torch.cuda.device_count() > 1:
        model = DataParallel(model)
    return model
Ejemplo n.º 17
0
def log_audio(model: Tacotron2, iteration: int, logger: Tacotron2Logger, waveglow):
    text = "Does it work yet?"
    sequence = array(text_to_sequence(text, ['english_cleaners']))[None, :]
    sequence = torch.autograd.Variable(
        torch.from_numpy(sequence)).cuda().long()

    mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
    with torch.no_grad():
        audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)

    logger.add_audio(text, audio[0].data.cpu(), global_step=iteration, sample_rate=hparams.sampling_rate)
Ejemplo n.º 18
0
def log_audio(model: Tacotron2, iteration: int, logger: Tacotron2Logger,
              waveglow, inference_batch, text_encoded, mel):
    # load source data to obtain rhythm using tacotron 2 as a forced aligner
    x, y = model.parse_batch(inference_batch)

    with torch.no_grad():
        # get rhythm (alignment map) using tacotron 2
        mel_outputs, mel_outputs_postnet, gate_outputs, rhythm = model.forward(
            x)
        rhythm = rhythm.permute(1, 0, 2)

    for emotion in range(4):
        emotion_id = torch.LongTensor([emotion]).cuda()

        with torch.no_grad():
            mel_outputs, mel_outputs_postnet, gate_outputs, _ = model.inference_noattention(
                (text_encoded, mel, emotion_id, rhythm))
            audio = waveglow.infer(mel_outputs_postnet, sigma=0.8)

        logger.add_audio(f"Emotion {str(emotion)}",
                         audio[0].data.cpu(),
                         global_step=iteration,
                         sample_rate=hparams.sampling_rate)
Ejemplo n.º 19
0
def load_inference_model(hparams, name, ckpt_step, cuda=True):
    ckpt_path = './output-{}/checkpoint_{}'.format(name, ckpt_step)
    ckpt = torch.load(ckpt_path)['state_dict']
    ckpt_ = dict()
    for k in ckpt.keys():
        if k.startswith('module'):
            ckpt_['.'.join(k.split('.')[1:])] = ckpt[k]
        else:
            ckpt_[k] = ckpt[k]
    model = Tacotron2(
        hparams) if hparams.model == 'tacotron2' else Transformer(hparams)
    model.load_state_dict(ckpt_)
    model.eval()
    if cuda:
        model = model.cuda()
        model.eval()
    return model
def load_tacotron():
    global hparams
    hparams = hparams
    if hparams.is_cuda == True:
        device = "cuda"
    else:
        device = "cpu"
    # load tacotron model
    tacotron = Tacotron2(hparams)
    tacotron.load_state_dict(
        torch.load(hparams.tacotron_path,
                   map_location=torch.device(device))['state_dict'])

    if hparams.is_cuda:
        tacotron.cuda().eval().half()
    else:
        tacotron.eval()
    return hparams, tacotron
Ejemplo n.º 21
0
    def load(self,
             t_checkpoint_path,
             v_checkpoint_path,
             model_name='tacotron'):
        print('Constructing model: %s' % model_name)

        # set-up params
        hparams = create_hparams()

        # load model from checkpoint
        self.model = Tacotron2(hparams)
        self.model.load_state_dict(
            torch.load(t_checkpoint_path, map_location='cpu')['state_dict'])
        _ = self.model.eval()

        # Load neurips MelGAN for mel2audio synthesis
        self.vocoder = torch.hub.load('descriptinc/melgan-neurips',
                                      'load_melgan')
        melgan_ckpt = torch.load(v_checkpoint_path, map_location='cpu')
        self.vocoder.mel2wav.load_state_dict(melgan_ckpt)
Ejemplo n.º 22
0
def load_model(hparams, device="cuda"):
    if hparams.model_type == "tacotron2":
        model = Tacotron2(hparams).to(device)
        model.requires_durations = False
    elif hparams.model_type == "forwardtacotron":
        model = ForwardTacotron(hparams,
                                num_chars=hparams.n_symbols,
                                n_mels=hparams.n_mel_channels).to(device)
        model.requires_durations = True
    elif hparams.model_type == "durationtacotron2":
        model = DurationTacotron2().to(device)
        model.requires_durations = True

    if hparams.fp16_run:
        model.decoder.attention_layer.score_mask_value = finfo('float16').min

    if hparams.distributed_run:
        model = apply_gradient_allreduce(model)

    return model
Ejemplo n.º 23
0
    def load(self, tacotron_model, waveglow_model):
        # setting
        self.project_name = 'tacotron2'
        sys.path.append(self.project_name)
        sys.path.append(join(self.project_name, 'waveglow/'))

        # initialize Tacotron2
        self.hparams = create_hparams()
        self.hparams.sampling_rate = 22050
        self.hparams.max_decoder_steps = 1000
        self.hparams.fp16_run = True

        self.tacotron = Tacotron2(self.hparams)
        self.tacotron.load_state_dict(torch.load(tacotron_model)['state_dict'])
        _ = self.tacotron.cuda().eval()

        self.waveglow = torch.load(waveglow_model)['model']
        self.waveglow = self.waveglow.remove_weightnorm(self.waveglow)
        _ = self.waveglow.cuda().eval()
        for k in self.waveglow.convinv:
            k.float()
Ejemplo n.º 24
0
    from model import Tacotron2
    from loss_function import Tacotron2Loss
    hparams = create_hparams()
    text_loader = TextMelLoader(hparams.training_lst, hparams)
    collate_fn = TextMelCollate(hparams.n_frames_per_step)

    text, mel = text_loader[0]  # mel.shape (80 * frame_num)
    plt.matshow(mel, origin='lower')
    plt.colorbar()
    plt.savefig('mel_demo.png')

    train_loader = torch.utils.data.DataLoader(text_loader,
                                               num_workers=1,
                                               shuffle=False,
                                               batch_size=3,
                                               pin_memory=False,
                                               drop_last=True,
                                               collate_fn=collate_fn)
    print(len(train_loader))
    tacotron = Tacotron2(hparams)
    criterion = Tacotron2Loss()
    for batch in train_loader:
        text_padded, text_alignment_padded, input_lengths, mel_padded, alignments, alignments_weights_padded,\
            output_lengths = batch
        max_len = torch.max(input_lengths.data).item()
        x = (text_padded, input_lengths, mel_padded, max_len, output_lengths)
        y = (mel_padded, alignments, alignments_weights_padded,
             text_alignment_padded)
        y_pred = tacotron(x)
        print(criterion(y_pred, y))
        break
Ejemplo n.º 25
0
def main(args):
    # Get device
    device = torch.device('cuda'if torch.cuda.is_available()else 'cpu')

    # Define model
    model = nn.DataParallel(Tacotron2(hp)).to(device)
    print("Model Has Been Defined")
    num_param = utils.get_param_num(model)
    print('Number of TTS Parameters:', num_param)

    # Get dataset
    dataset = DNNDataset()

    # Optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(),
                                 betas=(0.9, 0.98),
                                 eps=1e-9)
    scheduled_optim = ScheduledOptim(optimizer,
                                     hp.encoder_embedding_dim,
                                     hp.n_warm_up_step,
                                     args.restore_step)
    criterion = DNNLoss().to(device)
    print("Defined Optimizer and Loss Function.")

    # Load checkpoint if exists
    try:
        checkpoint = torch.load(os.path.join(
            hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step))
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step %d---\n" % args.restore_step)
    except:
        print("\n---Start New Training---\n")
        if not os.path.exists(hp.checkpoint_path):
            os.mkdir(hp.checkpoint_path)

    # Init logger
    if not os.path.exists(hp.logger_path):
        os.mkdir(hp.logger_path)

    # Define Some Information
    Time = np.array([])
    Start = time.clock()

    # Training
    model = model.train()

    for epoch in range(hp.epochs):
        # Get Training Loader
        training_loader = DataLoader(dataset,
                                     batch_size=hp.batch_size**2,
                                     shuffle=True,
                                     collate_fn=collate_fn,
                                     drop_last=True,
                                     num_workers=0)
        total_step = hp.epochs * len(training_loader) * hp.batch_size

        for i, batchs in enumerate(training_loader):
            for j, db in enumerate(batchs):
                start_time = time.clock()

                current_step = i * hp.batch_size + j + args.restore_step + \
                    epoch * len(training_loader) * hp.batch_size + 1

                # Init
                scheduled_optim.zero_grad()

                # Get Data
                character = torch.from_numpy(db["text"]).long().to(device)
                mel_target = torch.from_numpy(db["mel_target"]).transpose(1, 2)
                mel_target = mel_target.float().to(device)
                duration = torch.from_numpy(db["D"]).int().to(device)
                src_pos = torch.from_numpy(db["src_pos"]).long().to(device)
                mel_pos = torch.from_numpy(db["mel_pos"]).long().to(device)
                max_mel_len = db["mel_max_len"]

                input_lengths = torch.max(src_pos, -1)[0]
                output_lengths = torch.max(mel_pos, -1)[0]
                stop_token = torch.from_numpy(db["stop_token"])
                stop_token = stop_token.float().to(device)

                # Forward
                batch = character, input_lengths, mel_target, stop_token, output_lengths
                x, y = model.module.parse_batch(batch)
                y_ = model(x)

                # Cal Loss
                mel_loss, mel_postnet_loss, stop_pred_loss = criterion(y_, y)
                total_loss = mel_loss + mel_postnet_loss + stop_pred_loss

                # Logger
                t_l = total_loss.item()
                m_l = mel_loss.item()
                m_p_l = mel_postnet_loss.item()
                s_l = stop_pred_loss

                with open(os.path.join("logger", "total_loss.txt"), "a") as f_total_loss:
                    f_total_loss.write(str(t_l)+"\n")

                with open(os.path.join("logger", "mel_loss.txt"), "a") as f_mel_loss:
                    f_mel_loss.write(str(m_l)+"\n")

                with open(os.path.join("logger", "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss:
                    f_mel_postnet_loss.write(str(m_p_l)+"\n")

                with open(os.path.join("logger", "duration_loss.txt"), "a") as f_d_loss:
                    f_d_loss.write(str(s_l)+"\n")

                # Backward
                total_loss.backward()

                # Clipping gradients to avoid gradient explosion
                nn.utils.clip_grad_norm_(
                    model.parameters(), hp.grad_clip_thresh)

                # Update weights
                if args.frozen_learning_rate:
                    scheduled_optim.step_and_update_lr_frozen(
                        args.learning_rate_frozen)
                else:
                    scheduled_optim.step_and_update_lr()

                # Print
                if current_step % hp.log_step == 0:
                    Now = time.clock()

                    str1 = "Epoch [{}/{}], Step [{}/{}]:".format(
                        epoch+1, hp.epochs, current_step, total_step)
                    str2 = "Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f}, ".format(m_l, m_p_l) + \
                        "Stop Prediction Loss: {:.4f};".format(s_l)
                    str3 = "Current Learning Rate is {:.6f}.".format(
                        scheduled_optim.get_learning_rate())
                    str4 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format(
                        (Now-Start), (total_step-current_step)*np.mean(Time))

                    print("\n" + str1)
                    print(str2)
                    print(str3)
                    print(str4)

                    with open(os.path.join("logger", "logger.txt"), "a") as f_logger:
                        f_logger.write(str1 + "\n")
                        f_logger.write(str2 + "\n")
                        f_logger.write(str3 + "\n")
                        f_logger.write(str4 + "\n")
                        f_logger.write("\n")

                if current_step % hp.save_step == 0:
                    torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict(
                    )}, os.path.join(hp.checkpoint_path, 'checkpoint_%d.pth.tar' % current_step))
                    print("save model at step %d ..." % current_step)

                end_time = time.clock()
                Time = np.append(Time, end_time - start_time)
                if len(Time) == hp.clear_Time:
                    temp_value = np.mean(Time)
                    Time = np.delete(
                        Time, [i for i in range(len(Time))], axis=None)
                    Time = np.append(Time, temp_value)
Ejemplo n.º 26
0
def main(args):
    # Get device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Define model
    print("Use Tacotron2")
    model = nn.DataParallel(Tacotron2(hp)).to(device)
    print("Model Has Been Defined")
    num_param = utils.get_param_num(model)
    print('Number of TTS Parameters:', num_param)
    # Get buffer
    print("Load data to buffer")
    buffer = get_data_to_buffer()

    # Optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(),
                                 betas=(0.9, 0.98),
                                 eps=1e-9)
    scheduled_optim = ScheduledOptim(optimizer, hp.decoder_rnn_dim,
                                     hp.n_warm_up_step, args.restore_step)
    tts_loss = DNNLoss().to(device)
    print("Defined Optimizer and Loss Function.")

    # Load checkpoint if exists
    try:
        checkpoint = torch.load(
            os.path.join(hp.checkpoint_path,
                         'checkpoint_%d.pth.tar' % args.restore_step))
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step %d---\n" % args.restore_step)
    except:
        print("\n---Start New Training---\n")
        if not os.path.exists(hp.checkpoint_path):
            os.mkdir(hp.checkpoint_path)

    # Init logger
    if not os.path.exists(hp.logger_path):
        os.mkdir(hp.logger_path)

    # Get dataset
    dataset = BufferDataset(buffer)

    # Get Training Loader
    training_loader = DataLoader(dataset,
                                 batch_size=hp.batch_expand_size *
                                 hp.batch_size,
                                 shuffle=True,
                                 collate_fn=collate_fn_tensor,
                                 drop_last=True,
                                 num_workers=0)
    total_step = hp.epochs * len(training_loader) * hp.batch_expand_size

    # Define Some Information
    Time = np.array([])
    Start = time.perf_counter()

    # Training
    model = model.train()

    for epoch in range(hp.epochs):
        for i, batchs in enumerate(training_loader):
            # real batch start here
            for j, db in enumerate(batchs):
                start_time = time.perf_counter()

                current_step = i * hp.batch_expand_size + j + args.restore_step + \
                    epoch * len(training_loader) * hp.batch_expand_size + 1

                # Init
                scheduled_optim.zero_grad()

                # Get Data
                character = db["text"].long().to(device)
                mel_target = db["mel_target"].float().to(device)
                mel_pos = db["mel_pos"].long().to(device)
                src_pos = db["src_pos"].long().to(device)
                max_mel_len = db["mel_max_len"]

                mel_target = mel_target.contiguous().transpose(1, 2)
                src_length = torch.max(src_pos, -1)[0]
                mel_length = torch.max(mel_pos, -1)[0]

                gate_target = mel_pos.eq(0).float()
                gate_target = gate_target[:, 1:]
                gate_target = F.pad(gate_target, (0, 1, 0, 0), value=1.)

                # Forward
                inputs = character, src_length, mel_target, max_mel_len, mel_length
                mel_output, mel_output_postnet, gate_output = model(inputs)

                # Cal Loss
                mel_loss, mel_postnet_loss, gate_loss \
                    = tts_loss(mel_output, mel_output_postnet, gate_output,
                               mel_target, gate_target)
                total_loss = mel_loss + mel_postnet_loss + gate_loss

                # Logger
                t_l = total_loss.item()
                m_l = mel_loss.item()
                m_p_l = mel_postnet_loss.item()
                g_l = gate_loss.item()

                with open(os.path.join("logger", "total_loss.txt"),
                          "a") as f_total_loss:
                    f_total_loss.write(str(t_l) + "\n")

                with open(os.path.join("logger", "mel_loss.txt"),
                          "a") as f_mel_loss:
                    f_mel_loss.write(str(m_l) + "\n")

                with open(os.path.join("logger", "mel_postnet_loss.txt"),
                          "a") as f_mel_postnet_loss:
                    f_mel_postnet_loss.write(str(m_p_l) + "\n")

                with open(os.path.join("logger", "gate_loss.txt"),
                          "a") as f_g_loss:
                    f_g_loss.write(str(g_l) + "\n")

                # Backward
                total_loss.backward()

                # Clipping gradients to avoid gradient explosion
                nn.utils.clip_grad_norm_(model.parameters(),
                                         hp.grad_clip_thresh)

                # Update weights
                if args.frozen_learning_rate:
                    scheduled_optim.step_and_update_lr_frozen(
                        args.learning_rate_frozen)
                else:
                    scheduled_optim.step_and_update_lr()

                # Print
                if current_step % hp.log_step == 0:
                    Now = time.perf_counter()

                    str1 = "Epoch [{}/{}], Step [{}/{}]:"\
                        .format(epoch + 1, hp.epochs, current_step, total_step)
                    str2 = "Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f}, Gate Loss: {:.4f};".format(
                        m_l, m_p_l, g_l)
                    str3 = "Current Learning Rate is {:.6f}."\
                        .format(scheduled_optim.get_learning_rate())
                    str4 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s."\
                        .format((Now-Start), (total_step-current_step) * np.mean(Time, dtype=np.float32))

                    print("\n" + str1)
                    print(str2)
                    print(str3)
                    print(str4)

                    with open(os.path.join("logger", "logger.txt"),
                              "a") as f_logger:
                        f_logger.write(str1 + "\n")
                        f_logger.write(str2 + "\n")
                        f_logger.write(str3 + "\n")
                        f_logger.write(str4 + "\n")
                        f_logger.write("\n")

                if current_step % hp.save_step == 0:
                    torch.save(
                        {
                            'model': model.state_dict(),
                            'optimizer': optimizer.state_dict()
                        },
                        os.path.join(hp.checkpoint_path,
                                     'checkpoint_%d.pth.tar' % current_step))
                    print("save model at step %d ..." % current_step)

                end_time = time.perf_counter()
                Time = np.append(Time, end_time - start_time)
                if len(Time) == hp.clear_Time:
                    temp_value = np.mean(Time)
                    Time = np.delete(Time, [i for i in range(len(Time))],
                                     axis=None)
                    Time = np.append(Time, temp_value)
Ejemplo n.º 27
0
BATCH_SIZE = 8

torch.backends.cudnn.deterministic = True
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)


from src.dataset import load_dataset
dataloader_train, dataloader_val = load_dataset(featurizer, BATCH_SIZE)

from model import Tacotron2

generator = Tacotron2(n_mels=80, n_frames=1).to(device)

from math import exp, log
optimizer = optim.Adam(generator.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-6, weight_decay=1e-6)
lambda1 = lambda step: exp(log(0.01)*min(15000, step)/15000)
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1])

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import wandb
wandb.init(
  project="DLA_HW4",
  config={
    "lstm_dropout": 0.1,
    "n_mels": 80,
Ejemplo n.º 28
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, batch_size, seed, checkpoint_path, hparams):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    # =====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    if num_gpus >= 1:
        model = WaveGlow(**waveglow_config, hparams=hparams).cuda()
    else:
        model = WaveGlow(**waveglow_config, hparams=hparams)

    # =====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    # =====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Load checkpoint if one exists
    iteration, eval_iteration = 0, 0

    if checkpoint_path != "":
        model, optimizer, iteration, eval_iteration = load_checkpoint(checkpoint_path, model, optimizer)
        iteration += 1  # next iteration is iteration + 1
        eval_iteration += 1
    # trainset = Mel2Samp(**data_config)

    trainset = TextMelLoader(
        audiopaths_and_text='./filelists/ljs_audio_text_train_filelist.txt', hparams=hparams)
    testset = TextMelLoader(
        audiopaths_and_text='./filelists/ljs_audio_text_test_filelist.txt', hparams=hparams)


    collate_fn = TextMelCollate(hparams, fixed_length=True)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset, num_workers=1,
                              collate_fn=collate_fn,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)
    test_loader = DataLoader(testset, num_workers=1,
                             collate_fn=collate_fn,
                             shuffle=False,
                             sampler=train_sampler,
                             batch_size=batch_size,
                             pin_memory=False,
                             drop_last=True)
    log_path = os.path.join(output_directory, 'log-event')
    os.makedirs(log_path, exist_ok=True)
    logger = WaveGlowLogger(log_path)
    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    model.train()
    tacotron2 = Tacotron2(hparams)
    batch_parser = tacotron2.parse_batch
    # we use tacotron-2's pipeline
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        model.train()
        for i, batch in enumerate(train_loader):
            model.zero_grad()
            x, y = batch_parser(batch)
            text_padded, input_lengths, mel_padded, max_len, output_lengths = x
            # print(text_padded.size(), mel_padded.size())
            mel_padded, gate_padded = y
            outputs = model((text_padded, mel_padded))

            loss = criterion(outputs)
            logger.log_loss('train/loss', loss, iteration)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()
            loss.backward()
            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            iteration += 1

        # model.eval()
        # for i, batch in enumerate(test_loader):
        #     x, y = batch_parser(batch)
        #     text_padded, input_lengths, mel_padded, max_len, output_lengths = x
        #     mel_padded, gate_padded = y
        #     outputs = model((text_padded, mel_padded))
        #     loss = criterion(outputs)
        #     logger.log_loss('eval/loss', loss, iteration)
        #     eval_iteration += 1

        if rank == 0:
            checkpoint_path = "{}/waveglow_epoch_{}".format(output_directory, epoch)
            save_checkpoint(model, optimizer, learning_rate, iteration, eval_iteration, checkpoint_path,
                            hparams=hparams)
Ejemplo n.º 29
0
def load_model(hparams):
    model = Tacotron2(hparams).cuda()


    return model
Ejemplo n.º 30
0
                    help='pre-trained model')
parser.add_argument('--warm_start',
                    action='store_true',
                    help='load model weights only, ignore specified layers')
parser.add_argument('--hparams',
                    type=str,
                    required=False,
                    help='comma separated name=value pairs')
parser.add_argument('--freeze_encoder',
                    type=str,
                    default=False,
                    help='freeze encoder for transfer learning')
args = parser.parse_args()
hps = create_hparams(args.hparams)

model = Tacotron2(hps).to(device)

# If warm-start: need to specify a valid path for pre-trained model
if path.exists(args.pre_trained) and args.warm_start:
    model = warm_start_model(args.pre_trained, model, hps.ignore_layers)

# Case Freeze Encoder
if args.freeze_encoder:
    model.freeze_encoder()

# Load training csv
(data_train, nms_data) = load_csv(hps.nm_csv_train, hps)

# Manage types of sampling for batch selection: (BatchSampler or OrderedSampler)
sampler_train = OrderedSampler(model,
                               data_train,