Esempio n. 1
0
    def forward(self, audio, mel, audio_start, clip_kl=True):
        """Compute loss of Clarinet model.

        Args:
            audio (Variable): shape(B, T_audio), dtype flaot32, ground truth waveform.
            mel (Variable): shape(B, F, T_mel), dtype flaot32, condition(mel spectrogram here).
            audio_start (Variable): shape(B, ), dtype int64, audio starts positions.
            clip_kl (bool, optional): whether to clip kl_loss by maximum=100. Defaults to True.

        Returns:
            Dict(str, Variable)
            loss (Variable): shape(1, ), dtype flaot32, total loss.
            kl (Variable): shape(1, ), dtype flaot32, kl divergence between the teacher's output distribution and student's output distribution.
            regularization (Variable): shape(1, ), dtype flaot32, a regularization term of the KL divergence.
            spectrogram_frame_loss (Variable): shape(1, ), dytpe: float, stft loss, the L1-distance of the magnitudes of the spectrograms of the ground truth waveform and synthesized waveform.
        """
        batch_size, audio_length = audio.shape  # audio clip's length

        z = F.gaussian_random(audio.shape)
        condition = self.encoder(mel)  # (B, C, T)
        condition_slice = crop(condition, audio_start, audio_length)

        x, s_means, s_scales = self.student(z, condition_slice)  # all [0: T]
        s_means = s_means[:, 1:]  # (B, T-1), time steps [1: T]
        s_scales = s_scales[:, 1:]  # (B, T-1), time steps [1: T]
        s_clipped_scales = F.clip(s_scales, self.min_log_scale, 100.)

        # teacher outputs single gaussian
        y = self.teacher(x[:, :-1], condition_slice[:, :, 1:])
        _, t_means, t_scales = F.split(y, 3, -1)  # time steps [1: T]
        t_means = F.squeeze(t_means, [-1])  # (B, T-1), time steps [1: T]
        t_scales = F.squeeze(t_scales, [-1])  # (B, T-1), time steps [1: T]
        t_clipped_scales = F.clip(t_scales, self.min_log_scale, 100.)

        s_distribution = D.Normal(s_means, F.exp(s_clipped_scales))
        t_distribution = D.Normal(t_means, F.exp(t_clipped_scales))

        # kl divergence loss, so we only need to sample once? no MC
        kl = s_distribution.kl_divergence(t_distribution)
        if clip_kl:
            kl = F.clip(kl, -100., 10.)
        # context size dropped
        kl = F.reduce_mean(kl[:, self.teacher.context_size:])
        # major diff here
        regularization = F.mse_loss(t_scales[:, self.teacher.context_size:],
                                    s_scales[:, self.teacher.context_size:])

        # introduce information from real target
        spectrogram_frame_loss = F.mse_loss(self.stft.magnitude(audio),
                                            self.stft.magnitude(x))
        loss = kl + self.lmd * regularization + spectrogram_frame_loss
        loss_dict = {
            "loss": loss,
            "kl_divergence": kl,
            "regularization": regularization,
            "stft_loss": spectrogram_frame_loss
        }
        return loss_dict
Esempio n. 2
0
    def test_mse_loss(self):
        input_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32")
        label_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32")

        sub = input_val - label_val
        np_result = np.mean(sub * sub)

        input_var = layers.create_tensor(dtype="float32", name="input")
        label_var = layers.create_tensor(dtype="float32", name="label")

        layers.assign(input=input_val, output=input_var)
        layers.assign(input=label_val, output=label_var)
        output = layers.mse_loss(input=input_var, label=label_var)
        for use_cuda in ([False, True]
                         if core.is_compiled_with_cuda() else [False]):
            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
            exe = Executor(place)
            result = exe.run(fluid.default_main_program(),
                             feed={
                                 "input": input_var,
                                 "label": label_var
                             },
                             fetch_list=[output])

            self.assertTrue(np.isclose(np_result, result).all())
Esempio n. 3
0
    def forward(self):
        """forward"""
        dist_feat_order = self.edges_dist
        dist_feat = self.e2n_gw.edge_feat['dist']
        dist_feat, dist_feat_order = layers.spatial_embedding(
            dist_feat, dist_feat_order, self.hidden_size)

        node_edge_feat = self.e2n_gw.node_feat["attr"]
        feat_size = node_edge_feat.shape[-1]

        for i in range(self.num_layers):
            out_size = self.hidden_size if i == self.num_layers + 1 else feat_size
            feat_h = layers.SpatialConv(self.e2n_gw,
                                        self.e2e_gw,
                                        self.srcs,
                                        self.dsts,
                                        node_edge_feat,
                                        dist_feat_order,
                                        dist_feat,
                                        self.nids,
                                        self.eids,
                                        self.node_lod,
                                        self.edge_lod,
                                        out_size,
                                        name="layer_%s" % (i))
            node_edge_feat = feat_h

        node_feat = fl.gather(node_edge_feat, self.nids)
        pooled_h = layers.graph_pooling(node_feat, self.node_lod,
                                        self.pool_type)

        output = fl.fc(pooled_h, size=self.hidden_size * 4, act='relu')
        output = fl.dropout(output,
                            self.dropout_prob,
                            dropout_implementation="upscale_in_train")
        output = fl.fc(output, size=self.hidden_size * 2, act='relu')
        output = fl.dropout(output,
                            self.dropout_prob,
                            dropout_implementation="upscale_in_train")
        output = fl.fc(output, size=self.hidden_size * 1, act='relu')
        output = fl.dropout(output,
                            self.dropout_prob,
                            dropout_implementation="upscale_in_train")
        self.output = fl.fc(output, size=self.n_output, act=None)

        # calculate loss
        self.loss = fl.mse_loss(self.output, self.pk)
        self.loss = fl.reduce_mean(self.loss)
Esempio n. 4
0
                                kept_layers_index.append(
                                    math.floor(i / depth_mult) - 1)

                            if mode == 'classification':
                                logit_loss = soft_cross_entropy(
                                    student_logit, teacher_logit.detach())
                            else:
                                logit_loss = 0.0

                            ### hidden_states distillation loss
                            rep_loss = 0.0
                            for stu_rep, tea_rep in zip(
                                    student_reps,
                                    list(teacher_reps[i]
                                         for i in kept_layers_index)):
                                tmp_loss = L.mse_loss(stu_rep,
                                                      tea_rep.detach())
                                rep_loss += tmp_loss

                            loss = args.width_lambda1 * logit_loss + args.width_lambda2 * rep_loss

                        else:
                            ### logit distillation loss
                            if mode == 'classification':
                                logit_loss = soft_cross_entropy(
                                    student_logit, teacher_logit.detach())
                            else:
                                logit_loss = 0.0

                            ### hidden_states distillation loss
                            rep_loss = 0.0
                            for stu_rep, tea_rep in zip(
Esempio n. 5
0
def main(args):
    local_rank = dg.parallel.Env().local_rank
    nranks = dg.parallel.Env().nranks
    parallel = nranks > 1

    with open(args.config) as f:
        cfg = yaml.load(f, Loader=yaml.Loader)

    global_step = 0
    place = fluid.CUDAPlace(dg.parallel.Env()
                            .dev_id) if args.use_gpu else fluid.CPUPlace()
    fluid.enable_dygraph(place)

    if not os.path.exists(args.output):
        os.mkdir(args.output)

    writer = SummaryWriter(os.path.join(args.output,
                                        'log')) if local_rank == 0 else None

    model = FastSpeech(cfg['network'], num_mels=cfg['audio']['num_mels'])
    model.train()
    optimizer = fluid.optimizer.AdamOptimizer(
        learning_rate=dg.NoamDecay(1 / (cfg['train']['warm_up_step'] *
                                        (cfg['train']['learning_rate']**2)),
                                   cfg['train']['warm_up_step']),
        parameter_list=model.parameters(),
        grad_clip=fluid.clip.GradientClipByGlobalNorm(cfg['train'][
            'grad_clip_thresh']))
    reader = LJSpeechLoader(
        cfg['audio'],
        place,
        args.data,
        args.alignments_path,
        cfg['train']['batch_size'],
        nranks,
        local_rank,
        shuffle=True).reader
    iterator = iter(tqdm(reader))

    # Load parameters.
    global_step = io.load_parameters(
        model=model,
        optimizer=optimizer,
        checkpoint_dir=os.path.join(args.output, 'checkpoints'),
        iteration=args.iteration,
        checkpoint_path=args.checkpoint)
    print("Rank {}: checkpoint loaded.".format(local_rank))

    if parallel:
        strategy = dg.parallel.prepare_context()
        model = fluid.dygraph.parallel.DataParallel(model, strategy)

    while global_step <= cfg['train']['max_iteration']:
        try:
            batch = next(iterator)
        except StopIteration as e:
            iterator = iter(tqdm(reader))
            batch = next(iterator)

        (character, mel, pos_text, pos_mel, alignment) = batch

        global_step += 1

        #Forward
        result = model(
            character, pos_text, mel_pos=pos_mel, length_target=alignment)
        mel_output, mel_output_postnet, duration_predictor_output, _, _ = result
        mel_loss = layers.mse_loss(mel_output, mel)
        mel_postnet_loss = layers.mse_loss(mel_output_postnet, mel)
        duration_loss = layers.mean(
            layers.abs(
                layers.elementwise_sub(duration_predictor_output, alignment)))
        total_loss = mel_loss + mel_postnet_loss + duration_loss

        if local_rank == 0:
            writer.add_scalar('mel_loss', mel_loss.numpy(), global_step)
            writer.add_scalar('post_mel_loss',
                              mel_postnet_loss.numpy(), global_step)
            writer.add_scalar('duration_loss',
                              duration_loss.numpy(), global_step)
            writer.add_scalar('learning_rate',
                              optimizer._learning_rate.step().numpy(),
                              global_step)

        if parallel:
            total_loss = model.scale_loss(total_loss)
            total_loss.backward()
            model.apply_collective_grads()
        else:
            total_loss.backward()
        optimizer.minimize(total_loss)
        model.clear_gradients()

        # save checkpoint
        if local_rank == 0 and global_step % cfg['train'][
                'checkpoint_interval'] == 0:
            io.save_parameters(
                os.path.join(args.output, 'checkpoints'), global_step, model,
                optimizer)

    if local_rank == 0:
        writer.close()
Esempio n. 6
0
def main(args):
    local_rank = dg.parallel.Env().local_rank if args.use_data_parallel else 0
    nranks = dg.parallel.Env().nranks if args.use_data_parallel else 1

    with open(args.config_path) as f:
        cfg = yaml.load(f, Loader=yaml.Loader)

    global_step = 0
    place = (fluid.CUDAPlace(dg.parallel.Env().dev_id)
             if args.use_data_parallel else fluid.CUDAPlace(0)
             if args.use_gpu else fluid.CPUPlace())

    if not os.path.exists(args.log_dir):
        os.mkdir(args.log_dir)
    path = os.path.join(args.log_dir, 'fastspeech')

    writer = SummaryWriter(path) if local_rank == 0 else None

    with dg.guard(place):
        with fluid.unique_name.guard():
            transformer_tts = TransformerTTS(cfg)
            model_dict, _ = load_checkpoint(
                str(args.transformer_step),
                os.path.join(args.transtts_path, "transformer"))
            transformer_tts.set_dict(model_dict)
            transformer_tts.eval()

        model = FastSpeech(cfg)
        model.train()
        optimizer = fluid.optimizer.AdamOptimizer(
            learning_rate=dg.NoamDecay(1 / (
                cfg['warm_up_step'] * (args.lr**2)), cfg['warm_up_step']),
            parameter_list=model.parameters())
        reader = LJSpeechLoader(
            cfg, args, nranks, local_rank, shuffle=True).reader()

        if args.checkpoint_path is not None:
            model_dict, opti_dict = load_checkpoint(
                str(args.fastspeech_step),
                os.path.join(args.checkpoint_path, "fastspeech"))
            model.set_dict(model_dict)
            optimizer.set_dict(opti_dict)
            global_step = args.fastspeech_step
            print("load checkpoint!!!")

        if args.use_data_parallel:
            strategy = dg.parallel.prepare_context()
            model = fluid.dygraph.parallel.DataParallel(model, strategy)

        for epoch in range(args.epochs):
            pbar = tqdm(reader)

            for i, data in enumerate(pbar):
                pbar.set_description('Processing at epoch %d' % epoch)
                (character, mel, mel_input, pos_text, pos_mel, text_length,
                 mel_lens, enc_slf_mask, enc_query_mask, dec_slf_mask,
                 enc_dec_mask, dec_query_slf_mask, dec_query_mask) = data

                _, _, attn_probs, _, _, _ = transformer_tts(
                    character,
                    mel_input,
                    pos_text,
                    pos_mel,
                    dec_slf_mask=dec_slf_mask,
                    enc_slf_mask=enc_slf_mask,
                    enc_query_mask=enc_query_mask,
                    enc_dec_mask=enc_dec_mask,
                    dec_query_slf_mask=dec_query_slf_mask,
                    dec_query_mask=dec_query_mask)
                alignment, max_attn = get_alignment(attn_probs, mel_lens,
                                                    cfg['transformer_head'])
                alignment = dg.to_variable(alignment).astype(np.float32)

                if local_rank == 0 and global_step % 5 == 1:
                    x = np.uint8(
                        cm.viridis(max_attn[8, :mel_lens.numpy()[8]]) * 255)
                    writer.add_image(
                        'Attention_%d_0' % global_step,
                        x,
                        0,
                        dataformats="HWC")

                global_step += 1

                #Forward
                result = model(
                    character,
                    pos_text,
                    mel_pos=pos_mel,
                    length_target=alignment,
                    enc_non_pad_mask=enc_query_mask,
                    enc_slf_attn_mask=enc_slf_mask,
                    dec_non_pad_mask=dec_query_slf_mask,
                    dec_slf_attn_mask=dec_slf_mask)
                mel_output, mel_output_postnet, duration_predictor_output, _, _ = result
                mel_loss = layers.mse_loss(mel_output, mel)
                mel_postnet_loss = layers.mse_loss(mel_output_postnet, mel)
                duration_loss = layers.mean(
                    layers.abs(
                        layers.elementwise_sub(duration_predictor_output,
                                               alignment)))
                total_loss = mel_loss + mel_postnet_loss + duration_loss

                if local_rank == 0:
                    writer.add_scalar('mel_loss',
                                      mel_loss.numpy(), global_step)
                    writer.add_scalar('post_mel_loss',
                                      mel_postnet_loss.numpy(), global_step)
                    writer.add_scalar('duration_loss',
                                      duration_loss.numpy(), global_step)
                    writer.add_scalar('learning_rate',
                                      optimizer._learning_rate.step().numpy(),
                                      global_step)

                if args.use_data_parallel:
                    total_loss = model.scale_loss(total_loss)
                    total_loss.backward()
                    model.apply_collective_grads()
                else:
                    total_loss.backward()
                optimizer.minimize(
                    total_loss,
                    grad_clip=fluid.dygraph_grad_clip.GradClipByGlobalNorm(cfg[
                        'grad_clip_thresh']))
                model.clear_gradients()

                # save checkpoint
                if local_rank == 0 and global_step % args.save_step == 0:
                    if not os.path.exists(args.save_path):
                        os.mkdir(args.save_path)
                    save_path = os.path.join(args.save_path,
                                             'fastspeech/%d' % global_step)
                    dg.save_dygraph(model.state_dict(), save_path)
                    dg.save_dygraph(optimizer.state_dict(), save_path)
        if local_rank == 0:
            writer.close()
Esempio n. 7
0
import paddle.fluid.layers as layers
import paddleslim as slim

places = fluid.cpu_places()
place = places[0]
exe = fluid.Executor(place)

train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
    with fluid.unique_name.guard():
        x = layers.data("x", shape=[-1, 10], dtype="float32")
        label = layers.data("y", shape=[-1, 1], dtype="float32")
        loader = fluid.io.DataLoader.from_generator([x, label], 1)
        y = layers.fc(x, size=1, param_attr="fc.w_0", bias_attr="fc.b_0")
        loss = layers.mse_loss(y, label)
        avg_loss = layers.mean(loss)
        opt = fluid.optimizer.Adam()
        opt.minimize(avg_loss)
exe.run(startup_program)


def data_generator():
    x_np = np.random.rand(10, 10).astype("float32")
    y_np = np.random.rand(10, 1).astype("float32")

    def __generator__():
        print("haha")
        for i in range(0, 10, 2):
            print(i)
            yield x_np[i:i + 2], y_np[i:i + 2]
Esempio n. 8
0
 def forward(self, x, y):
     return L.mse_loss(x, y)