Ejemplo n.º 1
0
    def forward(self, batch_wave, lengths, phone, len_phone, label_smooth=0., threshold=0.95):
        device = batch_wave.device
        phone_paddings = phone.eq(0).float()

        encoder_outputs, encoder_output_lengths = self.splayer(batch_wave, lengths)
        encoder_outputs, encoder_output_lengths = self.encoder(encoder_outputs, encoder_output_lengths)
        ctc_logits = self.ctc_fc(encoder_outputs)

        len_logits_ctc = encoder_output_lengths
        alphas = self.assigner(encoder_outputs, encoder_output_lengths)

        # sum
        _num = alphas.sum(-1)
        # scaling
        num = len_phone.float()
        num_noise = num + 0.9 * torch.rand(alphas.size(0)).to(device) - 0.45
        alphas *= (num_noise / _num)[:, None].repeat(1, alphas.size(1))

        cif_outputs = self.cif(encoder_outputs, alphas, threshold=threshold)

        logits_IPA = self.phone_fc(cif_outputs)

        ctc_loss = cal_ctc_loss(ctc_logits, len_logits_ctc, phone, len_phone)
        qua_loss = cal_qua_loss(_num, num)
        ce_phone_loss = cal_ce_loss(logits_IPA, phone, phone_paddings, label_smooth)

        return ctc_loss, qua_loss, ce_phone_loss
Ejemplo n.º 2
0
    def forward(self, batch_wave, lengths, target_ids, target_labels=None, target_paddings=None, label_smooth=0., threshold=0.95):
        device = batch_wave.device
        target_lengths = torch.sum(1-target_paddings, dim=-1).long()

        encoder_outputs, encoder_output_lengths = self.splayer(batch_wave, lengths)
        encoder_outputs, encoder_output_lengths = self.encoder(encoder_outputs, encoder_output_lengths)
        ctc_logits = self.ctc_fc(encoder_outputs)

        len_logits_ctc = encoder_output_lengths
        alphas = self.assigner(encoder_outputs, encoder_output_lengths)

        # sum
        _num = alphas.sum(-1)
        # scaling
        num = target_lengths.float()
        num_noise = num + 0.9 * torch.rand(alphas.size(0)).to(device) - 0.45
        alphas *= (num_noise / _num)[:, None].repeat(1, alphas.size(1))

        cif_outputs = self.cif(encoder_outputs, alphas, threshold=threshold)

        logits = self.decoder(cif_outputs, target_ids, target_lengths)

        ctc_loss = cal_ctc_loss(ctc_logits, len_logits_ctc, target_labels, target_lengths)
        qua_loss = cal_qua_loss(_num, num)
        ce_loss = cal_ce_loss(logits, target_labels, target_paddings, label_smooth)

        return ctc_loss, qua_loss, ce_loss
Ejemplo n.º 3
0
    def forward(self, batch_wave, lengths, target_ids, target_labels=None, target_paddings=None, label_smooth=0.):
        target_lengths = torch.sum(1-target_paddings, dim=-1).long()
        logits = self.get_logits(batch_wave, lengths,
                target_ids, target_lengths)
        loss = cal_ce_loss(logits, target_labels, target_paddings, label_smooth)

        return loss
Ejemplo n.º 4
0
    def forward(self, batch_wave, lengths, target_ids, target_labels=None, target_paddings=None, label_smooth=0.):
        target_lengths = torch.sum(1-target_paddings, dim=-1).long()
        ctc_logits, len_logits_ctc, ce_logits = self.get_logits(
            batch_wave, lengths, target_ids, target_lengths)

        ctc_loss = cal_ctc_loss(ctc_logits, len_logits_ctc, target_labels, target_lengths-1) # the target of ctc counts without blk
        ce_loss = cal_ce_loss(ce_logits, target_labels, target_paddings, label_smooth)

        return ctc_loss, ce_loss
Ejemplo n.º 5
0
    def forward(self,
                tokens_input,
                len_input,
                target_input,
                target_output,
                target_paddings,
                label_smooth=0.):
        len_targets = torch.sum(1 - target_paddings, dim=-1).long()
        logits = self.get_logits(tokens_input, len_input, target_input,
                                 len_targets)
        loss = cal_ce_loss(logits, target_output, target_paddings,
                           label_smooth)

        return loss