def forward(self, batch_wave, lengths, phone, len_phone, label_smooth=0., threshold=0.95): device = batch_wave.device phone_paddings = phone.eq(0).float() encoder_outputs, encoder_output_lengths = self.splayer(batch_wave, lengths) encoder_outputs, encoder_output_lengths = self.encoder(encoder_outputs, encoder_output_lengths) ctc_logits = self.ctc_fc(encoder_outputs) len_logits_ctc = encoder_output_lengths alphas = self.assigner(encoder_outputs, encoder_output_lengths) # sum _num = alphas.sum(-1) # scaling num = len_phone.float() num_noise = num + 0.9 * torch.rand(alphas.size(0)).to(device) - 0.45 alphas *= (num_noise / _num)[:, None].repeat(1, alphas.size(1)) cif_outputs = self.cif(encoder_outputs, alphas, threshold=threshold) logits_IPA = self.phone_fc(cif_outputs) ctc_loss = cal_ctc_loss(ctc_logits, len_logits_ctc, phone, len_phone) qua_loss = cal_qua_loss(_num, num) ce_phone_loss = cal_ce_loss(logits_IPA, phone, phone_paddings, label_smooth) return ctc_loss, qua_loss, ce_phone_loss
def forward(self, batch_wave, lengths, target_ids, target_labels=None, target_paddings=None, label_smooth=0., threshold=0.95): device = batch_wave.device target_lengths = torch.sum(1-target_paddings, dim=-1).long() encoder_outputs, encoder_output_lengths = self.splayer(batch_wave, lengths) encoder_outputs, encoder_output_lengths = self.encoder(encoder_outputs, encoder_output_lengths) ctc_logits = self.ctc_fc(encoder_outputs) len_logits_ctc = encoder_output_lengths alphas = self.assigner(encoder_outputs, encoder_output_lengths) # sum _num = alphas.sum(-1) # scaling num = target_lengths.float() num_noise = num + 0.9 * torch.rand(alphas.size(0)).to(device) - 0.45 alphas *= (num_noise / _num)[:, None].repeat(1, alphas.size(1)) cif_outputs = self.cif(encoder_outputs, alphas, threshold=threshold) logits = self.decoder(cif_outputs, target_ids, target_lengths) ctc_loss = cal_ctc_loss(ctc_logits, len_logits_ctc, target_labels, target_lengths) qua_loss = cal_qua_loss(_num, num) ce_loss = cal_ce_loss(logits, target_labels, target_paddings, label_smooth) return ctc_loss, qua_loss, ce_loss
def forward(self, batch_wave, lengths, target_ids, target_labels=None, target_paddings=None, label_smooth=0.): target_lengths = torch.sum(1-target_paddings, dim=-1).long() logits = self.get_logits(batch_wave, lengths, target_ids, target_lengths) loss = cal_ce_loss(logits, target_labels, target_paddings, label_smooth) return loss
def forward(self, batch_wave, lengths, target_ids, target_labels=None, target_paddings=None, label_smooth=0.): target_lengths = torch.sum(1-target_paddings, dim=-1).long() ctc_logits, len_logits_ctc, ce_logits = self.get_logits( batch_wave, lengths, target_ids, target_lengths) ctc_loss = cal_ctc_loss(ctc_logits, len_logits_ctc, target_labels, target_lengths-1) # the target of ctc counts without blk ce_loss = cal_ce_loss(ce_logits, target_labels, target_paddings, label_smooth) return ctc_loss, ce_loss
def forward(self, tokens_input, len_input, target_input, target_output, target_paddings, label_smooth=0.): len_targets = torch.sum(1 - target_paddings, dim=-1).long() logits = self.get_logits(tokens_input, len_input, target_input, len_targets) loss = cal_ce_loss(logits, target_output, target_paddings, label_smooth) return loss