예제 #1
0
파일: model.py 프로젝트: HERECJ/two_pass
    def loss_function(self,
                      neg_rat,
                      neg_prob,
                      pos_rat,
                      reduction=False,
                      weighted=False,
                      pos_rank=None,
                      **kwargs):
        """
        Bpr loss
        """
        pred = torch.subtract(pos_rat.unsqueeze(1), neg_rat)

        if weighted:
            importance = F.softmax(torch.negative(pred) - neg_prob, dim=1)
        else:
            importance = F.softmax(torch.ones_like(pred), dim=1)

        if pos_rank is not None:
            importance = importance * pos_rank

        weight_loss = torch.multiply(importance.detach(),
                                     torch.negative(F.logsigmoid(pred)))
        if reduction:
            return torch.sum(weight_loss, dim=-1).mean(-1)
        else:
            return torch.sum(weight_loss, dim=-1).sum(-1)
예제 #2
0
def calculateInvRTTensor(R, T):

    # Extract the rotation matrix from 4x4 matrix
    #R = RT[:,:3,:3]

    # Extract the translation matrix from 4x4 matrix
    #T = RT[:,:3,3]

    T = torch.unsqueeze(T, dim=2)

    # Calculate the inverse of the matrix
    invR = torch.inverse(R)
    invT = torch.negative(torch.matmul(invR, T))
    invRT = torch.cat((invR, invT), dim=2)

    zeros = torch.zeros((R.shape[0], 1, 4))
    ones = torch.ones((R.shape[0], 1))
    zeros[:, :, 3] = ones[:]

    invRT = torch.cat((invRT, zeros.to('cuda:' + str(invRT.get_device()))),
                      dim=1)

    #print(invRT)

    return (invRT)
예제 #3
0
    def forward(
        self,
        postnet_output,
        decoder_output,
        mel_input,
        linear_input,
        stopnet_output,
        stopnet_target,
        stop_target_length,
        capacitron_vae_outputs,
        output_lens,
        decoder_b_output,
        alignments,
        alignment_lens,
        alignments_backwards,
        input_lens,
    ):

        # decoder outputs linear or mel spectrograms for Tacotron and Tacotron2
        # the target should be set acccordingly
        postnet_target = linear_input if self.config.model.lower() in ["tacotron"] else mel_input

        return_dict = {}
        # remove lengths if no masking is applied
        if not self.config.loss_masking:
            output_lens = None
        # decoder and postnet losses
        if self.config.loss_masking:
            if self.decoder_alpha > 0:
                decoder_loss = self.criterion(decoder_output, mel_input, output_lens)
            if self.postnet_alpha > 0:
                postnet_loss = self.criterion(postnet_output, postnet_target, output_lens)
        else:
            if self.decoder_alpha > 0:
                decoder_loss = self.criterion(decoder_output, mel_input)
            if self.postnet_alpha > 0:
                postnet_loss = self.criterion(postnet_output, postnet_target)
        loss = self.decoder_alpha * decoder_loss + self.postnet_alpha * postnet_loss
        return_dict["decoder_loss"] = decoder_loss
        return_dict["postnet_loss"] = postnet_loss

        if self.use_capacitron_vae:
            # extract capacitron vae infos
            posterior_distribution, prior_distribution, beta = capacitron_vae_outputs

            # KL divergence term between the posterior and the prior
            kl_term = torch.mean(torch.distributions.kl_divergence(posterior_distribution, prior_distribution))

            # Limit the mutual information between the data and latent space by the variational capacity limit
            kl_capacity = kl_term - self.capacitron_capacity

            # pass beta through softplus to keep it positive
            beta = torch.nn.functional.softplus(beta)[0]

            # This is the term going to the main ADAM optimiser, we detach beta because
            # beta is optimised by a separate, SGD optimiser below
            capacitron_vae_loss = beta.detach() * kl_capacity

            # normalize the capacitron_vae_loss as in L1Loss or MSELoss.
            # After this, both the standard loss and capacitron_vae_loss will be in the same scale.
            # For this reason we don't need use L1Loss and MSELoss in "sum" reduction mode.
            # Note: the batch is not considered because the L1Loss was calculated in "sum" mode
            # divided by the batch size, So not dividing the capacitron_vae_loss by B is legitimate.

            # get B T D dimension from input
            B, T, D = mel_input.size()
            # normalize
            if self.config.loss_masking:
                # if mask loss get T using the mask
                T = output_lens.sum() / B

            # Only for dev purposes to be able to compare the reconstruction loss with the values in the
            # original Capacitron paper
            return_dict["capaciton_reconstruction_loss"] = (
                self.criterion_capacitron_reconstruction_loss(decoder_output, mel_input) / decoder_output.size(0)
            ) + kl_capacity

            capacitron_vae_loss = capacitron_vae_loss / (T * D)
            capacitron_vae_loss = capacitron_vae_loss * self.capacitron_vae_loss_alpha

            # This is the term to purely optimise beta and to pass into the SGD optimizer
            beta_loss = torch.negative(beta) * kl_capacity.detach()

            loss += capacitron_vae_loss

            return_dict["capacitron_vae_loss"] = capacitron_vae_loss
            return_dict["capacitron_vae_beta_loss"] = beta_loss
            return_dict["capacitron_vae_kl_term"] = kl_term
            return_dict["capacitron_beta"] = beta

        stop_loss = (
            self.criterion_st(stopnet_output, stopnet_target, stop_target_length)
            if self.config.stopnet
            else torch.zeros(1)
        )
        loss += stop_loss
        return_dict["stopnet_loss"] = stop_loss

        # backward decoder loss (if enabled)
        if self.config.bidirectional_decoder:
            if self.config.loss_masking:
                decoder_b_loss = self.criterion(torch.flip(decoder_b_output, dims=(1,)), mel_input, output_lens)
            else:
                decoder_b_loss = self.criterion(torch.flip(decoder_b_output, dims=(1,)), mel_input)
            decoder_c_loss = torch.nn.functional.l1_loss(torch.flip(decoder_b_output, dims=(1,)), decoder_output)
            loss += self.decoder_alpha * (decoder_b_loss + decoder_c_loss)
            return_dict["decoder_b_loss"] = decoder_b_loss
            return_dict["decoder_c_loss"] = decoder_c_loss

        # double decoder consistency loss (if enabled)
        if self.config.double_decoder_consistency:
            if self.config.loss_masking:
                decoder_b_loss = self.criterion(decoder_b_output, mel_input, output_lens)
            else:
                decoder_b_loss = self.criterion(decoder_b_output, mel_input)
            # decoder_c_loss = torch.nn.functional.l1_loss(decoder_b_output, decoder_output)
            attention_c_loss = torch.nn.functional.l1_loss(alignments, alignments_backwards)
            loss += self.decoder_alpha * (decoder_b_loss + attention_c_loss)
            return_dict["decoder_coarse_loss"] = decoder_b_loss
            return_dict["decoder_ddc_loss"] = attention_c_loss

        # guided attention loss (if enabled)
        if self.config.ga_alpha > 0:
            ga_loss = self.criterion_ga(alignments, input_lens, alignment_lens)
            loss += ga_loss * self.ga_alpha
            return_dict["ga_loss"] = ga_loss

        # decoder differential spectral loss
        if self.config.decoder_diff_spec_alpha > 0:
            decoder_diff_spec_loss = self.criterion_diff_spec(decoder_output, mel_input, output_lens)
            loss += decoder_diff_spec_loss * self.decoder_diff_spec_alpha
            return_dict["decoder_diff_spec_loss"] = decoder_diff_spec_loss

        # postnet differential spectral loss
        if self.config.postnet_diff_spec_alpha > 0:
            postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, postnet_target, output_lens)
            loss += postnet_diff_spec_loss * self.postnet_diff_spec_alpha
            return_dict["postnet_diff_spec_loss"] = postnet_diff_spec_loss

        # decoder ssim loss
        if self.config.decoder_ssim_alpha > 0:
            decoder_ssim_loss = self.criterion_ssim(decoder_output, mel_input, output_lens)
            loss += decoder_ssim_loss * self.postnet_ssim_alpha
            return_dict["decoder_ssim_loss"] = decoder_ssim_loss

        # postnet ssim loss
        if self.config.postnet_ssim_alpha > 0:
            postnet_ssim_loss = self.criterion_ssim(postnet_output, postnet_target, output_lens)
            loss += postnet_ssim_loss * self.postnet_ssim_alpha
            return_dict["postnet_ssim_loss"] = postnet_ssim_loss

        return_dict["loss"] = loss
        return return_dict
예제 #4
0
def min_max_scaler(input):
    min = torch.amin(input, dim=2, keepdim=True, out=None)  # mix max scaler
    input = torch.add(input, torch.negative(min), out=None)  # add min
    max = torch.amax(input, dim=2, keepdim=True, out=None)  # calculate max
    return torch.div(input, max, out=None)  # devide by max