def loss_function(self, neg_rat, neg_prob, pos_rat, reduction=False, weighted=False, pos_rank=None, **kwargs): """ Bpr loss """ pred = torch.subtract(pos_rat.unsqueeze(1), neg_rat) if weighted: importance = F.softmax(torch.negative(pred) - neg_prob, dim=1) else: importance = F.softmax(torch.ones_like(pred), dim=1) if pos_rank is not None: importance = importance * pos_rank weight_loss = torch.multiply(importance.detach(), torch.negative(F.logsigmoid(pred))) if reduction: return torch.sum(weight_loss, dim=-1).mean(-1) else: return torch.sum(weight_loss, dim=-1).sum(-1)
def calculateInvRTTensor(R, T): # Extract the rotation matrix from 4x4 matrix #R = RT[:,:3,:3] # Extract the translation matrix from 4x4 matrix #T = RT[:,:3,3] T = torch.unsqueeze(T, dim=2) # Calculate the inverse of the matrix invR = torch.inverse(R) invT = torch.negative(torch.matmul(invR, T)) invRT = torch.cat((invR, invT), dim=2) zeros = torch.zeros((R.shape[0], 1, 4)) ones = torch.ones((R.shape[0], 1)) zeros[:, :, 3] = ones[:] invRT = torch.cat((invRT, zeros.to('cuda:' + str(invRT.get_device()))), dim=1) #print(invRT) return (invRT)
def forward( self, postnet_output, decoder_output, mel_input, linear_input, stopnet_output, stopnet_target, stop_target_length, capacitron_vae_outputs, output_lens, decoder_b_output, alignments, alignment_lens, alignments_backwards, input_lens, ): # decoder outputs linear or mel spectrograms for Tacotron and Tacotron2 # the target should be set acccordingly postnet_target = linear_input if self.config.model.lower() in ["tacotron"] else mel_input return_dict = {} # remove lengths if no masking is applied if not self.config.loss_masking: output_lens = None # decoder and postnet losses if self.config.loss_masking: if self.decoder_alpha > 0: decoder_loss = self.criterion(decoder_output, mel_input, output_lens) if self.postnet_alpha > 0: postnet_loss = self.criterion(postnet_output, postnet_target, output_lens) else: if self.decoder_alpha > 0: decoder_loss = self.criterion(decoder_output, mel_input) if self.postnet_alpha > 0: postnet_loss = self.criterion(postnet_output, postnet_target) loss = self.decoder_alpha * decoder_loss + self.postnet_alpha * postnet_loss return_dict["decoder_loss"] = decoder_loss return_dict["postnet_loss"] = postnet_loss if self.use_capacitron_vae: # extract capacitron vae infos posterior_distribution, prior_distribution, beta = capacitron_vae_outputs # KL divergence term between the posterior and the prior kl_term = torch.mean(torch.distributions.kl_divergence(posterior_distribution, prior_distribution)) # Limit the mutual information between the data and latent space by the variational capacity limit kl_capacity = kl_term - self.capacitron_capacity # pass beta through softplus to keep it positive beta = torch.nn.functional.softplus(beta)[0] # This is the term going to the main ADAM optimiser, we detach beta because # beta is optimised by a separate, SGD optimiser below capacitron_vae_loss = beta.detach() * kl_capacity # normalize the capacitron_vae_loss as in L1Loss or MSELoss. # After this, both the standard loss and capacitron_vae_loss will be in the same scale. # For this reason we don't need use L1Loss and MSELoss in "sum" reduction mode. # Note: the batch is not considered because the L1Loss was calculated in "sum" mode # divided by the batch size, So not dividing the capacitron_vae_loss by B is legitimate. # get B T D dimension from input B, T, D = mel_input.size() # normalize if self.config.loss_masking: # if mask loss get T using the mask T = output_lens.sum() / B # Only for dev purposes to be able to compare the reconstruction loss with the values in the # original Capacitron paper return_dict["capaciton_reconstruction_loss"] = ( self.criterion_capacitron_reconstruction_loss(decoder_output, mel_input) / decoder_output.size(0) ) + kl_capacity capacitron_vae_loss = capacitron_vae_loss / (T * D) capacitron_vae_loss = capacitron_vae_loss * self.capacitron_vae_loss_alpha # This is the term to purely optimise beta and to pass into the SGD optimizer beta_loss = torch.negative(beta) * kl_capacity.detach() loss += capacitron_vae_loss return_dict["capacitron_vae_loss"] = capacitron_vae_loss return_dict["capacitron_vae_beta_loss"] = beta_loss return_dict["capacitron_vae_kl_term"] = kl_term return_dict["capacitron_beta"] = beta stop_loss = ( self.criterion_st(stopnet_output, stopnet_target, stop_target_length) if self.config.stopnet else torch.zeros(1) ) loss += stop_loss return_dict["stopnet_loss"] = stop_loss # backward decoder loss (if enabled) if self.config.bidirectional_decoder: if self.config.loss_masking: decoder_b_loss = self.criterion(torch.flip(decoder_b_output, dims=(1,)), mel_input, output_lens) else: decoder_b_loss = self.criterion(torch.flip(decoder_b_output, dims=(1,)), mel_input) decoder_c_loss = torch.nn.functional.l1_loss(torch.flip(decoder_b_output, dims=(1,)), decoder_output) loss += self.decoder_alpha * (decoder_b_loss + decoder_c_loss) return_dict["decoder_b_loss"] = decoder_b_loss return_dict["decoder_c_loss"] = decoder_c_loss # double decoder consistency loss (if enabled) if self.config.double_decoder_consistency: if self.config.loss_masking: decoder_b_loss = self.criterion(decoder_b_output, mel_input, output_lens) else: decoder_b_loss = self.criterion(decoder_b_output, mel_input) # decoder_c_loss = torch.nn.functional.l1_loss(decoder_b_output, decoder_output) attention_c_loss = torch.nn.functional.l1_loss(alignments, alignments_backwards) loss += self.decoder_alpha * (decoder_b_loss + attention_c_loss) return_dict["decoder_coarse_loss"] = decoder_b_loss return_dict["decoder_ddc_loss"] = attention_c_loss # guided attention loss (if enabled) if self.config.ga_alpha > 0: ga_loss = self.criterion_ga(alignments, input_lens, alignment_lens) loss += ga_loss * self.ga_alpha return_dict["ga_loss"] = ga_loss # decoder differential spectral loss if self.config.decoder_diff_spec_alpha > 0: decoder_diff_spec_loss = self.criterion_diff_spec(decoder_output, mel_input, output_lens) loss += decoder_diff_spec_loss * self.decoder_diff_spec_alpha return_dict["decoder_diff_spec_loss"] = decoder_diff_spec_loss # postnet differential spectral loss if self.config.postnet_diff_spec_alpha > 0: postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, postnet_target, output_lens) loss += postnet_diff_spec_loss * self.postnet_diff_spec_alpha return_dict["postnet_diff_spec_loss"] = postnet_diff_spec_loss # decoder ssim loss if self.config.decoder_ssim_alpha > 0: decoder_ssim_loss = self.criterion_ssim(decoder_output, mel_input, output_lens) loss += decoder_ssim_loss * self.postnet_ssim_alpha return_dict["decoder_ssim_loss"] = decoder_ssim_loss # postnet ssim loss if self.config.postnet_ssim_alpha > 0: postnet_ssim_loss = self.criterion_ssim(postnet_output, postnet_target, output_lens) loss += postnet_ssim_loss * self.postnet_ssim_alpha return_dict["postnet_ssim_loss"] = postnet_ssim_loss return_dict["loss"] = loss return return_dict
def min_max_scaler(input): min = torch.amin(input, dim=2, keepdim=True, out=None) # mix max scaler input = torch.add(input, torch.negative(min), out=None) # add min max = torch.amax(input, dim=2, keepdim=True, out=None) # calculate max return torch.div(input, max, out=None) # devide by max