def loss(top_out, targets, model_hparams, vocab_size, weights_fn): """Compute loss numerator and denominator for one shard of output.""" del vocab_size # unused arg logits = top_out logits = common_attention.maybe_upcast(logits, hparams=model_hparams) return common_layers.padded_cross_entropy( logits, targets, model_hparams.label_smoothing, weights_fn=weights_fn)
def loss(self, top_out, targets, weights_fn=None): """Compute loss numerator and denominator for one shard of output.""" logits = top_out if weights_fn is None: weights_fn = self.targets_weights_fn logits = common_attention.maybe_upcast(logits, hparams=self._model_hparams) return common_layers.padded_cross_entropy( logits, targets, self._model_hparams.label_smoothing, weights_fn=weights_fn)
def generic_loss(top_out, targets, model_hparams, vocab_size, weights_fn): """Compute loss numerator and denominator for one shard of output.""" del vocab_size # unused arg logits = top_out logits = common_attention.maybe_upcast(logits, hparams=model_hparams) cutoff = getattr(model_hparams, "video_modality_loss_cutoff", 0.0) return common_layers.padded_cross_entropy(logits, targets, model_hparams.label_smoothing, cutoff=cutoff, weights_fn=weights_fn, reduce_sum=False)