def smoothing_crossentropy_avgall(logits, targets, sequence_length): """ Computes cross entropy loss of a batch of data with label smoothing. The final loss is averaged by the length of each sequence and then averaged by the batch size. Args: logits: The logits Tensor with shape [timesteps, batch_size, vocab_size]. targets: The gold labels Tensor with shape [timesteps, batch_size]. sequence_length: The length of `targets`, [batch_size, ] Returns: Loss sum and weight sum. """ soft_targets, normalizing = label_smoothing( targets, logits.get_shape().as_list()[-1]) losses = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=soft_targets) - normalizing # [timesteps, batch_size] loss_mask = tf.transpose( tf.sequence_mask(lengths=tf.to_int32(sequence_length), maxlen=tf.to_int32(tf.shape(targets)[0]), dtype=tf.float32), [1, 0]) losses = losses * loss_mask # average loss avg_length = tf.to_float(sequence_length) loss_by_time = tf.reduce_sum(losses, axis=0) / avg_length loss_sum = tf.reduce_sum(loss_by_time) return loss_sum, tf.to_float(tf.shape(sequence_length)[0])
def smoothing_crossentropy_avgall(logits, targets, sequence_length): """ Computes cross entropy loss of a batch of data with label smoothing. The final loss is averaged by the length of each sequence and then averaged by the batch size. Args: logits: The logits Tensor with shape [timesteps, batch_size, vocab_size]. targets: The gold labels Tensor with shape [timesteps, batch_size]. sequence_length: The length of `targets`, [batch_size, ] Returns: Loss sum and weight sum. """ soft_targets, normalizing = label_smoothing(targets, logits.get_shape().as_list()[-1]) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=soft_targets) - normalizing # [timesteps, batch_size] loss_mask = tf.transpose( tf.sequence_mask( lengths=tf.to_int32(sequence_length), maxlen=tf.to_int32(tf.shape(targets)[0]), dtype=tf.float32), [1, 0]) losses = losses * loss_mask # average loss avg_length = tf.to_float(sequence_length) loss_by_time = tf.reduce_sum(losses, axis=0) / avg_length loss_sum = tf.reduce_sum(loss_by_time) return loss_sum, tf.to_float(tf.shape(sequence_length)[0])
def smoothing_crossentropy_t(logits, targets, sequence_length): """ Computes cross entropy loss of a batch of data with label smoothing. The final loss is averaged by the number of tokens in the batch. Args: logits: The logits Tensor with shape [timesteps, batch_size, vocab_size]. targets: The gold labels Tensor with shape [timesteps, batch_size]. sequence_length: The length of `targets`, [batch_size, ] Returns: A float32 Scalar. """ soft_targets, normalizing = label_smoothing( targets, logits.get_shape().as_list()[-1]) losses = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=soft_targets) - normalizing # [timesteps, batch_size] loss_mask = tf.transpose( tf.sequence_mask(lengths=tf.to_int32(sequence_length), maxlen=tf.to_int32(tf.shape(targets)[0]), dtype=tf.float32), [1, 0]) losses = losses * loss_mask loss = tf.reduce_sum(losses) / tf.to_float(tf.reduce_sum(sequence_length)) return loss
def _compute_loss(self, logits, targets, targets_length, return_as_scorer=False): """ Computes loss. Args: logits: The logits Tensor with shape [timesteps, batch_size, target_vocab_size]. targets: The labels Tensor with shape [batch_size, timesteps]. targets_length: The length of labels Tensor with shape [batch_size, ] return_as_scorer: Whether to average by sequence length and return batch result. Returns: Loss sum and weight sum. """ targets = tf.transpose(targets, [1, 0]) # [timesteps, batch_size] if float(self.params["label_smoothing"]) > 0.: soft_targets, normalizing = label_smoothing( targets, logits.get_shape().as_list()[-1], self.params["label_smoothing"]) ces = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=soft_targets) - normalizing else: ces = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) # [timesteps, batch] ces_mask = tf.transpose( tf.sequence_mask(lengths=tf.to_int32(targets_length), maxlen=tf.to_int32(tf.shape(targets)[0]), dtype=tf.float32), [1, 0]) masked_ces = ces * ces_mask if return_as_scorer: loss_sum = tf.reduce_sum(masked_ces, axis=0) return loss_sum / tf.to_float(targets_length) loss_sum = tf.reduce_sum(masked_ces) weight_sum = tf.to_float(tf.shape(targets_length)[0]) return loss_sum, weight_sum