Ejemplo n.º 1
0
def smoothing_crossentropy_avgall(logits, targets, sequence_length):
    """ Computes cross entropy loss of a batch of data with label smoothing.

    The final loss is averaged by the length of each
    sequence and then averaged by the batch size.

    Args:
        logits: The logits Tensor with shape [timesteps, batch_size, vocab_size].
        targets: The gold labels Tensor with shape [timesteps, batch_size].
        sequence_length: The length of `targets`, [batch_size, ]

    Returns: Loss sum and weight sum.
    """
    soft_targets, normalizing = label_smoothing(
        targets,
        logits.get_shape().as_list()[-1])
    losses = tf.nn.softmax_cross_entropy_with_logits(
        logits=logits, labels=soft_targets) - normalizing
    # [timesteps, batch_size]
    loss_mask = tf.transpose(
        tf.sequence_mask(lengths=tf.to_int32(sequence_length),
                         maxlen=tf.to_int32(tf.shape(targets)[0]),
                         dtype=tf.float32), [1, 0])
    losses = losses * loss_mask
    # average loss
    avg_length = tf.to_float(sequence_length)
    loss_by_time = tf.reduce_sum(losses, axis=0) / avg_length
    loss_sum = tf.reduce_sum(loss_by_time)
    return loss_sum, tf.to_float(tf.shape(sequence_length)[0])
Ejemplo n.º 2
0
def smoothing_crossentropy_avgall(logits, targets, sequence_length):
    """ Computes cross entropy loss of a batch of data with label smoothing.

    The final loss is averaged by the length of each
    sequence and then averaged by the batch size.

    Args:
        logits: The logits Tensor with shape [timesteps, batch_size, vocab_size].
        targets: The gold labels Tensor with shape [timesteps, batch_size].
        sequence_length: The length of `targets`, [batch_size, ]

    Returns: Loss sum and weight sum.
    """
    soft_targets, normalizing = label_smoothing(targets, logits.get_shape().as_list()[-1])
    losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=soft_targets) - normalizing
    # [timesteps, batch_size]
    loss_mask = tf.transpose(
        tf.sequence_mask(
            lengths=tf.to_int32(sequence_length),
            maxlen=tf.to_int32(tf.shape(targets)[0]),
            dtype=tf.float32), [1, 0])
    losses = losses * loss_mask
    # average loss
    avg_length = tf.to_float(sequence_length)
    loss_by_time = tf.reduce_sum(losses, axis=0) / avg_length
    loss_sum = tf.reduce_sum(loss_by_time)
    return loss_sum, tf.to_float(tf.shape(sequence_length)[0])
Ejemplo n.º 3
0
def smoothing_crossentropy_t(logits, targets, sequence_length):
    """ Computes cross entropy loss of a batch of data with label smoothing.

    The final loss is averaged by the number of tokens in the batch.

    Args:
        logits: The logits Tensor with shape [timesteps, batch_size, vocab_size].
        targets: The gold labels Tensor with shape [timesteps, batch_size].
        sequence_length: The length of `targets`, [batch_size, ]

    Returns: A float32 Scalar.
    """
    soft_targets, normalizing = label_smoothing(
        targets,
        logits.get_shape().as_list()[-1])
    losses = tf.nn.softmax_cross_entropy_with_logits(
        logits=logits, labels=soft_targets) - normalizing
    # [timesteps, batch_size]
    loss_mask = tf.transpose(
        tf.sequence_mask(lengths=tf.to_int32(sequence_length),
                         maxlen=tf.to_int32(tf.shape(targets)[0]),
                         dtype=tf.float32), [1, 0])
    losses = losses * loss_mask
    loss = tf.reduce_sum(losses) / tf.to_float(tf.reduce_sum(sequence_length))
    return loss
Ejemplo n.º 4
0
    def _compute_loss(self,
                      logits,
                      targets,
                      targets_length,
                      return_as_scorer=False):
        """ Computes loss.

        Args:
            logits: The logits Tensor with shape [timesteps, batch_size, target_vocab_size].
            targets: The labels Tensor with shape [batch_size, timesteps].
            targets_length: The length of labels Tensor with shape [batch_size, ]
            return_as_scorer: Whether to average by sequence length and return batch result.

        Returns: Loss sum and weight sum.
        """
        targets = tf.transpose(targets, [1, 0])  # [timesteps, batch_size]
        if float(self.params["label_smoothing"]) > 0.:
            soft_targets, normalizing = label_smoothing(
                targets,
                logits.get_shape().as_list()[-1],
                self.params["label_smoothing"])
            ces = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=soft_targets) - normalizing
        else:
            ces = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=targets)
        # [timesteps, batch]
        ces_mask = tf.transpose(
            tf.sequence_mask(lengths=tf.to_int32(targets_length),
                             maxlen=tf.to_int32(tf.shape(targets)[0]),
                             dtype=tf.float32), [1, 0])
        masked_ces = ces * ces_mask
        if return_as_scorer:
            loss_sum = tf.reduce_sum(masked_ces, axis=0)
            return loss_sum / tf.to_float(targets_length)
        loss_sum = tf.reduce_sum(masked_ces)
        weight_sum = tf.to_float(tf.shape(targets_length)[0])
        return loss_sum, weight_sum