Exemple #1
0
def word_error_rate(raw_predictions,
                    labels,
                    lookup=None,
                    weights_fn=common_layers.weights_nonzero):
    """Calculate word error rate.

  Args:
    raw_predictions: The raw predictions.
    labels: The actual labels.
    lookup: A tf.constant mapping indices to output tokens.
    weights_fn: Weighting function.

  Returns:
    The word error rate.
  """
    def from_tokens(raw, lookup_):
        gathered = tf.gather(lookup_, tf.cast(raw, tf.int32))
        joined = tf.regex_replace(tf.reduce_join(gathered, axis=1), b"<EOS>.*",
                                  b"")
        cleaned = tf.regex_replace(joined, b"_", b" ")
        tokens = tf.string_split(cleaned, " ")
        return tokens

    def from_characters(raw, lookup_):
        """Convert ascii+2 encoded codes to string-tokens."""
        corrected = tf.bitcast(tf.clip_by_value(tf.subtract(raw, 2), 0, 255),
                               tf.uint8)

        gathered = tf.gather(lookup_, tf.cast(corrected, tf.int32))[:, :, 0]
        joined = tf.reduce_join(gathered, axis=1)
        cleaned = tf.regex_replace(joined, b"\0", b"")
        tokens = tf.string_split(cleaned, " ")
        return tokens

    if lookup is None:
        lookup = tf.constant([chr(i) for i in range(256)])
        convert_fn = from_characters
    else:
        convert_fn = from_tokens

    if weights_fn is not common_layers.weights_nonzero:
        raise ValueError("Only weights_nonzero can be used for this metric.")

    with tf.variable_scope("word_error_rate", values=[raw_predictions,
                                                      labels]):

        raw_predictions = tf.squeeze(tf.argmax(raw_predictions, axis=-1),
                                     axis=(2, 3))
        labels = tf.squeeze(labels, axis=(2, 3))

        reference = convert_fn(labels, lookup)
        predictions = convert_fn(raw_predictions, lookup)

        distance = tf.reduce_sum(
            tf.edit_distance(predictions, reference, normalize=False))
        reference_length = tf.cast(tf.size(reference.values,
                                           out_type=tf.int32),
                                   dtype=tf.float32)

        return distance / reference_length, reference_length
Exemple #2
0
def sequence_edit_distance(predictions,
                           labels,
                           weights_fn=common_layers.weights_nonzero):
    """Average edit distance, ignoring padding 0s.

  The score returned is the edit distance divided by the total length of
  reference truth and the weight returned is the total length of the truth.

  Args:
    predictions: Tensor of shape [`batch_size`, `length`, 1, `num_classes`] and
        type tf.float32 representing the logits, 0-padded.
    labels: Tensor of shape [`batch_size`, `length`, 1, 1] and type tf.int32
        representing the labels of same length as logits and 0-padded.
    weights_fn: ignored. The weights returned are the total length of the ground
        truth labels, excluding 0-paddings.

  Returns:
    (edit distance / reference length, reference length)

  Raises:
    ValueError: if weights_fn is not common_layers.weights_nonzero.
  """
    if weights_fn is not common_layers.weights_nonzero:
        raise ValueError("Only weights_nonzero can be used for this metric.")

    with tf.variable_scope("edit_distance", values=[predictions, labels]):
        # Transform logits into sequence classes by taking max at every step.
        predictions = tf.to_int32(
            tf.squeeze(tf.argmax(predictions, axis=-1), axis=(2, 3)))
        nonzero_idx = tf.where(tf.not_equal(predictions, 0))
        sparse_outputs = tf.SparseTensor(
            nonzero_idx, tf.gather_nd(predictions, nonzero_idx),
            tf.shape(predictions, out_type=tf.int64))
        labels = tf.squeeze(labels, axis=(2, 3))
        nonzero_idx = tf.where(tf.not_equal(labels, 0))
        label_sparse_outputs = tf.SparseTensor(
            nonzero_idx, tf.gather_nd(labels, nonzero_idx),
            tf.shape(labels, out_type=tf.int64))
        distance = tf.reduce_sum(
            tf.edit_distance(sparse_outputs,
                             label_sparse_outputs,
                             normalize=False))
        reference_length = tf.to_float(
            common_layers.shape_list(nonzero_idx)[0])
        return distance / reference_length, reference_length
Exemple #3
0
targets = tf.SparseTensor(indices, values, shape)

# Compute Loss
losses = tf.nn.ctc_loss(targets, logits, SeqLens)

loss = tf.reduce_mean(losses)

TrainLoss_s = tf.summary.scalar('TrainLoss', loss)

# CTC Beam Search Decoder to decode pred string from the prob map
decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, SeqLens)

predicted = tf.to_int32(decoded[0])

error_rate = tf.reduce_sum(
    tf.edit_distance(predicted, targets, normalize=False)) / tf.to_float(
        tf.size(targets.values))

TrainError_s = tf.summary.scalar('TrainError', error_rate)

tvars = tf.trainable_variables()

grad, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                 cfg.MaxGradientNorm)

optimizer = tf.train.AdamOptimizer(learning_rate=cfg.LearningRate)

train_step = optimizer.apply_gradients(zip(grad, tvars))

#These values are used to draw performance graphs. Updated after each epoch.
OverallTrainingLoss = tf.Variable(0,