예제 #1
0
def batch_wer(y_true, y_pred, input_length, label_length):
    """Runs CTC loss algorithm on each batch element.

    # Arguments
        y_true: tensor `(samples, max_string_length)`
            containing the truth labels.
        y_pred: tensor `(samples, time_steps, num_categories)` (samples, max_string_length)
            containing the prediction, or output of the softmax.
        input_length: tensor `(samples, 1)` containing the sequence length for
            each batch item in `y_pred`.
        label_length: tensor `(samples, 1)` containing the sequence length for
            each batch item in `y_true`.

    # Returns

    """
    label_length = tf.to_int32(tf.squeeze(label_length, axis=-1))
    input_length = tf.to_int32(tf.squeeze(input_length, axis=-1))
    sparse_labels = tf.to_int32(
        K.ctc_label_dense_to_sparse(y_true, label_length))
    sparse_pred = tf.to_int32(K.ctc_label_dense_to_sparse(
        y_pred, input_length))

    WER = tf.reduce_mean(
        tf.edit_distance(sparse_pred, sparse_labels, normalize=True))
    return WER
예제 #2
0
def ctc_loss(y_true, y_pred):
    y_true_sparse = K.ctc_label_dense_to_sparse(y_true, 64)
    y_pred_sparse = K.ctc_label_dense_to_sparse(y_pred, 64)
    return (tf.nn.ctc_loss(y_pred_sparse,
                           64,
                           preprocess_collapse_repeated=False,
                           ctc_merge_repeated=False,
                           time_major=True))
예제 #3
0
    def ctc_batch_cost(self, y_true, y_pred, input_length, label_length):
        """Runs CTC loss algorithm on each batch element.

        # Arguments
            y_true: tensor `(samples, max_string_length)`
                containing the truth labels.
            y_pred: tensor `(samples, time_steps, num_categories)`
                containing the prediction, or output of the softmax.
            input_length: tensor `(samples, 1)` containing the sequence length for
                each batch item in `y_pred`.
            label_length: tensor `(samples, 1)` containing the sequence length for
                each batch item in `y_true`.

        # Returns
            Tensor with shape (samples,1) containing the
                CTC loss of each element.
        """
        label_length = tf.to_int32(tf.squeeze(label_length, axis=-1))
        input_length = tf.to_int32(tf.squeeze(input_length, axis=-1))
        sparse_labels = tf.to_int32(
            K.ctc_label_dense_to_sparse(y_true, label_length))

        y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-7)

        # 注意这里的True是为了忽略解码失败的情况,此时loss会变成nan直到下一个个batch
        return tf.expand_dims(
            ctc.ctc_loss(inputs=y_pred,
                         labels=sparse_labels,
                         sequence_length=input_length,
                         ignore_longer_outputs_than_inputs=True), 1)
def ctc_batch_cost(y_true, y_pred, input_length, label_length):
    """
    FROM KERAS - MODIFIED FOR BATCH SIZE OF ONE.
    Runs CTC loss algorithm on each batch element.
    # Arguments
        y_true: tensor `(samples, max_string_length)`
            containing the truth labels.
        y_pred: tensor `(samples, time_steps, num_categories)`
            containing the prediction, or output of the softmax.
        input_length: tensor `(samples, 1)` containing the sequence length for
            each batch item in `y_pred`.
        label_length: tensor `(samples, 1)` containing the sequence length for
            each batch item in `y_true`.
    # Returns
        Tensor with shape (samples,1) containing the
            CTC loss of each element.
    """
    label_length = tf.to_int32(tf.squeeze(label_length, axis=1))
    input_length = tf.to_int32(tf.squeeze(input_length, axis=1))
    sparse_labels = tf.to_int32(
        K.ctc_label_dense_to_sparse(y_true, label_length))

    y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + K.epsilon())

    return tf.expand_dims(
        ctc.ctc_loss(inputs=y_pred,
                     labels=sparse_labels,
                     sequence_length=input_length), 1)
예제 #5
0
        def ctc_batch_cost(y_true,
                           y_pred,
                           input_length,
                           label_length,
                           ctc_merge_repeated=False):
            '''Runs CTC loss algorithm on each batch element.

            # Arguments
                y_true: tensor (samples, max_string_length) containing the truth labels
                y_pred: tensor (samples, time_steps, num_categories) containing the prediction,
                        or output of the softmax
                input_length: tensor (samples,1) containing the sequence length for
                        each batch item in y_pred
                label_length: tensor (samples,1) containing the sequence length for
                        each batch item in y_true

            # Returns
                Tensor with shape (samples,1) containing the
                    CTC loss of each element
            '''
            label_length = tf.to_int32(tf.squeeze(label_length))
            input_length = tf.to_int32(tf.squeeze(input_length))
            sparse_labels = tf.to_int32(
                K.ctc_label_dense_to_sparse(y_true, label_length))

            y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8)

            return tf.expand_dims(
                K.ctc.ctc_loss(inputs=y_pred,
                               labels=sparse_labels,
                               sequence_length=input_length,
                               ctc_merge_repeated=ctc_merge_repeated), 1)
예제 #6
0
def decode(args):
    import tensorflow as tf
    y_pred, label_len = args
    label_len = K.cast(tf.squeeze(label_len), 'int32')
    # ctc_labels = tf.nn.ctc_greedy_decoder(y_pred, label_len)[0][0]
    # return ctc_labels
    ctc_labels = K.ctc_decode(y_pred, label_len, greedy=False)[0][0]
    return K.ctc_label_dense_to_sparse(ctc_labels, label_len)
예제 #7
0
def ctc_eval_lambda_func(args):
    y_pred_logits, y_true, input_length, label_length = args
    label_length = tf.to_int32(tf.squeeze(label_length, axis=-1))
    decoded, log_prob = tf.nn.ctc_greedy_decoder(
        tf.transpose(y_pred_logits, (1, 0, 2)),
        tf.squeeze(tf.cast(input_length, tf.int32)))

    return (tf.edit_distance(
        tf.to_int32(decoded[0]),
        tf.to_int32(K.ctc_label_dense_to_sparse(y_true, label_length))))
예제 #8
0
def CTC_loss(y_true, y_pred, input_length, label_length):
    lable_length = math_ops.to_int32(array_ops.squeeze(label_length))
    input_length = math_ops.to_int32(array_ops.squeeze(input_length))
    sparse_lables = math_ops.to_int32(
        ctc_label_dense_to_sparse(y_true, lable_length))
    y_pred = math_ops.log(array_ops.transpose(y_pred, perm([1, 2, 3]) + 1e-8))

    return array_ops.expand_dime(
        tf.nn.ctc_loss(inputs=y_pred,
                       labels=sparse_lables,
                       sequence_length=input_length), 1)
예제 #9
0
def ctc_lambda_func(args):
	import tensorflow as tf
	y_pred, labels, input_length, label_length = args
	label_length = K.cast(tf.squeeze(label_length), 'int32')
	input_length = K.cast(tf.squeeze(input_length), 'int32')
    # return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
	labels = K.ctc_label_dense_to_sparse(labels,label_length)
	return tf.nn.ctc_loss(labels,y_pred,input_length,
    				preprocess_collapse_repeated=True,
    				ctc_merge_repeated=False,
    				time_major=False,
    				ignore_longer_outputs_than_inputs=True)
예제 #10
0
def ctc_batch_cost(y_true, y_pred, input_length, label_length):
    label_length = tf.to_int32(tf.squeeze(label_length, axis=-1))
    input_length = tf.to_int32(tf.squeeze(input_length, axis=-1))
    sparse_labels = tf.to_int32(
        K.ctc_label_dense_to_sparse(y_true, label_length))

    y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + K.epsilon())

    return tf.expand_dims(
        tf.nn.ctc_loss(inputs=y_pred,
                       labels=sparse_labels,
                       sequence_length=input_length,
                       ignore_longer_outputs_than_inputs=True), 1)
예제 #11
0
파일: textReg.py 프로젝트: gogovim/textReg
 def ctc_lambda_func(self, args):
     y_pred, y_true, input_length, label_length = args
     label_length = math_ops.to_int32(array_ops.squeeze(label_length))
     input_length = math_ops.to_int32(array_ops.squeeze(input_length))
     sparse_labels = math_ops.to_int32(
         ctc_label_dense_to_sparse(y_true, label_length))
     y_pred = math_ops.log(
         array_ops.transpose(y_pred, perm=[1, 0, 2]) + 1e-7)
     return array_ops.expand_dims(
         ctc.ctc_loss(inputs=y_pred,
                      labels=sparse_labels,
                      sequence_length=input_length,
                      ignore_longer_outputs_than_inputs=True), 1)
    def ctc_complete_analysis_lambda_func(args, **arguments):
        """
        Complete CTC analysis using Keras and tensorflow
        WARNING : tf is required
        :param args:
            y_pred, labels, input_length, label_len
        :param arguments:
            greedy, beam_width, top_paths
        :return:
            ler = label error rate
        """

        y_pred, labels, input_length, label_len = args
        my_params = arguments

        assert (K.backend() == 'tensorflow')

        batch = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8)
        input_length = tf.to_int32(tf.squeeze(input_length))

        greedy = my_params['greedy']
        beam_width = my_params['beam_width']
        top_paths = my_params['top_paths']

        if greedy:
            (decoded,
             log_prob) = ctc.ctc_greedy_decoder(inputs=batch,
                                                sequence_length=input_length)
        else:
            (decoded, log_prob) = ctc.ctc_beam_search_decoder(
                inputs=batch,
                sequence_length=input_length,
                beam_width=beam_width,
                top_paths=top_paths)

        cast_decoded = tf.cast(decoded[0], tf.float32)

        sparse_y = K.ctc_label_dense_to_sparse(
            labels, tf.cast(tf.squeeze(label_len), tf.int32))
        ed_tensor = tf_edit_distance(cast_decoded, sparse_y, norm=True)
        ler_per_seq = Kreshape_To1D(ed_tensor)

        return K.cast(ler_per_seq, dtype='float32')