def batch_wer(y_true, y_pred, input_length, label_length): """Runs CTC loss algorithm on each batch element. # Arguments y_true: tensor `(samples, max_string_length)` containing the truth labels. y_pred: tensor `(samples, time_steps, num_categories)` (samples, max_string_length) containing the prediction, or output of the softmax. input_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_pred`. label_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_true`. # Returns """ label_length = tf.to_int32(tf.squeeze(label_length, axis=-1)) input_length = tf.to_int32(tf.squeeze(input_length, axis=-1)) sparse_labels = tf.to_int32( K.ctc_label_dense_to_sparse(y_true, label_length)) sparse_pred = tf.to_int32(K.ctc_label_dense_to_sparse( y_pred, input_length)) WER = tf.reduce_mean( tf.edit_distance(sparse_pred, sparse_labels, normalize=True)) return WER
def ctc_loss(y_true, y_pred): y_true_sparse = K.ctc_label_dense_to_sparse(y_true, 64) y_pred_sparse = K.ctc_label_dense_to_sparse(y_pred, 64) return (tf.nn.ctc_loss(y_pred_sparse, 64, preprocess_collapse_repeated=False, ctc_merge_repeated=False, time_major=True))
def ctc_batch_cost(self, y_true, y_pred, input_length, label_length): """Runs CTC loss algorithm on each batch element. # Arguments y_true: tensor `(samples, max_string_length)` containing the truth labels. y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_pred`. label_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_true`. # Returns Tensor with shape (samples,1) containing the CTC loss of each element. """ label_length = tf.to_int32(tf.squeeze(label_length, axis=-1)) input_length = tf.to_int32(tf.squeeze(input_length, axis=-1)) sparse_labels = tf.to_int32( K.ctc_label_dense_to_sparse(y_true, label_length)) y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-7) # 注意这里的True是为了忽略解码失败的情况,此时loss会变成nan直到下一个个batch return tf.expand_dims( ctc.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length, ignore_longer_outputs_than_inputs=True), 1)
def ctc_batch_cost(y_true, y_pred, input_length, label_length): """ FROM KERAS - MODIFIED FOR BATCH SIZE OF ONE. Runs CTC loss algorithm on each batch element. # Arguments y_true: tensor `(samples, max_string_length)` containing the truth labels. y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_pred`. label_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_true`. # Returns Tensor with shape (samples,1) containing the CTC loss of each element. """ label_length = tf.to_int32(tf.squeeze(label_length, axis=1)) input_length = tf.to_int32(tf.squeeze(input_length, axis=1)) sparse_labels = tf.to_int32( K.ctc_label_dense_to_sparse(y_true, label_length)) y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + K.epsilon()) return tf.expand_dims( ctc.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1)
def ctc_batch_cost(y_true, y_pred, input_length, label_length, ctc_merge_repeated=False): '''Runs CTC loss algorithm on each batch element. # Arguments y_true: tensor (samples, max_string_length) containing the truth labels y_pred: tensor (samples, time_steps, num_categories) containing the prediction, or output of the softmax input_length: tensor (samples,1) containing the sequence length for each batch item in y_pred label_length: tensor (samples,1) containing the sequence length for each batch item in y_true # Returns Tensor with shape (samples,1) containing the CTC loss of each element ''' label_length = tf.to_int32(tf.squeeze(label_length)) input_length = tf.to_int32(tf.squeeze(input_length)) sparse_labels = tf.to_int32( K.ctc_label_dense_to_sparse(y_true, label_length)) y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8) return tf.expand_dims( K.ctc.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length, ctc_merge_repeated=ctc_merge_repeated), 1)
def decode(args): import tensorflow as tf y_pred, label_len = args label_len = K.cast(tf.squeeze(label_len), 'int32') # ctc_labels = tf.nn.ctc_greedy_decoder(y_pred, label_len)[0][0] # return ctc_labels ctc_labels = K.ctc_decode(y_pred, label_len, greedy=False)[0][0] return K.ctc_label_dense_to_sparse(ctc_labels, label_len)
def ctc_eval_lambda_func(args): y_pred_logits, y_true, input_length, label_length = args label_length = tf.to_int32(tf.squeeze(label_length, axis=-1)) decoded, log_prob = tf.nn.ctc_greedy_decoder( tf.transpose(y_pred_logits, (1, 0, 2)), tf.squeeze(tf.cast(input_length, tf.int32))) return (tf.edit_distance( tf.to_int32(decoded[0]), tf.to_int32(K.ctc_label_dense_to_sparse(y_true, label_length))))
def CTC_loss(y_true, y_pred, input_length, label_length): lable_length = math_ops.to_int32(array_ops.squeeze(label_length)) input_length = math_ops.to_int32(array_ops.squeeze(input_length)) sparse_lables = math_ops.to_int32( ctc_label_dense_to_sparse(y_true, lable_length)) y_pred = math_ops.log(array_ops.transpose(y_pred, perm([1, 2, 3]) + 1e-8)) return array_ops.expand_dime( tf.nn.ctc_loss(inputs=y_pred, labels=sparse_lables, sequence_length=input_length), 1)
def ctc_lambda_func(args): import tensorflow as tf y_pred, labels, input_length, label_length = args label_length = K.cast(tf.squeeze(label_length), 'int32') input_length = K.cast(tf.squeeze(input_length), 'int32') # return K.ctc_batch_cost(labels, y_pred, input_length, label_length) labels = K.ctc_label_dense_to_sparse(labels,label_length) return tf.nn.ctc_loss(labels,y_pred,input_length, preprocess_collapse_repeated=True, ctc_merge_repeated=False, time_major=False, ignore_longer_outputs_than_inputs=True)
def ctc_batch_cost(y_true, y_pred, input_length, label_length): label_length = tf.to_int32(tf.squeeze(label_length, axis=-1)) input_length = tf.to_int32(tf.squeeze(input_length, axis=-1)) sparse_labels = tf.to_int32( K.ctc_label_dense_to_sparse(y_true, label_length)) y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + K.epsilon()) return tf.expand_dims( tf.nn.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length, ignore_longer_outputs_than_inputs=True), 1)
def ctc_lambda_func(self, args): y_pred, y_true, input_length, label_length = args label_length = math_ops.to_int32(array_ops.squeeze(label_length)) input_length = math_ops.to_int32(array_ops.squeeze(input_length)) sparse_labels = math_ops.to_int32( ctc_label_dense_to_sparse(y_true, label_length)) y_pred = math_ops.log( array_ops.transpose(y_pred, perm=[1, 0, 2]) + 1e-7) return array_ops.expand_dims( ctc.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length, ignore_longer_outputs_than_inputs=True), 1)
def ctc_complete_analysis_lambda_func(args, **arguments): """ Complete CTC analysis using Keras and tensorflow WARNING : tf is required :param args: y_pred, labels, input_length, label_len :param arguments: greedy, beam_width, top_paths :return: ler = label error rate """ y_pred, labels, input_length, label_len = args my_params = arguments assert (K.backend() == 'tensorflow') batch = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8) input_length = tf.to_int32(tf.squeeze(input_length)) greedy = my_params['greedy'] beam_width = my_params['beam_width'] top_paths = my_params['top_paths'] if greedy: (decoded, log_prob) = ctc.ctc_greedy_decoder(inputs=batch, sequence_length=input_length) else: (decoded, log_prob) = ctc.ctc_beam_search_decoder( inputs=batch, sequence_length=input_length, beam_width=beam_width, top_paths=top_paths) cast_decoded = tf.cast(decoded[0], tf.float32) sparse_y = K.ctc_label_dense_to_sparse( labels, tf.cast(tf.squeeze(label_len), tf.int32)) ed_tensor = tf_edit_distance(cast_decoded, sparse_y, norm=True) ler_per_seq = Kreshape_To1D(ed_tensor) return K.cast(ler_per_seq, dtype='float32')