def evaluate(self, outputs, references, reference_seq_length): '''evaluate the output of the decoder args: outputs: the outputs of the decoder as a dictionary references: the references as a dictionary reference_seq_length: the sequence lengths of the references Returns: the error of the outputs ''' sequences = outputs.values()[0][0][:, 0, :] lengths = outputs.values()[0][1][:, 0] #convert the references to sparse representations sparse_targets = dense_sequence_to_sparse( references.values()[0], reference_seq_length.values()[0]) #convert the best sequences to sparse representations sparse_sequences = dense_sequence_to_sparse( sequences, lengths-1) #compute the edit distance loss = tf.reduce_mean( tf.edit_distance(sparse_sequences, sparse_targets)) return loss
def update_evaluation_loss(self, loss, outputs, references, reference_seq_length): '''update the evaluation loss args: loss: the current evaluation loss outputs: the outputs of the decoder as a dictionary references: the references as a dictionary reference_seq_length: the sequence lengths of the references Returns: an op to update the evalution loss ''' #create a variable to hold the total number of reference targets num_targets = tf.get_variable(name='num_targets', shape=[], dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False) if ('visualize_alignments' in self.conf and self.conf['visualize_alignments'] == 'True'): alignments = outputs.values()[0][3][:, 0] tf.summary.image('alignments', tf.expand_dims(alignments, 3), collections=['eval_summaries']) sequences = outputs.values()[0][0][:, 0] lengths = outputs.values()[0][1][:, 0] #convert the references to sparse representations sparse_targets = dense_sequence_to_sparse( references.values()[0], reference_seq_length.values()[0] - 1) #convert the best sequences to sparse representations sparse_sequences = dense_sequence_to_sparse(sequences, lengths) #compute the edit distance errors = tf.edit_distance(sparse_sequences, sparse_targets, normalize=False) errors = tf.reduce_sum(errors) #compute the number of targets in this batch batch_targets = tf.reduce_sum(reference_seq_length.values()[0]) new_num_targets = num_targets + tf.cast(batch_targets, tf.float32) #an operation to update the loss update_loss = loss.assign( (loss * num_targets + errors) / new_num_targets).op #add an operation to update the number of targets with tf.control_dependencies([update_loss]): update_loss = num_targets.assign(new_num_targets).op return update_loss
def evaluate(self, outputs, references, reference_seq_length): '''evaluate the output of the decoder args: outputs: the outputs of the decoder as a dictionary references: the references as a dictionary reference_seq_length: the sequence lengths of the references Returns: the error of the outputs ''' #compute the edit distance for the decoded sequences #convert the representations to sparse Tensors sparse_targets = { o: dense_sequence_to_sparse(references[o], reference_seq_length[o]) for o in references } #compute the edit distance losses = [ tf.reduce_mean(tf.edit_distance(outputs[o], sparse_targets[o])) for o in outputs ] loss = tf.reduce_mean(losses) return loss
def CTC(targets, logits, logit_seq_length, target_seq_length): ''' CTC loss Args: targets: a dictionary of [batch_size x time x ...] tensor containing the targets logits: a dictionary of [batch_size x time x ...] tensor containing the logits logit_seq_length: a dictionary of [batch_size] vectors containing the logit sequence lengths target_seq_length: a dictionary of [batch_size] vectors containing the target sequence lengths Returns: a scalar value containing the loss ''' with tf.name_scope('CTC_loss'): losses = [] for t in targets: #convert the targets into a sparse tensor representation sparse_targets = ops.dense_sequence_to_sparse( targets[t], target_seq_length[t]) losses.append(tf.reduce_mean(tf.nn.ctc_loss( sparse_targets, logits[t], logit_seq_length[t], time_major=False))) loss = tf.reduce_sum(losses) return loss
def update_evaluation_loss(self, loss, outputs, references, reference_seq_length): '''update the evaluation loss args: loss: the current evaluation loss outputs: the outputs of the decoder as a dictionary references: the references as a dictionary reference_seq_length: the sequence lengths of the references Returns: an op to update the evalution loss ''' #create a variable to hold the total number of reference targets num_targets = tf.get_variable(name='num_targets', shape=[], dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False) #compute the edit distance for the decoded sequences #convert the representations to sparse Tensors sparse_targets = { o: dense_sequence_to_sparse(references[o], reference_seq_length[o]) for o in references } #compute the number of errors made in this batch errors = [ tf.reduce_sum( tf.edit_distance(outputs[o], sparse_targets[o], normalize=False)) for o in outputs ] errors = tf.reduce_sum(errors) #compute the number of targets in this batch batch_targets = tf.reduce_sum([ tf.reduce_sum(lengths) for lengths in reference_seq_length.values() ]) new_num_targets = num_targets + tf.cast(batch_targets, tf.float32) #an operation to update the loss update_loss = loss.assign( (loss * num_targets + errors) / new_num_targets).op #add an operation to update the number of targets with tf.control_dependencies([update_loss]): update_loss = num_targets.assign(new_num_targets).op return update_loss