예제 #1
0
    def evaluate(self, outputs, references, reference_seq_length):
        '''evaluate the output of the decoder

        args:
            outputs: the outputs of the decoder as a dictionary
            references: the references as a dictionary
            reference_seq_length: the sequence lengths of the references

        Returns:
            the error of the outputs
        '''

        sequences = outputs.values()[0][0][:, 0, :]
        lengths = outputs.values()[0][1][:, 0]

        #convert the references to sparse representations
        sparse_targets = dense_sequence_to_sparse(
            references.values()[0], reference_seq_length.values()[0])

        #convert the best sequences to sparse representations
        sparse_sequences = dense_sequence_to_sparse(
            sequences, lengths-1)

        #compute the edit distance
        loss = tf.reduce_mean(
            tf.edit_distance(sparse_sequences, sparse_targets))

        return loss
    def update_evaluation_loss(self, loss, outputs, references,
                               reference_seq_length):
        '''update the evaluation loss

        args:
            loss: the current evaluation loss
            outputs: the outputs of the decoder as a dictionary
            references: the references as a dictionary
            reference_seq_length: the sequence lengths of the references

        Returns:
            an op to update the evalution loss
        '''

        #create a variable to hold the total number of reference targets
        num_targets = tf.get_variable(name='num_targets',
                                      shape=[],
                                      dtype=tf.float32,
                                      initializer=tf.zeros_initializer(),
                                      trainable=False)

        if ('visualize_alignments' in self.conf
                and self.conf['visualize_alignments'] == 'True'):
            alignments = outputs.values()[0][3][:, 0]
            tf.summary.image('alignments',
                             tf.expand_dims(alignments, 3),
                             collections=['eval_summaries'])

        sequences = outputs.values()[0][0][:, 0]
        lengths = outputs.values()[0][1][:, 0]

        #convert the references to sparse representations
        sparse_targets = dense_sequence_to_sparse(
            references.values()[0],
            reference_seq_length.values()[0] - 1)

        #convert the best sequences to sparse representations
        sparse_sequences = dense_sequence_to_sparse(sequences, lengths)

        #compute the edit distance
        errors = tf.edit_distance(sparse_sequences,
                                  sparse_targets,
                                  normalize=False)
        errors = tf.reduce_sum(errors)

        #compute the number of targets in this batch
        batch_targets = tf.reduce_sum(reference_seq_length.values()[0])

        new_num_targets = num_targets + tf.cast(batch_targets, tf.float32)

        #an operation to update the loss
        update_loss = loss.assign(
            (loss * num_targets + errors) / new_num_targets).op

        #add an operation to update the number of targets
        with tf.control_dependencies([update_loss]):
            update_loss = num_targets.assign(new_num_targets).op

        return update_loss
예제 #3
0
    def evaluate(self, outputs, references, reference_seq_length):
        '''evaluate the output of the decoder

        args:
            outputs: the outputs of the decoder as a dictionary
            references: the references as a dictionary
            reference_seq_length: the sequence lengths of the references

        Returns:
            the error of the outputs
        '''

        #compute the edit distance for the decoded sequences
        #convert the representations to sparse Tensors
        sparse_targets = {
            o: dense_sequence_to_sparse(references[o], reference_seq_length[o])
            for o in references
        }

        #compute the edit distance
        losses = [
            tf.reduce_mean(tf.edit_distance(outputs[o], sparse_targets[o]))
            for o in outputs
        ]

        loss = tf.reduce_mean(losses)

        return loss
예제 #4
0
def CTC(targets, logits, logit_seq_length, target_seq_length):
    '''
    CTC loss

    Args:
        targets: a dictionary of [batch_size x time x ...] tensor containing
            the targets
        logits: a dictionary of [batch_size x time x ...] tensor containing
            the logits
        logit_seq_length: a dictionary of [batch_size] vectors containing
            the logit sequence lengths
        target_seq_length: a dictionary of [batch_size] vectors containing
            the target sequence lengths

    Returns:
        a scalar value containing the loss
    '''
    with tf.name_scope('CTC_loss'):

        losses = []

        for t in targets:
            #convert the targets into a sparse tensor representation
            sparse_targets = ops.dense_sequence_to_sparse(
                targets[t], target_seq_length[t])

            losses.append(tf.reduce_mean(tf.nn.ctc_loss(
                sparse_targets,
                logits[t],
                logit_seq_length[t],
                time_major=False)))

        loss = tf.reduce_sum(losses)

    return loss
예제 #5
0
파일: ctc_decoder.py 프로젝트: Rpersie/nabu
    def update_evaluation_loss(self, loss, outputs, references,
                               reference_seq_length):
        '''update the evaluation loss

        args:
            loss: the current evaluation loss
            outputs: the outputs of the decoder as a dictionary
            references: the references as a dictionary
            reference_seq_length: the sequence lengths of the references

        Returns:
            an op to update the evalution loss
        '''

        #create a variable to hold the total number of reference targets
        num_targets = tf.get_variable(name='num_targets',
                                      shape=[],
                                      dtype=tf.float32,
                                      initializer=tf.zeros_initializer(),
                                      trainable=False)

        #compute the edit distance for the decoded sequences
        #convert the representations to sparse Tensors
        sparse_targets = {
            o: dense_sequence_to_sparse(references[o], reference_seq_length[o])
            for o in references
        }

        #compute the number of errors made in this batch
        errors = [
            tf.reduce_sum(
                tf.edit_distance(outputs[o],
                                 sparse_targets[o],
                                 normalize=False)) for o in outputs
        ]

        errors = tf.reduce_sum(errors)

        #compute the number of targets in this batch
        batch_targets = tf.reduce_sum([
            tf.reduce_sum(lengths)
            for lengths in reference_seq_length.values()
        ])

        new_num_targets = num_targets + tf.cast(batch_targets, tf.float32)

        #an operation to update the loss
        update_loss = loss.assign(
            (loss * num_targets + errors) / new_num_targets).op

        #add an operation to update the number of targets
        with tf.control_dependencies([update_loss]):
            update_loss = num_targets.assign(new_num_targets).op

        return update_loss