def evaluate(self, outputs, references, reference_seq_length): '''evaluate the output of the decoder args: outputs: the outputs of the decoder as a dictionary references: the references as a dictionary reference_seq_length: the sequence lengths of the references Returns: the error of the outputs ''' #stack all the logits except the final logits stacked_outputs = { t:ops.seq2nonseq(outputs[t][0], outputs[t][1]) for t in outputs} #create the stacked targets stacked_targets = { t:tf.cast(ops.seq2nonseq(references[t], reference_seq_length[t]), tf.int32) for t in references} #compute the edit distance losses = [ tf.reduce_mean(tf.reduce_mean(tf.cast(tf.not_equal( stacked_outputs[o], stacked_targets[o]), tf.float32))) for o in outputs] loss = tf.reduce_mean(losses) return loss
def update_evaluation_loss(self, loss, outputs, references, reference_seq_length): '''update the evaluation loss args: loss: the current evaluation loss outputs: the outputs of the decoder as a dictionary references: the references as a dictionary reference_seq_length: the sequence lengths of the references Returns: an op to update the evalution loss ''' #create a valiable to hold the total number of reference targets num_targets = tf.get_variable( name='num_targets', shape=[], dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False ) #stack all the logits stacked_outputs = { t:ops.seq2nonseq(outputs[t][0], outputs[t][1]) for t in outputs} #create the stacked targets stacked_targets = { t:tf.cast(ops.seq2nonseq(references[t], reference_seq_length[t]), tf.int32) for t in references} #compute the number of errors errors = [ tf.reduce_sum(tf.reduce_sum(tf.cast(tf.not_equal( stacked_outputs[o], stacked_targets[o]), tf.float32))) for o in outputs] errors = tf.reduce_sum(errors) #compute the number of targets in this batch batch_targets = tf.reduce_sum([ tf.reduce_sum(lengths) for lengths in reference_seq_length.values()]) new_num_targets = num_targets + tf.cast(batch_targets, tf.float32) #an operation to update the loss update_loss = loss.assign( (loss*num_targets + errors)/new_num_targets).op #add an operation to update the number of targets with tf.control_dependencies([update_loss]): update_loss = num_targets.assign(new_num_targets).op return update_loss
def cross_entropy_eos(targets, logits, logit_seq_length, target_seq_length): ''' cross enthropy loss with an end of sequence label added Args: targets: a dictionary of [batch_size x time x ...] tensor containing the targets logits: a dictionary of [batch_size x time x ...] tensor containing the logits logit_seq_length: a dictionary of [batch_size] vectors containing the logit sequence lengths target_seq_length: a dictionary of [batch_size] vectors containing the target sequence lengths Returns: a scalar value containing the loss ''' with tf.name_scope('cross_entropy_loss'): losses = [] batch_size = tf.shape(targets.values()[0])[0] for t in targets: with tf.name_scope('cross_entropy_loss'): output_dim = tf.shape(logits[t])[2] #get the logits for the final timestep indices = tf.stack([tf.range(batch_size), logit_seq_length[t] - 1], axis=1) final_logits = tf.gather_nd(logits[t], indices) #stack all the logits except the final logits stacked_logits = ops.seq2nonseq(logits[t], logit_seq_length[t] - 1) #create the stacked targets stacked_targets = ops.seq2nonseq(targets[t], target_seq_length[t]) #create the targets for the end of sequence labels final_targets = tf.tile([output_dim-1], [batch_size]) #add the final logits and targets stacked_logits = tf.concat([stacked_logits, final_logits], 0) stacked_targets = tf.concat([stacked_targets, final_targets], 0) #compute the cross-entropy loss losses.append(tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=stacked_logits, labels=stacked_targets))) loss = tf.reduce_sum(losses) return loss
def marigin_loss(targets, logits, logit_seq_length, target_seq_length): ''' marigin loss Args: targets: a dictionary of [batch_size x time x ...] tensor containing the targets logits: a dictionary of [batch_size x time x ...] tensor containing the logits logit_seq_length: a dictionary of [batch_size] vectors containing the logit sequence lengths target_seq_length: a dictionary of [batch_size] vectors containing the target sequence lengths Returns: a scalar value containing the loss ''' with tf.name_scope('marigin_loss'): losses = [] for t in targets: #stack the logits stacked_logits = tf.squeeze( ops.seq2nonseq(logits[t], logit_seq_length[t]), [1]) stacked_probs = tf.nn.sigmoid(stacked_logits) #create the stacked targets stacked_targets = tf.to_float( ops.seq2nonseq(targets[t], target_seq_length[t])) #compute the lower and upper marigins lower = tf.square(tf.maximum(0.0, stacked_probs - 0.1)) upper = tf.square(tf.maximum(0.0, 0.9 - stacked_probs)) #compute the loss losses.append( tf.reduce_mean(stacked_targets * upper + (1 - stacked_targets) * lower)) loss = tf.reduce_sum(losses) return loss
def cross_entropy(targets, logits, logit_seq_length, target_seq_length): ''' cross enthropy loss Args: targets: a dictionary of [batch_size x time x ...] tensor containing the targets logits: a dictionary of [batch_size x time x ...] tensor containing the logits logit_seq_length: a dictionary of [batch_size] vectors containing the logit sequence lengths target_seq_length: a dictionary of [batch_size] vectors containing the target sequence lengths Returns: a scalar value containing the loss ''' with tf.name_scope('cross_entropy_loss'): losses = [] for t in targets: #stack the logits stacked_logits = ops.seq2nonseq(logits[t], logit_seq_length[t]) #create the stacked targets stacked_targets = ops.seq2nonseq(targets[t], target_seq_length[t]) stacked_targets = tf.cast(stacked_targets, tf.int32) losses.append(tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=stacked_logits, labels=stacked_targets))) loss = tf.reduce_sum(losses) return loss
def sigmoid_cross_entropy(targets, logits, logit_seq_length, target_seq_length): ''' Sigmoid cross entropy Args: targets: a dictionary of [batch_size x time x ...] tensor containing the targets logits: a dictionary of [batch_size x time x ...] tensor containing the logits logit_seq_length: a dictionary of [batch_size] vectors containing the logit sequence lengths target_seq_length: a dictionary of [batch_size] vectors containing the target sequence lengths Returns: a scalar value containing the loss ''' with tf.name_scope('sigmoid_cross_entropy_loss'): losses = [] for t in targets: #stack all the logits except the final logits stacked_logits = ops.seq2nonseq(logits[t], logit_seq_length[t]) #create the stacked targets stacked_targets = ops.seq2nonseq( tf.cast(targets[t], tf.float32), target_seq_length[t]) losses.append(tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=stacked_logits, labels=stacked_targets))) loss = tf.reduce_sum(losses) return loss