def tower_loss(scope, feats, labels, seq_lens): """Calculate the total loss on a single tower running the deepSpeech model. This function builds the graph for computing the loss per tower(GPU). ARGS: scope: unique prefix string identifying the deepSpeech tower, e.g. 'tower_0' feats: Tensor of shape BxFxT representing the audio features (mfccs or spectrogram). labels: sparse tensor holding labels of each utterance. seq_lens: tensor of shape [batch_size] holding the sequence length per input utterance. Returns: Tensor of shape [batch_size] containing the total loss for a batch of data """ # Build inference Graph. logits = deepSpeech.inference(feats, seq_lens, ARGS) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = deepSpeech.loss(logits, labels, seq_lens) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name = 'total_loss') # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name = 'avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summary to all individual losses and the total loss; # do the same for the averaged version of the losses. for loss in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a # multi-GPU training session. This helps the clarity # of presentation on tensorboard. loss_name = re.sub('%s_[0-9]*/' % helper_routines.TOWER_NAME, '', loss.op.name) # Name each loss as '(raw)' and name the moving average # version of the loss as the original loss name. tf.summary.scalar(loss_name + '(raw)', loss) tf.summary.scalar(loss_name, loss_averages.average(loss)) # Without this loss_averages_op would never run with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) return total_loss
def tower_loss(sess, scope, feats, labels, seq_lens): """Calculate the total loss on a single tower running the deepSpeech model. This function builds the graph for computing the loss per tower(GPU). ARGS: scope: unique prefix string identifying the deepSpeech tower, e.g. 'tower_0' feats: Tensor of shape BxFxT representing the audio features (mfccs or spectrogram). labels: sparse tensor holding labels of each utterance. seq_lens: tensor of shape [batch_size] holding the sequence length per input utterance. Returns: Tensor of shape [batch_size] containing the total loss for a batch of data """ # Build inference Graph. logits = deepSpeech.inference(sess, feats, seq_lens, ARGS) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. total_loss = deepSpeech.loss(logits, labels, seq_lens) # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply([total_loss]) # Attach a scalar summary to all individual losses and the total loss; # do the same for the averaged version of the losses. loss_name = total_loss.op.name # Name each loss as '(raw)' and name the moving average # version of the loss as the original loss name. tf.summary.scalar(loss_name + '(raw)', total_loss) tf.summary.scalar(loss_name, loss_averages.average(total_loss)) # Without this loss_averages_op would never run with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) return total_loss