def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] # Scale loss by number of replicas. if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: loss = losses_utils.scale_loss_for_distribution(loss) if dnn_logits is not None: train_ops.extend( dnn_optimizer.get_updates(loss, dnn_trainable_variables)) if dnn_update_ops is not None: train_ops.extend(dnn_update_ops) if linear_logits is not None: train_ops.extend( linear_optimizer.get_updates(loss, linear_trainable_variables)) train_op = control_flow_ops.group(*train_ops) return train_op
def _model_loss(model, inputs, targets, output_loss_metrics=None, sample_weights=None, training=False): """Calculates the loss for a given model. Args: model: The model on which metrics are being calculated. inputs: Either a dictionary of inputs to the model or a list of input arrays. targets: List of target arrays. output_loss_metrics: List of metrics that are used to aggregated output loss values. sample_weights: Optional list of sample weight arrays. training: Whether the model should be run in inference or training mode. Returns: Returns the model output, total loss, loss value calculated using the specified loss function and masks for each output. The total loss includes regularization losses and applies masking and sample weighting to the loss value. """ # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn. # Used to keep track of the total loss value (stateless). # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + # loss_weight_2 * output_2_loss_fn(...) + # layer losses. total_loss = 0 kwargs = {} if model._expects_training_arg: kwargs['training'] = training if len(inputs) == 1 and not isinstance(inputs, dict): inputs = inputs[0] # Allow mixed `NumPy` and `EagerTensor` input here. if any( isinstance(input_t, (np.ndarray, float, int)) for input_t in nest.flatten(inputs)): inputs = nest.map_structure(ops.convert_to_tensor_v2_with_dispatch, inputs) outs = model(inputs, **kwargs) outs = nest.flatten(outs) if targets: targets = training_utils_v1.cast_if_floating_dtype_and_mismatch( targets, outs) # TODO(sallymatson/psv): check if we should do same mismatch fix for weights if sample_weights: sample_weights = [ training_utils_v1.cast_if_floating_dtype( ops.convert_to_tensor_v2_with_dispatch(val)) if val is not None else None for val in sample_weights ] masks = [getattr(t, '_keras_mask', None) for t in outs] targets = nest.flatten(targets) # Used to keep track of individual output losses. output_losses = [] with backend.name_scope('loss'): loss_fns = [ loss_fn for loss_fn in model.loss_functions if loss_fn is not None ] custom_losses = model.losses # Regularization losses if not loss_fns and not custom_losses: if training: raise ValueError('The model cannot be trained ' 'because it has no loss to optimize.') else: raise ValueError('The model cannot be evaluated ' 'because it has no loss to compute.') for i, loss_fn in enumerate(loss_fns): weights = sample_weights[i] if sample_weights else None mask = masks[i] with backend.name_scope(model.output_names[i] + '_loss'): if mask is not None: mask = math_ops.cast(mask, outs[i].dtype) # Update weights with mask. if weights is None: weights = mask else: # Update dimensions of weights to match with mask if possible. weights = math_ops.cast(weights, outs[i].dtype) mask, _, weights = ( losses_utils.squeeze_or_expand_dimensions( mask, sample_weight=weights)) weights *= mask if hasattr(loss_fn, 'reduction'): per_sample_losses = loss_fn.call(targets[i], outs[i]) weighted_losses = losses_utils.compute_weighted_loss( per_sample_losses, sample_weight=weights, reduction=losses_utils.ReductionV2.NONE) loss_reduction = loss_fn.reduction # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all # compile use cases. if loss_reduction == losses_utils.ReductionV2.AUTO: loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE # Compute the stateless loss value. output_loss = losses_utils.reduce_weighted_loss( weighted_losses, reduction=loss_reduction) else: # Compute the stateless loss value for a custom loss class. # Here we assume that the class takes care of loss reduction # because if this class returns a vector value we cannot # differentiate between use case where a custom optimizer # expects a vector loss value vs unreduced per-sample loss value. output_loss = loss_fn(targets[i], outs[i], sample_weight=weights) loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE # If the number of outputs is 1 then we don't append the loss metric # associated with each model output. When there are multiple outputs # associated with a model, each output's loss is calculated and returned # as part of the loss_metrics. if len(model.outputs) > 1: # Keep track of the stateful output loss result. output_losses.append(output_loss_metrics[i](output_loss)) # Scale output loss for distribution. For custom losses we assume # reduction was mean. if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: output_loss = losses_utils.scale_loss_for_distribution( output_loss) total_loss += model._loss_weights_list[i] * output_loss # Add regularization losses if custom_losses: total_loss += losses_utils.scale_loss_for_distribution( math_ops.add_n(custom_losses)) return outs, total_loss, output_losses, masks
def create_estimator_spec_train_op( head_name, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularized_training_loss=None, loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE): """Create train_op for estimator_spec. Args: head_name: The name of the head. optimizer: An `tf.keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularized_training_loss: A scalar for total training loss that includes all regularization losses. If you're not using optimizer to generate train op, make sure to scale the loss correctly before passing it in. The loss typically needs to be scaled down by the number of workers. loss_reduction: One of `tf.keras.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: A train op for EstimatorSpec. """ del head_name validate_update_ops(update_ops) with ops.name_scope(''): # Reset all previous name_scope. # Add training as the name_scope to be compatible with Keras. with ops.name_scope('training'): if optimizer is not None: if train_op_fn is not None: raise ValueError( 'train_op_fn and optimizer cannot both be set.') validate_v2_optimizer(optimizer) validate_trainable_variables(trainable_variables) # Scale loss by number of replicas. if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: regularized_training_loss = losses_utils.scale_loss_for_distribution( regularized_training_loss) train_op = optimizer.get_updates(regularized_training_loss, trainable_variables)[0] elif train_op_fn is not None: train_op = train_op_fn(regularized_training_loss) else: raise ValueError( 'train_op_fn and optimizer cannot both be None.') if update_ops is not None: train_op = tf.group(train_op, *update_ops) return train_op
def __call__(self, y_true, y_pred, sample_weight=None, regularization_losses=None): """Computes the overall loss. Args: y_true: An arbitrary structure of Tensors representing the ground truth. y_pred: An arbitrary structure of Tensors representing a Model's outputs. sample_weight: An arbitrary structure of Tensors representing the per-sample loss weights. If one Tensor is passed, it is used for all losses. If multiple Tensors are passed, the structure should match `y_pred`. regularization_losses: Additional losses to be added to the total loss. Returns: Tuple of `(total_loss, per_output_loss_list)` """ y_true = self._conform_to_outputs(y_pred, y_true) sample_weight = self._conform_to_outputs(y_pred, sample_weight) if not self._built: self.build(y_pred) y_pred = nest.flatten(y_pred) y_true = nest.flatten(y_true) sample_weight = nest.flatten(sample_weight) loss_values = [] # Used for gradient calculation. loss_metric_values = [] # Used for loss metric calculation. batch_dim = None zip_args = (y_true, y_pred, sample_weight, self._losses, self._loss_weights, self._per_output_metrics) for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args): if y_t is None or loss_obj is None: # Ok to have no loss for an output. continue y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw) sw = apply_mask(y_p, sw, get_mask(y_p)) loss_value = loss_obj(y_t, y_p, sample_weight=sw) loss_metric_value = loss_value # Correct for the `Mean` loss metrics counting each replica as a batch. if loss_obj.reduction == losses_utils.ReductionV2.SUM: loss_metric_value *= ds_context.get_strategy( ).num_replicas_in_sync if batch_dim is None: if tf_utils.is_ragged(y_t): batch_dim = y_t.nrows() else: batch_dim = array_ops.shape(y_t)[0] if metric_obj is not None: metric_obj.update_state(loss_metric_value, sample_weight=batch_dim) if loss_weight is not None: loss_value *= loss_weight loss_metric_value *= loss_weight if (loss_obj.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE or loss_obj.reduction == losses_utils.ReductionV2.AUTO): loss_value = losses_utils.scale_loss_for_distribution( loss_value) loss_values.append(loss_value) loss_metric_values.append(loss_metric_value) if regularization_losses: regularization_losses = losses_utils.cast_losses_to_common_dtype( regularization_losses) reg_loss = math_ops.add_n(regularization_losses) loss_metric_values.append(reg_loss) loss_values.append( losses_utils.scale_loss_for_distribution(reg_loss)) if loss_values: loss_metric_values = losses_utils.cast_losses_to_common_dtype( loss_metric_values) total_loss_metric_value = math_ops.add_n(loss_metric_values) self._loss_metric.update_state(total_loss_metric_value, sample_weight=batch_dim) loss_values = losses_utils.cast_losses_to_common_dtype(loss_values) total_loss = math_ops.add_n(loss_values) return total_loss else: # Ok for a model to have no compiled loss. return array_ops.zeros(shape=())
def _model_loss(model, inputs, targets, output_loss_metrics=None, sample_weights=None, training=False): """Calculates the loss for a given model. Arguments: model: The model on which metrics are being calculated. inputs: Either a dictionary of inputs to the model or a list of input arrays. targets: List of target arrays. output_loss_metrics: List of metrics that are used to aggregated output loss values. sample_weights: Optional list of sample weight arrays. training: Whether the model should be run in inference or training mode. Returns: Returns the model output, total loss, loss value calculated using the specified loss function and masks for each output. The total loss includes regularization losses and applies masking and sample weighting to the loss value. """ # Used to keep track of the total loss value (stateless). # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + # loss_weight_2 * output_2_loss_fn(...) + # layer losses. total_loss = 0 kwargs = {} if model._expects_training_arg: kwargs['training'] = training if len(inputs) == 1 and not isinstance(inputs, dict): inputs = inputs[0] # Allow mixed `NumPy` and `EagerTensor` input here. if any( isinstance(input_t, (np.ndarray, float, int)) for input_t in nest.flatten(inputs)): inputs = nest.map_structure(ops.convert_to_tensor, inputs) outs = model(inputs, **kwargs) outs = nest.flatten(outs) # `None` by default for `EagerTensors`. masks = [t._keras_mask for t in outs] targets = nest.flatten(targets) # Used to keep track of individual output losses (stateless). output_losses = [] # Used to keep track of individual output losses (stateful). aggregated_output_losses = [] with backend.name_scope('loss'): for i, loss_fn in enumerate(model.loss_functions): weights = sample_weights[i] if sample_weights else None mask = masks[i] with backend.name_scope(model.output_names[i] + '_loss'): if mask is not None: mask = math_ops.cast(mask, outs[i].dtype) # Update weights with mask. if weights is None: weights = mask else: # Update dimensions of weights to match with mask if possible. mask, _, weights = ( losses_utils.squeeze_or_expand_dimensions( mask, None, weights)) weights *= mask # Reset reduction on the loss so that we can get the per sample loss # value. We use this to get both the stateless and stateful loss # values without having to compute the underlying loss function # twice. weighted_losses = None if hasattr(loss_fn, 'reduction'): current_loss_reduction = loss_fn.reduction loss_fn.reduction = losses_utils.ReductionV2.NONE weighted_losses = loss_fn(targets[i], outs[i], sample_weight=weights) loss_fn.reduction = current_loss_reduction # Compute the stateless loss value. output_loss = losses_utils.reduce_weighted_loss( weighted_losses) else: # Compute the stateless loss value for a custom loss class. # Here we assume that the class takes care of loss reduction # because if this class returns a vector value we cannot # differentiate between use case where a custom optimizer # expects a vector loss value vs unreduced per-sample loss value. output_loss = loss_fn(targets[i], outs[i], sample_weight=weights) # If the number of outputs is 1 then we don't append the loss metric # associated with each model output. When there are multiple outputs # associated with a model, each output's loss is calculated and returned # as part of the loss_metrics. if len(model.outputs) > 1: output_losses.append(backend.mean(output_loss)) if output_loss_metrics is not None: # Compute the stateful loss value. if weighted_losses is not None: aggregated_output_loss = output_loss_metrics[i]( weighted_losses) else: # Custom loss class. aggregated_output_loss = training_utils.call_metric_function( output_loss_metrics[i], targets[i], outs[i], weights=weights) # Keep track of the stateful output loss result. aggregated_output_losses.append(aggregated_output_loss) loss_weight = model.loss_weights_list[i] if total_loss is None: total_loss = loss_weight * output_loss else: total_loss += loss_weight * output_loss total_loss = backend.mean(total_loss) # Add regularization losses custom_losses = model.losses if custom_losses: total_loss += losses_utils.scale_loss_for_distribution( math_ops.add_n(custom_losses)) model._clear_losses() return outs, total_loss, output_losses, aggregated_output_losses, masks
def create_estimator_spec_train_op( head_name, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularized_training_loss=None, loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE): """Create train_op for estimator_spec. Args: head_name: The name of the head. optimizer: An `tf.keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularized_training_loss: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to set `loss_reduction=SUM_OVER_BATCH_SIZE` when creating the head to avoid scaling errors. loss_reduction: One of `tf.keras.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: A train op for EstimatorSpec. """ validate_update_ops(update_ops) with ops.name_scope(head_name, 'head'): if optimizer is not None: if train_op_fn is not None: raise ValueError('train_op_fn and optimizer cannot both be set.') validate_v2_optimizer(optimizer) validate_trainable_variables(trainable_variables) with ops.name_scope(''): # Reset name_scope. # Scale loss by number of replicas. if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: regularized_training_loss = losses_utils.scale_loss_for_distribution( regularized_training_loss) train_op = optimizer.get_updates(regularized_training_loss, trainable_variables)[0] elif train_op_fn is not None: train_op = train_op_fn(regularized_training_loss) else: raise ValueError('train_op_fn and optimizer cannot both be None.') if update_ops is not None: train_op = control_flow_ops.group(train_op, *update_ops) return train_op
def __call__(self, y_true, y_pred, sample_weight=None): """Computes the overall loss. Arguments: y_true: An arbitrary structure of Tensors representing the ground truth. y_pred: An arbitrary structure of Tensors representing a Model's outputs. sample_weight: An arbitrary structure of Tensors representing the per-sample loss weights. If one Tensor is passed, it is used for all losses. If multiple Tensors are passed, the structure should match `y_pred`. Returns: Tuple of `(total_loss, per_output_loss_list)` """ if not self._built: self._build(y_pred) y_true = nest.flatten(y_true) y_pred = nest.flatten(y_pred) # TODO(omalleyt): Remove ambiguity here. # This is currently needed to support passing only 1 loss and 1 target # to a Functional Model with multiple outputs. However, this is # ambiguous, especially with subclass, and we should reconsider how we # support this. if len(y_true) == 1 and len(y_pred) > 1: y_true = y_true * len(y_pred) sample_weight = nest.flatten(sample_weight) # Allows passing one sample-weight array for all outputs. if len(sample_weight) == 1 and len(y_pred) > 1: sample_weight = sample_weight * len(y_pred) loss_values = [] zip_args = (y_true, y_pred, sample_weight, self._losses, self._loss_weights, self._per_output_metrics) for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args): if loss_obj is None: # Ok to have no loss for an output. continue y_t = math_ops.cast(y_t, y_p.dtype) if sw is not None: sw = math_ops.cast(sw, y_p.dtype) # Handle Keras mask on outputs. mask = getattr(y_p, '_keras_mask', None) if mask is not None: mask = math_ops.cast(mask, y_p.dtype) if sw is not None: mask, _, sw = ( tf_losses_utils.squeeze_or_expand_dimensions( mask, sample_weight=sw)) sw *= mask else: sw = mask loss_value = loss_obj(y_t, y_p, sample_weight=sw) if metric_obj is not None: metric_obj.update_state(loss_value) if loss_weight is not None: loss_value *= loss_weight if (loss_obj.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE or loss_obj.reduction == losses_utils.ReductionV2.AUTO): loss_value = losses_utils.scale_loss_for_distribution( loss_value) loss_values.append(loss_value) if loss_values: total_loss = math_ops.add_n(loss_values) self._loss_metric.update_state(total_loss) return total_loss else: # Ok for a model to have no compiled loss. return array_ops.zeros(shape=())
def _model_loss(model, inputs, targets, output_loss_metrics=None, sample_weights=None, training=False): """Calculates the loss for a given model. Arguments: model: The model on which metrics are being calculated. inputs: Either a dictionary of inputs to the model or a list of input arrays. targets: List of target arrays. output_loss_metrics: List of metrics that are used to aggregated output loss values. sample_weights: Optional list of sample weight arrays. training: Whether the model should be run in inference or training mode. Returns: Returns the model output, total loss, loss value calculated using the specified loss function and masks for each output. The total loss includes regularization losses and applies masking and sample weighting to the loss value. """ # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn. # Used to keep track of the total loss value (stateless). # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + # loss_weight_2 * output_2_loss_fn(...) + # layer losses. total_loss = 0 kwargs = {} if model._expects_training_arg: kwargs['training'] = training if len(inputs) == 1 and not isinstance(inputs, dict): inputs = inputs[0] # Allow mixed `NumPy` and `EagerTensor` input here. if any( isinstance(input_t, (np.ndarray, float, int)) for input_t in nest.flatten(inputs)): inputs = nest.map_structure(ops.convert_to_tensor, inputs) outs = model(inputs, **kwargs) outs = nest.flatten(outs) masks = [getattr(t, '_keras_mask', None) for t in outs] targets = nest.flatten(targets) # Used to keep track of individual output losses. output_losses = [] with backend.name_scope('loss'): loss_fns = [ loss_fn for loss_fn in model.loss_functions if loss_fn is not None ] for i, loss_fn in enumerate(loss_fns): weights = sample_weights[i] if sample_weights else None mask = masks[i] with backend.name_scope(model.output_names[i] + '_loss'): if mask is not None: mask = math_ops.cast(mask, outs[i].dtype) # Update weights with mask. if weights is None: weights = mask else: # Update dimensions of weights to match with mask if possible. mask, _, weights = ( losses_utils.squeeze_or_expand_dimensions(mask, None, weights)) weights *= mask weighted_losses = None if hasattr(loss_fn, 'reduction'): per_sample_losses = loss_fn.call(targets[i], outs[i]) weighted_losses = losses_utils.compute_weighted_loss( per_sample_losses, sample_weight=weights, reduction=losses_utils.ReductionV2.NONE) loss_reduction = loss_fn.reduction # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all # compile use cases. if loss_reduction == losses_utils.ReductionV2.AUTO: loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE # Compute the stateless loss value. output_loss = losses_utils.reduce_weighted_loss( weighted_losses, reduction=loss_reduction) if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: output_loss = losses_utils.scale_loss_for_distribution(output_loss) else: # Compute the stateless loss value for a custom loss class. # Here we assume that the class takes care of loss reduction # because if this class returns a vector value we cannot # differentiate between use case where a custom optimizer # expects a vector loss value vs unreduced per-sample loss value. output_loss = loss_fn(targets[i], outs[i], sample_weight=weights) # For custom losses we assume reduction was mean. output_loss = losses_utils.scale_loss_for_distribution(output_loss) # If the number of outputs is 1 then we don't append the loss metric # associated with each model output. When there are multiple outputs # associated with a model, each output's loss is calculated and returned # as part of the loss_metrics. if len(model.outputs) > 1: # Keep track of the stateful output loss result. output_losses.append(output_loss_metrics[i](output_loss)) total_loss += model._loss_weights_list[i] * output_loss # Add regularization losses custom_losses = model.losses if custom_losses: total_loss += losses_utils.scale_loss_for_distribution( math_ops.add_n(custom_losses)) return outs, total_loss, output_losses, masks
def train_op_fn(loss): # Scale loss by number of replicas. if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: loss = losses_utils.scale_loss_for_distribution(loss) return opt.get_updates(loss, trainable_variables)[0]
def __call__(self, y_true, y_pred, sample_weight=None, regularization_losses=None): """Computes the overall loss. Arguments: y_true: An arbitrary structure of Tensors representing the ground truth. y_pred: An arbitrary structure of Tensors representing a Model's outputs. sample_weight: An arbitrary structure of Tensors representing the per-sample loss weights. If one Tensor is passed, it is used for all losses. If multiple Tensors are passed, the structure should match `y_pred`. regularization_losses: Additional losses to be added to the total loss. Returns: Tuple of `(total_loss, per_output_loss_list)` """ y_true = map_to_output_names(y_pred, self._output_names, y_true) sample_weight = map_to_output_names(y_pred, self._output_names, sample_weight) if not self._built: self._build(y_pred) y_true = nest.flatten(y_true) if y_true is not None else [] y_pred = nest.flatten(y_pred) # TODO(omalleyt): Remove ambiguity here. # This is currently needed to support passing only 1 loss and 1 target # to a Functional Model with multiple outputs. However, this is # ambiguous, especially with subclass, and we should reconsider how we # support this. if len(y_true) == 1 and len(y_pred) > 1: y_true = y_true * len(y_pred) sample_weight = nest.flatten(sample_weight) # Allows passing one sample-weight array for all outputs. if len(sample_weight) == 1 and len(y_pred) > 1: sample_weight = sample_weight * len(y_pred) loss_values = [] # Used for gradient calculation. loss_metric_values = [] # Used for loss metric calculation. zip_args = (y_true, y_pred, sample_weight, self._losses, self._loss_weights, self._per_output_metrics) for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args): if loss_obj is None: # Ok to have no loss for an output. continue y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw) sw = apply_mask(y_p, sw) loss_value = loss_obj(y_t, y_p, sample_weight=sw) loss_metric_value = loss_value # Correct for the `Mean` loss metrics counting each replica as a batch. if loss_obj.reduction == losses_utils.ReductionV2.SUM: loss_metric_value *= ds_context.get_strategy().num_replicas_in_sync if metric_obj is not None: metric_obj.update_state(loss_metric_value) if loss_weight is not None: loss_value *= loss_weight loss_metric_value *= loss_weight if (loss_obj.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE or loss_obj.reduction == losses_utils.ReductionV2.AUTO): loss_value = losses_utils.scale_loss_for_distribution(loss_value) loss_values.append(loss_value) loss_metric_values.append(loss_metric_value) if regularization_losses: regularization_losses = losses_utils.cast_losses_to_common_dtype( regularization_losses) reg_loss = math_ops.add_n(regularization_losses) loss_metric_values.append(reg_loss) loss_values.append(losses_utils.scale_loss_for_distribution(reg_loss)) if loss_values: loss_metric_values = losses_utils.cast_losses_to_common_dtype( loss_metric_values) total_loss_metric_value = math_ops.add_n(loss_metric_values) self._loss_metric.update_state(total_loss_metric_value) loss_values = losses_utils.cast_losses_to_common_dtype(loss_values) total_loss = math_ops.add_n(loss_values) return total_loss else: # Ok for a model to have no compiled loss. return array_ops.zeros(shape=())
def _model_loss(model, inputs, targets, output_loss_metrics=None, sample_weights=None, training=False): """Calculates the loss for a given model. Arguments: model: The model on which metrics are being calculated. inputs: Either a dictionary of inputs to the model or a list of input arrays. targets: List of target arrays. output_loss_metrics: List of metrics that are used to aggregated output loss values. sample_weights: Optional list of sample weight arrays. training: Whether the model should be run in inference or training mode. Returns: Returns the model output, total loss, loss value calculated using the specified loss function and masks for each output. The total loss includes regularization losses and applies masking and sample weighting to the loss value. """ # Used to keep track of the total loss value (stateless). # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + # loss_weight_2 * output_2_loss_fn(...) + # layer losses. total_loss = 0 kwargs = {} if model._expects_training_arg: kwargs['training'] = training if len(inputs) == 1 and not isinstance(inputs, dict): inputs = inputs[0] # Allow mixed `NumPy` and `EagerTensor` input here. if any( isinstance(input_t, (np.ndarray, float, int)) for input_t in nest.flatten(inputs)): inputs = nest.map_structure(ops.convert_to_tensor, inputs) outs = model(inputs, **kwargs) outs = nest.flatten(outs) # `None` by default for `EagerTensors`. masks = [t._keras_mask for t in outs] targets = nest.flatten(targets) # Used to keep track of individual output losses (stateless). output_losses = [] # Used to keep track of individual output losses (stateful). aggregated_output_losses = [] with backend.name_scope('loss'): for i, loss_fn in enumerate(model.loss_functions): weights = sample_weights[i] if sample_weights else None mask = masks[i] with backend.name_scope(model.output_names[i] + '_loss'): if mask is not None: mask = math_ops.cast(mask, outs[i].dtype) # Update weights with mask. if weights is None: weights = mask else: # Update dimensions of weights to match with mask if possible. mask, _, weights = ( losses_utils.squeeze_or_expand_dimensions(mask, None, weights)) weights *= mask # Reset reduction on the loss so that we can get the per sample loss # value. We use this to get both the stateless and stateful loss # values without having to compute the underlying loss function # twice. weighted_losses = None if hasattr(loss_fn, 'reduction'): current_loss_reduction = loss_fn.reduction loss_fn.reduction = losses_utils.ReductionV2.NONE weighted_losses = loss_fn(targets[i], outs[i], sample_weight=weights) loss_fn.reduction = current_loss_reduction # Compute the stateless loss value. output_loss = losses_utils.reduce_weighted_loss(weighted_losses) else: # Compute the stateless loss value for a custom loss class. # Here we assume that the class takes care of loss reduction # because if this class returns a vector value we cannot # differentiate between use case where a custom optimizer # expects a vector loss value vs unreduced per-sample loss value. output_loss = loss_fn(targets[i], outs[i], sample_weight=weights) # If the number of outputs is 1 then we don't append the loss metric # associated with each model output. When there are multiple outputs # associated with a model, each output's loss is calculated and returned # as part of the loss_metrics. if len(model.outputs) > 1: output_losses.append(backend.mean(output_loss)) if output_loss_metrics is not None: # Compute the stateful loss value. if weighted_losses is not None: aggregated_output_loss = output_loss_metrics[i](weighted_losses) else: # Custom loss class. aggregated_output_loss = training_utils.call_metric_function( output_loss_metrics[i], targets[i], outs[i], weights=weights) # Keep track of the stateful output loss result. aggregated_output_losses.append(aggregated_output_loss) loss_weight = model.loss_weights_list[i] if total_loss is None: total_loss = loss_weight * output_loss else: total_loss += loss_weight * output_loss total_loss = backend.mean(total_loss) # Add regularization losses custom_losses = model.losses if custom_losses: total_loss += losses_utils.scale_loss_for_distribution( math_ops.add_n(custom_losses)) return outs, total_loss, output_losses, aggregated_output_losses, masks