Example #1
0
    def _train_op_fn(loss):
        """Returns the op to optimize the loss."""
        train_ops = []
        # Scale loss by number of replicas.
        if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
            loss = losses_utils.scale_loss_for_distribution(loss)

        if dnn_logits is not None:
            train_ops.extend(
                dnn_optimizer.get_updates(loss, dnn_trainable_variables))
            if dnn_update_ops is not None:
                train_ops.extend(dnn_update_ops)
        if linear_logits is not None:
            train_ops.extend(
                linear_optimizer.get_updates(loss, linear_trainable_variables))
        train_op = control_flow_ops.group(*train_ops)
        return train_op
Example #2
0
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
    """Calculates the loss for a given model.

  Args:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
    # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn.
    # Used to keep track of the total loss value (stateless).
    # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
    #                   loss_weight_2 * output_2_loss_fn(...) +
    #                   layer losses.
    total_loss = 0
    kwargs = {}
    if model._expects_training_arg:
        kwargs['training'] = training
    if len(inputs) == 1 and not isinstance(inputs, dict):
        inputs = inputs[0]

    # Allow mixed `NumPy` and `EagerTensor` input here.
    if any(
            isinstance(input_t, (np.ndarray, float, int))
            for input_t in nest.flatten(inputs)):
        inputs = nest.map_structure(ops.convert_to_tensor_v2_with_dispatch,
                                    inputs)

    outs = model(inputs, **kwargs)
    outs = nest.flatten(outs)

    if targets:
        targets = training_utils_v1.cast_if_floating_dtype_and_mismatch(
            targets, outs)
    # TODO(sallymatson/psv): check if we should do same mismatch fix for weights
    if sample_weights:
        sample_weights = [
            training_utils_v1.cast_if_floating_dtype(
                ops.convert_to_tensor_v2_with_dispatch(val))
            if val is not None else None for val in sample_weights
        ]

    masks = [getattr(t, '_keras_mask', None) for t in outs]
    targets = nest.flatten(targets)

    # Used to keep track of individual output losses.
    output_losses = []

    with backend.name_scope('loss'):
        loss_fns = [
            loss_fn for loss_fn in model.loss_functions if loss_fn is not None
        ]
        custom_losses = model.losses  # Regularization losses

        if not loss_fns and not custom_losses:
            if training:
                raise ValueError('The model cannot be trained '
                                 'because it has no loss to optimize.')
            else:
                raise ValueError('The model cannot be evaluated '
                                 'because it has no loss to compute.')

        for i, loss_fn in enumerate(loss_fns):
            weights = sample_weights[i] if sample_weights else None
            mask = masks[i]
            with backend.name_scope(model.output_names[i] + '_loss'):
                if mask is not None:
                    mask = math_ops.cast(mask, outs[i].dtype)
                    # Update weights with mask.
                    if weights is None:
                        weights = mask
                    else:
                        # Update dimensions of weights to match with mask if possible.
                        weights = math_ops.cast(weights, outs[i].dtype)
                        mask, _, weights = (
                            losses_utils.squeeze_or_expand_dimensions(
                                mask, sample_weight=weights))
                        weights *= mask

                if hasattr(loss_fn, 'reduction'):
                    per_sample_losses = loss_fn.call(targets[i], outs[i])
                    weighted_losses = losses_utils.compute_weighted_loss(
                        per_sample_losses,
                        sample_weight=weights,
                        reduction=losses_utils.ReductionV2.NONE)
                    loss_reduction = loss_fn.reduction

                    # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all
                    # compile use cases.
                    if loss_reduction == losses_utils.ReductionV2.AUTO:
                        loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

                    # Compute the stateless loss value.
                    output_loss = losses_utils.reduce_weighted_loss(
                        weighted_losses, reduction=loss_reduction)
                else:
                    # Compute the stateless loss value for a custom loss class.
                    # Here we assume that the class takes care of loss reduction
                    # because if this class returns a vector value we cannot
                    # differentiate between use case where a custom optimizer
                    # expects a vector loss value vs unreduced per-sample loss value.
                    output_loss = loss_fn(targets[i],
                                          outs[i],
                                          sample_weight=weights)
                    loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

            # If the number of outputs is 1 then we don't append the loss metric
            # associated with each model output. When there are multiple outputs
            # associated with a model, each output's loss is calculated and returned
            # as part of the loss_metrics.
            if len(model.outputs) > 1:
                # Keep track of the stateful output loss result.
                output_losses.append(output_loss_metrics[i](output_loss))

            # Scale output loss for distribution. For custom losses we assume
            # reduction was mean.
            if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
                output_loss = losses_utils.scale_loss_for_distribution(
                    output_loss)
            total_loss += model._loss_weights_list[i] * output_loss

        # Add regularization losses
        if custom_losses:
            total_loss += losses_utils.scale_loss_for_distribution(
                math_ops.add_n(custom_losses))
    return outs, total_loss, output_losses, masks
Example #3
0
def create_estimator_spec_train_op(
        head_name,
        optimizer=None,
        trainable_variables=None,
        train_op_fn=None,
        update_ops=None,
        regularized_training_loss=None,
        loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE):
    """Create train_op for estimator_spec.

  Args:
    head_name: The name of the head.
    optimizer: An `tf.keras.optimizers.Optimizer` instance to optimize the
       loss in TRAIN mode. Namely, sets
      `train_op = optimizer.get_updates(loss, trainable_variables)`,
      which updates variables to minimize `loss`.
    trainable_variables: A list or tuple of `Variable` objects to update to
      minimize `loss`. In Tensorflow 1.x, by default these are the list of
      variables collected in the graph under the key
      `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have
      collections and GraphKeys, trainable_variables need to be passed
      explicitly here.
    train_op_fn: Function that takes a scalar loss `Tensor` and returns
      `train_op`. Used if `optimizer` is `None`.
    update_ops: A list or tuple of update ops to be run at training time. For
      example, layers such as BatchNormalization create mean and variance
      update ops that need to be run at training time. In Tensorflow 1.x,
      these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x
      doesn't have collections, update_ops need to be passed explicitly here.
    regularized_training_loss: A scalar for total training loss that includes
      all regularization losses. If you're not using optimizer to generate
      train op, make sure to scale the loss correctly before passing it in. The
      loss typically needs to be scaled down by the number of workers.
    loss_reduction: One of `tf.keras.losses.Reduction` except `NONE`. Describes
      how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.

  Returns:
    A train op for EstimatorSpec.
  """
    del head_name
    validate_update_ops(update_ops)
    with ops.name_scope(''):  # Reset all previous name_scope.
        # Add training as the name_scope to be compatible with Keras.
        with ops.name_scope('training'):
            if optimizer is not None:
                if train_op_fn is not None:
                    raise ValueError(
                        'train_op_fn and optimizer cannot both be set.')
                validate_v2_optimizer(optimizer)
                validate_trainable_variables(trainable_variables)
                # Scale loss by number of replicas.
                if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
                    regularized_training_loss = losses_utils.scale_loss_for_distribution(
                        regularized_training_loss)
                train_op = optimizer.get_updates(regularized_training_loss,
                                                 trainable_variables)[0]
            elif train_op_fn is not None:
                train_op = train_op_fn(regularized_training_loss)
            else:
                raise ValueError(
                    'train_op_fn and optimizer cannot both be None.')
            if update_ops is not None:
                train_op = tf.group(train_op, *update_ops)
            return train_op
Example #4
0
    def __call__(self,
                 y_true,
                 y_pred,
                 sample_weight=None,
                 regularization_losses=None):
        """Computes the overall loss.

    Args:
      y_true: An arbitrary structure of Tensors representing the ground truth.
      y_pred: An arbitrary structure of Tensors representing a Model's outputs.
      sample_weight: An arbitrary structure of Tensors representing the
        per-sample loss weights. If one Tensor is passed, it is used for all
        losses. If multiple Tensors are passed, the structure should match
        `y_pred`.
      regularization_losses: Additional losses to be added to the total loss.

    Returns:
      Tuple of `(total_loss, per_output_loss_list)`
    """
        y_true = self._conform_to_outputs(y_pred, y_true)
        sample_weight = self._conform_to_outputs(y_pred, sample_weight)

        if not self._built:
            self.build(y_pred)

        y_pred = nest.flatten(y_pred)
        y_true = nest.flatten(y_true)
        sample_weight = nest.flatten(sample_weight)

        loss_values = []  # Used for gradient calculation.
        loss_metric_values = []  # Used for loss metric calculation.
        batch_dim = None
        zip_args = (y_true, y_pred, sample_weight, self._losses,
                    self._loss_weights, self._per_output_metrics)
        for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args):
            if y_t is None or loss_obj is None:  # Ok to have no loss for an output.
                continue

            y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
            sw = apply_mask(y_p, sw, get_mask(y_p))
            loss_value = loss_obj(y_t, y_p, sample_weight=sw)

            loss_metric_value = loss_value
            # Correct for the `Mean` loss metrics counting each replica as a batch.
            if loss_obj.reduction == losses_utils.ReductionV2.SUM:
                loss_metric_value *= ds_context.get_strategy(
                ).num_replicas_in_sync

            if batch_dim is None:
                if tf_utils.is_ragged(y_t):
                    batch_dim = y_t.nrows()
                else:
                    batch_dim = array_ops.shape(y_t)[0]

            if metric_obj is not None:
                metric_obj.update_state(loss_metric_value,
                                        sample_weight=batch_dim)

            if loss_weight is not None:
                loss_value *= loss_weight
                loss_metric_value *= loss_weight

            if (loss_obj.reduction
                    == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE
                    or loss_obj.reduction == losses_utils.ReductionV2.AUTO):
                loss_value = losses_utils.scale_loss_for_distribution(
                    loss_value)

            loss_values.append(loss_value)
            loss_metric_values.append(loss_metric_value)

        if regularization_losses:
            regularization_losses = losses_utils.cast_losses_to_common_dtype(
                regularization_losses)
            reg_loss = math_ops.add_n(regularization_losses)
            loss_metric_values.append(reg_loss)
            loss_values.append(
                losses_utils.scale_loss_for_distribution(reg_loss))

        if loss_values:
            loss_metric_values = losses_utils.cast_losses_to_common_dtype(
                loss_metric_values)
            total_loss_metric_value = math_ops.add_n(loss_metric_values)
            self._loss_metric.update_state(total_loss_metric_value,
                                           sample_weight=batch_dim)

            loss_values = losses_utils.cast_losses_to_common_dtype(loss_values)
            total_loss = math_ops.add_n(loss_values)
            return total_loss
        else:
            # Ok for a model to have no compiled loss.
            return array_ops.zeros(shape=())
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
    """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
    # Used to keep track of the total loss value (stateless).
    # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
    #                   loss_weight_2 * output_2_loss_fn(...) +
    #                   layer losses.
    total_loss = 0
    kwargs = {}
    if model._expects_training_arg:
        kwargs['training'] = training
    if len(inputs) == 1 and not isinstance(inputs, dict):
        inputs = inputs[0]

    # Allow mixed `NumPy` and `EagerTensor` input here.
    if any(
            isinstance(input_t, (np.ndarray, float, int))
            for input_t in nest.flatten(inputs)):
        inputs = nest.map_structure(ops.convert_to_tensor, inputs)

    outs = model(inputs, **kwargs)

    outs = nest.flatten(outs)
    # `None` by default for `EagerTensors`.
    masks = [t._keras_mask for t in outs]
    targets = nest.flatten(targets)

    # Used to keep track of individual output losses (stateless).
    output_losses = []
    # Used to keep track of individual output losses (stateful).
    aggregated_output_losses = []

    with backend.name_scope('loss'):
        for i, loss_fn in enumerate(model.loss_functions):
            weights = sample_weights[i] if sample_weights else None
            mask = masks[i]
            with backend.name_scope(model.output_names[i] + '_loss'):
                if mask is not None:
                    mask = math_ops.cast(mask, outs[i].dtype)
                    # Update weights with mask.
                    if weights is None:
                        weights = mask
                    else:
                        # Update dimensions of weights to match with mask if possible.
                        mask, _, weights = (
                            losses_utils.squeeze_or_expand_dimensions(
                                mask, None, weights))
                        weights *= mask

                # Reset reduction on the loss so that we can get the per sample loss
                # value. We use this to get both the stateless and stateful loss
                # values without having to compute the underlying loss function
                # twice.
                weighted_losses = None
                if hasattr(loss_fn, 'reduction'):
                    current_loss_reduction = loss_fn.reduction
                    loss_fn.reduction = losses_utils.ReductionV2.NONE
                    weighted_losses = loss_fn(targets[i],
                                              outs[i],
                                              sample_weight=weights)
                    loss_fn.reduction = current_loss_reduction

                    # Compute the stateless loss value.
                    output_loss = losses_utils.reduce_weighted_loss(
                        weighted_losses)
                else:
                    # Compute the stateless loss value for a custom loss class.
                    # Here we assume that the class takes care of loss reduction
                    # because if this class returns a vector value we cannot
                    # differentiate between use case where a custom optimizer
                    # expects a vector loss value vs unreduced per-sample loss value.
                    output_loss = loss_fn(targets[i],
                                          outs[i],
                                          sample_weight=weights)

            # If the number of outputs is 1 then we don't append the loss metric
            # associated with each model output. When there are multiple outputs
            # associated with a model, each output's loss is calculated and returned
            # as part of the loss_metrics.
            if len(model.outputs) > 1:
                output_losses.append(backend.mean(output_loss))
                if output_loss_metrics is not None:
                    # Compute the stateful loss value.
                    if weighted_losses is not None:
                        aggregated_output_loss = output_loss_metrics[i](
                            weighted_losses)
                    else:
                        # Custom loss class.
                        aggregated_output_loss = training_utils.call_metric_function(
                            output_loss_metrics[i],
                            targets[i],
                            outs[i],
                            weights=weights)
                    # Keep track of the stateful output loss result.
                    aggregated_output_losses.append(aggregated_output_loss)

            loss_weight = model.loss_weights_list[i]
            if total_loss is None:
                total_loss = loss_weight * output_loss
            else:
                total_loss += loss_weight * output_loss

        total_loss = backend.mean(total_loss)
        # Add regularization losses
        custom_losses = model.losses
        if custom_losses:
            total_loss += losses_utils.scale_loss_for_distribution(
                math_ops.add_n(custom_losses))
        model._clear_losses()

    return outs, total_loss, output_losses, aggregated_output_losses, masks
Example #6
0
def create_estimator_spec_train_op(
    head_name,
    optimizer=None,
    trainable_variables=None,
    train_op_fn=None,
    update_ops=None,
    regularized_training_loss=None,
    loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE):
  """Create train_op for estimator_spec.

  Args:
    head_name: The name of the head.
    optimizer: An `tf.keras.optimizers.Optimizer` instance to optimize the
       loss in TRAIN mode. Namely, sets
      `train_op = optimizer.get_updates(loss, trainable_variables)`,
      which updates variables to minimize `loss`.
    trainable_variables: A list or tuple of `Variable` objects to update to
      minimize `loss`. In Tensorflow 1.x, by default these are the list of
      variables collected in the graph under the key
      `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have
      collections and GraphKeys, trainable_variables need to be passed
      explicitly here.
    train_op_fn: Function that takes a scalar loss `Tensor` and returns
      `train_op`. Used if `optimizer` is `None`.
    update_ops: A list or tuple of update ops to be run at training time. For
      example, layers such as BatchNormalization create mean and variance
      update ops that need to be run at training time. In Tensorflow 1.x,
      these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x
      doesn't have collections, update_ops need to be passed explicitly here.
    regularized_training_loss: A list of additional scalar losses to be added to
      the training loss, such as regularization losses. These losses are
      usually expressed as a batch average, so for best results users need to
      set `loss_reduction=SUM_OVER_BATCH_SIZE` when creating the head to
      avoid scaling errors.
    loss_reduction: One of `tf.keras.losses.Reduction` except `NONE`. Describes
      how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.

  Returns:
    A train op for EstimatorSpec.
  """
  validate_update_ops(update_ops)
  with ops.name_scope(head_name, 'head'):
    if optimizer is not None:
      if train_op_fn is not None:
        raise ValueError('train_op_fn and optimizer cannot both be set.')
      validate_v2_optimizer(optimizer)
      validate_trainable_variables(trainable_variables)
      with ops.name_scope(''):  # Reset name_scope.
        # Scale loss by number of replicas.
        if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
          regularized_training_loss = losses_utils.scale_loss_for_distribution(
              regularized_training_loss)

        train_op = optimizer.get_updates(regularized_training_loss,
                                         trainable_variables)[0]
    elif train_op_fn is not None:
      train_op = train_op_fn(regularized_training_loss)
    else:
      raise ValueError('train_op_fn and optimizer cannot both be None.')
    if update_ops is not None:
      train_op = control_flow_ops.group(train_op, *update_ops)
    return train_op
Example #7
0
    def __call__(self, y_true, y_pred, sample_weight=None):
        """Computes the overall loss.

    Arguments:
      y_true: An arbitrary structure of Tensors representing the ground truth.
      y_pred: An arbitrary structure of Tensors representing a Model's outputs.
      sample_weight: An arbitrary structure of Tensors representing the
        per-sample loss weights. If one Tensor is passed, it is used for all
        losses. If multiple Tensors are passed, the structure should match
        `y_pred`.

    Returns:
      Tuple of `(total_loss, per_output_loss_list)`
    """
        if not self._built:
            self._build(y_pred)

        y_true = nest.flatten(y_true)
        y_pred = nest.flatten(y_pred)

        # TODO(omalleyt): Remove ambiguity here.
        # This is currently needed to support passing only 1 loss and 1 target
        # to a Functional Model with multiple outputs. However, this is
        # ambiguous, especially with subclass, and we should reconsider how we
        # support this.
        if len(y_true) == 1 and len(y_pred) > 1:
            y_true = y_true * len(y_pred)

        sample_weight = nest.flatten(sample_weight)
        # Allows passing one sample-weight array for all outputs.
        if len(sample_weight) == 1 and len(y_pred) > 1:
            sample_weight = sample_weight * len(y_pred)

        loss_values = []
        zip_args = (y_true, y_pred, sample_weight, self._losses,
                    self._loss_weights, self._per_output_metrics)
        for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args):
            if loss_obj is None:  # Ok to have no loss for an output.
                continue

            y_t = math_ops.cast(y_t, y_p.dtype)
            if sw is not None:
                sw = math_ops.cast(sw, y_p.dtype)

            # Handle Keras mask on outputs.
            mask = getattr(y_p, '_keras_mask', None)
            if mask is not None:
                mask = math_ops.cast(mask, y_p.dtype)
                if sw is not None:
                    mask, _, sw = (
                        tf_losses_utils.squeeze_or_expand_dimensions(
                            mask, sample_weight=sw))
                    sw *= mask
                else:
                    sw = mask

            loss_value = loss_obj(y_t, y_p, sample_weight=sw)

            if metric_obj is not None:
                metric_obj.update_state(loss_value)

            if loss_weight is not None:
                loss_value *= loss_weight

            if (loss_obj.reduction
                    == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE
                    or loss_obj.reduction == losses_utils.ReductionV2.AUTO):
                loss_value = losses_utils.scale_loss_for_distribution(
                    loss_value)
            loss_values.append(loss_value)

        if loss_values:
            total_loss = math_ops.add_n(loss_values)
            self._loss_metric.update_state(total_loss)
            return total_loss
        else:
            # Ok for a model to have no compiled loss.
            return array_ops.zeros(shape=())
Example #8
0
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
  """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
  # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn.
  # Used to keep track of the total loss value (stateless).
  # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
  #                   loss_weight_2 * output_2_loss_fn(...) +
  #                   layer losses.
  total_loss = 0
  kwargs = {}
  if model._expects_training_arg:
    kwargs['training'] = training
  if len(inputs) == 1 and not isinstance(inputs, dict):
    inputs = inputs[0]

  # Allow mixed `NumPy` and `EagerTensor` input here.
  if any(
      isinstance(input_t, (np.ndarray, float, int))
      for input_t in nest.flatten(inputs)):
    inputs = nest.map_structure(ops.convert_to_tensor, inputs)

  outs = model(inputs, **kwargs)

  outs = nest.flatten(outs)
  masks = [getattr(t, '_keras_mask', None) for t in outs]
  targets = nest.flatten(targets)

  # Used to keep track of individual output losses.
  output_losses = []

  with backend.name_scope('loss'):
    loss_fns = [
        loss_fn for loss_fn in model.loss_functions if loss_fn is not None
    ]
    for i, loss_fn in enumerate(loss_fns):
      weights = sample_weights[i] if sample_weights else None
      mask = masks[i]
      with backend.name_scope(model.output_names[i] + '_loss'):
        if mask is not None:
          mask = math_ops.cast(mask, outs[i].dtype)
          # Update weights with mask.
          if weights is None:
            weights = mask
          else:
            # Update dimensions of weights to match with mask if possible.
            mask, _, weights = (
                losses_utils.squeeze_or_expand_dimensions(mask, None, weights))
            weights *= mask

        weighted_losses = None
        if hasattr(loss_fn, 'reduction'):
          per_sample_losses = loss_fn.call(targets[i], outs[i])
          weighted_losses = losses_utils.compute_weighted_loss(
              per_sample_losses,
              sample_weight=weights,
              reduction=losses_utils.ReductionV2.NONE)
          loss_reduction = loss_fn.reduction

          # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all
          # compile use cases.
          if loss_reduction == losses_utils.ReductionV2.AUTO:
            loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

          # Compute the stateless loss value.
          output_loss = losses_utils.reduce_weighted_loss(
              weighted_losses, reduction=loss_reduction)
          if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
            output_loss = losses_utils.scale_loss_for_distribution(output_loss)
        else:
          # Compute the stateless loss value for a custom loss class.
          # Here we assume that the class takes care of loss reduction
          # because if this class returns a vector value we cannot
          # differentiate between use case where a custom optimizer
          # expects a vector loss value vs unreduced per-sample loss value.
          output_loss = loss_fn(targets[i], outs[i], sample_weight=weights)
          # For custom losses we assume reduction was mean.
          output_loss = losses_utils.scale_loss_for_distribution(output_loss)

      # If the number of outputs is 1 then we don't append the loss metric
      # associated with each model output. When there are multiple outputs
      # associated with a model, each output's loss is calculated and returned
      # as part of the loss_metrics.
      if len(model.outputs) > 1:
        # Keep track of the stateful output loss result.
        output_losses.append(output_loss_metrics[i](output_loss))

      total_loss += model._loss_weights_list[i] * output_loss

    # Add regularization losses
    custom_losses = model.losses
    if custom_losses:
      total_loss += losses_utils.scale_loss_for_distribution(
          math_ops.add_n(custom_losses))

  return outs, total_loss, output_losses, masks
Example #9
0
 def train_op_fn(loss):
     # Scale loss by number of replicas.
     if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
         loss = losses_utils.scale_loss_for_distribution(loss)
     return opt.get_updates(loss, trainable_variables)[0]
Example #10
0
  def __call__(self,
               y_true,
               y_pred,
               sample_weight=None,
               regularization_losses=None):
    """Computes the overall loss.

    Arguments:
      y_true: An arbitrary structure of Tensors representing the ground truth.
      y_pred: An arbitrary structure of Tensors representing a Model's outputs.
      sample_weight: An arbitrary structure of Tensors representing the
        per-sample loss weights. If one Tensor is passed, it is used for all
        losses. If multiple Tensors are passed, the structure should match
        `y_pred`.
      regularization_losses: Additional losses to be added to the total loss.

    Returns:
      Tuple of `(total_loss, per_output_loss_list)`
    """
    y_true = map_to_output_names(y_pred, self._output_names, y_true)
    sample_weight = map_to_output_names(y_pred, self._output_names,
                                        sample_weight)

    if not self._built:
      self._build(y_pred)

    y_true = nest.flatten(y_true) if y_true is not None else []
    y_pred = nest.flatten(y_pred)

    # TODO(omalleyt): Remove ambiguity here.
    # This is currently needed to support passing only 1 loss and 1 target
    # to a Functional Model with multiple outputs. However, this is
    # ambiguous, especially with subclass, and we should reconsider how we
    # support this.
    if len(y_true) == 1 and len(y_pred) > 1:
      y_true = y_true * len(y_pred)

    sample_weight = nest.flatten(sample_weight)
    # Allows passing one sample-weight array for all outputs.
    if len(sample_weight) == 1 and len(y_pred) > 1:
      sample_weight = sample_weight * len(y_pred)

    loss_values = []  # Used for gradient calculation.
    loss_metric_values = []  # Used for loss metric calculation.
    zip_args = (y_true, y_pred, sample_weight, self._losses, self._loss_weights,
                self._per_output_metrics)
    for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args):
      if loss_obj is None:  # Ok to have no loss for an output.
        continue

      y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
      sw = apply_mask(y_p, sw)

      loss_value = loss_obj(y_t, y_p, sample_weight=sw)

      loss_metric_value = loss_value
      # Correct for the `Mean` loss metrics counting each replica as a batch.
      if loss_obj.reduction == losses_utils.ReductionV2.SUM:
        loss_metric_value *= ds_context.get_strategy().num_replicas_in_sync
      if metric_obj is not None:
        metric_obj.update_state(loss_metric_value)

      if loss_weight is not None:
        loss_value *= loss_weight
        loss_metric_value *= loss_weight

      if (loss_obj.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE or
          loss_obj.reduction == losses_utils.ReductionV2.AUTO):
        loss_value = losses_utils.scale_loss_for_distribution(loss_value)

      loss_values.append(loss_value)
      loss_metric_values.append(loss_metric_value)

    if regularization_losses:
      regularization_losses = losses_utils.cast_losses_to_common_dtype(
          regularization_losses)
      reg_loss = math_ops.add_n(regularization_losses)
      loss_metric_values.append(reg_loss)
      loss_values.append(losses_utils.scale_loss_for_distribution(reg_loss))

    if loss_values:
      loss_metric_values = losses_utils.cast_losses_to_common_dtype(
          loss_metric_values)
      total_loss_metric_value = math_ops.add_n(loss_metric_values)
      self._loss_metric.update_state(total_loss_metric_value)

      loss_values = losses_utils.cast_losses_to_common_dtype(loss_values)
      total_loss = math_ops.add_n(loss_values)
      return total_loss
    else:
      # Ok for a model to have no compiled loss.
      return array_ops.zeros(shape=())
Example #11
0
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
  """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
  # Used to keep track of the total loss value (stateless).
  # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
  #                   loss_weight_2 * output_2_loss_fn(...) +
  #                   layer losses.
  total_loss = 0
  kwargs = {}
  if model._expects_training_arg:
    kwargs['training'] = training
  if len(inputs) == 1 and not isinstance(inputs, dict):
    inputs = inputs[0]

  # Allow mixed `NumPy` and `EagerTensor` input here.
  if any(
      isinstance(input_t, (np.ndarray, float, int))
      for input_t in nest.flatten(inputs)):
    inputs = nest.map_structure(ops.convert_to_tensor, inputs)

  outs = model(inputs, **kwargs)

  outs = nest.flatten(outs)
  # `None` by default for `EagerTensors`.
  masks = [t._keras_mask for t in outs]
  targets = nest.flatten(targets)

  # Used to keep track of individual output losses (stateless).
  output_losses = []
  # Used to keep track of individual output losses (stateful).
  aggregated_output_losses = []

  with backend.name_scope('loss'):
    for i, loss_fn in enumerate(model.loss_functions):
      weights = sample_weights[i] if sample_weights else None
      mask = masks[i]
      with backend.name_scope(model.output_names[i] + '_loss'):
        if mask is not None:
          mask = math_ops.cast(mask, outs[i].dtype)
          # Update weights with mask.
          if weights is None:
            weights = mask
          else:
            # Update dimensions of weights to match with mask if possible.
            mask, _, weights = (
                losses_utils.squeeze_or_expand_dimensions(mask, None, weights))
            weights *= mask

        # Reset reduction on the loss so that we can get the per sample loss
        # value. We use this to get both the stateless and stateful loss
        # values without having to compute the underlying loss function
        # twice.
        weighted_losses = None
        if hasattr(loss_fn, 'reduction'):
          current_loss_reduction = loss_fn.reduction
          loss_fn.reduction = losses_utils.ReductionV2.NONE
          weighted_losses = loss_fn(targets[i], outs[i], sample_weight=weights)
          loss_fn.reduction = current_loss_reduction

          # Compute the stateless loss value.
          output_loss = losses_utils.reduce_weighted_loss(weighted_losses)
        else:
          # Compute the stateless loss value for a custom loss class.
          # Here we assume that the class takes care of loss reduction
          # because if this class returns a vector value we cannot
          # differentiate between use case where a custom optimizer
          # expects a vector loss value vs unreduced per-sample loss value.
          output_loss = loss_fn(targets[i], outs[i], sample_weight=weights)

      # If the number of outputs is 1 then we don't append the loss metric
      # associated with each model output. When there are multiple outputs
      # associated with a model, each output's loss is calculated and returned
      # as part of the loss_metrics.
      if len(model.outputs) > 1:
        output_losses.append(backend.mean(output_loss))
        if output_loss_metrics is not None:
          # Compute the stateful loss value.
          if weighted_losses is not None:
            aggregated_output_loss = output_loss_metrics[i](weighted_losses)
          else:
            # Custom loss class.
            aggregated_output_loss = training_utils.call_metric_function(
                output_loss_metrics[i], targets[i], outs[i], weights=weights)
          # Keep track of the stateful output loss result.
          aggregated_output_losses.append(aggregated_output_loss)

      loss_weight = model.loss_weights_list[i]
      if total_loss is None:
        total_loss = loss_weight * output_loss
      else:
        total_loss += loss_weight * output_loss

    total_loss = backend.mean(total_loss)
    # Add regularization losses
    custom_losses = model.losses
    if custom_losses:
      total_loss += losses_utils.scale_loss_for_distribution(
          math_ops.add_n(custom_losses))

  return outs, total_loss, output_losses, aggregated_output_losses, masks