Beispiel #1
0
def true_positive_rate_at_false_positive_rate_loss(
        labels,
        logits,
        target_rate,
        weights=1.0,
        dual_rate_factor=0.1,
        label_priors=None,
        surrogate_type='xent',
        lambdas_initializer=tf.constant_initializer(1.0),
        reuse=None,
        variables_collections=None,
        trainable=True,
        scope=None):
    """Computes true positive rate at false positive rate loss.

  The loss is based on a surrogate of the form
      wt * loss(+) + lambdas * (wt * loss(-) - r * (1 - pi))
  where:
  - loss(-) is the loss on the negative examples
  - loss(+) is the loss on the positive examples
  - wt is a scalar or tensor of per-example weights
  - r is the target rate
  - pi is the label_priors.

  The per-example weights change not only the coefficients of individual
  training examples, but how the examples are counted toward the constraint.
  If `label_priors` is given, it MUST take `weights` into account. That is,
      label_priors = P / (P + N)
  where
      P = sum_i (wt_i on positives)
      N = sum_i (wt_i on negatives).

  Args:
    labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels].
    logits: A `Tensor` with the same shape as `labels`.
    target_rate: The false positive rate at which to compute the loss. Can be a
      floating point value between 0 and 1 for a single false positive rate, or
      a `Tensor` of shape [num_labels] holding each label's false positive rate.
    weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape
      [batch_size] or [batch_size, num_labels].
    dual_rate_factor: A floating point value which controls the step size for
      the Lagrange multipliers.
    label_priors: None, or a floating point `Tensor` of shape [num_labels]
      containing the prior probability of each label (i.e. the fraction of the
      training data consisting of positive examples). If None, the label
      priors are computed from `labels` with a moving average. See the notes
      above regarding the interaction with `weights` and do not set this unless
      you have a good reason to do so.
    surrogate_type: Either 'xent' or 'hinge', specifying which upper bound
      should be used for indicator functions. 'xent' will use the cross-entropy
      loss surrogate, and 'hinge' will use the hinge loss.
    lambdas_initializer: An initializer op for the Lagrange multipliers.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for the variables.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional scope for `variable_scope`.

  Returns:
    loss: A `Tensor` of the same shape as `logits` with the component-wise
      loss.
    other_outputs: A dictionary of useful internal quantities for debugging. For
      more details, see http://arxiv.org/pdf/1608.04802.pdf.
      lambdas: A Tensor of shape [num_labels] consisting of the Lagrange
        multipliers.
      label_priors: A Tensor of shape [num_labels] consisting of the prior
        probability of each label learned by the loss, if not provided.
      true_positives_lower_bound: Lower bound on the number of true positives
        given `labels` and `logits`. This is the same lower bound which is used
        in the loss expression to be optimized.
      false_positives_upper_bound: Upper bound on the number of false positives
        given `labels` and `logits`. This is the same upper bound which is used
        in the loss expression to be optimized.

  Raises:
    ValueError: If `surrogate_type` is not `xent` or `hinge`.
  """
    with tf.variable_scope(scope,
                           'tpr_at_fpr', [labels, logits, label_priors],
                           reuse=reuse):
        labels, logits, weights, original_shape = _prepare_labels_logits_weights(
            labels, logits, weights)
        num_labels = util.get_num_labels(logits)

        # Convert other inputs to tensors and standardize dtypes.
        target_rate = util.convert_and_cast(target_rate, 'target_rate',
                                            logits.dtype)
        dual_rate_factor = util.convert_and_cast(dual_rate_factor,
                                                 'dual_rate_factor',
                                                 logits.dtype)

        # Create lambdas.
        lambdas, lambdas_variable = _create_dual_variable(
            'lambdas',
            shape=[num_labels],
            dtype=logits.dtype,
            initializer=lambdas_initializer,
            collections=variables_collections,
            trainable=trainable,
            dual_rate_factor=dual_rate_factor)
        # Maybe create label_priors.
        label_priors = maybe_create_label_priors(label_priors, labels, weights,
                                                 variables_collections)

        # Loss op and other outputs. The log(2.0) term corrects for
        # logloss not being an upper bound on the indicator function.
        weighted_loss = weights * util.weighted_surrogate_loss(
            labels,
            logits,
            surrogate_type=surrogate_type,
            positive_weights=1.0,
            negative_weights=lambdas)
        maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0
        maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype)
        lambda_term = lambdas * target_rate * (1.0 - label_priors) * maybe_log2
        loss = tf.reshape(weighted_loss - lambda_term, original_shape)
        other_outputs = {
            'lambdas':
            lambdas_variable,
            'label_priors':
            label_priors,
            'true_positives_lower_bound':
            true_positives_lower_bound(labels, logits, weights,
                                       surrogate_type),
            'false_positives_upper_bound':
            false_positives_upper_bound(labels, logits, weights,
                                        surrogate_type)
        }

    return loss, other_outputs
Beispiel #2
0
def precision_recall_auc_loss(labels,
                              logits,
                              precision_range=(0.0, 1.0),
                              num_anchors=20,
                              weights=1.0,
                              dual_rate_factor=0.1,
                              label_priors=None,
                              surrogate_type='xent',
                              lambdas_initializer=tf.constant_initializer(1.0),
                              reuse=None,
                              variables_collections=None,
                              trainable=True,
                              scope=None):
    """Computes precision-recall AUC loss.

  The loss is based on a sum of losses for recall at a range of
  precision values (anchor points). This sum is a Riemann sum that
  approximates the area under the precision-recall curve.

  The per-example `weights` argument changes not only the coefficients of
  individual training examples, but how the examples are counted toward the
  constraint. If `label_priors` is given, it MUST take `weights` into account.
  That is,
      label_priors = P / (P + N)
  where
      P = sum_i (wt_i on positives)
      N = sum_i (wt_i on negatives).

  Args:
    labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels].
    logits: A `Tensor` with the same shape as `labels`.
    precision_range: A length-two tuple, the range of precision values over
      which to compute AUC. The entries must be nonnegative, increasing, and
      less than or equal to 1.0.
    num_anchors: The number of grid points used to approximate the Riemann sum.
    weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape
      [batch_size] or [batch_size, num_labels].
    dual_rate_factor: A floating point value which controls the step size for
      the Lagrange multipliers.
    label_priors: None, or a floating point `Tensor` of shape [num_labels]
      containing the prior probability of each label (i.e. the fraction of the
      training data consisting of positive examples). If None, the label
      priors are computed from `labels` with a moving average. See the notes
      above regarding the interaction with `weights` and do not set this unless
      you have a good reason to do so.
    surrogate_type: Either 'xent' or 'hinge', specifying which upper bound
      should be used for indicator functions.
    lambdas_initializer: An initializer for the Lagrange multipliers.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for the variables.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional scope for `variable_scope`.

  Returns:
    loss: A `Tensor` of the same shape as `logits` with the component-wise
      loss.
    other_outputs: A dictionary of useful internal quantities for debugging. For
      more details, see http://arxiv.org/pdf/1608.04802.pdf.
      lambdas: A Tensor of shape [1, num_labels, num_anchors] consisting of the
        Lagrange multipliers.
      biases: A Tensor of shape [1, num_labels, num_anchors] consisting of the
        learned bias term for each.
      label_priors: A Tensor of shape [1, num_labels, 1] consisting of the prior
        probability of each label learned by the loss, if not provided.
      true_positives_lower_bound: Lower bound on the number of true positives
        given `labels` and `logits`. This is the same lower bound which is used
        in the loss expression to be optimized.
      false_positives_upper_bound: Upper bound on the number of false positives
        given `labels` and `logits`. This is the same upper bound which is used
        in the loss expression to be optimized.

  Raises:
    ValueError: If `surrogate_type` is not `xent` or `hinge`.
  """
    with tf.variable_scope(scope,
                           'precision_recall_auc',
                           [labels, logits, label_priors],
                           reuse=reuse):
        labels, logits, weights, original_shape = _prepare_labels_logits_weights(
            labels, logits, weights)
        num_labels = util.get_num_labels(logits)

        # Convert other inputs to tensors and standardize dtypes.
        dual_rate_factor = util.convert_and_cast(dual_rate_factor,
                                                 'dual_rate_factor',
                                                 logits.dtype)

        # Create Tensor of anchor points and distance between anchors.
        precision_values, delta = _range_to_anchors_and_delta(
            precision_range, num_anchors, logits.dtype)
        # Create lambdas with shape [1, num_labels, num_anchors].
        lambdas, lambdas_variable = _create_dual_variable(
            'lambdas',
            shape=[1, num_labels, num_anchors],
            dtype=logits.dtype,
            initializer=lambdas_initializer,
            collections=variables_collections,
            trainable=trainable,
            dual_rate_factor=dual_rate_factor)
        # Create biases with shape [1, num_labels, num_anchors].
        biases = tf.contrib.framework.model_variable(
            name='biases',
            shape=[1, num_labels, num_anchors],
            dtype=logits.dtype,
            initializer=tf.zeros_initializer(),
            collections=variables_collections,
            trainable=trainable)
        # Maybe create label_priors.
        label_priors = maybe_create_label_priors(label_priors, labels, weights,
                                                 variables_collections)
        label_priors = tf.reshape(label_priors, [1, num_labels, 1])

        # Expand logits, labels, and weights to shape [batch_size, num_labels, 1].
        logits = tf.expand_dims(logits, 2)
        labels = tf.expand_dims(labels, 2)
        weights = tf.expand_dims(weights, 2)

        # Calculate weighted loss and other outputs. The log(2.0) term corrects for
        # logloss not being an upper bound on the indicator function.
        loss = weights * util.weighted_surrogate_loss(
            labels,
            logits + biases,
            surrogate_type=surrogate_type,
            positive_weights=1.0 + lambdas * (1.0 - precision_values),
            negative_weights=lambdas * precision_values)
        maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0
        maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype)
        lambda_term = lambdas * (1.0 -
                                 precision_values) * label_priors * maybe_log2
        per_anchor_loss = loss - lambda_term
        per_label_loss = delta * tf.reduce_sum(per_anchor_loss, 2)
        # Normalize the AUC such that a perfect score function will have AUC 1.0.
        # Because precision_range is discretized into num_anchors + 1 intervals
        # but only num_anchors terms are included in the Riemann sum, the
        # effective length of the integration interval is `delta` less than the
        # length of precision_range.
        scaled_loss = tf.div(per_label_loss,
                             precision_range[1] - precision_range[0] - delta,
                             name='AUC_Normalize')
        scaled_loss = tf.reshape(scaled_loss, original_shape)

        other_outputs = {
            'lambdas':
            lambdas_variable,
            'biases':
            biases,
            'label_priors':
            label_priors,
            'true_positives_lower_bound':
            true_positives_lower_bound(labels, logits, weights,
                                       surrogate_type),
            'false_positives_upper_bound':
            false_positives_upper_bound(labels, logits, weights,
                                        surrogate_type)
        }

        return scaled_loss, other_outputs
Beispiel #3
0
def true_positive_rate_at_false_positive_rate_loss(
    labels,
    logits,
    target_rate,
    weights=1.0,
    dual_rate_factor=0.1,
    label_priors=None,
    surrogate_type='xent',
    lambdas_initializer=tf.constant_initializer(1.0),
    reuse=None,
    variables_collections=None,
    trainable=True,
    scope=None):
  """Computes true positive rate at false positive rate loss.

  The loss is based on a surrogate of the form
      wt * loss(+) + lambdas * (wt * loss(-) - r * (1 - pi))
  where:
  - loss(-) is the loss on the negative examples
  - loss(+) is the loss on the positive examples
  - wt is a scalar or tensor of per-example weights
  - r is the target rate
  - pi is the label_priors.

  The per-example weights change not only the coefficients of individual
  training examples, but how the examples are counted toward the constraint.
  If `label_priors` is given, it MUST take `weights` into account. That is,
      label_priors = P / (P + N)
  where
      P = sum_i (wt_i on positives)
      N = sum_i (wt_i on negatives).

  Args:
    labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels].
    logits: A `Tensor` with the same shape as `labels`.
    target_rate: The false positive rate at which to compute the loss. Can be a
      floating point value between 0 and 1 for a single false positive rate, or
      a `Tensor` of shape [num_labels] holding each label's false positive rate.
    weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape
      [batch_size] or [batch_size, num_labels].
    dual_rate_factor: A floating point value which controls the step size for
      the Lagrange multipliers.
    label_priors: None, or a floating point `Tensor` of shape [num_labels]
      containing the prior probability of each label (i.e. the fraction of the
      training data consisting of positive examples). If None, the label
      priors are computed from `labels` with a moving average. See the notes
      above regarding the interaction with `weights` and do not set this unless
      you have a good reason to do so.
    surrogate_type: Either 'xent' or 'hinge', specifying which upper bound
      should be used for indicator functions. 'xent' will use the cross-entropy
      loss surrogate, and 'hinge' will use the hinge loss.
    lambdas_initializer: An initializer op for the Lagrange multipliers.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for the variables.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional scope for `variable_scope`.

  Returns:
    loss: A `Tensor` of the same shape as `logits` with the component-wise
      loss.
    other_outputs: A dictionary of useful internal quantities for debugging. For
      more details, see http://arxiv.org/pdf/1608.04802.pdf.
      lambdas: A Tensor of shape [num_labels] consisting of the Lagrange
        multipliers.
      label_priors: A Tensor of shape [num_labels] consisting of the prior
        probability of each label learned by the loss, if not provided.
      true_positives_lower_bound: Lower bound on the number of true positives
        given `labels` and `logits`. This is the same lower bound which is used
        in the loss expression to be optimized.
      false_positives_upper_bound: Upper bound on the number of false positives
        given `labels` and `logits`. This is the same upper bound which is used
        in the loss expression to be optimized.

  Raises:
    ValueError: If `surrogate_type` is not `xent` or `hinge`.
  """
  with tf.variable_scope(scope,
                         'tpr_at_fpr',
                         [labels, logits, label_priors],
                         reuse=reuse):
    labels, logits, weights, original_shape = _prepare_labels_logits_weights(
        labels, logits, weights)
    num_labels = util.get_num_labels(logits)

    # Convert other inputs to tensors and standardize dtypes.
    target_rate = util.convert_and_cast(
        target_rate, 'target_rate', logits.dtype)
    dual_rate_factor = util.convert_and_cast(
        dual_rate_factor, 'dual_rate_factor', logits.dtype)

    # Create lambdas.
    lambdas, lambdas_variable = _create_dual_variable(
        'lambdas',
        shape=[num_labels],
        dtype=logits.dtype,
        initializer=lambdas_initializer,
        collections=variables_collections,
        trainable=trainable,
        dual_rate_factor=dual_rate_factor)
    # Maybe create label_priors.
    label_priors = maybe_create_label_priors(
        label_priors, labels, weights, variables_collections)

    # Loss op and other outputs. The log(2.0) term corrects for
    # logloss not being an upper bound on the indicator function.
    weighted_loss = weights * util.weighted_surrogate_loss(
        labels,
        logits,
        surrogate_type=surrogate_type,
        positive_weights=1.0,
        negative_weights=lambdas)
    maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0
    maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype)
    lambda_term = lambdas * target_rate * (1.0 - label_priors) * maybe_log2
    loss = tf.reshape(weighted_loss - lambda_term, original_shape)
    other_outputs = {
        'lambdas': lambdas_variable,
        'label_priors': label_priors,
        'true_positives_lower_bound': true_positives_lower_bound(
            labels, logits, weights, surrogate_type),
        'false_positives_upper_bound': false_positives_upper_bound(
            labels, logits, weights, surrogate_type)}

  return loss, other_outputs
Beispiel #4
0
def precision_recall_auc_loss(
    labels,
    logits,
    precision_range=(0.0, 1.0),
    num_anchors=20,
    weights=1.0,
    dual_rate_factor=0.1,
    label_priors=None,
    surrogate_type='xent',
    lambdas_initializer=tf.constant_initializer(1.0),
    reuse=None,
    variables_collections=None,
    trainable=True,
    scope=None):
  """Computes precision-recall AUC loss.

  The loss is based on a sum of losses for recall at a range of
  precision values (anchor points). This sum is a Riemann sum that
  approximates the area under the precision-recall curve.

  The per-example `weights` argument changes not only the coefficients of
  individual training examples, but how the examples are counted toward the
  constraint. If `label_priors` is given, it MUST take `weights` into account.
  That is,
      label_priors = P / (P + N)
  where
      P = sum_i (wt_i on positives)
      N = sum_i (wt_i on negatives).

  Args:
    labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels].
    logits: A `Tensor` with the same shape as `labels`.
    precision_range: A length-two tuple, the range of precision values over
      which to compute AUC. The entries must be nonnegative, increasing, and
      less than or equal to 1.0.
    num_anchors: The number of grid points used to approximate the Riemann sum.
    weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape
      [batch_size] or [batch_size, num_labels].
    dual_rate_factor: A floating point value which controls the step size for
      the Lagrange multipliers.
    label_priors: None, or a floating point `Tensor` of shape [num_labels]
      containing the prior probability of each label (i.e. the fraction of the
      training data consisting of positive examples). If None, the label
      priors are computed from `labels` with a moving average. See the notes
      above regarding the interaction with `weights` and do not set this unless
      you have a good reason to do so.
    surrogate_type: Either 'xent' or 'hinge', specifying which upper bound
      should be used for indicator functions.
    lambdas_initializer: An initializer for the Lagrange multipliers.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for the variables.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional scope for `variable_scope`.

  Returns:
    loss: A `Tensor` of the same shape as `logits` with the component-wise
      loss.
    other_outputs: A dictionary of useful internal quantities for debugging. For
      more details, see http://arxiv.org/pdf/1608.04802.pdf.
      lambdas: A Tensor of shape [1, num_labels, num_anchors] consisting of the
        Lagrange multipliers.
      biases: A Tensor of shape [1, num_labels, num_anchors] consisting of the
        learned bias term for each.
      label_priors: A Tensor of shape [1, num_labels, 1] consisting of the prior
        probability of each label learned by the loss, if not provided.
      true_positives_lower_bound: Lower bound on the number of true positives
        given `labels` and `logits`. This is the same lower bound which is used
        in the loss expression to be optimized.
      false_positives_upper_bound: Upper bound on the number of false positives
        given `labels` and `logits`. This is the same upper bound which is used
        in the loss expression to be optimized.

  Raises:
    ValueError: If `surrogate_type` is not `xent` or `hinge`.
  """
  with tf.variable_scope(scope,
                         'precision_recall_auc',
                         [labels, logits, label_priors],
                         reuse=reuse):
    labels, logits, weights, original_shape = _prepare_labels_logits_weights(
        labels, logits, weights)
    num_labels = util.get_num_labels(logits)

    # Convert other inputs to tensors and standardize dtypes.
    dual_rate_factor = util.convert_and_cast(
        dual_rate_factor, 'dual_rate_factor', logits.dtype)

    # Create Tensor of anchor points and distance between anchors.
    precision_values, delta = _range_to_anchors_and_delta(
        precision_range, num_anchors, logits.dtype)
    # Create lambdas with shape [1, num_labels, num_anchors].
    lambdas, lambdas_variable = _create_dual_variable(
        'lambdas',
        shape=[1, num_labels, num_anchors],
        dtype=logits.dtype,
        initializer=lambdas_initializer,
        collections=variables_collections,
        trainable=trainable,
        dual_rate_factor=dual_rate_factor)
    # Create biases with shape [1, num_labels, num_anchors].
    biases = tf.contrib.framework.model_variable(
        name='biases',
        shape=[1, num_labels, num_anchors],
        dtype=logits.dtype,
        initializer=tf.zeros_initializer(),
        collections=variables_collections,
        trainable=trainable)
    # Maybe create label_priors.
    label_priors = maybe_create_label_priors(
        label_priors, labels, weights, variables_collections)
    label_priors = tf.reshape(label_priors, [1, num_labels, 1])

    # Expand logits, labels, and weights to shape [batch_size, num_labels, 1].
    logits = tf.expand_dims(logits, 2)
    labels = tf.expand_dims(labels, 2)
    weights = tf.expand_dims(weights, 2)

    # Calculate weighted loss and other outputs. The log(2.0) term corrects for
    # logloss not being an upper bound on the indicator function.
    loss = weights * util.weighted_surrogate_loss(
        labels,
        logits + biases,
        surrogate_type=surrogate_type,
        positive_weights=1.0 + lambdas * (1.0 - precision_values),
        negative_weights=lambdas * precision_values)
    maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0
    maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype)
    lambda_term = lambdas * (1.0 - precision_values) * label_priors * maybe_log2
    per_anchor_loss = loss - lambda_term
    per_label_loss = delta * tf.reduce_sum(per_anchor_loss, 2)
    # Normalize the AUC such that a perfect score function will have AUC 1.0.
    # Because precision_range is discretized into num_anchors + 1 intervals
    # but only num_anchors terms are included in the Riemann sum, the
    # effective length of the integration interval is `delta` less than the
    # length of precision_range.
    scaled_loss = tf.div(per_label_loss,
                         precision_range[1] - precision_range[0] - delta,
                         name='AUC_Normalize')
    scaled_loss = tf.reshape(scaled_loss, original_shape)

    other_outputs = {
        'lambdas': lambdas_variable,
        'biases': biases,
        'label_priors': label_priors,
        'true_positives_lower_bound': true_positives_lower_bound(
            labels, logits, weights, surrogate_type),
        'false_positives_upper_bound': false_positives_upper_bound(
            labels, logits, weights, surrogate_type)}

    return scaled_loss, other_outputs
Beispiel #5
0
def precision_recall_auc_loss(labels,
                              logits,
                              precision_range=(0.0, 1.0),
                              num_anchors=20,
                              weights=1.0,
                              dual_rate_factor=0.1,
                              label_priors=None,
                              surrogate_type='xent',
                              lambdas_initializer=tf.constant_initializer(1.0),
                              reuse=None,
                              variables_collections=None,
                              trainable=True,
                              scope=None):

    with tf.variable_scope(scope,
                           'precision_recall_auc',
                           [labels, logits, label_priors],
                           reuse=reuse):
        labels, logits, weights, original_shape = _prepare_labels_logits_weights(
            labels, logits, weights)
        num_labels = util.get_num_labels(logits)

        # Convert other inputs to tensors and standardize dtypes.
        dual_rate_factor = util.convert_and_cast(dual_rate_factor,
                                                 'dual_rate_factor',
                                                 logits.dtype)

        # Create Tensor of anchor points and distance between anchors.
        precision_values, delta = _range_to_anchors_and_delta(
            precision_range, num_anchors, logits.dtype)
        # Create lambdas with shape [1, num_labels, num_anchors].
        lambdas, lambdas_variable = _create_dual_variable(
            'lambdas',
            shape=[1, num_labels, num_anchors],
            dtype=logits.dtype,
            initializer=lambdas_initializer,
            collections=variables_collections,
            trainable=trainable,
            dual_rate_factor=dual_rate_factor)
        # Create biases with shape [1, num_labels, num_anchors].
        biases = tf.contrib.framework.model_variable(
            name='biases',
            shape=[1, num_labels, num_anchors],
            dtype=logits.dtype,
            initializer=tf.zeros_initializer(),
            collections=variables_collections,
            trainable=trainable)
        # Maybe create label_priors.
        label_priors = maybe_create_label_priors(label_priors, labels, weights,
                                                 variables_collections)
        label_priors = tf.reshape(label_priors, [1, num_labels, 1])

        # Expand logits, labels, and weights to shape [batch_size, num_labels, 1].
        logits = tf.expand_dims(logits, 2)
        labels = tf.expand_dims(labels, 2)
        weights = tf.expand_dims(weights, 2)

        # Calculate weighted loss and other outputs. The log(2.0) term corrects for
        # logloss not being an upper bound on the indicator function.
        loss = weights * util.weighted_surrogate_loss(
            labels,
            logits + biases,
            surrogate_type=surrogate_type,
            positive_weights=1.0 + lambdas * (1.0 - precision_values),
            negative_weights=lambdas * precision_values)
        maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0
        maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype)
        lambda_term = lambdas * (1.0 -
                                 precision_values) * label_priors * maybe_log2
        per_anchor_loss = loss - lambda_term
        per_label_loss = delta * tf.reduce_sum(per_anchor_loss, 2)
        # Normalize the AUC such that a perfect score function will have AUC 1.0.
        # Because precision_range is discretized into num_anchors + 1 intervals
        # but only num_anchors terms are included in the Riemann sum, the
        # effective length of the integration interval is `delta` less than the
        # length of precision_range.
        scaled_loss = tf.div(per_label_loss,
                             precision_range[1] - precision_range[0] - delta,
                             name='AUC_Normalize')
        scaled_loss = tf.reshape(scaled_loss, original_shape)

        other_outputs = {
            'lambdas':
            lambdas_variable,
            'biases':
            biases,
            'label_priors':
            label_priors,
            'true_positives_lower_bound':
            true_positives_lower_bound(labels, logits, weights,
                                       surrogate_type),
            'false_positives_upper_bound':
            false_positives_upper_bound(labels, logits, weights,
                                        surrogate_type)
        }

        return scaled_loss, other_outputs
Beispiel #6
0
def true_positive_rate_at_false_positive_rate_loss(
        labels,
        logits,
        target_rate,
        weights=1.0,
        dual_rate_factor=0.1,
        label_priors=None,
        surrogate_type='xent',
        lambdas_initializer=tf.constant_initializer(1.0),
        reuse=None,
        variables_collections=None,
        trainable=True,
        scope=None):

    with tf.variable_scope(scope,
                           'tpr_at_fpr', [labels, logits, label_priors],
                           reuse=reuse):
        labels, logits, weights, original_shape = _prepare_labels_logits_weights(
            labels, logits, weights)
        num_labels = util.get_num_labels(logits)

        # Convert other inputs to tensors and standardize dtypes.
        target_rate = util.convert_and_cast(target_rate, 'target_rate',
                                            logits.dtype)
        dual_rate_factor = util.convert_and_cast(dual_rate_factor,
                                                 'dual_rate_factor',
                                                 logits.dtype)

        # Create lambdas.
        lambdas, lambdas_variable = _create_dual_variable(
            'lambdas',
            shape=[num_labels],
            dtype=logits.dtype,
            initializer=lambdas_initializer,
            collections=variables_collections,
            trainable=trainable,
            dual_rate_factor=dual_rate_factor)
        # Maybe create label_priors.
        label_priors = maybe_create_label_priors(label_priors, labels, weights,
                                                 variables_collections)

        # Loss op and other outputs. The log(2.0) term corrects for
        # logloss not being an upper bound on the indicator function.
        weighted_loss = weights * util.weighted_surrogate_loss(
            labels,
            logits,
            surrogate_type=surrogate_type,
            positive_weights=1.0,
            negative_weights=lambdas)
        maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0
        maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype)
        lambda_term = lambdas * target_rate * (1.0 - label_priors) * maybe_log2
        loss = tf.reshape(weighted_loss - lambda_term, original_shape)
        other_outputs = {
            'lambdas':
            lambdas_variable,
            'label_priors':
            label_priors,
            'true_positives_lower_bound':
            true_positives_lower_bound(labels, logits, weights,
                                       surrogate_type),
            'false_positives_upper_bound':
            false_positives_upper_bound(labels, logits, weights,
                                        surrogate_type)
        }

    return loss, other_outputs