def bce_of_true_positive(y_true, y_pred, from_logits=False, _sentinel=None, name=None): if not from_logits: _epsilon = tf.convert_to_tensor(epsilon(), y_pred.dtype.base_dtype) output = tf.clip_by_value(y_pred, _epsilon, 1 - _epsilon) output = tf.log(output / (1 - output)) # alteration of sigmoid_crossentroy_with_logits nn_ops._ensure_xent_args("sigmoid_cross_entropy_with_logits", _sentinel, y_true, y_pred) with ops.name_scope(name, "logistic_loss_over_true_positives", [y_pred, y_true]) as name: logits = ops.convert_to_tensor(y_pred, name="logits") labels = ops.convert_to_tensor(y_true, name="labels") try: labels.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "Logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = array_ops.where(cond, logits, zeros) neg_abs_logits = array_ops.where(cond, -logits, logits) # here we calculate the mean to be in-line with Keras' binary crossentropy. return K.mean( math_ops.multiply(-labels, math_ops.log1p(math_ops.exp(neg_abs_logits)), name=name))
def sigmoid_balanced_cross_entropy_with_logits(_sentinel=None, labels=None, logits=None, beta=None, name=None): nn_ops._ensure_xent_args("sigmoid_cross_entropy_with_logits", _sentinel, labels, logits) with ops.name_scope(name, "logistic_loss", [logits, labels]) as name: logits = ops.convert_to_tensor(logits, name="logits") labels = ops.convert_to_tensor(labels, name="labels") try: labels.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = array_ops.where(cond, logits, zeros) neg_abs_logits = array_ops.where(cond, -logits, logits) #beta=0.5 balanced_cross_entropy = relu_logits * ( 1. - beta) - logits * labels * (1. - beta) + math_ops.log1p( math_ops.exp(neg_abs_logits)) * ((1. - beta) * (1. - labels) + beta * labels) return tf.reduce_mean(balanced_cross_entropy)
def sigmoid_cross_entropy_with_logits( # pylint: disable=invalid-name _sentinel=None, labels=None, logits=None, name=None, fp_rate=None, fn_rate=None): """Computes sigmoid cross entropy given `logits`. Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time. For brevity, let `x = logits`, `z = labels`. The logistic loss is z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) = (1 - z) * x + log(1 + exp(-x)) = x - x * z + log(1 + exp(-x)) For x < 0, to avoid overflow in exp(-x), we reformulate the above x - x * z + log(1 + exp(-x)) = log(exp(x)) - x * z + log(1 + exp(-x)) = - x * z + log(1 + exp(x)) Hence, to ensure stability and avoid overflow, the implementation uses this equivalent formulation max(x, 0) - x * z + log(1 + exp(-abs(x))) `logits` and `labels` must have the same type and shape. Args: _sentinel: Used to prevent positional parameters. Internal, do not use. labels: A `Tensor` of the same type and shape as `logits`. logits: A `Tensor` of type `float32` or `float64`. name: A name for the operation (optional). Returns: A `Tensor` of the same shape as `logits` with the componentwise logistic losses. Raises: ValueError: If `logits` and `labels` do not have the same shape. """ # pylint: disable=protected-access nn_ops._ensure_xent_args("sigmoid_cross_entropy_with_logits", _sentinel, labels, logits) # pylint: enable=protected-access with ops.name_scope(name, "logistic_loss", [logits, labels]) as name: logits = ops.convert_to_tensor(logits, name="logits") labels = ops.convert_to_tensor(labels, name="labels") try: labels.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) # The logistic loss formula from above is # x - x * z + log(1 + exp(-x)) # For x < 0, a more numerically stable formula is # -x * z + log(1 + exp(x)) # Note that these two expressions can be combined into the following: # max(x, 0) - x * z + log(1 + exp(-abs(x))) # To allow computing gradients at zero, we define custom versions of max and # abs functions. zeros = array_ops.zeros_like(logits, dtype=logits.dtype) ones = array_ops.ones_like(logits, dtype=logits.dtype) cond = (logits >= zeros) #print (cond) relu_logits = array_ops.where(cond, logits, zeros) neg_abs_logits = array_ops.where(cond, -logits, logits) fn_cond = math_ops.logical_and(labels > zeros, logits < zeros) fp_cond = math_ops.logical_and(labels <= zeros, logits >= zeros) fn_cost = fn_rate * math_ops.cast(fn_cond, dtypes.float32) fp_cost = fp_rate * math_ops.cast(fp_cond, dtypes.float32) pos_loss = logits - logits * labels + labels * math_ops.log1p( math_ops.exp(-logits + fn_cost)) + ( (ones - labels) * math_ops.log1p(math_ops.exp(-logits + fp_cost))) neg_loss = -logits * labels + labels * math_ops.log( math_ops.exp(logits) + math_ops.exp(fn_cost)) + ( (ones - labels) * math_ops.log(math_ops.exp(logits) + math_ops.exp(fp_cost))) return array_ops.where(cond, pos_loss, neg_loss, name=name), fn_cost, fp_cost
def weighted_cel( _sentinel=None, labels=None, logits=None, bound=2.0, name=None): """ Inspired strongly by tensorflow :sigmoid_cross_entropy_with_logits https://github.com/tensorflow/tensorflow/blob/v2.3.1/tensorflow/python/ops/nn_impl.py#L196-L244 Version with weighted CEL(Cross-Entropy Loss) https://arxiv.org/pdf/1705.02315 Starting from CEL from TF For brevity, let `x = logits`, `z = labels`. The logistic loss is z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) (4) = (1 - z) * x + log(1 + exp(-x)) = x - x * z + log(1 + exp(-x)) For x < 0, to avoid overflow in exp(-x), we reformulate the above x - x * z + log(1 + exp(-x)) = log(exp(x)) - x * z + log(1 + exp(-x)) = - x * z + log(1 + exp(x)) Hence, to ensure stability and avoid overflow, the implementation uses this equivalent formulation max(x, 0) - x * z + log(1 + exp(-abs(x))) weighted CEL: For x > 0 (from (4)): = B_p * [z * -log( 1 + exp(-x) )] + B_n * [(1 - z) * (x + log(1 + exp(-x)))] For x < 0 (from (4)): = B_p * [z * log( exp(x) / (1 + exp(x)) )] + B_n * [(1 - z) *(x + log( exp(x) / (1 + exp(x))))] = B_p * [z * log(1 + exp(x)) - x] + B_n * [(1 - z) * log( (1 + exp(x))))] Hence, to ensure stability and avoid overflow, the implementation uses this equivalent formulation = B_p * [z * log(1 + exp(-x)) + min(0,x) ] + B_n * [(1 - z) * (max(0,x) + log( (1 + exp(-x)))))] Args: _sentinel: Used to prevent positional parameters. Internal, do not use. labels: A `Tensor` of the same type and shape as `logits`. logits: A `Tensor` of type `float32` or `float64`. name: A name for the operation (optional). Returns: A `Tensor` of the same shape as `logits` with the componentwise logistic losses. Raises: ValueError: If `logits` and `labels` do not have the same shape. """ nn_ops._ensure_xent_args("sigmoid_cross_entropy_with_logits", _sentinel, labels, logits) with ops.name_scope(name, "weighted_logistic_loss", [logits, labels]) as name: logits = ops.convert_to_tensor(logits, name="logits") labels = ops.convert_to_tensor(labels, name="labels") try: labels.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError("logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) cnt_one = tf.cast(tf.reduce_sum(labels),tf.float32) cnt_zero = tf.cast(tf.size(logits),tf.float32) - cnt_one beta_p = tf.cast((cnt_one + cnt_zero) / cnt_one, tf.float32) beta_n = tf.cast((cnt_one + cnt_zero) / cnt_zero, tf.float32) beta_n = math_ops.minimum(bound, beta_n) beta_p = math_ops.minimum(bound, beta_p) zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = array_ops.where(cond, logits, zeros) not_relu_logits = array_ops.where(cond, zeros, logits) abs_logits = math_ops.abs(logits) A = beta_p * (labels * (math_ops.log1p(math_ops.exp(-abs_logits)) - not_relu_logits)) B = beta_n * ((1.0-labels) * (relu_logits + math_ops.log1p(math_ops.exp(-abs_logits)))) return math_ops.add(A, B, name=name)
def sigmoid_cross_entropy_with_logits(_sentinel=None, # pylint: disable=invalid-name labels=None, logits=None, name=None): """Computes sigmoid cross entropy given `logits`. Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time. For brevity, let `x = logits`, `z = labels`. The logistic loss is z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) = (1 - z) * x + log(1 + exp(-x)) = x - x * z + log(1 + exp(-x)) For x < 0, to avoid overflow in exp(-x), we reformulate the above x - x * z + log(1 + exp(-x)) = log(exp(x)) - x * z + log(1 + exp(-x)) = - x * z + log(1 + exp(x)) Hence, to ensure stability and avoid overflow, the implementation uses this equivalent formulation max(x, 0) - x * z + log(1 + exp(-abs(x))) `logits` and `labels` must have the same type and shape. Args: _sentinel: Used to prevent positional parameters. Internal, do not use. labels: A `Tensor` of the same type and shape as `logits`. logits: A `Tensor` of type `float32` or `float64`. name: A name for the operation (optional). Returns: A `Tensor` of the same shape as `logits` with the componentwise logistic losses. Raises: ValueError: If `logits` and `labels` do not have the same shape. """ # pylint: disable=protected-access nn_ops._ensure_xent_args("sigmoid_cross_entropy_with_logits", _sentinel, labels, logits) # pylint: enable=protected-access with ops.name_scope(name, "logistic_loss", [logits, labels]) as name: logits = ops.convert_to_tensor(logits, name="logits") labels = ops.convert_to_tensor(labels, name="labels") try: labels.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError("logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) # The logistic loss formula from above is # x - x * z + log(1 + exp(-x)) # For x < 0, a more numerically stable formula is # -x * z + log(1 + exp(x)) # Note that these two expressions can be combined into the following: # max(x, 0) - x * z + log(1 + exp(-abs(x))) # To allow computing gradients at zero, we define custom versions of max and # abs functions. zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = array_ops.where(cond, logits, zeros) neg_abs_logits = array_ops.where(cond, -logits, logits) return math_ops.add(relu_logits - logits * labels, math_ops.log1p(math_ops.exp(neg_abs_logits)), name=name)
def focal_sigmoid_cross_entropy_with_logits( # pylint: disable=invalid-name _sentinel=None, labels=None, logits=None, alpha=0.5, gamma=0.0, name=None): """Computes focal sigmoid cross entropy given `logits`. Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time. For brevity, let `x = logits`, `z = labels`. For `every row`, the logistic loss is If the background label(z[baich_i][0]) is 0: row_result = - labels * alpha * tf.pow(1.0 - sigmoid_x, gamma_array) * tf.log(sigmoid_x) \ - (1.0 - labels) * alpha * tf.pow(sigmoid_x, gamma_array) * tf.log(1.0 - sigmoid_x) If the background label(z[baich_i][0]) is 1: row_result = - labels * (1.0 - alpha) * tf.pow(1.0 - sigmoid_x, gamma_array) * tf.log(sigmoid_x) \ - (1.0 - labels) * (1.0 - alpha) * tf.pow(sigmoid_x, gamma_array) * tf.log(1.0 - sigmoid_x) `logits` and `labels` must have the same type and shape. Args: _sentinel: Used to prevent positional parameters. Internal, do not use. labels: A `Tensor` of the same type and shape as `logits`. logits: A `Tensor` of type `float32` or `float64`. It must be a tensor of shape [num_batch, num_classes], and first class is background. alpha: weighting factor for positive class. gamma: focusing parameter in focal loss. name: A name for the operation (optional). Returns: A `Tensor` of the same shape as `logits` with the componentwise logistic losses. Raises: ValueError: If `logits` and `labels` do not have the same shape. """ # pylint: disable=protected-access nn_ops._ensure_xent_args("focal_sigmoid_cross_entropy_with_logits", _sentinel, labels, logits) # pylint: enable=protected-access with ops.name_scope(name, "logistic_loss", [logits, labels]) as name: logits = ops.convert_to_tensor(logits, name="logits") labels = ops.convert_to_tensor(labels, name="labels") try: labels.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) if len(labels.get_shape().as_list()) != 2: raise ValueError( "logits and labels must have two dim: [num_batch, num_classes], but the shape is %s " % (logits.get_shape())) sigmoid_x = tf.sigmoid(logits) log_sigmoid_x = tf.log_sigmoid(logits) background_col = tf.slice(labels, [0, 0], [-1, 1]) num_classes = labels.get_shape().as_list()[1] background_col = tf.matmul( background_col, tf.ones([1, num_classes], dtype=labels.dtype)) is_pos_label_row = background_col < 0.5 alpha_tmp = alpha * tf.ones_like(logits, dtype=logits.dtype) alpha_array = tf.where(is_pos_label_row, alpha_tmp, 1.0 - alpha_tmp) gamma_array = gamma * tf.ones_like(logits, dtype=logits.dtype) pos_part = -labels * alpha_array * tf.pow(1.0 - sigmoid_x, gamma_array) * log_sigmoid_x neg_part = -(1.0 - labels) * alpha_array * tf.pow( sigmoid_x, gamma_array) * -tf.log(1.0 + tf.exp(logits)) ### tf.log(1.0 - sigmoid_x) = - tf.log(1.0 + tf.exp(logits)) return tf.add(pos_part, neg_part, name=name)