def _inverse_log_det_jacobian(self, y): y = self._maybe_assert_valid_y(y) event_dims = self._event_dims_tensor(y) return math_ops.reduce_sum( -math_ops.log1p(-y) + (1 / self.concentration - 1) * math_ops.log(-math_ops.log1p(-y)) + math_ops.log(self.scale / self.concentration), axis=event_dims)
def weighted_ce(targets, logits, beta, name=None): """Computes a weighted cross entropy like in http://www.vision.ee.ethz.ch/~cvlsegmentation/driu/data/paper/DRIU_MICCAI2016.pdf cross entropy is computed as follows: z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) = (1 - z) * x + log(1 + exp(-x)) = x - x * z + log(1 + exp(-x)) """ with ops.name_scope(name, "logistic_loss", [logits, targets]) as name: logits = ops.convert_to_tensor(logits, name="logits") targets = ops.convert_to_tensor(targets, name="targets") targets = tf.cast(targets,tf.float32) try: targets.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "logits and targets must have the same shape (%s vs %s)" % (logits.get_shape(), targets.get_shape())) targets = tf.math.add(targets,tf.keras.backend.epsilon()) zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = array_ops.where(cond, logits, zeros) neg_abs_logits = array_ops.where(cond, -logits, logits) return tf.reduce_mean(tf.math.abs((math_ops.add( beta * (relu_logits - logits * targets), # false negatives (1-beta)*(math_ops.log1p(math_ops.exp(neg_abs_logits))), # false positives name=name))))
def pixelwise_weighted_binary_crossentropy(y_true, y_pred): ''' This function calculates the pixel-wise weighted, binary cross-entropy value between the prediction (y_pred) and the pixel-wise weight map, which is unstacked from y_true. On the Gauss ssh server, tf.log must be written as tf.math.log ''' try: # The weights are passed as part of the y_true tensor: [seg, weight] = tf.unstack(y_true, 2, axis=-1) seg = tf.expand_dims(seg, -1) weight = tf.expand_dims(weight, -1) except: pass epsilon = tf.convert_to_tensor(K.epsilon(), y_pred.dtype.base_dtype) y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon) y_pred = tf.math.log(y_pred / (1 - y_pred)) zeros = array_ops.zeros_like(y_pred, dtype=y_pred.dtype) cond = (y_pred >= zeros) relu_logits = math_ops.select(cond, y_pred, zeros) neg_abs_logits = math_ops.select(cond, -y_pred, y_pred) entropy = math_ops.add(relu_logits - y_pred*seg, math_ops.log1p(math_ops.exp(neg_abs_logits)), name=None) # This is essentially the only part that is different from the Keras code: return K.mean(math_ops.multiply(weight, entropy), axis=-1)
def sigmoid_balanced_cross_entropy_with_logits(_sentinel=None, labels=None, logits=None, beta=None, name=None): nn_ops._ensure_xent_args("sigmoid_cross_entropy_with_logits", _sentinel, labels, logits) with ops.name_scope(name, "logistic_loss", [logits, labels]) as name: logits = ops.convert_to_tensor(logits, name="logits") labels = ops.convert_to_tensor(labels, name="labels") try: labels.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = array_ops.where(cond, logits, zeros) neg_abs_logits = array_ops.where(cond, -logits, logits) #beta=0.5 balanced_cross_entropy = relu_logits * ( 1. - beta) - logits * labels * (1. - beta) + math_ops.log1p( math_ops.exp(neg_abs_logits)) * ((1. - beta) * (1. - labels) + beta * labels) return tf.reduce_mean(balanced_cross_entropy)
def attention_loss(logits, label, beta=4, gamma=0.5, name='attention_loss'): """ Implements Attention Loss in DOOBNet: Deep Object Occlusion Boundary Detection from an Image """ y = tf.cast(label, tf.float32) count_neg = tf.reduce_sum(1. - y) count_pos = tf.reduce_sum(y) alpha = count_neg / (count_neg + count_pos) pos_weight = alpha / (1 - alpha) beta = tf.cast(beta, tf.float32) sigma = math_ops.log1p(math_ops.exp(-math_ops.abs(logits))) + nn_ops.relu(-logits) p = tf.nn.sigmoid(logits) eps = 1e-14 p_clip = tf.clip_by_value(p, eps, 1.0-eps) cost = pos_weight * y * sigma * tf.pow(beta,tf.pow(1 - p_clip, gamma)) + (1 - y) * (logits + sigma) * tf.pow(beta,tf.pow(p_clip, gamma)) cost = tf.reduce_mean(cost) return cost, tf.where(tf.equal(count_pos, 0.0), 0.0, cost, name=name)
def fancy_loss(self, logits, labels): zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = array_ops.where(cond, logits, zeros) neg_abs_logits = array_ops.where(cond, -logits, logits) losses = math_ops.add(relu_logits - logits * labels, math_ops.log1p(math_ops.exp(neg_abs_logits))) positive_cond = (labels > zeros) negative_losses = array_ops.where(tf.logical_not(positive_cond), losses, zeros) positive_losses = array_ops.where(positive_cond, losses, tf.ones_like(logits) * 3000) positive_losses_2 = array_ops.where(positive_cond, losses, zeros) #positive_losses = tf.Print(positive_losses, data=[labels], summarize=100, message="labels") #positive_losses = tf.Print(positive_losses, data=[logits], summarize=100, message="logits") #positive_losses = tf.Print(positive_losses, data=[tf.nn.sigmoid(logits)], summarize=100, message="preds") #positive_losses = tf.Print(positive_losses, data=[positive_losses], summarize=100, message="pos") #positive_losses = tf.Print(positive_losses, data=[negative_losses], summarize=100, message="neg") total_negative_loss = tf.reduce_max(negative_losses) + tf.reduce_mean( negative_losses) total_positive_loss = tf.reduce_min(positive_losses) + tf.reduce_mean( positive_losses_2) total_loss = total_negative_loss + total_positive_loss return total_loss
def call(self, logits, label, reduce_op=None): """Computes the binary cross entropy loss from logits with label broadcasting. Args: logits: A `Tensor` of type `float32` or `float64`. label: A `Tensor` of the same type as `logits`. reduce_op: A function that reduces output tensor along all dimensions. Defaults to None. If None, than reduction along all dimensions is not used. For instance, the function could be `tf.reduce_mean` or defined by user. Returns: A `Tensor` of the same type as `logits` with the binary cross entropy loss. """ if self.label_smoothing > 0: label = (1 - self.label_smoothing) + 0.5 * self.label_smoothing relu_logits = -tf.minimum(-logits, 0.0) neg_abs_logits = tf.minimum(-logits, logits) result = math_ops.add(relu_logits, math_ops.log1p(math_ops.exp(neg_abs_logits))) if label == 1: result = math_ops.add(result, -logits) elif label != 0.0: result = math_ops.add(result, -logits * label) return reduce_op(result)
def discounted_cumulative_gain(labels, predictions, weights=None, topn=None, name=None): """Computes discounted cumulative gain (DCG). Args: labels: A `Tensor` of the same shape as `predictions`. predictions: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding example. weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The former case is per-example and the latter case is per-list. topn: A cutoff for how many examples to consider for this metric. name: A string used as the name for this metric. Returns: A metric for the weighted discounted cumulative gain of the batch. """ with ops.name_scope(name, 'discounted_cumulative_gain', (labels, predictions, weights)): labels, predictions, weights, topn = _prepare_and_validate_params( labels, predictions, weights, topn) sorted_labels, sorted_weights = utils.sort_by_scores( predictions, [labels, weights], topn=topn) dcg = _discounted_cumulative_gain(sorted_labels, sorted_weights) * math_ops.log1p(1.0) per_list_weights = _per_example_weights_to_per_list_weights( weights=weights, relevance=math_ops.pow(2.0, math_ops.to_float(labels)) - 1.0) return math_ops.reduce_mean( _safe_div(dcg, per_list_weights) * per_list_weights)
def _forward_log_det_jacobian(self, x): x = self._maybe_assert_valid_x(x) event_dims = self._event_dims_tensor(x) if self.power == 0.: return math_ops.reduce_sum(x, axis=event_dims) return (1. / self.power - 1.) * math_ops.reduce_sum( math_ops.log1p(x * self.power), axis=event_dims)
def testSigmoidNumericalStability(self): for dtype in self.float_types: if dtype != np.float16: self._assertOpOutputMatchesExpected( lambda x: math_ops.sigmoid(x) / math_ops.log1p(math_ops.exp(x)), np.array([-40, 40], dtype=dtype), expected=np.array([1.0, 0.025], dtype=dtype))
def bce_of_true_positive(y_true, y_pred, from_logits=False, _sentinel=None, name=None): if not from_logits: _epsilon = tf.convert_to_tensor(epsilon(), y_pred.dtype.base_dtype) output = tf.clip_by_value(y_pred, _epsilon, 1 - _epsilon) output = tf.log(output / (1 - output)) # alteration of sigmoid_crossentroy_with_logits nn_ops._ensure_xent_args("sigmoid_cross_entropy_with_logits", _sentinel, y_true, y_pred) with ops.name_scope(name, "logistic_loss_over_true_positives", [y_pred, y_true]) as name: logits = ops.convert_to_tensor(y_pred, name="logits") labels = ops.convert_to_tensor(y_true, name="labels") try: labels.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "Logits and labels must have the same shape (%s vs %s)" % (logits.get_shape(), labels.get_shape())) zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = array_ops.where(cond, logits, zeros) neg_abs_logits = array_ops.where(cond, -logits, logits) # here we calculate the mean to be in-line with Keras' binary crossentropy. return K.mean( math_ops.multiply(-labels, math_ops.log1p(math_ops.exp(neg_abs_logits)), name=name))
def _call_log_survival_function(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = ops.convert_to_tensor(value, name="value") try: return self._log_survival_function(value, **kwargs) except NotImplementedError: return math_ops.log1p(-self.cdf(value, **kwargs))
def _forward(self, x): x = self._maybe_assert_valid_x(x) if self.power == 0.: return math_ops.exp(x) # If large x accuracy is an issue, consider using: # (1. + x * self.power)**(1. / self.power) when x >> 1. return math_ops.exp(math_ops.log1p(x * self.power) / self.power)
def _inverse_log_det_jacobian(self, y): y = self._maybe_assert_valid(y) return (math_ops.log(self.concentration1) + math_ops.log(self.concentration0) + (self.concentration1 - 1) * math_ops.log(y) + (self.concentration0 - 1) * math_ops.log1p(-y**self.concentration1))
def test_softmax_loss(self): scores = [[1., 3., 2.], [1., 2., 3.], [1., 2., 3.]] labels = [[0., 0., 1.], [0., 0., 2.], [0., 0., 0.]] weights = [[2.], [1.], [1.]] with self.cached_session(): self.assertAlmostEqual( ranking_losses._softmax_loss(labels, scores).eval(), -(math.log(_softmax(scores[0])[2]) + math.log(_softmax(scores[1])[2]) * 2.) / 2., places=5) self.assertAlmostEqual( ranking_losses._softmax_loss(labels, scores, weights).eval(), -(math.log(_softmax(scores[0])[2]) * 2. + math.log(_softmax(scores[1])[2]) * 2. * 1.) / 2., places=5) # Test LambdaWeight. lambda_weight = ranking_losses.DCGLambdaWeight( rank_discount_fn=lambda r: 1. / math_ops.log1p(r)) self.assertAlmostEqual( ranking_losses._softmax_loss( labels, scores, lambda_weight=lambda_weight).eval(), -(math.log(_softmax(scores[0])[2]) / math.log(1. + 2.) + math.log(_softmax(scores[1])[2]) * 2. / math.log(1. + 1.)) / 2., places=5)
def testFloatOpsDisabledOnMlirBridge(self): for dtype in self.float_types: if dtype != np.float16: self._assertOpOutputMatchesExpected( lambda x: math_ops.sigmoid(x) / math_ops.log1p(math_ops.exp(x)), np.array([-40, 40], dtype=dtype), expected=np.array([1.0, 0.025], dtype=dtype))
def custom_weighted_binary_crossentropy(targets, logits, pos_weight=weight_array, name=None): # transform back to logits _epsilon = tfb._to_tensor(tfb.epsilon(), logits.dtype.base_dtype) logits = tf.clip_by_value(logits, _epsilon, 1 - _epsilon) logits = tf.log(logits / (1 - logits)) # compute weighted loss with ops.name_scope(name, "logistic_loss", [logits, targets]) as name: logits = ops.convert_to_tensor(logits, name="logits") targets = ops.convert_to_tensor(targets, name="targets") try: targets.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "logits and targets must have the same shape (%s vs %s)" % (logits.get_shape(), targets.get_shape())) loss = [] for i in range(0, label_num - 1): log_weight = 1 + (pos_weight[i] - 1) * targets[i] loss_i = math_ops.add( (1 - targets[i]) * logits[i], log_weight * (math_ops.log1p(math_ops.exp(-math_ops.abs(logits[i]))) + nn_ops.relu(-logits[i])), name=name) loss.append(loss_i) return tf.reduce_mean(loss)
def __init__(self, temperature, logits=None, probs=None, validate_args=False, allow_nan_stats=True, name="RelaxedBernoulli"): """Construct RelaxedBernoulli distributions. Args: temperature: An 0-D `Tensor`, representing the temperature of a set of RelaxedBernoulli distributions. The temperature should be positive. logits: An N-D `Tensor` representing the log-odds of a positive event. Each entry in the `Tensor` parametrizes an independent RelaxedBernoulli distribution where the probability of an event is sigmoid(logits). Only one of `logits` or `probs` should be passed in. probs: An N-D `Tensor` representing the probability of a positive event. Each entry in the `Tensor` parameterizes an independent Bernoulli distribution. Only one of `logits` or `probs` should be passed in. validate_args: Python `Boolean`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: `String` name prefixed to Ops created by this class. Raises: ValueError: If both `probs` and `logits` are passed, or if neither. """ parameters = locals() with ops.name_scope(name, values=[logits, probs, temperature]) as ns: with ops.control_dependencies([check_ops.assert_positive(temperature)] if validate_args else []): self._temperature = array_ops.identity(temperature, name="temperature") self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, probs=probs, validate_args=validate_args) dist = logistic._Logistic(self._logits / self._temperature, 1. / self._temperature, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=ns) self._parameters = parameters def inverse_log_det_jacobian_fn(y): return -math_ops.reduce_sum(math_ops.log(y) + math_ops.log1p(-y), -1) sigmoid_bijector = bijector.Inline( forward_fn=math_ops.sigmoid, inverse_fn=(lambda y: math_ops.log(y) - math_ops.log1p(-y)), inverse_log_det_jacobian_fn=inverse_log_det_jacobian_fn, name="sigmoid") super(_RelaxedBernoulli, self).__init__(dist, sigmoid_bijector, name=name)
def call(self, y_true, y_pred): y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) rel_error = (y_pred - y_true) / y_true # rel_abs_error = math_ops.abs(rel_error) rel_sq_error = math_ops.square(rel_error) log_rel_error = math_ops.log1p(rel_sq_error) return K.mean(log_rel_error, axis=-1)
def _inverse_log_det_jacobian(self, y): y = self._maybe_assert_valid(y) event_dims = self._event_dims_tensor(y) return math_ops.reduce_sum( math_ops.log(self.concentration1) + math_ops.log(self.concentration0) + (self.concentration1 - 1) * math_ops.log(y) + (self.concentration0 - 1) * math_ops.log1p(-y**self.concentration1), axis=event_dims)
def create_ndcg_lambda_weight(topn=None, smooth_fraction=0.): """Creates _LambdaWeight for NDCG metric.""" return DCGLambdaWeight( topn, gain_fn=lambda labels: math_ops.pow(2.0, labels) - 1., rank_discount_fn=lambda rank: 1. / math_ops.log1p(rank), normalized=True, smooth_fraction=smooth_fraction)
def __init__(self, temperature, logits=None, probs=None, validate_args=False, allow_nan_stats=True, name="RelaxedBernoulli"): """Construct RelaxedBernoulli distributions. Args: temperature: An 0-D `Tensor`, representing the temperature of a set of RelaxedBernoulli distributions. The temperature should be positive. logits: An N-D `Tensor` representing the log-odds of a positive event. Each entry in the `Tensor` parametrizes an independent RelaxedBernoulli distribution where the probability of an event is sigmoid(logits). Only one of `logits` or `probs` should be passed in. probs: An N-D `Tensor` representing the probability of a positive event. Each entry in the `Tensor` parameterizes an independent Bernoulli distribution. Only one of `logits` or `probs` should be passed in. validate_args: Python `Boolean`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: `String` name prefixed to Ops created by this class. Raises: ValueError: If both `probs` and `logits` are passed, or if neither. """ parameters = locals() with ops.name_scope(name, values=[logits, probs, temperature]) as ns: with ops.control_dependencies([check_ops.assert_positive(temperature)] if validate_args else []): self._temperature = array_ops.identity(temperature, name="temperature") self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, probs=probs, validate_args=validate_args) dist = logistic.Logistic(self._logits / self._temperature, 1. / self._temperature, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=ns) self._parameters = parameters def inverse_log_det_jacobian_fn(y): return -math_ops.log(y) - math_ops.log1p(-y) sigmoid_bijector = bijector.Inline( forward_fn=math_ops.sigmoid, inverse_fn=(lambda y: math_ops.log(y) - math_ops.log1p(-y)), inverse_log_det_jacobian_fn=inverse_log_det_jacobian_fn, name="sigmoid") super(RelaxedBernoulli, self).__init__(dist, sigmoid_bijector, name=name)
def _approx_ndcg_loss( labels, logits, weights=None, reduction=core_losses.Reduction.SUM, name=None, alpha=10.): """Computes ApproxNDCG loss. ApproxNDCG ["A general approximation framework for direct optimization of information retrieval measures" by Qin et al.] is a smooth approximation to NDCG. Args: labels: A `Tensor` of the same shape as `logits` representing graded relevance. logits: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding item. weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise weights, or a `Tensor` with shape [batch_size, list_size] for item-wise weights. If None, the weight of a list in the mini-batch is set to the sum of the labels of the items in that list. reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. name: A string used as the name for this loss. alpha: The exponent in the generalized sigmoid function. Returns: An op for the ApproxNDCG loss. """ with ops.name_scope(name, 'approx_ndcg_loss', (labels, logits, weights)): is_label_valid = utils.is_label_valid(labels) labels = array_ops.where(is_label_valid, labels, array_ops.zeros_like(labels)) logits = array_ops.where( is_label_valid, logits, -1e3 * array_ops.ones_like(logits) + math_ops.reduce_min( logits, axis=-1, keepdims=True)) label_sum = math_ops.reduce_sum(labels, 1, keepdims=True) if weights is None: weights = array_ops.ones_like(label_sum) weights = array_ops.squeeze(weights) nonzero_mask = math_ops.greater(array_ops.reshape(label_sum, [-1]), 0.0) labels, logits, weights = [ array_ops.boolean_mask(x, nonzero_mask) for x in [labels, logits, weights] ] gains = math_ops.pow(2., math_ops.to_float(labels)) - 1. ranks = utils.approx_ranks(logits, alpha=alpha) discounts = 1. / math_ops.log1p(ranks) dcg = math_ops.reduce_sum(gains * discounts, -1) cost = -dcg * array_ops.squeeze(utils.inverse_max_dcg(labels)) return core_losses.compute_weighted_loss( cost, weights=weights, reduction=reduction)
def testXlog1pyNoNeg1(self): for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]: x = constant_op.constant([[0.1, 0.2, 3.5], [-2., -5., 30.]], dtype=dtype) y = constant_op.constant([[-0.1, -0.2, 3.5], [3.1, -0.9, 2.]], dtype=dtype) with test_util.use_gpu(): xlog1py = self.evaluate(math_ops.xlog1py(x, y)) xtimeslog1py = self.evaluate(x * math_ops.log1p(y)) self.assertAllClose(xlog1py, xtimeslog1py)
def testNonZeroValuesGrad(self): for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]: x = constant_op.constant(0.1, dtype=dtype) y = constant_op.constant(3.1, dtype=dtype) xlog1py_xgrad, xlog1py_ygrad = self._xlog1py_gradients(x, y) xlog1py_expected_xgrad = self.evaluate(math_ops.log1p(y)) xlog1py_expected_ygrad = self.evaluate(x / (1. + y)) self.assertAllClose(xlog1py_expected_xgrad, xlog1py_xgrad) self.assertAllClose(xlog1py_expected_ygrad, xlog1py_ygrad)
def _log_prob(self, counts): if self.validate_args: counts = distribution_util.embed_check_nonnegative_discrete( counts, check_integer=True) counts *= array_ops.ones_like(self.probs) probs = self.probs * array_ops.ones_like(counts) safe_domain = array_ops.where(math_ops.equal(counts, 0.), array_ops.zeros_like(probs), probs) return counts * math_ops.log1p(-safe_domain) + math_ops.log(probs)
def test_gain_and_discount(self): sorted_labels = [[2.0, 1.0]] lambda_weight = ranking_losses.DCGLambdaWeight( gain_fn=lambda x: math_ops.pow(2., x) - 1., rank_discount_fn=lambda r: 1. / math_ops.log1p(r)) with self.cached_session(): self.assertAllClose( lambda_weight.pair_weights(sorted_labels).eval(), [[[0., 2. * (1. / math.log(2.) - 1. / math.log(3.))], [2. * (1. / math.log(2.) - 1. / math.log(3.)), 0.]]])
def testXlog1pyWithZeroBroadcast(self): for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]: x = constant_op.constant([[0.], [1.]], dtype=dtype) y = constant_op.constant([[-0.1, -0.2, -1.], [0., 1., 2.]], dtype=dtype) with test_util.use_gpu(): xlog1py_tf_np = self.evaluate(math_ops.xlog1py(x, y)) zeros_np = self.evaluate(array_ops.zeros_like(y[0])) xtimes_log1py = self.evaluate(math_ops.log1p(y[1])) self.assertAllClose(zeros_np, xlog1py_tf_np[0]) self.assertAllClose(xtimes_log1py, xlog1py_tf_np[1])
def _forward_log_det_jacobian(self, x): # y = sinh((arcsinh(x) + skewness) * tailweight) # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1), # dy/dx # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1) event_dims = self._event_dims_tensor(x) return math_ops.reduce_sum( log_cosh((arcsinh(x) + self.skewness) * self.tailweight) + math_ops.log(self.tailweight) - 0.5 * math_ops.log1p(x**2), axis=event_dims)
def _inverse_log_det_jacobian(self, y): # x = sinh(arcsinh(y) / tailweight - skewness) # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1), # dx/dy # = cosh(arcsinh(y) / tailweight - skewness) # / (tailweight * sqrt(y**2 + 1)) event_dims = self._event_dims_tensor(y) return math_ops.reduce_sum( log_cosh(arcsinh(y) / self.tailweight - self.skewness) - math_ops.log(self.tailweight) - 0.5 * math_ops.log1p(y**2), axis=event_dims)
def _log_prob(self, x): if self.validate_args: x = distribution_util.embed_check_nonnegative_integer_form(x) else: # For consistency with cdf, we take the floor. x = math_ops.floor(x) x *= array_ops.ones_like(self.probs) probs = self.probs * array_ops.ones_like(x) safe_domain = array_ops.where(math_ops.equal(x, 0.), array_ops.zeros_like(probs), probs) return x * math_ops.log1p(-safe_domain) + math_ops.log(probs)
def _call_log_survival_function(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = _convert_to_tensor( value, name="value", preferred_dtype=self.dtype) try: return self._log_survival_function(value, **kwargs) except NotImplementedError as original_exception: try: return math_ops.log1p(-self.cdf(value, **kwargs)) except NotImplementedError: raise original_exception
def _log_prob(self, counts): if self.validate_args: counts = distribution_util.embed_check_nonnegative_discrete( counts, check_integer=True) counts *= array_ops.ones_like(self.probs) probs = self.probs * array_ops.ones_like(counts) safe_domain = array_ops.where( math_ops.equal(counts, 0.), array_ops.zeros_like(probs), probs) return counts * math_ops.log1p(-safe_domain) + math_ops.log(probs)
def _cdf(self, x): if self.validate_args: x = distribution_util.embed_check_nonnegative_integer_form(x) else: # Whether or not x is integer-form, the following is well-defined. # However, scipy takes the floor, so we do too. x = math_ops.floor(x) x *= array_ops.ones_like(self.probs) return array_ops.where( x < 0., array_ops.zeros_like(x), -math_ops.expm1((1. + x) * math_ops.log1p(-self.probs)))
def _log_prob(self, x): if self.validate_args: x = distribution_util.embed_check_nonnegative_integer_form(x) else: # For consistency with cdf, we take the floor. x = math_ops.floor(x) x *= array_ops.ones_like(self.probs) probs = self.probs * array_ops.ones_like(x) safe_domain = array_ops.where( math_ops.equal(x, 0.), array_ops.zeros_like(probs), probs) return x * math_ops.log1p(-safe_domain) + math_ops.log(probs)
def _cdf(self, counts): if self.validate_args: # We set `check_integer=False` since the CDF is defined on whole real # line. counts = math_ops.floor( distribution_util.embed_check_nonnegative_discrete( counts, check_integer=False)) counts *= array_ops.ones_like(self.probs) return array_ops.where( counts < 0., array_ops.zeros_like(counts), -math_ops.expm1( (counts + 1) * math_ops.log1p(-self.probs)))
def _sample_n(self, n, seed=None): # Uniform variates must be sampled from the open-interval `(0, 1)` rather # than `[0, 1)`. To do so, we use `np.finfo(self.dtype.as_numpy_dtype).tiny` # because it is the smallest, positive, "normal" number. A "normal" number # is such that the mantissa has an implicit leading 1. Normal, positive # numbers x, y have the reasonable property that, `x + y >= max(x, y)`. In # this case, a subnormal number (i.e., np.nextafter) can cause us to sample # 0. uniform = random_ops.random_uniform( shape=array_ops.concat([[n], self.batch_shape_tensor()], 0), minval=np.finfo(self.dtype.as_numpy_dtype).tiny, maxval=1., dtype=self.dtype, seed=seed) sampled = math_ops.log(uniform) - math_ops.log1p(-1. * uniform) return sampled * self.scale + self.loc
def _sample_n(self, n, seed=None): shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) # Uniform variates must be sampled from the open-interval `(-1, 1)` rather # than `[-1, 1)`. In the case of `(0, 1)` we'd use # `np.finfo(self.dtype.as_numpy_dtype).tiny` because it is the smallest, # positive, "normal" number. However, the concept of subnormality exists # only at zero; here we need the smallest usable number larger than -1, # i.e., `-1 + eps/2`. uniform_samples = random_ops.random_uniform( shape=shape, minval=np.nextafter(self.dtype.as_numpy_dtype(-1.), self.dtype.as_numpy_dtype(0.)), maxval=1., dtype=self.dtype, seed=seed) return (self.loc - self.scale * math_ops.sign(uniform_samples) * math_ops.log1p(-math_ops.abs(uniform_samples)))
def log_cdf_laplace(x, name="log_cdf_laplace"): """Log Laplace distribution function. This function calculates `Log[L(x)]`, where `L(x)` is the cumulative distribution function of the Laplace distribution, i.e. ```L(x) := 0.5 * int_{-infty}^x e^{-|t|} dt``` For numerical accuracy, `L(x)` is computed in different ways depending on `x`, ``` x <= 0: Log[L(x)] = Log[0.5] + x, which is exact 0 < x: Log[L(x)] = Log[1 - 0.5 * e^{-x}], which is exact ``` Args: x: `Tensor` of type `float32`, `float64`. name: Python string. A name for the operation (default="log_ndtr"). Returns: `Tensor` with `dtype=x.dtype`. Raises: TypeError: if `x.dtype` is not handled. """ with ops.name_scope(name, values=[x]): x = ops.convert_to_tensor(x, name="x") # For x < 0, L(x) = 0.5 * exp{x} exactly, so Log[L(x)] = log(0.5) + x. lower_solution = -np.log(2.) + x # safe_exp_neg_x = exp{-x} for x > 0, but is # bounded above by 1, which avoids # log[1 - 1] = -inf for x = log(1/2), AND # exp{-x} --> inf, for x << -1 safe_exp_neg_x = math_ops.exp(-math_ops.abs(x)) # log1p(z) = log(1 + z) approx z for |z| << 1. This approxmation is used # internally by log1p, rather than being done explicitly here. upper_solution = math_ops.log1p(-0.5 * safe_exp_neg_x) return array_ops.where(x < 0., lower_solution, upper_solution)
def _forward_log_det_jacobian(self, x): x = self._maybe_assert_valid_x(x) if self.power == 0.: return x return (1. / self.power - 1.) * math_ops.log1p(x * self.power)
def _inverse(self, y): y = self._maybe_assert_valid_y(y) return self.scale * (-math_ops.log1p(-y)) ** (1 / self.concentration)
def _forward(self, x): x = self._maybe_assert_valid(x) return math_ops.exp( math_ops.log1p(-math_ops.exp(math_ops.log1p(-x) / self.concentration0)) / self.concentration1)
def sigmoid_cross_entropy_with_logits(logits, targets, name=None): """Computes sigmoid cross entropy given `logits`. Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time. For brevity, let `x = logits`, `z = targets`. The logistic loss is z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) = (1 - z) * x + log(1 + exp(-x)) = x - x * z + log(1 + exp(-x)) For x < 0, to avoid overflow in exp(-x), we reformulate the above x - x * z + log(1 + exp(-x)) = log(exp(x)) - x * z + log(1 + exp(-x)) = - x * z + log(1 + exp(x)) Hence, to ensure stability and avoid overflow, the implementation uses this equivalent formulation max(x, 0) - x * z + log(1 + exp(-abs(x))) `logits` and `targets` must have the same type and shape. Args: logits: A `Tensor` of type `float32` or `float64`. targets: A `Tensor` of the same type and shape as `logits`. name: A name for the operation (optional). Returns: A `Tensor` of the same shape as `logits` with the componentwise logistic losses. Raises: ValueError: If `logits` and `targets` do not have the same shape. """ with ops.name_scope(name, "logistic_loss", [logits, targets]) as name: logits = ops.convert_to_tensor(logits, name="logits") targets = ops.convert_to_tensor(targets, name="targets") try: targets.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError("logits and targets must have the same shape (%s vs %s)" % (logits.get_shape(), targets.get_shape())) # The logistic loss formula from above is # x - x * z + log(1 + exp(-x)) # For x < 0, a more numerically stable formula is # -x * z + log(1 + exp(x)) # Note that these two expressions can be combined into the following: # max(x, 0) - x * z + log(1 + exp(-abs(x))) # To allow computing gradients at zero, we define custom versions of max and # abs functions. zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = array_ops.where(cond, logits, zeros) neg_abs_logits = array_ops.where(cond, -logits, logits) return math_ops.add(relu_logits - logits * targets, math_ops.log1p(math_ops.exp(neg_abs_logits)), name=name)
def weighted_cross_entropy_with_logits(targets, logits, pos_weight, name=None): """Computes a weighted cross entropy. This is like `sigmoid_cross_entropy_with_logits()` except that `pos_weight`, allows one to trade off recall and precision by up- or down-weighting the cost of a positive error relative to a negative error. The usual cross-entropy cost is defined as: targets * -log(sigmoid(logits)) + (1 - targets) * -log(1 - sigmoid(logits)) The argument `pos_weight` is used as a multiplier for the positive targets: targets * -log(sigmoid(logits)) * pos_weight + (1 - targets) * -log(1 - sigmoid(logits)) For brevity, let `x = logits`, `z = targets`, `q = pos_weight`. The loss is: qz * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = qz * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = qz * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = qz * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) = (1 - z) * x + (qz + 1 - z) * log(1 + exp(-x)) = (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(-x)) Setting `l = (1 + (q - 1) * z)`, to ensure stability and avoid overflow, the implementation uses (1 - z) * x + l * (log(1 + exp(-abs(x))) + max(-x, 0)) `logits` and `targets` must have the same type and shape. Args: targets: A `Tensor` of the same type and shape as `logits`. logits: A `Tensor` of type `float32` or `float64`. pos_weight: A coefficient to use on the positive examples. name: A name for the operation (optional). Returns: A `Tensor` of the same shape as `logits` with the componentwise weighted logistic losses. Raises: ValueError: If `logits` and `targets` do not have the same shape. """ with ops.name_scope(name, "logistic_loss", [logits, targets]) as name: logits = ops.convert_to_tensor(logits, name="logits") targets = ops.convert_to_tensor(targets, name="targets") try: targets.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError("logits and targets must have the same shape (%s vs %s)" % (logits.get_shape(), targets.get_shape())) # The logistic loss formula from above is # (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(-x)) # For x < 0, a more numerically stable formula is # (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(x)) - l * x # To avoid branching, we use the combined version # (1 - z) * x + l * (log(1 + exp(-abs(x))) + max(-x, 0)) log_weight = 1 + (pos_weight - 1) * targets return math_ops.add( (1 - targets) * logits, log_weight * (math_ops.log1p(math_ops.exp(-math_ops.abs(logits))) + nn_ops.relu(-logits)), name=name)
def _log_unnormalized_prob(self, x): x = self._maybe_assert_valid_sample(x) a = self.concentration1 b = self.concentration0 return (a - 1) * math_ops.log(x) + (b - 1) * math_ops.log1p(-x**a)
def _log_unnormalized_prob(self, positive_counts): if self.validate_args: positive_counts = distribution_util.embed_check_nonnegative_discrete( positive_counts, check_integer=True) return self.total_count * math_ops.log1p( -self.probs) + positive_counts * math_ops.log(self.probs)
def _log_unnormalized_prob(self, x): y = (x - self.loc) / self.scale # Abs(scale) superfluous. return -0.5 * (self.df + 1.) * math_ops.log1p(y**2. / self.df)
def _inverse_log_det_jacobian(self, y): return -math_ops.log(y) - math_ops.log1p(-y)
def _inverse(self, y): return math_ops.log(y) - math_ops.log1p(-y)
def _log_unnormalized_prob(self, counts): counts = self._maybe_assert_valid_sample(counts) return (counts * math_ops.log(self.probs) + (self.total_count - counts) * math_ops.log1p(-self.probs))
def _log_unnormalized_prob(self, x): x = self._maybe_assert_valid_sample(x) return ((self.concentration1 - 1.) * math_ops.log(x) + (self.concentration0 - 1.) * math_ops.log1p(-x))
def get_logits_and_probs(logits=None, probs=None, multidimensional=False, validate_args=False, name="get_logits_and_probs"): """Converts logit to probabilities (or vice-versa), and returns both. Args: logits: Numeric `Tensor` representing log-odds. probs: Numeric `Tensor` representing probabilities. multidimensional: `Boolean`, default `False`. If `True`, represents whether the last dimension of `logits` or `probs`, a `[N1, N2, ... k]` dimensional tensor, representing the logit or probability of `shape[-1]` classes. validate_args: `Boolean`, default `False`. When `True`, either assert `0 <= probs <= 1` (if not `multidimensional`) or that the last dimension of `probs` sums to one. name: A name for this operation (optional). Returns: logits, probs: Tuple of `Tensor`s. If `probs` has an entry that is `0` or `1`, then the corresponding entry in the returned logit will be `-Inf` and `Inf` respectively. Raises: ValueError: if neither `probs` nor `logits` were passed in, or both were. """ with ops.name_scope(name, values=[probs, logits]): if (probs is None) == (logits is None): raise ValueError("Must pass probs or logits, but not both.") if probs is None: logits = ops.convert_to_tensor(logits, name="logits") if multidimensional: return logits, nn.softmax(logits, name="probs") return logits, math_ops.sigmoid(logits, name="probs") probs = ops.convert_to_tensor(probs, name="probs") if validate_args: with ops.name_scope("validate_probs"): one = constant_op.constant(1., probs.dtype) dependencies = [check_ops.assert_non_negative(probs)] if multidimensional: dependencies += [assert_close(math_ops.reduce_sum(probs, -1), one, message="probs does not sum to 1.")] else: dependencies += [check_ops.assert_less_equal( probs, one, message="probs has components greater than 1.")] probs = control_flow_ops.with_dependencies(dependencies, probs) with ops.name_scope("logits"): if multidimensional: # Here we don't compute the multidimensional case, in a manner # consistent with respect to the unidimensional case. We do so # following the TF convention. Typically, you might expect to see # logits = log(probs) - log(gather(probs, pivot)). A side-effect of # being consistent with the TF approach is that the unidimensional case # implicitly handles the second dimension but the multidimensional case # explicitly keeps the pivot dimension. return math_ops.log(probs), probs return math_ops.log(probs) - math_ops.log1p(-1. * probs), probs
def _inverse(self, y): y = self._maybe_assert_valid(y) return math_ops.exp(math_ops.log1p( -(1 - y**self.concentration1)**self.concentration0))