def test_inverse_max_dcg(self): labels = [[1., 4., 1., 0.], [4., 2., 0., 3.], [0., 0., 0., 0.]] target = [[0.04297], [0.033139], [0.]] target_1 = [[0.04621], [0.04621], [0.]] inverse_max_dcg = utils.inverse_max_dcg(labels) inverse_max_dcg_1 = utils.inverse_max_dcg(labels, topn=1) with tf.compat.v1.Session() as sess: inverse_max_dcg = sess.run(inverse_max_dcg) self.assertAllClose(inverse_max_dcg, target) inverse_max_dcg_1 = sess.run(inverse_max_dcg_1) self.assertAllClose(inverse_max_dcg_1, target_1)
def pair_weights(self, labels, ranks): """See `_LambdaWeight`.""" with tf.compat.v1.name_scope(name='dcg_lambda_weight'): _check_tensor_shapes([labels, ranks]) valid_pair, labels = _get_valid_pairs_and_clean_labels(labels) gain = self._gain_fn(labels) if self._normalized: gain *= utils.inverse_max_dcg( labels, gain_fn=self._gain_fn, rank_discount_fn=self._rank_discount_fn, topn=self._topn) pair_gain = _apply_pairwise_op(tf.subtract, gain) pair_gain *= tf.cast(valid_pair, dtype=tf.float32) list_size = tf.shape(input=labels)[1] topn = self._topn or list_size def _discount_for_relative_rank_diff(): """Rank-based discount in the LambdaLoss paper.""" # The LambdaLoss is not well defined when topn is active and topn < # list_size. We cap the rank of examples to topn + 1 so that the rank # differene is capped to topn. This is just a convenient upperbound # when topn is active. We need to revisit this. capped_rank = tf.compat.v1.where( tf.greater(ranks, topn), tf.ones_like(ranks) * (topn + 1), ranks) rank_diff = tf.cast(tf.abs( _apply_pairwise_op(tf.subtract, capped_rank)), dtype=tf.float32) pair_discount = tf.compat.v1.where( tf.greater(rank_diff, 0), tf.abs( self._rank_discount_fn(rank_diff) - self._rank_discount_fn(rank_diff + 1)), tf.zeros_like(rank_diff)) return pair_discount def _discount_for_absolute_rank(): """Standard discount in the LambdaMART paper.""" # When the rank discount is (1 / rank) for example, the discount is # |1 / r_i - 1 / r_j|. When i or j > topn, the discount becomes 0. rank_discount = tf.compat.v1.where( tf.greater(ranks, topn), tf.zeros_like(tf.cast(ranks, dtype=tf.float32)), self._rank_discount_fn(tf.cast(ranks, dtype=tf.float32))) pair_discount = tf.abs( _apply_pairwise_op(tf.subtract, rank_discount)) return pair_discount u = _discount_for_relative_rank_diff() v = _discount_for_absolute_rank() pair_discount = ( 1. - self._smooth_fraction) * u + self._smooth_fraction * v pair_weight = tf.abs(pair_gain) * pair_discount if self._topn is None: return pair_weight pair_mask = _apply_pairwise_op(tf.logical_or, tf.less_equal(ranks, self._topn)) return pair_weight * tf.cast(pair_mask, dtype=tf.float32)
def compute_unreduced_loss(self, labels, logits, weights): """See `_RankingLoss`.""" alpha = self._params.get('alpha') is_label_valid = utils.is_label_valid(labels) labels = tf.where(is_label_valid, labels, tf.zeros_like(labels)) logits = tf.where( is_label_valid, logits, -1e3 * tf.ones_like(logits) + tf.reduce_min(input_tensor=logits, axis=-1, keepdims=True)) label_sum = tf.reduce_sum(input_tensor=labels, axis=1, keepdims=True) if weights is None: weights = tf.ones_like(label_sum) weights = tf.squeeze(weights) nonzero_mask = tf.greater(tf.reshape(label_sum, [-1]), 0.0) labels = tf.where(nonzero_mask, labels, _EPSILON * tf.ones_like(labels)) weights = tf.where(nonzero_mask, weights, tf.zeros_like(weights)) gains = tf.pow(2., tf.cast(labels, dtype=tf.float32)) - 1. ranks = utils.approx_ranks(logits, alpha=alpha) discounts = 1. / tf.math.log1p(ranks) dcg = tf.reduce_sum(input_tensor=gains * discounts, axis=-1) cost = -dcg * tf.squeeze(utils.inverse_max_dcg(labels)) return cost, weights
def _approx_ndcg_loss( labels, logits, weights=None, reduction=core_losses.Reduction.SUM, name=None, alpha=10.): """Computes ApproxNDCG loss. ApproxNDCG ["A general approximation framework for direct optimization of information retrieval measures" by Qin et al.] is a smooth approximation to NDCG. Args: labels: A `Tensor` of the same shape as `logits` representing graded relevance. logits: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding item. weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise weights, or a `Tensor` with shape [batch_size, list_size] for item-wise weights. If None, the weight of a list in the mini-batch is set to the sum of the labels of the items in that list. reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. name: A string used as the name for this loss. alpha: The exponent in the generalized sigmoid function. Returns: An op for the ApproxNDCG loss. """ with ops.name_scope(name, 'approx_ndcg_loss', (labels, logits, weights)): is_label_valid = utils.is_label_valid(labels) labels = array_ops.where(is_label_valid, labels, array_ops.zeros_like(labels)) logits = array_ops.where( is_label_valid, logits, -1e3 * array_ops.ones_like(logits) + math_ops.reduce_min( logits, axis=-1, keepdims=True)) label_sum = math_ops.reduce_sum(labels, 1, keepdims=True) if weights is None: weights = array_ops.ones_like(label_sum) weights = array_ops.squeeze(weights) nonzero_mask = math_ops.greater(array_ops.reshape(label_sum, [-1]), 0.0) labels, logits, weights = [ array_ops.boolean_mask(x, nonzero_mask) for x in [labels, logits, weights] ] gains = math_ops.pow(2., math_ops.to_float(labels)) - 1. ranks = utils.approx_ranks(logits, alpha=alpha) discounts = 1. / math_ops.log1p(ranks) dcg = math_ops.reduce_sum(gains * discounts, -1) cost = -dcg * array_ops.squeeze(utils.inverse_max_dcg(labels)) return core_losses.compute_weighted_loss( cost, weights=weights, reduction=reduction)
def individual_weights(self, labels, ranks): """See `_LambdaWeight`.""" with tf.compat.v1.name_scope(name='dcg_lambda_weight'): _check_tensor_shapes([labels, ranks]) labels = tf.convert_to_tensor(value=labels) labels = tf.compat.v1.where( utils.is_label_valid(labels), labels, tf.zeros_like(labels)) gain = self._gain_fn(labels) if self._normalized: gain *= utils.inverse_max_dcg( labels, gain_fn=self._gain_fn, rank_discount_fn=self._rank_discount_fn, topn=self._topn) rank_discount = self._rank_discount_fn(tf.cast(ranks, dtype=tf.float32)) return gain * rank_discount
def individual_weights(self, sorted_labels): """See `_LambdaWeight`.""" with ops.name_scope(None, 'dcg_lambda_weight', (sorted_labels,)): sorted_labels = ops.convert_to_tensor(sorted_labels) sorted_labels = array_ops.where( utils.is_label_valid(sorted_labels), sorted_labels, array_ops.zeros_like(sorted_labels)) gain = self._gain_fn(sorted_labels) if self._normalized: gain *= utils.inverse_max_dcg( sorted_labels, gain_fn=self._gain_fn, rank_discount_fn=self._rank_discount_fn, topn=self._topn) rank_discount = self._rank_discount_fn( math_ops.to_float( math_ops.range(array_ops.shape(sorted_labels)[1]) + 1)) return gain * rank_discount
def individual_weights(self, sorted_labels): """See `_LambdaWeight`.""" with tf.name_scope(name='dcg_lambda_weight'): sorted_labels = tf.convert_to_tensor(value=sorted_labels) sorted_labels = tf.where(utils.is_label_valid(sorted_labels), sorted_labels, tf.zeros_like(sorted_labels)) gain = self._gain_fn(sorted_labels) if self._normalized: gain *= utils.inverse_max_dcg( sorted_labels, gain_fn=self._gain_fn, rank_discount_fn=self._rank_discount_fn, topn=self._topn) rank_discount = self._rank_discount_fn( tf.cast(tf.range(tf.shape(input=sorted_labels)[1]) + 1, dtype=tf.float32)) return gain * rank_discount
def compute_unreduced_loss(labels, logits): """See `_RankingLoss`.""" alpha = 10.0 is_valid = utils.is_label_valid(labels) labels = tf.compat.v1.where(is_valid, labels, tf.zeros_like(labels)) logits = tf.compat.v1.where( is_valid, logits, -1e3 * tf.ones_like(logits) + tf.reduce_min(input_tensor=logits, axis=-1, keepdims=True)) label_sum = tf.reduce_sum(input_tensor=labels, axis=1, keepdims=True) nonzero_mask = tf.greater(tf.reshape(label_sum, [-1]), 0.0) labels = tf.compat.v1.where(nonzero_mask, labels, _EPSILON * tf.ones_like(labels)) gains = tf.pow(2., tf.cast(labels, dtype=tf.float32)) - 1. ranks = utils.approx_ranks(logits, alpha=alpha) discounts = 1. / tf.math.log1p(ranks) dcg = tf.reduce_sum(input_tensor=gains * discounts, axis=-1, keepdims=True) cost = -dcg * utils.inverse_max_dcg(labels) return cost, tf.reshape(tf.cast(nonzero_mask, dtype=tf.float32), [-1, 1])
def pair_weights(self, sorted_labels): """See `_LambdaWeight`.""" with ops.name_scope(None, 'dcg_lambda_weight', (sorted_labels,)): valid_pair, sorted_labels = self._get_valid_pairs_and_clean_labels( sorted_labels) gain = self._gain_fn(sorted_labels) if self._normalized: gain *= utils.inverse_max_dcg( sorted_labels, gain_fn=self._gain_fn, rank_discount_fn=self._rank_discount_fn, topn=self._topn) pair_gain = array_ops.expand_dims(gain, 2) - array_ops.expand_dims( gain, 1) pair_gain *= math_ops.to_float(valid_pair) list_size = array_ops.shape(sorted_labels)[1] topn = self._topn or list_size rank = math_ops.range(list_size) + 1 def _discount_for_relative_rank_diff(): """Rank-based discount in the LambdaLoss paper.""" # The LambdaLoss is not well defined when topn is active and topn < # list_size. We cap the rank of examples to topn + 1 so that the rank # differene is capped to topn. This is just a convenient upperbound # when topn is active. We need to revisit this. capped_rank = array_ops.where( math_ops.greater(rank, topn), array_ops.ones_like(rank) * (topn + 1), rank) rank_diff = math_ops.to_float( math_ops.abs( array_ops.expand_dims(capped_rank, 1) - array_ops.expand_dims(capped_rank, 0))) pair_discount = array_ops.where( math_ops.greater(rank_diff, 0), math_ops.abs( self._rank_discount_fn(rank_diff) - self._rank_discount_fn(rank_diff + 1)), array_ops.zeros_like(rank_diff)) return pair_discount def _discount_for_absolute_rank(): """Standard discount in the LambdaMART paper.""" # When the rank discount is (1 / rank) for example, the discount is # |1 / r_i - 1 / r_j|. When i or j > topn, the discount becomes 0. rank_discount = array_ops.where( math_ops.greater(rank, topn), array_ops.zeros_like(math_ops.to_float(rank)), self._rank_discount_fn(math_ops.to_float(rank))) pair_discount = math_ops.abs( array_ops.expand_dims(rank_discount, 1) - array_ops.expand_dims(rank_discount, 0)) return pair_discount u = _discount_for_relative_rank_diff() v = _discount_for_absolute_rank() pair_discount = ( 1. - self._smooth_fraction) * u + self._smooth_fraction * v pair_weight = math_ops.abs(pair_gain) * pair_discount if self._topn is None: return pair_weight pair_mask = math_ops.logical_or( array_ops.expand_dims(math_ops.less_equal(rank, self._topn), 1), array_ops.expand_dims(math_ops.less_equal(rank, self._topn), 0)) return pair_weight * math_ops.to_float(pair_mask)