def test_compute_logits(self, mode): group_size = 2 params = { 'num_shuffles_train': 2, 'num_shuffles_eval': 2, 'num_shuffles_predict': 2, } def _dummy_score_fn(context_features, group_features, mode, params, config): del [mode, params, config] # 'context': [batch_size * num_groups, 1] # 'example_f1': [batch_size * num_groups, group_size, 1] logits = tf.expand_dims( context_features['context'], axis=1) + group_features['example_f1'] logits = tf.reshape(logits, [-1, group_size]) # Add the shape of the logits to differentiate number of shuffles. return logits + tf.cast(tf.shape(logits)[0], tf.float32) with tf.Graph().as_default(): tf.compat.v1.set_random_seed(1) with tf.compat.v1.Session() as sess: ranking_model = model._GroupwiseRankingModel( _dummy_score_fn, group_size=group_size, transform_fn=feature.make_identity_transform_fn(['context']), ) # batch_size = 1, list_size = 3, is_valid = [True, True, False] features = { 'context': [[1.]], 'example_f1': [[[1.], [2.], [3.]]], } labels = [[1., 0, -1]] # No params. logits = sess.run( ranking_model.compute_logits(features, labels, mode, None, None)) self.assertEqual( ranking_model._feature_gather_indices.get_shape().as_list(), [1, 3, 2, 2]) self.assertAllEqual(logits, [[5., 6., 0.]]) # Trigger params. logits = sess.run( ranking_model.compute_logits(features, labels, mode, params, None)) self.assertEqual( ranking_model._feature_gather_indices.get_shape().as_list(), [1, 6, 2, 2]) self.assertAllEqual(logits, [[8., 9., 0.]]) # batch_size = 1, list_size = 3, is_valid = [True, True, True] features = { 'context': [[1.]], 'example_f1': [[[1.], [2.], [0.]]], } labels = [[1., 0, 1]] logits = sess.run( ranking_model.compute_logits(features, labels, mode, params, None)) self.assertEqual( ranking_model._feature_gather_indices.get_shape().as_list(), [1, 6, 2, 2]) self.assertAllEqual(logits, [[8., 9., 7.]])
def setUp(self): super(GroupwiseRankingEstimatorTest, self).setUp() ops.reset_default_graph() self._model_dir = test.get_temp_dir() gfile.MakeDirs(self._model_dir) model_fn = model.make_groupwise_ranking_fn( _group_score_fn, group_size=2, transform_fn=feature.make_identity_transform_fn(['context', 'weight']), ranking_head=head.create_ranking_head( loss_fn=losses.make_loss_fn( losses.RankingLossKey.PAIRWISE_HINGE_LOSS, weights_feature_name='weight'), optimizer=training.AdagradOptimizer(learning_rate=0.1))) self._estimator = estimator.Estimator(model_fn, self._model_dir)
def test_make_identity_transform_fn(self): features = { "context": # Input size: (batch_size=2, num_features=2). ops.convert_to_tensor([[1.0, 1.0], [1.0, 1.0]]), "per_example": ops.convert_to_tensor([[[10.0]], [[10.0]]]), } with session.Session() as sess: transform_fn = feature_lib.make_identity_transform_fn(["context"]) context_features, per_example_features = sess.run( transform_fn(features, 1)) self.assertEqual(["context"], sorted(context_features)) self.assertAllEqual([[1.0, 1.0], [1.0, 1.0]], context_features["context"]) self.assertEqual(["per_example"], sorted(per_example_features)) self.assertAllEqual([[[10.0]], [[10.0]]], per_example_features["per_example"])
def test_make_identity_transform_fn(self): with tf.Graph().as_default(): features = { "context": # Input size: (batch_size=2, num_features=2). tf.convert_to_tensor(value=[[1.0, 1.0], [1.0, 1.0]]), "per_example": tf.convert_to_tensor(value=[[[10.0]], [[10.0]]]), } with tf.compat.v1.Session() as sess: transform_fn = feature_lib.make_identity_transform_fn(["context"]) context_features, per_example_features = sess.run( transform_fn(features, 1)) self.assertCountEqual(["context"], context_features) self.assertAllEqual([[1.0, 1.0], [1.0, 1.0]], context_features["context"]) self.assertCountEqual(["per_example"], per_example_features) self.assertAllEqual([[[10.0]], [[10.0]]], per_example_features["per_example"])
def __init__(self, transform_fn=None): """Constructor for the common components of all ranking models. Args: transform_fn: (function) A user-provided function that transforms raw features into dense Tensors with the following signature: * Args: `features`: A dict of Tensors or SparseTensors that contains the raw features from an input_fn. `mode`: Optional. See estimator `ModeKeys`. * Returns: `context_features`: A dict of `Tensor`s with shape [batch_size, ...] `example_features`: A dict of `Tensor`s with shape [batch_size, list_size, ...] """ if transform_fn is None: self._transform_fn = feature.make_identity_transform_fn({}) else: self._transform_fn = transform_fn
def make_groupwise_ranking_fn(group_score_fn, group_size, ranking_head, transform_fn=None): """Builds an `Estimator` model_fn for groupwise comparison ranking models. Args: group_score_fn: Scoring function for a group of examples with `group_size` that returns a score per example. It has to follow signature: * Args: `context_features`: A dict of `Tensor`s with shape [batch_size, ...]. `per_example_features`: A dict of `Tensor`s with shape [batch_size, group_size, ...] `mode`: Optional. Specifies if this is training, evaluation or inference. See `ModeKeys`. `params`: Optional dict of hyperparameters, same value passed in the `Estimator` constructor. `config`: Optional configuration object, same value passed in the `Estimator` constructor. * Returns: Tensor of shape [batch_size, group_size] containing per-example scores. group_size: An integer denoting the number of examples in `group_score_fn`. ranking_head: A `head._RankingHead` object. transform_fn: Function transforming the raw features into dense tensors. It has the following signature: * Args: `features`: A dict of `Tensor`s contains the raw input. `mode`: Optional. See estimator `ModeKeys`. * Returns: `context_features`: A dict of `Tensor`s with shape [batch_size, ...] `per_example_features`: A dict of `Tensor`s with shape [batch_size, list_size, ...] Returns: An `Estimator` `model_fn` (see estimator.py) with the following signature: * Args: * `features`: dict of Tensors of shape [batch_size, list_size, ...] for per-example features and shape [batch_size, ...] for non-example context features. * `labels`: Tensor with shape [batch_size, list_size] denoting relevance. * `mode`: No difference. * `params`: No difference. * `config`: No difference.. * Returns: `EstimatorSpec` Raises: ValueError: when group_size is invalid. """ if group_size <= 0: raise ValueError('Invalid group_size %d' % group_size) if transform_fn is None: transform_fn = feature.make_identity_transform_fn({}) def _call_transform_fn(features, mode): """Calling transform function.""" transform_fn_args = function_utils.fn_args(transform_fn) if 'mode' in transform_fn_args: return transform_fn(features, mode=mode) else: return transform_fn(features) def _groupwise_dnn_v2(features, labels, mode, params, config): """Defines the dnn for groupwise scoring functions.""" with ops.name_scope('transform'): context_features, per_example_features = _call_transform_fn( features, mode) def _score_fn(context_features, group_features, reuse): with variable_scope.variable_scope('group_score', reuse=reuse): return group_score_fn(context_features, group_features, mode, params, config) # Scatter/Gather per-example scores through groupwise comparison. Each # instance in a mini-batch will form a number of groups. Each groups of # examples are scored by 'score_fn' and socres for individual examples # accumulated over groups. with ops.name_scope('groupwise_dnn_v2'): with ops.name_scope('infer_sizes'): if labels is not None: batch_size, list_size = array_ops.unstack( array_ops.shape(labels)) is_valid = utils.is_label_valid(labels) else: # Infer batch_size and list_size from a feature. example_tensor_shape = array_ops.shape( next(six.itervalues(per_example_features))) batch_size = example_tensor_shape[0] list_size = example_tensor_shape[1] is_valid = utils.is_label_valid( array_ops.ones([batch_size, list_size])) if batch_size is None or list_size is None: raise ValueError('Invalid batch_size=%s or list_size=%s' % (batch_size, list_size)) # For each example feature, assume the shape is [batch_size, list_size, # feature_size], the groups are formed along the 2nd dim. Each group has a # 'group_size' number of indices in [0, list_size). Based on these # indices, we can gather the example feature into a sub-tensor for each # group. The total number of groups we have for a mini-batch is batch_size # * num_groups. Inside each group, we have a 'group_size' number of # examples. indices, mask = _form_group_indices_nd( is_valid, group_size, shuffle=(mode != model_fn.ModeKeys.PREDICT)) num_groups = array_ops.shape(mask)[1] with ops.name_scope('group_features'): # For context features, We have shape [batch_size * num_groups, ...]. large_batch_context_features = {} for name, value in six.iteritems(context_features): # [batch_size, 1, ...]. value = array_ops.expand_dims(value, axis=1) # [batch_size, num_groups, ...]. value = array_ops.gather(value, array_ops.zeros([num_groups], dtypes.int32), axis=1) # [batch_size * num_groups, ...] large_batch_context_features[ name] = utils.reshape_first_ndims( value, 2, [batch_size * num_groups]) # For example feature, we have shape [batch_size * num_groups, # group_size, ...]. large_batch_group_features = {} for name, value in six.iteritems(per_example_features): # [batch_size, num_groups, group_size, ...]. value = array_ops.gather_nd(value, indices) # [batch_size * num_groups, group_size, ...]. large_batch_group_features[ name] = utils.reshape_first_ndims( value, 3, [batch_size * num_groups, group_size]) # Do the inference and get scores for the large batch. # [batch_size * num_groups, group_size]. scores = _score_fn(large_batch_context_features, large_batch_group_features, reuse=False) with ops.name_scope('accumulate_scores'): scores = array_ops.reshape( scores, [batch_size, num_groups, group_size]) # Reset invalid scores to 0 based on mask. scores = array_ops.where( array_ops.gather(array_ops.expand_dims(mask, 2), array_ops.zeros([group_size], dtypes.int32), axis=2), scores, array_ops.zeros_like(scores)) # [batch_size, num_groups, group_size]. list_scores = array_ops.scatter_nd(indices, scores, [batch_size, list_size]) # Use average. list_scores /= math_ops.to_float(group_size) if mode == model_fn.ModeKeys.PREDICT: return list_scores else: features.update(context_features) features.update(per_example_features) return list_scores def _model_fn(features, labels, mode, params, config): """Defines an `Estimator` model_fn.""" params = params or {} tf_logging.info('Use groupwise dnn v2.') logits = _groupwise_dnn_v2(features, labels, mode, params, config) return ranking_head.create_estimator_spec(features=features, mode=mode, logits=logits, labels=labels) return _model_fn