def test_train_one_head_with_optimizer(self):
        head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1')
        multi_head = multi_head_lib.MultiHead([head1])

        logits = {
            'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
        }
        labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
        features = {'x': np.array(((42, ), ), dtype=np.int32)}
        # For large logits, sigmoid cross entropy loss is approximated as:
        # loss = labels * (logits < 0) * (-logits) +
        #        (1 - labels) * (logits > 0) * logits =>
        # expected_unweighted_loss = [[10., 10.], [15., 0.]]
        # loss = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75
        expected_loss = 8.75
        tol = 1e-3
        loss = multi_head.loss(logits=logits,
                               labels=labels,
                               features=features,
                               mode=ModeKeys.TRAIN)
        self.assertAllClose(expected_loss,
                            self.evaluate(loss),
                            rtol=tol,
                            atol=tol)
        if tf.executing_eagerly():
            return

        expected_train_result = 'my_train_op'

        class _Optimizer(optimizer_v2.OptimizerV2):
            def get_updates(self, loss, params):
                del params
                return [
                    tf.strings.join([
                        tf.constant(expected_train_result),
                        tf.strings.as_string(loss, precision=3)
                    ])
                ]

            def get_config(self):
                config = super(_Optimizer, self).get_config()
                return config

        spec = multi_head.create_estimator_spec(
            features=features,
            mode=ModeKeys.TRAIN,
            logits=logits,
            labels=labels,
            optimizer=_Optimizer('my_optimizer'),
            trainable_variables=[
                tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)
            ])

        with self.cached_session() as sess:
            test_lib._initialize_variables(self, spec.scaffold)
            loss, train_result = sess.run((spec.loss, spec.train_op))
            self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
            self.assertEqual(
                six.b('{0:s}{1:.3f}'.format(expected_train_result,
                                            expected_loss)), train_result)
Exemple #2
0
    def test_predict_two_heads_logits_tensor_multi_dim(self):
        """Tests predict with multi-dimensional logits of shape [2, 2, 5]."""
        head1 = regression_head.RegressionHead(label_dimension=2, name='head1')
        head2 = regression_head.RegressionHead(label_dimension=3, name='head2')
        multi_head = multi_head_lib.MultiHead([head1, head2])

        logits = np.array(
            [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]],
             [[-1.5, 1., -3., 2., -2.], [-1.5, 1., -3., 2., -2.]]],
            dtype=np.float32)
        expected_logits1 = np.array(
            [[[-1., 1.], [-1., 1.]], [[-1.5, 1.], [-1.5, 1.]]],
            dtype=np.float32)
        expected_logits2 = np.array(
            [[[2., -2., 2.], [2., -2., 2.]], [[-3., 2., -2.], [-3., 2., -2.]]],
            dtype=np.float32)
        pred_keys = prediction_keys.PredictionKeys

        predictions = multi_head.predictions(logits)
        self.assertAllClose(
            expected_logits1,
            self.evaluate(predictions[('head1', pred_keys.PREDICTIONS)]))
        self.assertAllClose(
            expected_logits2,
            self.evaluate(predictions[('head2', pred_keys.PREDICTIONS)]))
        if context.executing_eagerly():
            return

        spec = multi_head.create_estimator_spec(
            features={'x': np.array(((42, ), ), dtype=np.int32)},
            mode=model_fn.ModeKeys.PREDICT,
            logits=logits)
        self.assertItemsEqual((test_lib._DEFAULT_SERVING_KEY, 'predict',
                               'head1', 'head1/regression', 'head1/predict',
                               'head2', 'head2/regression', 'head2/predict'),
                              spec.export_outputs.keys())
        # Assert predictions and export_outputs.
        with self.cached_session() as sess:
            test_lib._initialize_variables(self, spec.scaffold)
            self.assertIsNone(spec.scaffold.summary_op)
            predictions = sess.run(spec.predictions)
            self.assertAllClose(expected_logits1,
                                predictions[('head1', pred_keys.PREDICTIONS)])
            self.assertAllClose(expected_logits2,
                                predictions[('head2', pred_keys.PREDICTIONS)])

            self.assertAllClose(
                expected_logits1,
                sess.run(
                    spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].value))
            self.assertAllClose(expected_logits1,
                                sess.run(spec.export_outputs['head1'].value))
            self.assertAllClose(expected_logits2,
                                sess.run(spec.export_outputs['head2'].value))
    def test_optimizer_v2_variable_name(self):
        head = head_lib.BinaryClassHead()

        logits = np.array((
            (45, ),
            (-41, ),
        ), dtype=np.float32)
        labels = np.array((
            (1, ),
            (1, ),
        ), dtype=np.float64)
        features = {'x': np.array(((42, ), ), dtype=np.float32)}

        class _Optimizer(optimizer_v2.OptimizerV2):
            def init(self, name, **kwargs):
                super(_Optimizer, self).__init__(name, **kwargs)

            def get_updates(self, loss, params):
                del params
                variable = tf.Variable(name='my_variable',
                                       dtype=tf.dtypes.float32,
                                       initial_value=0.)
                self._weights.append(variable)
                return [variable]

            def get_config(self):
                config = super(_Optimizer, self).get_config()
                return config

        # Create estimator spec.
        optimizer = _Optimizer('my_optimizer')
        old_opt_variable_name_prefix = 'training/' + optimizer.__class__.__name__
        spec = head.create_estimator_spec(features=features,
                                          mode=ModeKeys.TRAIN,
                                          logits=logits,
                                          labels=labels,
                                          optimizer=optimizer,
                                          trainable_variables=[
                                              tf.Variable(
                                                  [1.0, 2.0],
                                                  dtype=tf.dtypes.float32)
                                          ])

        with self.cached_session() as sess:
            test_lib._initialize_variables(self, spec.scaffold)
            optimizer_variables = optimizer.variables()
            var_values = sess.run(optimizer_variables)
            self.assertEqual(0., var_values[0])
            for var in optimizer_variables:
                self.assertNotIn(old_opt_variable_name_prefix, var.name)
Exemple #4
0
    def test_train_one_head_with_optimizer(self):
        head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1')
        multi_head = multi_head_lib.MultiHead([head1])

        logits = {
            'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
        }
        labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
        features = {'x': np.array(((42, ), ), dtype=np.int32)}
        # For large logits, sigmoid cross entropy loss is approximated as:
        # loss = labels * (logits < 0) * (-logits) +
        #        (1 - labels) * (logits > 0) * logits =>
        # expected_unweighted_loss = [[10., 10.], [15., 0.]]
        # loss = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75
        expected_loss = 8.75
        tol = 1e-3
        loss = multi_head.loss(logits=logits,
                               labels=labels,
                               features=features,
                               mode=model_fn.ModeKeys.TRAIN)
        self.assertAllClose(expected_loss,
                            self.evaluate(loss),
                            rtol=tol,
                            atol=tol)
        if context.executing_eagerly():
            return

        expected_train_result = 'my_train_op'

        class _Optimizer(object):
            def minimize(self, loss, global_step):
                del global_step
                return string_ops.string_join([
                    constant_op.constant(expected_train_result),
                    string_ops.as_string(loss, precision=3)
                ])

        spec = multi_head.create_estimator_spec(features=features,
                                                mode=model_fn.ModeKeys.TRAIN,
                                                logits=logits,
                                                labels=labels,
                                                optimizer=_Optimizer())
        with self.cached_session() as sess:
            test_lib._initialize_variables(self, spec.scaffold)
            loss, train_result = sess.run((spec.loss, spec.train_op))
            self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
            self.assertEqual(
                six.b('{0:s}{1:.3f}'.format(expected_train_result,
                                            expected_loss)), train_result)
    def test_metrics_computation(self):
        """Runs metrics computation tests.

    Use `update_metrics` method in eager execution, else `create_estimator_spec`
    in EVAL mode.

    logits = [[-101, 102, -103], [104, _, _]]
    predicted_labels = [[0, 1, 0], [1, _, _]]
    labels = [[1, 1, 1], [1, _, _]]
    weights = [[2, 5, 1], [2, _, _]]

    loss = (101*2 + 103*1) / 10 = 30.5
    accuracy = (0 + 5 + 0 + 2) / (2 + 5 + 1 + 2) = 0.7
    prediction_mean = (0 + 5 + 0 + 2) / (2 + 5 + 1 + 2) = 0.7
    precision = (5 + 2) / (5 + 2) = 1.0
    recall = (5 + 2) / (2 + 5 + 1 + 2) = 0.7
    """
        static_head = binary_head_lib.BinaryClassHead(weight_column='weights')
        head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask',
                                                  'weights')

        features = {
            'sequence_mask': np.array([[1, 1, 1], [1, 0, 0]]),
            'weights': np.array([[2, 5, 1], [2, 100, 100]])
        }
        regularization_losses = [100.]
        logits = _convert_to_tensor([[-101, 102, -103], [104, 100, 100]])
        labels = sparse_tensor.SparseTensor(values=[1, 1, 1, 1],
                                            indices=((0, 0), (0, 1), (0, 2),
                                                     (1, 0)),
                                            dense_shape=(2, 3))
        features = _convert_to_tensor(features)
        expected_loss = 30.5
        keys = metric_keys.MetricKeys
        expected_metrics = {
            keys.LOSS_MEAN: expected_loss,
            keys.ACCURACY: 0.7,
            keys.PREDICTION_MEAN: 0.7,
            keys.LABEL_MEAN: 1.0,
            keys.LOSS_REGULARIZATION: 100,
            keys.PRECISION: 1.0,
            keys.RECALL: 0.7,
            keys.ACCURACY_BASELINE: 1.0,
            keys.AUC: 0.,
            keys.AUC_PR: 1.0
        }

        if context.executing_eagerly():
            eval_metrics = head.metrics(
                regularization_losses=regularization_losses)
            updated_metrics = head.update_metrics(eval_metrics, features,
                                                  logits, labels,
                                                  regularization_losses)
            self.assertItemsEqual(expected_metrics.keys(),
                                  updated_metrics.keys())
            self.assertAllClose(
                expected_metrics,
                {k: updated_metrics[k].result()
                 for k in updated_metrics})
            return

        spec = head.create_estimator_spec(
            features=features,
            mode=ModeKeys.EVAL,
            logits=logits,
            labels=labels,
            regularization_losses=regularization_losses)

        with self.cached_session() as sess:
            head_utils._initialize_variables(self, spec.scaffold)
            self.assertIsNone(spec.scaffold.summary_op)
            value_ops = {
                k: spec.eval_metric_ops[k][0]
                for k in spec.eval_metric_ops
            }
            update_ops = {
                k: spec.eval_metric_ops[k][1]
                for k in spec.eval_metric_ops
            }
            _ = sess.run(update_ops)
            self.assertAllClose(expected_metrics,
                                {k: value_ops[k].eval()
                                 for k in value_ops})
Exemple #6
0
    def test_train_with_regularization_losses(self):
        head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1')
        head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2')
        multi_head = multi_head_lib.MultiHead([head1, head2],
                                              head_weights=[1., 2.])

        logits = {
            'head1':
            np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
            'head2':
            np.array([[20., -20., 20.], [-30., 20., -20.]], dtype=np.float32),
        }
        expected_probabilities = {
            'head1': nn.sigmoid(logits['head1']),
            'head2': nn.sigmoid(logits['head2']),
        }
        labels = {
            'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
            'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
        }
        features = {'x': np.array(((42, ), ), dtype=np.int32)}
        regularization_losses = [1.5, 0.5]

        # For large logits, sigmoid cross entropy loss is approximated as:
        # loss = labels * (logits < 0) * (-logits) +
        #        (1 - labels) * (logits > 0) * logits =>
        # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]]
        # loss1 = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75
        # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]]
        # loss2 = ((20 + 20 + 20) / 3 + (30 + 0 + 0) / 3) / 2 = 15
        # Average over classes, weighted sum over batch and heads.
        # weights = [1., 2.]
        # merged_training_loss = 1. * loss1 + 2. * loss2
        # training_loss = merged_training_loss + regularization_loss
        #               = 1. * loss1 + 2. * loss2 + sum([1.5, 0.5])
        expected_loss_head1 = 8.75
        expected_loss_head2 = 15.0
        expected_regularization_loss = 2.
        # training loss.
        expected_loss = (1. * expected_loss_head1 + 2. * expected_loss_head2 +
                         expected_regularization_loss)
        tol = 1e-3
        loss = multi_head.loss(logits=logits,
                               labels=labels,
                               features=features,
                               mode=model_fn.ModeKeys.TRAIN,
                               regularization_losses=regularization_losses)
        self.assertAllClose(expected_loss,
                            self.evaluate(loss),
                            rtol=tol,
                            atol=tol)
        if context.executing_eagerly():
            return

        keys = metric_keys.MetricKeys
        expected_train_result = 'my_train_op'

        def _train_op_fn(loss):
            return string_ops.string_join([
                constant_op.constant(expected_train_result),
                string_ops.as_string(loss, precision=3)
            ])

        spec = multi_head.create_estimator_spec(
            features=features,
            mode=model_fn.ModeKeys.TRAIN,
            logits=logits,
            labels=labels,
            train_op_fn=_train_op_fn,
            regularization_losses=regularization_losses)
        self.assertIsNotNone(spec.loss)
        self.assertEqual({}, spec.eval_metric_ops)
        self.assertIsNotNone(spec.train_op)
        self.assertIsNone(spec.export_outputs)
        test_lib._assert_no_hooks(self, spec)
        # Assert predictions, loss, train_op, and summaries.
        with self.cached_session() as sess:
            test_lib._initialize_variables(self, spec.scaffold)
            self.assertIsNotNone(spec.scaffold.summary_op)
            loss, train_result, summary_str, predictions = sess.run(
                (spec.loss, spec.train_op, spec.scaffold.summary_op,
                 spec.predictions))
            self.assertAllClose(
                logits['head1'],
                predictions[('head1', prediction_keys.PredictionKeys.LOGITS)])
            self.assertAllClose(
                expected_probabilities['head1'],
                predictions[('head1',
                             prediction_keys.PredictionKeys.PROBABILITIES)])
            self.assertAllClose(
                logits['head2'],
                predictions[('head2', prediction_keys.PredictionKeys.LOGITS)])
            self.assertAllClose(
                expected_probabilities['head2'],
                predictions[('head2',
                             prediction_keys.PredictionKeys.PROBABILITIES)])
            self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
            self.assertEqual(
                six.b('{0:s}{1:.3f}'.format(expected_train_result,
                                            expected_loss)), train_result)
            test_lib._assert_simple_summaries(
                self, {
                    keys.LOSS_REGULARIZATION: expected_regularization_loss,
                    keys.LOSS: expected_loss,
                    keys.LOSS + '/head1': expected_loss_head1,
                    keys.LOSS + '/head2': expected_loss_head2,
                }, summary_str, tol)
Exemple #7
0
    def test_predict_two_heads_logits_dict(self):
        """Tests predict with logits as dict."""
        head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1')
        head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2')
        multi_head = multi_head_lib.MultiHead([head1, head2])

        logits = {
            'head1': np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32),
            'head2': np.array([[2., -2., 2.], [-3., 2., -2.]],
                              dtype=np.float32)
        }
        expected_probabilities = {
            'head1': nn.sigmoid(logits['head1']),
            'head2': nn.sigmoid(logits['head2']),
        }
        pred_keys = prediction_keys.PredictionKeys

        predictions = multi_head.predictions(logits)
        self.assertAllClose(
            logits['head1'],
            self.evaluate(predictions[('head1', pred_keys.LOGITS)]))
        self.assertAllClose(
            logits['head2'],
            self.evaluate(predictions[('head2', pred_keys.LOGITS)]))
        self.assertAllClose(
            expected_probabilities['head1'],
            self.evaluate(predictions[('head1', pred_keys.PROBABILITIES)]))
        self.assertAllClose(
            expected_probabilities['head2'],
            self.evaluate(predictions[('head2', pred_keys.PROBABILITIES)]))
        if context.executing_eagerly():
            return

        spec = multi_head.create_estimator_spec(
            features={'x': np.array(((42, ), ), dtype=np.int32)},
            mode=model_fn.ModeKeys.PREDICT,
            logits=logits)
        self.assertItemsEqual(
            (test_lib._DEFAULT_SERVING_KEY, 'predict', 'head1',
             'head1/classification', 'head1/predict', 'head2',
             'head2/classification', 'head2/predict'),
            spec.export_outputs.keys())
        # Assert predictions and export_outputs.
        with self.cached_session() as sess:
            test_lib._initialize_variables(self, spec.scaffold)
            self.assertIsNone(spec.scaffold.summary_op)
            predictions = sess.run(spec.predictions)
            self.assertAllClose(logits['head1'],
                                predictions[('head1', pred_keys.LOGITS)])
            self.assertAllClose(logits['head2'],
                                predictions[('head2', pred_keys.LOGITS)])
            self.assertAllClose(
                expected_probabilities['head1'],
                predictions[('head1', pred_keys.PROBABILITIES)])
            self.assertAllClose(
                expected_probabilities['head2'],
                predictions[('head2', pred_keys.PROBABILITIES)])

            self.assertAllClose(
                expected_probabilities['head1'],
                sess.run(
                    spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].scores))
            self.assertAllClose(expected_probabilities['head1'],
                                sess.run(spec.export_outputs['head1'].scores))
            self.assertAllClose(expected_probabilities['head2'],
                                sess.run(spec.export_outputs['head2'].scores))
            self.assertAllClose(
                expected_probabilities['head1'],
                sess.run(spec.export_outputs['predict'].
                         outputs['head1/probabilities']))
            self.assertAllClose(
                expected_probabilities['head2'],
                sess.run(spec.export_outputs['predict'].
                         outputs['head2/probabilities']))
            self.assertAllClose(
                expected_probabilities['head1'],
                sess.run(spec.export_outputs['head1/predict'].
                         outputs['probabilities']))
            self.assertAllClose(
                expected_probabilities['head2'],
                sess.run(spec.export_outputs['head2/predict'].
                         outputs['probabilities']))
Exemple #8
0
    def test_train_one_head(self):
        head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1')
        multi_head = multi_head_lib.MultiHead([head1])

        logits = {
            'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
        }
        expected_probabilities = {
            'head1': nn.sigmoid(logits['head1']),
        }
        labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
        features = {'x': np.array(((42, ), ), dtype=np.int32)}
        # For large logits, sigmoid cross entropy loss is approximated as:
        # loss = labels * (logits < 0) * (-logits) +
        #        (1 - labels) * (logits > 0) * logits =>
        # expected_unweighted_loss = [[10., 10.], [15., 0.]]
        # loss = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75
        expected_loss = 8.75
        tol = 1e-3
        loss = multi_head.loss(logits=logits,
                               labels=labels,
                               features=features,
                               mode=model_fn.ModeKeys.TRAIN)
        self.assertAllClose(expected_loss,
                            self.evaluate(loss),
                            rtol=tol,
                            atol=tol)
        if context.executing_eagerly():
            return

        expected_train_result = 'my_train_op'

        def _train_op_fn(loss):
            return string_ops.string_join([
                constant_op.constant(expected_train_result),
                string_ops.as_string(loss, precision=3)
            ])

        spec = multi_head.create_estimator_spec(features=features,
                                                mode=model_fn.ModeKeys.TRAIN,
                                                logits=logits,
                                                labels=labels,
                                                train_op_fn=_train_op_fn)
        self.assertIsNotNone(spec.loss)
        self.assertEqual({}, spec.eval_metric_ops)
        self.assertIsNotNone(spec.train_op)
        self.assertIsNone(spec.export_outputs)
        test_lib._assert_no_hooks(self, spec)
        # Assert predictions, loss, train_op, and summaries.
        with self.cached_session() as sess:
            test_lib._initialize_variables(self, spec.scaffold)
            self.assertIsNotNone(spec.scaffold.summary_op)
            loss, train_result, summary_str, predictions = sess.run(
                (spec.loss, spec.train_op, spec.scaffold.summary_op,
                 spec.predictions))
            self.assertAllClose(
                logits['head1'],
                predictions[('head1', prediction_keys.PredictionKeys.LOGITS)])
            self.assertAllClose(
                expected_probabilities['head1'],
                predictions[('head1',
                             prediction_keys.PredictionKeys.PROBABILITIES)])
            self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
            self.assertEqual(
                six.b('{0:s}{1:.3f}'.format(expected_train_result,
                                            expected_loss)), train_result)
            test_lib._assert_simple_summaries(
                self, {
                    metric_keys.MetricKeys.LOSS: expected_loss,
                    metric_keys.MetricKeys.LOSS + '/head1': expected_loss,
                }, summary_str, tol)
Exemple #9
0
    def test_eval_two_heads_with_weights(self):
        head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1')
        head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2')
        multi_head = multi_head_lib.MultiHead([head1, head2],
                                              head_weights=[1., 2.])

        logits = {
            'head1':
            np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
            'head2':
            np.array([[20., -20., 20.], [-30., 20., -20.]], dtype=np.float32),
        }
        labels = {
            'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
            'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
        }
        features = {'x': np.array(((42, ), ), dtype=np.int32)}
        # For large logits, sigmoid cross entropy loss is approximated as:
        # loss = labels * (logits < 0) * (-logits) +
        #        (1 - labels) * (logits > 0) * logits =>
        # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]]
        # loss = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75
        # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]]
        # loss = ((20 + 20 + 20) / 3 + (30 + 0 + 0) / 3) / 2 = 15
        expected_loss_head1 = 8.75
        expected_loss_head2 = 15.
        expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2
        tol = 1e-3
        keys = metric_keys.MetricKeys
        expected_metrics = {
            keys.LOSS + '/head1': expected_loss_head1,
            keys.LOSS + '/head2': expected_loss_head2,
            # Average loss over examples.
            keys.LOSS_MEAN + '/head1': expected_loss_head1,
            keys.LOSS_MEAN + '/head2': expected_loss_head2,
            # auc and auc_pr cannot be reliably calculated for only 4-6 samples, but
            # this assert tests that the algorithm remains consistent.
            # TODO(yhliang): update metrics
            # keys.AUC + '/head1': 0.1667,
            # keys.AUC + '/head2': 0.3333,
            # keys.AUC_PR + '/head1': 0.6667,
            # keys.AUC_PR + '/head2': 0.5000,
        }

        if context.executing_eagerly():
            loss = multi_head.loss(logits,
                                   labels,
                                   features=features,
                                   mode=model_fn.ModeKeys.EVAL)
            self.assertIsNotNone(loss)
            self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)

            eval_metrics = multi_head.metrics()
            updated_metrics = multi_head.update_metrics(
                eval_metrics, features, logits, labels)
            self.assertItemsEqual(expected_metrics.keys(),
                                  updated_metrics.keys())
            self.assertAllClose(
                expected_metrics,
                {k: updated_metrics[k].result()
                 for k in updated_metrics},
                rtol=tol,
                atol=tol)
            return

        spec = multi_head.create_estimator_spec(features=features,
                                                mode=model_fn.ModeKeys.EVAL,
                                                logits=logits,
                                                labels=labels)
        # Assert spec contains expected tensors.
        self.assertIsNotNone(spec.loss)
        self.assertItemsEqual(expected_metrics.keys(),
                              spec.eval_metric_ops.keys())
        self.assertIsNone(spec.train_op)
        self.assertIsNone(spec.export_outputs)
        test_lib._assert_no_hooks(self, spec)
        # Assert predictions, loss, and metrics.
        with self.cached_session() as sess:
            test_lib._initialize_variables(self, spec.scaffold)
            self.assertIsNone(spec.scaffold.summary_op)
            value_ops = {
                k: spec.eval_metric_ops[k][0]
                for k in spec.eval_metric_ops
            }
            update_ops = {
                k: spec.eval_metric_ops[k][1]
                for k in spec.eval_metric_ops
            }
            loss, _ = sess.run((spec.loss, update_ops))
            self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
            # Check results of value ops (in `metrics`).
            self.assertAllClose(expected_metrics,
                                {k: value_ops[k].eval()
                                 for k in value_ops},
                                rtol=tol,
                                atol=tol)