def test_named_args_with_weights(self):
    features = {"f1": "f1_value", "f2": "f2_value"}
    labels_ = {"l1": "l1_value", "l2": "l2_value"}
    predictions_ = {"p1": "p1_value", "p2": "p2_value"}

    def _fn0(predictions, labels, weights=None):
      self.assertEqual("p1_value", predictions)
      self.assertEqual("l1_value", labels)
      self.assertEqual("f2_value", weights)
      return "metric_fn_result"

    def _fn1(predictions, targets, weights=None):
      self.assertEqual("p1_value", predictions)
      self.assertEqual("l1_value", targets)
      self.assertEqual("f2_value", weights)
      return "metric_fn_result"

    def _fn2(prediction, label, weight=None):
      self.assertEqual("p1_value", prediction)
      self.assertEqual("l1_value", label)
      self.assertEqual("f2_value", weight)
      return "metric_fn_result"

    def _fn3(prediction, target, weight=None):
      self.assertEqual("p1_value", prediction)
      self.assertEqual("l1_value", target)
      self.assertEqual("f2_value", weight)
      return "metric_fn_result"

    for fn in (_fn0, _fn1, _fn2, _fn3):
      spec = MetricSpec(
          metric_fn=fn, prediction_key="p1", label_key="l1", weight_key="f2")
      self.assertEqual(
          "metric_fn_result",
          spec.create_metric_ops(features, labels_, predictions_))
Beispiel #2
0
    def testCustomMetrics(self):
        """Tests custom evaluation metrics."""
        def _input_fn(num_epochs=None):
            # Create 4 rows, one of them (y = x), three of them (y=Not(x))
            labels = constant_op.constant([[1], [0], [0], [0]])
            features = {
                'x':
                input_lib.limit_epochs(array_ops.ones(shape=[4, 1],
                                                      dtype=dtypes.float32),
                                       num_epochs=num_epochs),
            }
            return features, labels

        def _my_metric_op(predictions, labels):
            # For the case of binary classification, the 2nd column of "predictions"
            # denotes the model predictions.
            labels = math_ops.cast(labels, dtypes.float32)
            predictions = array_ops.strided_slice(predictions, [0, 1], [-1, 2],
                                                  end_mask=1)
            labels = math_ops.cast(labels, predictions.dtype)
            return math_ops.reduce_sum(math_ops.multiply(predictions, labels))

        classifier = debug.DebugClassifier(config=run_config.RunConfig(
            tf_random_seed=1))

        classifier.fit(input_fn=_input_fn, steps=5)
        scores = classifier.evaluate(
            input_fn=_input_fn,
            steps=5,
            metrics={
                'my_accuracy':
                MetricSpec(metric_fn=metric_ops.streaming_accuracy,
                           prediction_key='classes'),
                'my_precision':
                MetricSpec(metric_fn=metric_ops.streaming_precision,
                           prediction_key='classes'),
                'my_metric':
                MetricSpec(metric_fn=_my_metric_op,
                           prediction_key='probabilities')
            })
        self.assertTrue(
            set(['loss', 'my_accuracy', 'my_precision',
                 'my_metric']).issubset(set(scores.keys())))
        predict_input_fn = functools.partial(_input_fn, num_epochs=1)
        predictions = np.array(
            list(classifier.predict_classes(input_fn=predict_input_fn)))
        self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions),
                         scores['my_accuracy'])

        # Test the case where the 2nd element of the key is neither "classes" nor
        # "probabilities".
        with self.assertRaisesRegexp(KeyError, 'bad_type'):
            classifier.evaluate(input_fn=_input_fn,
                                steps=5,
                                metrics={
                                    'bad_name':
                                    MetricSpec(
                                        metric_fn=metric_ops.streaming_auc,
                                        prediction_key='bad_type')
                                })
  def test_no_args(self):
    def _fn():
      self.fail("Expected failure before metric_fn.")

    spec = MetricSpec(metric_fn=_fn)
    with self.assertRaises(TypeError):
      spec.create_metric_ops(
          {"f1": "f1_value"}, "labels_value", "predictions_value")
  def test_no_named_labels_or_predictions_2args(self):
    features = {"f1": "f1_value"}
    labels_ = "labels_value"
    predictions_ = "predictions_value"

    def _fn(a, b):
      del a, b
      self.fail("Expected failure before metric_fn.")

    spec = MetricSpec(metric_fn=_fn)
    with self.assertRaises(TypeError):
      spec.create_metric_ops(features, labels_, predictions_)
  def test_named_labels_no_predictions(self):
    features = {"f1": "f1_value"}
    labels_ = "labels_value"
    predictions_ = "predictions_value"

    def _fn(labels):
      self.assertEqual(labels_, labels)
      return "metric_fn_result"

    spec = MetricSpec(metric_fn=_fn)
    with self.assertRaises(TypeError):
      spec.create_metric_ops(features, labels_, predictions_)
  def test_no_named_labels_or_predictions_1arg(self):
    features = {"f1": "f1_value"}
    labels_ = "labels_value"
    predictions_ = "predictions_value"

    def _fn(a):
      self.assertEqual(predictions_, a)
      return "metric_fn_result"

    spec = MetricSpec(metric_fn=_fn)
    self.assertEqual(
        "metric_fn_result",
        spec.create_metric_ops(features, labels_, predictions_))
  def test_no_named_predictions_named_labels_second_arg(self):
    features = {"f1": "f1_value"}
    labels_ = "labels_value"
    predictions_ = "predictions_value"

    def _fn(predictions_by_another_name, labels):
      self.assertEqual(predictions_, predictions_by_another_name)
      self.assertEqual(labels_, labels)
      return "metric_fn_result"

    spec = MetricSpec(metric_fn=_fn)
    self.assertEqual(
        "metric_fn_result",
        spec.create_metric_ops(features, labels_, predictions_))
Beispiel #8
0
    def testCustomMetrics(self):
        """Tests custom evaluation metrics."""
        def _input_fn(num_epochs=None):
            # Create 4 rows, one of them (y = x), three of them (y=Not(x))
            labels = constant_op.constant([[1.], [0.], [0.], [0.]])
            features = {
                'x':
                input_lib.limit_epochs(array_ops.ones(shape=[4, 1],
                                                      dtype=dtypes.float32),
                                       num_epochs=num_epochs),
            }
            return features, labels

        def _my_metric_op(predictions, labels):
            return math_ops.reduce_sum(math_ops.multiply(predictions, labels))

        regressor = debug.DebugRegressor(config=run_config.RunConfig(
            tf_random_seed=1))

        regressor.fit(input_fn=_input_fn, steps=5)
        scores = regressor.evaluate(
            input_fn=_input_fn,
            steps=1,
            metrics={
                'my_error':
                MetricSpec(metric_fn=metric_ops.streaming_mean_squared_error,
                           prediction_key='scores'),
                'my_metric':
                MetricSpec(metric_fn=_my_metric_op, prediction_key='scores')
            })
        self.assertIn('loss', set(scores.keys()))
        self.assertIn('my_error', set(scores.keys()))
        self.assertIn('my_metric', set(scores.keys()))
        predict_input_fn = functools.partial(_input_fn, num_epochs=1)
        predictions = np.array(
            list(regressor.predict_scores(input_fn=predict_input_fn)))
        self.assertAlmostEqual(
            _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
            scores['my_error'])

        # Tests the case where the prediction_key is not "scores".
        with self.assertRaisesRegexp(KeyError, 'bad_type'):
            regressor.evaluate(input_fn=_input_fn,
                               steps=1,
                               metrics={
                                   'bad_name':
                                   MetricSpec(
                                       metric_fn=metric_ops.streaming_auc,
                                       prediction_key='bad_type')
                               })
  def test_single_labels_with_key(self):
    features = {"f1": "f1_value", "f2": "f2_value"}
    labels = "l1_value"
    predictions = {"p1": "p1_value", "p2": "p2_value"}

    def _fn(predictions, labels, weights=None):
      del labels, predictions, weights
      self.fail("Expected failure before metric_fn.")

    spec = MetricSpec(
        metric_fn=_fn, prediction_key="p1", label_key="l1", weight_key="f2")
    with self.assertRaisesRegexp(
        ValueError, "MetricSpec with label_key specified requires labels dict"):
      spec.create_metric_ops(features, labels, predictions)
  def test_single_label(self):
    features = {"f1": "f1_value", "f2": "f2_value"}
    labels_ = "l1_value"
    predictions_ = {"p1": "p1_value", "p2": "p2_value"}

    def _fn(predictions, labels, weights=None):
      self.assertEqual("p1_value", predictions)
      self.assertEqual(labels_, labels)
      self.assertEqual("f2_value", weights)
      return "metric_fn_result"

    spec = MetricSpec(metric_fn=_fn, prediction_key="p1", weight_key="f2")
    self.assertEqual(
        "metric_fn_result",
        spec.create_metric_ops(features, labels_, predictions_))
  def test_labels_dict_no_key(self):
    features = {"f1": "f1_value", "f2": "f2_value"}
    labels = {"l1": "l1_value", "l2": "l2_value"}
    predictions = {"p1": "p1_value", "p2": "p2_value"}

    def _fn(labels, predictions, weights=None):
      del labels, predictions, weights
      self.fail("Expected failure before metric_fn.")

    spec = MetricSpec(metric_fn=_fn, prediction_key="p1", weight_key="f2")
    with self.assertRaisesRegexp(
        ValueError,
        "MetricSpec without specified label_key requires labels tensor or"
        " single element dict"):
      spec.create_metric_ops(features, labels, predictions)
  def test_multiple_weight_args(self):
    def _fn(predictions, labels, weights=None, weight=None):
      del predictions, labels, weights, weight
      self.fail("Expected failure before metric_fn.")

    with self.assertRaisesRegexp(ValueError, "provide only one of.*weight"):
      MetricSpec(metric_fn=_fn)
  def test_multiple_prediction_args(self):
    def _fn(predictions, prediction, labels):
      del predictions, prediction, labels
      self.fail("Expected failure before metric_fn.")

    with self.assertRaisesRegexp(ValueError, "provide only one of.*prediction"):
      MetricSpec(metric_fn=_fn)
  def test_partial(self):
    features = {"f1": "f1_value", "f2": "f2_value"}
    labels = {"l1": "l1_value"}
    predictions = {"p1": "p1_value", "p2": "p2_value"}

    def custom_metric(predictions, labels, stuff, weights=None):
      self.assertEqual("p1_value", predictions)
      self.assertEqual("l1_value", labels)
      self.assertEqual("f2_value", weights)
      if stuff:
        return "metric_fn_result"
      raise ValueError("No stuff.")

    spec = MetricSpec(
        metric_fn=functools.partial(custom_metric, stuff=5),
        label_key="l1",
        prediction_key="p1",
        weight_key="f2")
    self.assertEqual(
        "metric_fn_result",
        spec.create_metric_ops(features, labels, predictions))

    spec = MetricSpec(
        metric_fn=functools.partial(custom_metric, stuff=None),
        prediction_key="p1", label_key="l1", weight_key="f2")
    with self.assertRaisesRegexp(ValueError, "No stuff."):
      spec.create_metric_ops(features, labels, predictions)
  def test_multiple_label_args(self):
    def _fn0(predictions, labels, targets):
      del predictions, labels, targets
      self.fail("Expected failure before metric_fn.")

    def _fn1(prediction, label, target):
      del prediction, label, target
      self.fail("Expected failure before metric_fn.")

    for fn in (_fn0, _fn1):
      with self.assertRaisesRegexp(ValueError, "provide only one of.*label"):
        MetricSpec(metric_fn=fn)
  def test_label_key_without_label_arg(self):
    def _fn0(predictions, weights=None):
      del predictions, weights
      self.fail("Expected failure before metric_fn.")

    def _fn1(prediction, weight=None):
      del prediction, weight
      self.fail("Expected failure before metric_fn.")

    for fn in (_fn0, _fn1):
      with self.assertRaisesRegexp(ValueError, "label.*missing"):
        MetricSpec(metric_fn=fn, label_key="l1")
  def test_str(self):
    def _metric_fn(labels, predictions, weights=None):
      return predictions, labels, weights

    string = str(MetricSpec(
        metric_fn=_metric_fn,
        label_key="my_label",
        prediction_key="my_prediction",
        weight_key="my_weight"))
    self.assertIn("_metric_fn", string)
    self.assertIn("my_label", string)
    self.assertIn("my_prediction", string)
    self.assertIn("my_weight", string)
  def test_partial_str(self):

    def custom_metric(predictions, labels, stuff, weights=None):
      return predictions, labels, weights, stuff

    string = str(MetricSpec(
        metric_fn=functools.partial(custom_metric, stuff=5),
        label_key="my_label",
        prediction_key="my_prediction",
        weight_key="my_weight"))
    self.assertIn("custom_metric", string)
    self.assertIn("my_label", string)
    self.assertIn("my_prediction", string)
    self.assertIn("my_weight", string)
  def test_weight_key_without_weight_arg(self):
    def _fn0(predictions, labels):
      del predictions, labels
      self.fail("Expected failure before metric_fn.")

    def _fn1(prediction, label):
      del prediction, label
      self.fail("Expected failure before metric_fn.")

    def _fn2(predictions, targets):
      del predictions, targets
      self.fail("Expected failure before metric_fn.")

    def _fn3(prediction, target):
      del prediction, target
      self.fail("Expected failure before metric_fn.")

    for fn in (_fn0, _fn1, _fn2, _fn3):
      with self.assertRaisesRegexp(ValueError, "weight.*missing"):
        MetricSpec(metric_fn=fn, weight_key="f2")