def test_named_args_with_weights(self): features = {"f1": "f1_value", "f2": "f2_value"} labels_ = {"l1": "l1_value", "l2": "l2_value"} predictions_ = {"p1": "p1_value", "p2": "p2_value"} def _fn0(predictions, labels, weights=None): self.assertEqual("p1_value", predictions) self.assertEqual("l1_value", labels) self.assertEqual("f2_value", weights) return "metric_fn_result" def _fn1(predictions, targets, weights=None): self.assertEqual("p1_value", predictions) self.assertEqual("l1_value", targets) self.assertEqual("f2_value", weights) return "metric_fn_result" def _fn2(prediction, label, weight=None): self.assertEqual("p1_value", prediction) self.assertEqual("l1_value", label) self.assertEqual("f2_value", weight) return "metric_fn_result" def _fn3(prediction, target, weight=None): self.assertEqual("p1_value", prediction) self.assertEqual("l1_value", target) self.assertEqual("f2_value", weight) return "metric_fn_result" for fn in (_fn0, _fn1, _fn2, _fn3): spec = MetricSpec( metric_fn=fn, prediction_key="p1", label_key="l1", weight_key="f2") self.assertEqual( "metric_fn_result", spec.create_metric_ops(features, labels_, predictions_))
def testCustomMetrics(self): """Tests custom evaluation metrics.""" def _input_fn(num_epochs=None): # Create 4 rows, one of them (y = x), three of them (y=Not(x)) labels = constant_op.constant([[1], [0], [0], [0]]) features = { 'x': input_lib.limit_epochs(array_ops.ones(shape=[4, 1], dtype=dtypes.float32), num_epochs=num_epochs), } return features, labels def _my_metric_op(predictions, labels): # For the case of binary classification, the 2nd column of "predictions" # denotes the model predictions. labels = math_ops.cast(labels, dtypes.float32) predictions = array_ops.strided_slice(predictions, [0, 1], [-1, 2], end_mask=1) labels = math_ops.cast(labels, predictions.dtype) return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) classifier = debug.DebugClassifier(config=run_config.RunConfig( tf_random_seed=1)) classifier.fit(input_fn=_input_fn, steps=5) scores = classifier.evaluate( input_fn=_input_fn, steps=5, metrics={ 'my_accuracy': MetricSpec(metric_fn=metric_ops.streaming_accuracy, prediction_key='classes'), 'my_precision': MetricSpec(metric_fn=metric_ops.streaming_precision, prediction_key='classes'), 'my_metric': MetricSpec(metric_fn=_my_metric_op, prediction_key='probabilities') }) self.assertTrue( set(['loss', 'my_accuracy', 'my_precision', 'my_metric']).issubset(set(scores.keys()))) predict_input_fn = functools.partial(_input_fn, num_epochs=1) predictions = np.array( list(classifier.predict_classes(input_fn=predict_input_fn))) self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions), scores['my_accuracy']) # Test the case where the 2nd element of the key is neither "classes" nor # "probabilities". with self.assertRaisesRegexp(KeyError, 'bad_type'): classifier.evaluate(input_fn=_input_fn, steps=5, metrics={ 'bad_name': MetricSpec( metric_fn=metric_ops.streaming_auc, prediction_key='bad_type') })
def test_no_args(self): def _fn(): self.fail("Expected failure before metric_fn.") spec = MetricSpec(metric_fn=_fn) with self.assertRaises(TypeError): spec.create_metric_ops( {"f1": "f1_value"}, "labels_value", "predictions_value")
def test_no_named_labels_or_predictions_2args(self): features = {"f1": "f1_value"} labels_ = "labels_value" predictions_ = "predictions_value" def _fn(a, b): del a, b self.fail("Expected failure before metric_fn.") spec = MetricSpec(metric_fn=_fn) with self.assertRaises(TypeError): spec.create_metric_ops(features, labels_, predictions_)
def test_named_labels_no_predictions(self): features = {"f1": "f1_value"} labels_ = "labels_value" predictions_ = "predictions_value" def _fn(labels): self.assertEqual(labels_, labels) return "metric_fn_result" spec = MetricSpec(metric_fn=_fn) with self.assertRaises(TypeError): spec.create_metric_ops(features, labels_, predictions_)
def test_no_named_labels_or_predictions_1arg(self): features = {"f1": "f1_value"} labels_ = "labels_value" predictions_ = "predictions_value" def _fn(a): self.assertEqual(predictions_, a) return "metric_fn_result" spec = MetricSpec(metric_fn=_fn) self.assertEqual( "metric_fn_result", spec.create_metric_ops(features, labels_, predictions_))
def test_no_named_predictions_named_labels_second_arg(self): features = {"f1": "f1_value"} labels_ = "labels_value" predictions_ = "predictions_value" def _fn(predictions_by_another_name, labels): self.assertEqual(predictions_, predictions_by_another_name) self.assertEqual(labels_, labels) return "metric_fn_result" spec = MetricSpec(metric_fn=_fn) self.assertEqual( "metric_fn_result", spec.create_metric_ops(features, labels_, predictions_))
def testCustomMetrics(self): """Tests custom evaluation metrics.""" def _input_fn(num_epochs=None): # Create 4 rows, one of them (y = x), three of them (y=Not(x)) labels = constant_op.constant([[1.], [0.], [0.], [0.]]) features = { 'x': input_lib.limit_epochs(array_ops.ones(shape=[4, 1], dtype=dtypes.float32), num_epochs=num_epochs), } return features, labels def _my_metric_op(predictions, labels): return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) regressor = debug.DebugRegressor(config=run_config.RunConfig( tf_random_seed=1)) regressor.fit(input_fn=_input_fn, steps=5) scores = regressor.evaluate( input_fn=_input_fn, steps=1, metrics={ 'my_error': MetricSpec(metric_fn=metric_ops.streaming_mean_squared_error, prediction_key='scores'), 'my_metric': MetricSpec(metric_fn=_my_metric_op, prediction_key='scores') }) self.assertIn('loss', set(scores.keys())) self.assertIn('my_error', set(scores.keys())) self.assertIn('my_metric', set(scores.keys())) predict_input_fn = functools.partial(_input_fn, num_epochs=1) predictions = np.array( list(regressor.predict_scores(input_fn=predict_input_fn))) self.assertAlmostEqual( _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions), scores['my_error']) # Tests the case where the prediction_key is not "scores". with self.assertRaisesRegexp(KeyError, 'bad_type'): regressor.evaluate(input_fn=_input_fn, steps=1, metrics={ 'bad_name': MetricSpec( metric_fn=metric_ops.streaming_auc, prediction_key='bad_type') })
def test_single_labels_with_key(self): features = {"f1": "f1_value", "f2": "f2_value"} labels = "l1_value" predictions = {"p1": "p1_value", "p2": "p2_value"} def _fn(predictions, labels, weights=None): del labels, predictions, weights self.fail("Expected failure before metric_fn.") spec = MetricSpec( metric_fn=_fn, prediction_key="p1", label_key="l1", weight_key="f2") with self.assertRaisesRegexp( ValueError, "MetricSpec with label_key specified requires labels dict"): spec.create_metric_ops(features, labels, predictions)
def test_single_label(self): features = {"f1": "f1_value", "f2": "f2_value"} labels_ = "l1_value" predictions_ = {"p1": "p1_value", "p2": "p2_value"} def _fn(predictions, labels, weights=None): self.assertEqual("p1_value", predictions) self.assertEqual(labels_, labels) self.assertEqual("f2_value", weights) return "metric_fn_result" spec = MetricSpec(metric_fn=_fn, prediction_key="p1", weight_key="f2") self.assertEqual( "metric_fn_result", spec.create_metric_ops(features, labels_, predictions_))
def test_labels_dict_no_key(self): features = {"f1": "f1_value", "f2": "f2_value"} labels = {"l1": "l1_value", "l2": "l2_value"} predictions = {"p1": "p1_value", "p2": "p2_value"} def _fn(labels, predictions, weights=None): del labels, predictions, weights self.fail("Expected failure before metric_fn.") spec = MetricSpec(metric_fn=_fn, prediction_key="p1", weight_key="f2") with self.assertRaisesRegexp( ValueError, "MetricSpec without specified label_key requires labels tensor or" " single element dict"): spec.create_metric_ops(features, labels, predictions)
def test_multiple_weight_args(self): def _fn(predictions, labels, weights=None, weight=None): del predictions, labels, weights, weight self.fail("Expected failure before metric_fn.") with self.assertRaisesRegexp(ValueError, "provide only one of.*weight"): MetricSpec(metric_fn=_fn)
def test_multiple_prediction_args(self): def _fn(predictions, prediction, labels): del predictions, prediction, labels self.fail("Expected failure before metric_fn.") with self.assertRaisesRegexp(ValueError, "provide only one of.*prediction"): MetricSpec(metric_fn=_fn)
def test_partial(self): features = {"f1": "f1_value", "f2": "f2_value"} labels = {"l1": "l1_value"} predictions = {"p1": "p1_value", "p2": "p2_value"} def custom_metric(predictions, labels, stuff, weights=None): self.assertEqual("p1_value", predictions) self.assertEqual("l1_value", labels) self.assertEqual("f2_value", weights) if stuff: return "metric_fn_result" raise ValueError("No stuff.") spec = MetricSpec( metric_fn=functools.partial(custom_metric, stuff=5), label_key="l1", prediction_key="p1", weight_key="f2") self.assertEqual( "metric_fn_result", spec.create_metric_ops(features, labels, predictions)) spec = MetricSpec( metric_fn=functools.partial(custom_metric, stuff=None), prediction_key="p1", label_key="l1", weight_key="f2") with self.assertRaisesRegexp(ValueError, "No stuff."): spec.create_metric_ops(features, labels, predictions)
def test_multiple_label_args(self): def _fn0(predictions, labels, targets): del predictions, labels, targets self.fail("Expected failure before metric_fn.") def _fn1(prediction, label, target): del prediction, label, target self.fail("Expected failure before metric_fn.") for fn in (_fn0, _fn1): with self.assertRaisesRegexp(ValueError, "provide only one of.*label"): MetricSpec(metric_fn=fn)
def test_label_key_without_label_arg(self): def _fn0(predictions, weights=None): del predictions, weights self.fail("Expected failure before metric_fn.") def _fn1(prediction, weight=None): del prediction, weight self.fail("Expected failure before metric_fn.") for fn in (_fn0, _fn1): with self.assertRaisesRegexp(ValueError, "label.*missing"): MetricSpec(metric_fn=fn, label_key="l1")
def test_str(self): def _metric_fn(labels, predictions, weights=None): return predictions, labels, weights string = str(MetricSpec( metric_fn=_metric_fn, label_key="my_label", prediction_key="my_prediction", weight_key="my_weight")) self.assertIn("_metric_fn", string) self.assertIn("my_label", string) self.assertIn("my_prediction", string) self.assertIn("my_weight", string)
def test_partial_str(self): def custom_metric(predictions, labels, stuff, weights=None): return predictions, labels, weights, stuff string = str(MetricSpec( metric_fn=functools.partial(custom_metric, stuff=5), label_key="my_label", prediction_key="my_prediction", weight_key="my_weight")) self.assertIn("custom_metric", string) self.assertIn("my_label", string) self.assertIn("my_prediction", string) self.assertIn("my_weight", string)
def test_weight_key_without_weight_arg(self): def _fn0(predictions, labels): del predictions, labels self.fail("Expected failure before metric_fn.") def _fn1(prediction, label): del prediction, label self.fail("Expected failure before metric_fn.") def _fn2(predictions, targets): del predictions, targets self.fail("Expected failure before metric_fn.") def _fn3(prediction, target): del prediction, target self.fail("Expected failure before metric_fn.") for fn in (_fn0, _fn1, _fn2, _fn3): with self.assertRaisesRegexp(ValueError, "weight.*missing"): MetricSpec(metric_fn=fn, weight_key="f2")