def testEvaluateWithAdditionalMetricsBasic(self): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = multi_head.simple_multi_head( None, temp_eval_export_dir) eval_saved_model = load.EvalSavedModel(eval_export_dir) _, prediction_dict, label_dict = ( eval_saved_model.get_features_predictions_labels_dicts()) with eval_saved_model.graph_as_default(): metric_ops = {} value_op, update_op = tf.compat.v1.metrics.mean_absolute_error( label_dict['english_head'][0][0], prediction_dict['english_head/probabilities'][0][1]) metric_ops['mean_absolute_error/english_head'] = (value_op, update_op) value_op, update_op = metrics.total( tf.shape(input=prediction_dict['english_head/logits'])[0]) metric_ops['example_count/english_head'] = (value_op, update_op) eval_saved_model.register_additional_metric_ops(metric_ops) example1 = self._makeMultiHeadExample('english') features_predictions_labels = self.predict_injective_single_example( eval_saved_model, example1.SerializeToString()) eval_saved_model.perform_metrics_update(features_predictions_labels) example2 = self._makeMultiHeadExample('chinese') features_predictions_labels = self.predict_injective_single_example( eval_saved_model, example2.SerializeToString()) eval_saved_model.perform_metrics_update(features_predictions_labels) metric_values = eval_saved_model.get_metric_values() # Check that the original metrics are still there. self.assertDictElementsAlmostEqual( metric_values, { 'accuracy/english_head': 1.0, 'accuracy/chinese_head': 1.0, 'accuracy/other_head': 1.0, 'auc/english_head': 1.0, 'auc/chinese_head': 1.0, 'auc/other_head': 1.0, 'label/mean/english_head': 0.5, 'label/mean/chinese_head': 0.5, 'label/mean/other_head': 0.0 }) # Check the added metrics. # We don't control the trained model's weights fully, but it should # predict probabilities > 0.7. self.assertIn('mean_absolute_error/english_head', metric_values) self.assertLess(metric_values['mean_absolute_error/english_head'], 0.3) self.assertHasKeyWithValueAlmostEqual(metric_values, 'example_count/english_head', 2.0)
def _addExampleCountMetricCallback( # pylint: disable=invalid-name features_dict, predictions_dict, labels_dict): del features_dict del labels_dict metric_ops = {} value_op, update_op = metric_fns.total( tf.shape(input=predictions_dict['logits'])[0]) metric_ops['added_example_count'] = (value_op, update_op) return metric_ops
def testGetAndSetMetricVariables(self): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = multi_head.simple_multi_head(None, temp_eval_export_dir) eval_saved_model = load.EvalSavedModel(eval_export_dir) _, prediction_dict, _ = ( eval_saved_model.get_features_predictions_labels_dicts()) with eval_saved_model.graph_as_default(): metric_ops = {} value_op, update_op = metrics.total( tf.shape(input=prediction_dict['english_head/logits'])[0]) metric_ops['example_count/english_head'] = (value_op, update_op) eval_saved_model.register_additional_metric_ops(metric_ops) example1 = self._makeMultiHeadExample('english') features_predictions_labels = self.predict_injective_single_example( eval_saved_model, example1.SerializeToString()) eval_saved_model.perform_metrics_update(features_predictions_labels) metric_values = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values, { 'label/mean/english_head': 1.0, 'label/mean/chinese_head': 0.0, 'label/mean/other_head': 0.0, 'example_count/english_head': 1.0 }) metric_variables = eval_saved_model.get_metric_variables() example2 = self._makeMultiHeadExample('chinese') features_predictions_labels = self.predict_injective_single_example( eval_saved_model, example2.SerializeToString()) eval_saved_model.perform_metrics_update(features_predictions_labels) metric_values = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values, { 'label/mean/english_head': 0.5, 'label/mean/chinese_head': 0.5, 'label/mean/other_head': 0.0, 'example_count/english_head': 2.0 }) # Now set metric variables to what they were after the first example. eval_saved_model.set_metric_variables(metric_variables) metric_values = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values, { 'label/mean/english_head': 1.0, 'label/mean/chinese_head': 0.0, 'label/mean/other_head': 0.0, 'example_count/english_head': 1.0 })
def get_metric_ops(self, features_dict, predictions_dict, labels_dict): ref_tensor = _get_prediction_tensor(predictions_dict) if ref_tensor is None: # Note that if predictions_dict is a Tensor and not a dict, # get_predictions_tensor will return predictions_dict, so if we get # here, if means that predictions_dict is a dict without any of the # standard keys. # # If we can't get any of standard keys, then pick the first key # in alphabetical order if the predictions dict is non-empty. # If the predictions dict is empty, try the labels dict. # If that is empty too, default to the empty Tensor. tf.logging.info( 'ExampleCount post export metric: could not find any of ' 'the standard keys in predictions_dict (keys were: %s)', predictions_dict.keys()) if predictions_dict is not None and predictions_dict.keys(): first_key = sorted(predictions_dict.keys())[0] ref_tensor = predictions_dict[first_key] tf.logging.info( 'Using the first key from predictions_dict: %s', first_key) elif labels_dict is not None: if types.is_tensor(labels_dict): ref_tensor = labels_dict tf.logging.info('Using the labels Tensor') elif labels_dict.keys(): first_key = sorted(labels_dict.keys())[0] ref_tensor = labels_dict[first_key] tf.logging.info('Using the first key from labels_dict: %s', first_key) if ref_tensor is None: tf.logging.info( 'Could not find a reference Tensor for example count. ' 'Defaulting to the empty Tensor.') ref_tensor = tf.constant([]) return { metric_keys.EXAMPLE_COUNT: metrics.total(tf.shape(ref_tensor)[0]) }
def testResetMetricVariables(self): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = multi_head.simple_multi_head( None, temp_eval_export_dir) eval_saved_model = load.EvalSavedModel(eval_export_dir) _, prediction_dict, _ = ( eval_saved_model.get_features_predictions_labels_dicts()) with eval_saved_model.graph_as_default(): metric_ops = {} value_op, update_op = metrics.total( tf.shape(input=prediction_dict['english_head/logits'])[0]) metric_ops['example_count/english_head'] = (value_op, update_op) eval_saved_model.register_additional_metric_ops(metric_ops) example1 = self._makeMultiHeadExample('english').SerializeToString() eval_saved_model.metrics_reset_update_get(example1) metric_values = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values, { 'label/mean/english_head': 1.0, 'label/mean/chinese_head': 0.0, 'label/mean/other_head': 0.0, 'example_count/english_head': 1.0 }) eval_saved_model.reset_metric_variables() example2 = self._makeMultiHeadExample('chinese').SerializeToString() eval_saved_model.metrics_reset_update_get(example2) metric_values = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values, { 'label/mean/english_head': 0.0, 'label/mean/chinese_head': 1.0, 'label/mean/other_head': 0.0, 'example_count/english_head': 1.0 })
def testVariablePredictionLengths(self): # Check that we can handle cases where the model produces predictions of # different lengths for different examples. temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = ( fixed_prediction_classifier.simple_fixed_prediction_classifier( None, temp_eval_export_dir)) eval_saved_model = load.EvalSavedModel(eval_export_dir) _, prediction_dict, _ = ( eval_saved_model.get_features_predictions_labels_dicts()) with eval_saved_model.graph_as_default(): eval_saved_model.register_additional_metric_ops({ 'total_non_trivial_classes': metrics.total( tf.reduce_sum( tf.cast( tf.logical_and( tf.not_equal(prediction_dict['classes'], '?'), tf.not_equal(prediction_dict['classes'], '')), tf.int32))), 'example_count': metrics.total(tf.shape(prediction_dict['classes'])[0]), 'total_score': metrics.total(prediction_dict['probabilities']), }) example1 = self._makeExample(classes=['apple'], scores=[100.0]) example2 = self._makeExample() example3 = self._makeExample( classes=['durian', 'elderberry', 'fig', 'grape'], scores=[300.0, 301.0, 302.0, 303.0]) example4 = self._makeExample(classes=['banana', 'cherry'], scores=[400.0, 401.0]) fpl_list1 = self.predict_injective_example_list( eval_saved_model, [ example1.SerializeToString(), example2.SerializeToString(), ]) fpl_list2 = self.predict_injective_example_list( eval_saved_model, [ example3.SerializeToString(), example4.SerializeToString(), ]) # Note that the '?' and 0 default values come from the model. self.assertAllEqual( np.array([[b'apple']]), fpl_list1[0].predictions['classes'][encoding.NODE_SUFFIX]) self.assertAllEqual( np.array([[100]]), fpl_list1[0].predictions['probabilities'][encoding.NODE_SUFFIX]) self.assertAllEqual( np.array([[b'?']]), fpl_list1[1].predictions['classes'][encoding.NODE_SUFFIX]) self.assertAllEqual( np.array([[0]]), fpl_list1[1].predictions['probabilities'][encoding.NODE_SUFFIX]) self.assertAllEqual( np.array([[b'durian', b'elderberry', b'fig', b'grape']]), fpl_list2[0].predictions['classes'][encoding.NODE_SUFFIX]) self.assertAllEqual( np.array([[300, 301, 302, 303]]), fpl_list2[0].predictions['probabilities'][encoding.NODE_SUFFIX]) self.assertAllEqual( np.array([[b'banana', b'cherry', b'?', b'?']]), fpl_list2[1].predictions['classes'][encoding.NODE_SUFFIX]) self.assertAllEqual( np.array([[400, 401, 0, 0]]), fpl_list2[1].predictions['probabilities'][encoding.NODE_SUFFIX]) eval_saved_model.metrics_reset_update_get_list(fpl_list1 + fpl_list2) metric_values = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values, { 'total_non_trivial_classes': 7.0, 'example_count': 4.0, 'total_score': 2107.0, })
def get_metric_ops(self, features_dict, predictions_dict, labels_dict): value = features_dict[self._example_weight_key] return {metric_keys.EXAMPLE_WEIGHT: metrics.total(value)}
def testMetricsResetUpdateGetList(self): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = multi_head.simple_multi_head(None, temp_eval_export_dir) eval_saved_model = load.EvalSavedModel(eval_export_dir) _, prediction_dict, _ = ( eval_saved_model.get_features_predictions_labels_dicts()) with eval_saved_model.graph_as_default(): metric_ops = {} value_op, update_op = metrics.total( tf.shape(input=prediction_dict['english_head/logits'])[0]) metric_ops['example_count/english_head'] = (value_op, update_op) eval_saved_model.register_additional_metric_ops(metric_ops) example1 = self._makeMultiHeadExample('english') features_predictions_labels1 = self.predict_injective_single_example( eval_saved_model, example1.SerializeToString()) metric_variables1 = eval_saved_model.metrics_reset_update_get( features_predictions_labels1) example2 = self._makeMultiHeadExample('chinese') features_predictions_labels2 = self.predict_injective_single_example( eval_saved_model, example2.SerializeToString()) metric_variables2 = eval_saved_model.metrics_reset_update_get( features_predictions_labels2) example3 = self._makeMultiHeadExample('other') features_predictions_labels3 = self.predict_injective_single_example( eval_saved_model, example3.SerializeToString()) metric_variables3 = eval_saved_model.metrics_reset_update_get( features_predictions_labels3) eval_saved_model.set_metric_variables(metric_variables1) metric_values1 = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values1, { 'label/mean/english_head': 1.0, 'label/mean/chinese_head': 0.0, 'label/mean/other_head': 0.0, 'example_count/english_head': 1.0 }) eval_saved_model.set_metric_variables(metric_variables2) metric_values2 = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values2, { 'label/mean/english_head': 0.0, 'label/mean/chinese_head': 1.0, 'label/mean/other_head': 0.0, 'example_count/english_head': 1.0 }) eval_saved_model.set_metric_variables(metric_variables3) metric_values3 = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values3, { 'label/mean/english_head': 0.0, 'label/mean/chinese_head': 0.0, 'label/mean/other_head': 1.0, 'example_count/english_head': 1.0 }) eval_saved_model.metrics_reset_update_get_list([ features_predictions_labels1, features_predictions_labels2, features_predictions_labels3 ]) metric_values_combined = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values_combined, { 'label/mean/english_head': 1.0 / 3.0, 'label/mean/chinese_head': 1.0 / 3.0, 'label/mean/other_head': 1.0 / 3.0, 'example_count/english_head': 3.0 })