Esempio n. 1
0
    def testEvaluateWithAdditionalMetricsBasic(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = multi_head.simple_multi_head(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        _, prediction_dict, label_dict = (
            eval_saved_model.get_features_predictions_labels_dicts())
        with eval_saved_model.graph_as_default():
            metric_ops = {}
            value_op, update_op = tf.compat.v1.metrics.mean_absolute_error(
                label_dict['english_head'][0][0],
                prediction_dict['english_head/probabilities'][0][1])
            metric_ops['mean_absolute_error/english_head'] = (value_op,
                                                              update_op)

            value_op, update_op = metrics.total(
                tf.shape(input=prediction_dict['english_head/logits'])[0])
            metric_ops['example_count/english_head'] = (value_op, update_op)

            eval_saved_model.register_additional_metric_ops(metric_ops)

        example1 = self._makeMultiHeadExample('english')
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example1.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        example2 = self._makeMultiHeadExample('chinese')
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example2.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        metric_values = eval_saved_model.get_metric_values()

        # Check that the original metrics are still there.
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'accuracy/english_head': 1.0,
                'accuracy/chinese_head': 1.0,
                'accuracy/other_head': 1.0,
                'auc/english_head': 1.0,
                'auc/chinese_head': 1.0,
                'auc/other_head': 1.0,
                'label/mean/english_head': 0.5,
                'label/mean/chinese_head': 0.5,
                'label/mean/other_head': 0.0
            })

        # Check the added metrics.
        # We don't control the trained model's weights fully, but it should
        # predict probabilities > 0.7.
        self.assertIn('mean_absolute_error/english_head', metric_values)
        self.assertLess(metric_values['mean_absolute_error/english_head'], 0.3)

        self.assertHasKeyWithValueAlmostEqual(metric_values,
                                              'example_count/english_head',
                                              2.0)
Esempio n. 2
0
def _addExampleCountMetricCallback(  # pylint: disable=invalid-name
        features_dict, predictions_dict, labels_dict):
    del features_dict
    del labels_dict
    metric_ops = {}
    value_op, update_op = metric_fns.total(
        tf.shape(input=predictions_dict['logits'])[0])
    metric_ops['added_example_count'] = (value_op, update_op)
    return metric_ops
Esempio n. 3
0
  def testGetAndSetMetricVariables(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = multi_head.simple_multi_head(None,
                                                      temp_eval_export_dir)

    eval_saved_model = load.EvalSavedModel(eval_export_dir)
    _, prediction_dict, _ = (
        eval_saved_model.get_features_predictions_labels_dicts())
    with eval_saved_model.graph_as_default():
      metric_ops = {}
      value_op, update_op = metrics.total(
          tf.shape(input=prediction_dict['english_head/logits'])[0])
      metric_ops['example_count/english_head'] = (value_op, update_op)

      eval_saved_model.register_additional_metric_ops(metric_ops)

    example1 = self._makeMultiHeadExample('english')
    features_predictions_labels = self.predict_injective_single_example(
        eval_saved_model, example1.SerializeToString())
    eval_saved_model.perform_metrics_update(features_predictions_labels)
    metric_values = eval_saved_model.get_metric_values()
    self.assertDictElementsAlmostEqual(
        metric_values, {
            'label/mean/english_head': 1.0,
            'label/mean/chinese_head': 0.0,
            'label/mean/other_head': 0.0,
            'example_count/english_head': 1.0
        })
    metric_variables = eval_saved_model.get_metric_variables()

    example2 = self._makeMultiHeadExample('chinese')
    features_predictions_labels = self.predict_injective_single_example(
        eval_saved_model, example2.SerializeToString())
    eval_saved_model.perform_metrics_update(features_predictions_labels)
    metric_values = eval_saved_model.get_metric_values()
    self.assertDictElementsAlmostEqual(
        metric_values, {
            'label/mean/english_head': 0.5,
            'label/mean/chinese_head': 0.5,
            'label/mean/other_head': 0.0,
            'example_count/english_head': 2.0
        })

    # Now set metric variables to what they were after the first example.
    eval_saved_model.set_metric_variables(metric_variables)
    metric_values = eval_saved_model.get_metric_values()
    self.assertDictElementsAlmostEqual(
        metric_values, {
            'label/mean/english_head': 1.0,
            'label/mean/chinese_head': 0.0,
            'label/mean/other_head': 0.0,
            'example_count/english_head': 1.0
        })
    def get_metric_ops(self, features_dict, predictions_dict, labels_dict):
        ref_tensor = _get_prediction_tensor(predictions_dict)
        if ref_tensor is None:
            # Note that if predictions_dict is a Tensor and not a dict,
            # get_predictions_tensor will return predictions_dict, so if we get
            # here, if means that predictions_dict is a dict without any of the
            # standard keys.
            #
            # If we can't get any of standard keys, then pick the first key
            # in alphabetical order if the predictions dict is non-empty.
            # If the predictions dict is empty, try the labels dict.
            # If that is empty too, default to the empty Tensor.
            tf.logging.info(
                'ExampleCount post export metric: could not find any of '
                'the standard keys in predictions_dict (keys were: %s)',
                predictions_dict.keys())
            if predictions_dict is not None and predictions_dict.keys():
                first_key = sorted(predictions_dict.keys())[0]
                ref_tensor = predictions_dict[first_key]
                tf.logging.info(
                    'Using the first key from predictions_dict: %s', first_key)
            elif labels_dict is not None:
                if types.is_tensor(labels_dict):
                    ref_tensor = labels_dict
                    tf.logging.info('Using the labels Tensor')
                elif labels_dict.keys():
                    first_key = sorted(labels_dict.keys())[0]
                    ref_tensor = labels_dict[first_key]
                    tf.logging.info('Using the first key from labels_dict: %s',
                                    first_key)

            if ref_tensor is None:
                tf.logging.info(
                    'Could not find a reference Tensor for example count. '
                    'Defaulting to the empty Tensor.')
                ref_tensor = tf.constant([])

        return {
            metric_keys.EXAMPLE_COUNT: metrics.total(tf.shape(ref_tensor)[0])
        }
    def testResetMetricVariables(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = multi_head.simple_multi_head(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        _, prediction_dict, _ = (
            eval_saved_model.get_features_predictions_labels_dicts())
        with eval_saved_model.graph_as_default():
            metric_ops = {}
            value_op, update_op = metrics.total(
                tf.shape(input=prediction_dict['english_head/logits'])[0])
            metric_ops['example_count/english_head'] = (value_op, update_op)

            eval_saved_model.register_additional_metric_ops(metric_ops)

        example1 = self._makeMultiHeadExample('english').SerializeToString()
        eval_saved_model.metrics_reset_update_get(example1)
        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'label/mean/english_head': 1.0,
                'label/mean/chinese_head': 0.0,
                'label/mean/other_head': 0.0,
                'example_count/english_head': 1.0
            })
        eval_saved_model.reset_metric_variables()

        example2 = self._makeMultiHeadExample('chinese').SerializeToString()
        eval_saved_model.metrics_reset_update_get(example2)
        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'label/mean/english_head': 0.0,
                'label/mean/chinese_head': 1.0,
                'label/mean/other_head': 0.0,
                'example_count/english_head': 1.0
            })
Esempio n. 6
0
    def testVariablePredictionLengths(self):
        # Check that we can handle cases where the model produces predictions of
        # different lengths for different examples.
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            fixed_prediction_classifier.simple_fixed_prediction_classifier(
                None, temp_eval_export_dir))

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        _, prediction_dict, _ = (
            eval_saved_model.get_features_predictions_labels_dicts())
        with eval_saved_model.graph_as_default():
            eval_saved_model.register_additional_metric_ops({
                'total_non_trivial_classes':
                metrics.total(
                    tf.reduce_sum(
                        tf.cast(
                            tf.logical_and(
                                tf.not_equal(prediction_dict['classes'], '?'),
                                tf.not_equal(prediction_dict['classes'], '')),
                            tf.int32))),
                'example_count':
                metrics.total(tf.shape(prediction_dict['classes'])[0]),
                'total_score':
                metrics.total(prediction_dict['probabilities']),
            })

        example1 = self._makeExample(classes=['apple'], scores=[100.0])
        example2 = self._makeExample()
        example3 = self._makeExample(
            classes=['durian', 'elderberry', 'fig', 'grape'],
            scores=[300.0, 301.0, 302.0, 303.0])
        example4 = self._makeExample(classes=['banana', 'cherry'],
                                     scores=[400.0, 401.0])

        fpl_list1 = self.predict_injective_example_list(
            eval_saved_model, [
                example1.SerializeToString(),
                example2.SerializeToString(),
            ])
        fpl_list2 = self.predict_injective_example_list(
            eval_saved_model, [
                example3.SerializeToString(),
                example4.SerializeToString(),
            ])

        # Note that the '?' and 0 default values come from the model.
        self.assertAllEqual(
            np.array([[b'apple']]),
            fpl_list1[0].predictions['classes'][encoding.NODE_SUFFIX])
        self.assertAllEqual(
            np.array([[100]]),
            fpl_list1[0].predictions['probabilities'][encoding.NODE_SUFFIX])
        self.assertAllEqual(
            np.array([[b'?']]),
            fpl_list1[1].predictions['classes'][encoding.NODE_SUFFIX])
        self.assertAllEqual(
            np.array([[0]]),
            fpl_list1[1].predictions['probabilities'][encoding.NODE_SUFFIX])

        self.assertAllEqual(
            np.array([[b'durian', b'elderberry', b'fig', b'grape']]),
            fpl_list2[0].predictions['classes'][encoding.NODE_SUFFIX])
        self.assertAllEqual(
            np.array([[300, 301, 302, 303]]),
            fpl_list2[0].predictions['probabilities'][encoding.NODE_SUFFIX])
        self.assertAllEqual(
            np.array([[b'banana', b'cherry', b'?', b'?']]),
            fpl_list2[1].predictions['classes'][encoding.NODE_SUFFIX])
        self.assertAllEqual(
            np.array([[400, 401, 0, 0]]),
            fpl_list2[1].predictions['probabilities'][encoding.NODE_SUFFIX])

        eval_saved_model.metrics_reset_update_get_list(fpl_list1 + fpl_list2)
        metric_values = eval_saved_model.get_metric_values()

        self.assertDictElementsAlmostEqual(
            metric_values, {
                'total_non_trivial_classes': 7.0,
                'example_count': 4.0,
                'total_score': 2107.0,
            })
 def get_metric_ops(self, features_dict, predictions_dict, labels_dict):
     value = features_dict[self._example_weight_key]
     return {metric_keys.EXAMPLE_WEIGHT: metrics.total(value)}
Esempio n. 8
0
  def testMetricsResetUpdateGetList(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = multi_head.simple_multi_head(None,
                                                      temp_eval_export_dir)

    eval_saved_model = load.EvalSavedModel(eval_export_dir)
    _, prediction_dict, _ = (
        eval_saved_model.get_features_predictions_labels_dicts())
    with eval_saved_model.graph_as_default():
      metric_ops = {}
      value_op, update_op = metrics.total(
          tf.shape(input=prediction_dict['english_head/logits'])[0])
      metric_ops['example_count/english_head'] = (value_op, update_op)

      eval_saved_model.register_additional_metric_ops(metric_ops)

    example1 = self._makeMultiHeadExample('english')
    features_predictions_labels1 = self.predict_injective_single_example(
        eval_saved_model, example1.SerializeToString())
    metric_variables1 = eval_saved_model.metrics_reset_update_get(
        features_predictions_labels1)

    example2 = self._makeMultiHeadExample('chinese')
    features_predictions_labels2 = self.predict_injective_single_example(
        eval_saved_model, example2.SerializeToString())
    metric_variables2 = eval_saved_model.metrics_reset_update_get(
        features_predictions_labels2)

    example3 = self._makeMultiHeadExample('other')
    features_predictions_labels3 = self.predict_injective_single_example(
        eval_saved_model, example3.SerializeToString())
    metric_variables3 = eval_saved_model.metrics_reset_update_get(
        features_predictions_labels3)

    eval_saved_model.set_metric_variables(metric_variables1)
    metric_values1 = eval_saved_model.get_metric_values()
    self.assertDictElementsAlmostEqual(
        metric_values1, {
            'label/mean/english_head': 1.0,
            'label/mean/chinese_head': 0.0,
            'label/mean/other_head': 0.0,
            'example_count/english_head': 1.0
        })

    eval_saved_model.set_metric_variables(metric_variables2)
    metric_values2 = eval_saved_model.get_metric_values()
    self.assertDictElementsAlmostEqual(
        metric_values2, {
            'label/mean/english_head': 0.0,
            'label/mean/chinese_head': 1.0,
            'label/mean/other_head': 0.0,
            'example_count/english_head': 1.0
        })

    eval_saved_model.set_metric_variables(metric_variables3)
    metric_values3 = eval_saved_model.get_metric_values()
    self.assertDictElementsAlmostEqual(
        metric_values3, {
            'label/mean/english_head': 0.0,
            'label/mean/chinese_head': 0.0,
            'label/mean/other_head': 1.0,
            'example_count/english_head': 1.0
        })

    eval_saved_model.metrics_reset_update_get_list([
        features_predictions_labels1, features_predictions_labels2,
        features_predictions_labels3
    ])
    metric_values_combined = eval_saved_model.get_metric_values()
    self.assertDictElementsAlmostEqual(
        metric_values_combined, {
            'label/mean/english_head': 1.0 / 3.0,
            'label/mean/chinese_head': 1.0 / 3.0,
            'label/mean/other_head': 1.0 / 3.0,
            'example_count/english_head': 3.0
        })