def construct_fn():  # pylint: disable=invalid-name
   """Function for constructing shared models."""
   # If we are evaluating on TPU, initialize the TPU.
   # TODO(b/143484017): Add model warmup for TPU.
   if tf.saved_model.TPU in tags:
     tf.tpu.experimental.initialize_tpu_system()
   if (model_type == constants.TF_ESTIMATOR and
       eval_constants.EVAL_TAG in tags):
     model = load.EvalSavedModel(
         eval_saved_model_path,
         include_default_metrics,
         additional_fetches=additional_fetches,
         blacklist_feature_fetches=blacklist_feature_fetches,
         tags=tags)
     if add_metrics_callbacks:
       model.register_add_metric_callbacks(add_metrics_callbacks)
     model.graph_finalize()
   elif model_type == constants.TF_KERAS:
     # TODO(b/141524386, b/141566408): TPU Inference is not supported
     # for Keras saved_model yet.
     model = tf.keras.models.load_model(eval_saved_model_path)
   elif model_type == constants.TF_LITE:
     # The tf.lite.Interpreter is not thread-safe so we only load the model
     # file's contents and leave construction of the Interpreter up to the
     # PTransform using it.
     model_filename = os.path.join(eval_saved_model_path, _TFLITE_FILE_NAME)
     with tf.io.gfile.GFile(model_filename, 'rb') as model_file:
       model = ModelContents(model_file.read())
   elif model_type == constants.TF_JS:
     # We invoke TFJS models via a subprocess call. So this call is no-op.
     return None
   else:
     model = tf.compat.v1.saved_model.load_v2(eval_saved_model_path, tags=tags)
   return model
    def testEvaluateExistingMetricsBasic(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = multi_head.simple_multi_head(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        example1 = self._makeMultiHeadExample('english').SerializeToString()
        example2 = self._makeMultiHeadExample('chinese').SerializeToString()
        example3 = self._makeMultiHeadExample('other').SerializeToString()

        eval_saved_model.metrics_reset_update_get_list(
            [example1, example2, example3])

        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'accuracy/english_head': 1.0,
                'accuracy/chinese_head': 1.0,
                'accuracy/other_head': 1.0,
                'auc/english_head': 1.0,
                'auc/chinese_head': 1.0,
                'auc/other_head': 1.0,
                'label/mean/english_head': 1.0 / 3.0,
                'label/mean/chinese_head': 1.0 / 3.0,
                'label/mean/other_head': 1.0 / 3.0
            })
Beispiel #3
0
    def testEvaluateExistingMetricsWithExportedCustomMetricsDNN(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = dnn_classifier.simple_dnn_classifier(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        example1 = self._makeExample(age=3.0, language='english', label=1.0)
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example1.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        example2 = self._makeExample(age=2.0, language='chinese', label=0.0)
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example2.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(
            metric_values,
            {
                # We don't check accuracy and AUC here because it varies from run to
                # run due to DNN initialization
                'my_mean_age': 2.5,
                'my_mean_label': 0.5,
                'my_mean_age_times_label': 1.5
            })

        self.assertIn('my_mean_prediction', metric_values)
        self.assertIn('prediction/mean', metric_values)
        self.assertAlmostEqual(metric_values['prediction/mean'],
                               metric_values['my_mean_prediction'],
                               places=5)
Beispiel #4
0
 def construct():  # pylint: disable=invalid-name
   """Function for constructing a EvalSavedModel."""
   start_time = datetime.datetime.now()
   result = load.EvalSavedModel(eval_saved_model_path)
   if add_metrics_callbacks:
     features_dict, predictions_dict, labels_dict = (
         result.get_features_predictions_labels_dicts())
     features_dict = util.wrap_tensor_or_dict_of_tensors_in_identity(
         features_dict)
     predictions_dict = util.wrap_tensor_or_dict_of_tensors_in_identity(
         predictions_dict)
     labels_dict = util.wrap_tensor_or_dict_of_tensors_in_identity(labels_dict)
     with result.graph_as_default():
       metric_ops = {}
       for add_metrics_callback in add_metrics_callbacks:
         new_metric_ops = add_metrics_callback(features_dict, predictions_dict,
                                               labels_dict)
         overlap = set(new_metric_ops.keys()) & set(metric_ops.keys())
         if overlap:
           raise ValueError('metric keys should not conflict, but an '
                            'earlier callback already added the metrics '
                            'named %s' % overlap)
         metric_ops.update(new_metric_ops)
       result.register_additional_metric_ops(metric_ops)
   end_time = datetime.datetime.now()
   model_load_seconds_distribution.update(
       int((end_time - start_time).total_seconds()))
   return result
Beispiel #5
0
    def testServingGraphAlsoExportedIfSpecified(self):
        # Most of the example trainers also pass serving_input_receiver_fn to
        # export_eval_savedmodel, so the serving graph should be included.
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            fixed_prediction_estimator.simple_fixed_prediction_estimator(
                None, temp_eval_export_dir))

        # Check the eval graph.
        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        example1 = self._makeExample(prediction=0.9, label=0.0)
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example1.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(metric_values,
                                           {'average_loss': 0.81})

        # Check the serving graph.
        estimator = tf.contrib.estimator.SavedModelEstimator(eval_export_dir)

        def predict_input_fn():
            return {'inputs': tf.constant([example1.SerializeToString()])}

        predictions = next(estimator.predict(predict_input_fn))
        self.assertAllClose(predictions['outputs'], np.array([0.9]))
Beispiel #6
0
  def _computeMetricsWithoutBeamNoBatching(
      self, eval_saved_model_path: Text,
      serialized_examples: List[bytes]) -> Dict[Text, Any]:
    """Computes metrics in-memory using the low-level APIs without Beam.

    This is the non-batched version of computeMetricsWithoutBeam. This can be
    useful for debugging batching issues with TFMA or with your model
    (e.g. your model or metrics only works with a fixed-batch size - TFMA
    requires that your model can accept batches of any size)

    Args:
      eval_saved_model_path: Path to the directory containing the
        EvalSavedModel.
      serialized_examples: List of serialized example bytes.

    Returns:
      Metrics computed by TFMA using your model on the given examples.
    """
    eval_saved_model = load.EvalSavedModel(eval_saved_model_path)

    for example in serialized_examples:
      for fpl in eval_saved_model.as_features_predictions_labels(
          eval_saved_model.predict(example)):
        eval_saved_model.perform_metrics_update(fpl)
    return eval_saved_model.get_metric_values()
Beispiel #7
0
 def testLoadSavedModelDisallowsAdditionalFetchesWithLabels(self):
     temp_eval_export_dir = self._getEvalExportDir()
     _, eval_export_dir = multi_head.simple_multi_head(
         None, temp_eval_export_dir)
     with self.assertRaisesRegexp(
             ValueError, 'additional_fetches should not contain "labels"'):
         load.EvalSavedModel(eval_export_dir, additional_fetches=['labels'])
    def testEvaluateExistingMetricsWithExportedCustomMetrics(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        example1 = self._makeExample(age=3.0, language='english', label=1.0)
        example2 = self._makeExample(age=2.0, language='chinese', label=0.0)
        eval_saved_model.metrics_reset_update_get_list(
            [example1.SerializeToString(),
             example2.SerializeToString()])

        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'accuracy': 1.0,
                'auc': 1.0,
                'my_mean_age': 2.5,
                'my_mean_label': 0.5,
                'my_mean_age_times_label': 1.5
            })

        self.assertIn('my_mean_prediction', metric_values)
        self.assertIn('prediction/mean', metric_values)
        self.assertAlmostEqual(metric_values['prediction/mean'],
                               metric_values['my_mean_prediction'],
                               places=5)
 def construct():  # pylint: disable=invalid-name
     """Function for constructing shared ModelTypes."""
     start_time = datetime.datetime.now()
     saved_model = None
     keras_model = None
     eval_saved_model = None
     if model_path:
         if tf.version.VERSION.split('.')[0] == '1':
             saved_model = tf.compat.v1.saved_model.load_v2(model_path,
                                                            tags=[tag])
         else:
             saved_model = tf.saved_model.load(model_path, tags=[tag])
         try:
             keras_model = tf.keras.experimental.load_from_saved_model(
                 model_path)
         except tf.errors.NotFoundError:
             pass
     if eval_saved_model_path:
         eval_saved_model = load.EvalSavedModel(
             eval_saved_model_path,
             include_default_metrics,
             additional_fetches=additional_fetches,
             blacklist_feature_fetches=blacklist_feature_fetches)
         if add_metrics_callbacks:
             eval_saved_model.register_add_metric_callbacks(
                 add_metrics_callbacks)
         eval_saved_model.graph_finalize()
     end_time = datetime.datetime.now()
     model_load_seconds_callback(
         int((end_time - start_time).total_seconds()))
     return types.ModelTypes(saved_model=saved_model,
                             keras_model=keras_model,
                             eval_saved_model=eval_saved_model)
    def testServingGraphAlsoExportedIfSpecified(self):
        # Most of the example trainers also pass serving_input_receiver_fn to
        # export_eval_savedmodel, so the serving graph should be included.
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            fixed_prediction_estimator.simple_fixed_prediction_estimator(
                None, temp_eval_export_dir))

        # Check the eval graph.
        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        example1 = self._makeExample(prediction=0.9,
                                     label=0.0).SerializeToString()
        eval_saved_model.metrics_reset_update_get(example1)

        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(metric_values,
                                           {'average_loss': 0.81})

        # Check the serving graph.
        # TODO(b/124466113): Remove tf.compat.v2 once TF 2.0 is the default.
        if hasattr(tf, 'compat.v2'):
            imported = tf.compat.v2.saved_model.load(
                eval_export_dir, tags=tf.saved_model.SERVING)
            predictions = imported.signatures[
                tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY](
                    inputs=tf.constant([example1.SerializeToString()]))
            self.assertAllClose(predictions['outputs'], np.array([[0.9]]))
Beispiel #11
0
 def construct():  # pylint: disable=invalid-name
     """Function for constructing shared models."""
     start_time = datetime.datetime.now()
     # If we are evaluating on TPU, initialize the TPU.
     # TODO(b/143484017): Add model warmup for TPU.
     if tf.saved_model.TPU in tags:
         tf.tpu.experimental.initialize_tpu_system()
     if (model_type == constants.TF_ESTIMATOR
             and eval_constants.EVAL_TAG in tags):
         model = load.EvalSavedModel(
             eval_saved_model_path,
             include_default_metrics,
             additional_fetches=additional_fetches,
             blacklist_feature_fetches=blacklist_feature_fetches,
             tags=tags)
         if add_metrics_callbacks:
             model.register_add_metric_callbacks(add_metrics_callbacks)
         model.graph_finalize()
     elif model_type == constants.TF_KERAS:
         # TODO(b/141524386, b/141566408): TPU Inference is not supported
         # for Keras saved_model yet.
         model = tf.keras.models.load_model(eval_saved_model_path)
     else:
         model = tf.compat.v1.saved_model.load_v2(eval_saved_model_path,
                                                  tags=tags)
     end_time = datetime.datetime.now()
     model_load_seconds_callback(
         int((end_time - start_time).total_seconds()))
     return model
 def construct():  # pylint: disable=invalid-name
   """Function for constructing shared ModelTypes."""
   start_time = datetime.datetime.now()
   saved_model = None
   keras_model = None
   eval_saved_model = None
   if tags == [eval_constants.EVAL_TAG]:
     eval_saved_model = load.EvalSavedModel(
         eval_saved_model_path,
         include_default_metrics,
         additional_fetches=additional_fetches,
         blacklist_feature_fetches=blacklist_feature_fetches)
     if add_metrics_callbacks:
       eval_saved_model.register_add_metric_callbacks(add_metrics_callbacks)
     eval_saved_model.graph_finalize()
   else:
     try:
       keras_model = tf.keras.models.load_model(eval_saved_model_path)
     except Exception:  # pylint: disable=broad-except
       saved_model = tf.compat.v1.saved_model.load_v2(
           eval_saved_model_path, tags=tags)
   end_time = datetime.datetime.now()
   model_load_seconds_callback(int((end_time - start_time).total_seconds()))
   return types.ModelTypes(
       saved_model=saved_model,
       keras_model=keras_model,
       eval_saved_model=eval_saved_model)
Beispiel #13
0
  def testEvaluateExistingMetricsBasicForUnsupervisedModel(self):
    # Test that we can export and load unsupervised models (models which
    # don't take a labels parameter in their model_fn).
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = (
        fixed_prediction_estimator_no_labels
        .simple_fixed_prediction_estimator_no_labels(None,
                                                     temp_eval_export_dir))

    eval_saved_model = load.EvalSavedModel(eval_export_dir)
    example1 = self._makeExample(prediction=1.0)

    features_predictions_labels = self.predict_injective_single_example(
        eval_saved_model, example1.SerializeToString())
    eval_saved_model.perform_metrics_update(features_predictions_labels)

    example2 = self._makeExample(prediction=0.0)
    features_predictions_labels = self.predict_injective_single_example(
        eval_saved_model, example2.SerializeToString())
    eval_saved_model.perform_metrics_update(features_predictions_labels)

    metric_values = eval_saved_model.get_metric_values()
    self.assertDictElementsAlmostEqual(metric_values, {
        'average_loss': 0.5,
    })
Beispiel #14
0
    def testEvaluateExistingMetricsCustomEstimatorBasic(self):
        # Custom estimator aims to predict age * 3 + 1
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = custom_estimator.simple_custom_estimator(
            None, temp_eval_export_dir)

        example1 = example_pb2.Example()
        example1.features.feature['age'].float_list.value[:] = [1.0]
        example1.features.feature['label'].float_list.value[:] = [3.0]
        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example1.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        example2 = example_pb2.Example()
        example2.features.feature['age'].float_list.value[:] = [2.0]
        example2.features.feature['label'].float_list.value[:] = [7.0]
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example2.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        metric_values = eval_saved_model.get_metric_values()

        # We don't control the trained model's weights fully, but it should
        # predict close to what it aims to. The "target" mean prediction is 5.5.
        self.assertIn('mean_prediction', metric_values)
        self.assertGreater(metric_values['mean_prediction'], 5.4)
        self.assertLess(metric_values['mean_prediction'], 5.6)

        # The "target" mean absolute error is 0.5
        self.assertIn('mean_absolute_error', metric_values)
        self.assertGreater(metric_values['mean_absolute_error'], 0.4)
        self.assertLess(metric_values['mean_absolute_error'], 0.6)

        self.assertHasKeyWithValueAlmostEqual(metric_values, 'mean_label', 5.0)
    def testEvaluateExistingMetricsBasicForControlDependencyEstimator(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            control_dependency_estimator.simple_control_dependency_estimator(
                None, temp_eval_export_dir))

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        example1 = self._makeExample(prediction=0.9,
                                     label=0.0,
                                     fixed_float=1.0,
                                     fixed_string='apple',
                                     fixed_int=2,
                                     var_float=10.0,
                                     var_string='banana',
                                     var_int=20).SerializeToString()
        example2 = self._makeExample(prediction=0.1,
                                     label=0.0,
                                     fixed_float=5.0,
                                     fixed_string='avocado',
                                     fixed_int=6,
                                     var_float=50.0,
                                     var_string='berry',
                                     var_int=60).SerializeToString()

        eval_saved_model.metrics_reset_update_get_list([example1, example2])
        metric_values = eval_saved_model.get_metric_values()

        self.assertDictElementsAlmostEqual(
            metric_values, {
                'control_dependency_on_fixed_float': 1.0,
                'control_dependency_on_var_float': 10.0,
                'control_dependency_on_actual_label': 100.0,
                'control_dependency_on_var_int_label': 1000.0,
                'control_dependency_on_prediction': 10000.0,
            })
Beispiel #16
0
    def benchmarkEvalSavedModelPredict(self):
        """Benchmark using the EvalSavedModel to make predictions.

    Runs EvalSavedModel.predict_list and records the wall time taken.
    """
        batch_size = 1000

        eval_saved_model = load.EvalSavedModel(
            path=self._dataset.tfma_saved_model_path(),
            include_default_metrics=True)

        records = self._dataset.read_raw_dataset(deserialize=False,
                                                 limit=MAX_NUM_EXAMPLES)

        start = time.time()
        for batch in benchmark_utils.batched_iterator(records, batch_size):
            eval_saved_model.predict_list(batch)
        end = time.time()
        delta = end - start
        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "batch_size": batch_size,
                "num_examples":
                self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
            })
Beispiel #17
0
    def testEvaluateExistingMetricsBasic(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = multi_head.simple_multi_head(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        example1 = self._makeMultiHeadExample('english')
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example1.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        example2 = self._makeMultiHeadExample('chinese')
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example2.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'accuracy/english_head': 1.0,
                'accuracy/chinese_head': 1.0,
                'accuracy/other_head': 1.0,
                'auc/english_head': 1.0,
                'auc/chinese_head': 1.0,
                'auc/other_head': 1.0,
                'label/mean/english_head': 0.5,
                'label/mean/chinese_head': 0.5,
                'label/mean/other_head': 0.0
            })
Beispiel #18
0
    def testPredictListMultipleExamplesPerInputModelNoExampleInInput(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (fake_multi_examples_per_input_estimator.
                              fake_multi_examples_per_input_estimator(
                                  None, temp_eval_export_dir))

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        fetched_list = eval_saved_model.predict_list(['0', '0'])
        self.assertFalse(fetched_list)
Beispiel #19
0
    def testEvaluateWithAdditionalMetricsBasic(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = multi_head.simple_multi_head(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        _, prediction_dict, label_dict = (
            eval_saved_model.get_features_predictions_labels_dicts())
        with eval_saved_model.graph_as_default():
            metric_ops = {}
            value_op, update_op = tf.metrics.mean_absolute_error(
                label_dict['english_head'][0][0],
                prediction_dict['english_head/probabilities'][0][1])
            metric_ops['mean_absolute_error/english_head'] = (value_op,
                                                              update_op)

            value_op, update_op = tf.contrib.metrics.count(
                prediction_dict['english_head/logits'])
            metric_ops['example_count/english_head'] = (value_op, update_op)

            eval_saved_model.register_additional_metric_ops(metric_ops)

        example1 = self._makeMultiHeadExample('english')
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example1.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        example2 = self._makeMultiHeadExample('chinese')
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example2.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)

        metric_values = eval_saved_model.get_metric_values()

        # Check that the original metrics are still there.
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'accuracy/english_head': 1.0,
                'accuracy/chinese_head': 1.0,
                'accuracy/other_head': 1.0,
                'auc/english_head': 1.0,
                'auc/chinese_head': 1.0,
                'auc/other_head': 1.0,
                'label/mean/english_head': 0.5,
                'label/mean/chinese_head': 0.5,
                'label/mean/other_head': 0.0
            })

        # Check the added metrics.
        # We don't control the trained model's weights fully, but it should
        # predict probabilities > 0.7.
        self.assertIn('mean_absolute_error/english_head', metric_values)
        self.assertLess(metric_values['mean_absolute_error/english_head'], 0.3)

        self.assertHasKeyWithValueAlmostEqual(metric_values,
                                              'example_count/english_head',
                                              2.0)
    def _sharedTestForPredictListMultipleExamplesPerInputModel(
            self, use_legacy, use_iterator):
        temp_eval_export_dir = self._getEvalExportDir()
        if use_legacy:
            _, eval_export_dir = (
                fake_multi_examples_per_input_estimator.
                legacy_fake_multi_examples_per_input_estimator(
                    None, temp_eval_export_dir))
        else:
            _, eval_export_dir = (fake_multi_examples_per_input_estimator.
                                  fake_multi_examples_per_input_estimator(
                                      None, temp_eval_export_dir,
                                      use_iterator))

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        fetched_list = eval_saved_model.predict_list(
            [b'0', b'1', b'3', b'0', b'2'])
        self.assertEqual(6, len(fetched_list))

        input_index = []
        example_count = []
        labels = []
        predictions = []
        intra_input_index = []
        annotation = []

        def _check_and_append_feature(feature_name, one_fetch, feature_values):
            self.assertEqual((1, ),
                             one_fetch.values['features'][feature_name].shape)
            feature_values.append(
                one_fetch.values['features'][feature_name][0])

        for fetched in fetched_list:
            _check_and_append_feature('input_index', fetched, input_index)
            _check_and_append_feature('example_count', fetched, example_count)
            _check_and_append_feature('intra_input_index', fetched,
                                      intra_input_index)
            _check_and_append_feature('annotation', fetched, annotation)

            self.assertAllEqual((1, ), fetched.values['labels'].shape)
            labels.append(fetched.values['labels'])

            self.assertAllEqual((1, ), fetched.values['predictions'].shape)
            predictions.append(fetched.values['predictions'])

        self.assertSequenceEqual([1, 3, 3, 3, 2, 2], example_count)
        self.assertSequenceEqual([1, 2, 2, 2, 4, 4], input_index)
        self.assertSequenceEqual([0, 0, 1, 2, 0, 1], intra_input_index)
        self.assertAllEqual([
            b'raw_input: 1; index: 0', b'raw_input: 3; index: 0',
            b'raw_input: 3; index: 1', b'raw_input: 3; index: 2',
            b'raw_input: 2; index: 0', b'raw_input: 2; index: 1'
        ], annotation)

        self.assertSequenceEqual([1, 2, 2, 2, 4, 4], labels)
        self.assertSequenceEqual([1, 2, 2, 2, 4, 4], predictions)
Beispiel #21
0
    def testPredictListOutOfRangeInputRefs(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            fake_multi_examples_per_input_estimator.
            bad_multi_examples_per_input_estimator_out_of_range_input_refs(
                None, temp_eval_export_dir))

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        with self.assertRaisesRegexp(ValueError,
                                     'An index in input_refs is out of range'):
            eval_saved_model.predict_list(['1'])
Beispiel #22
0
    def testPredictListMisalignedInputRef(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            fake_multi_examples_per_input_estimator.
            bad_multi_examples_per_input_estimator_misaligned_input_refs(
                None, temp_eval_export_dir))

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        with self.assertRaisesRegexp(ValueError,
                                     'input_refs should be batch-aligned'):
            eval_saved_model.predict_list(['1'])
    def testPredictListMultipleExamplesPerInputModel(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (fake_multi_examples_per_input_estimator.
                              fake_multi_examples_per_input_estimator(
                                  None, temp_eval_export_dir))

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        fpls = eval_saved_model.predict_list([b'0', b'1', b'3', b'0', b'2'])
        self.assertEqual(6, len(fpls))

        input_index = []
        example_count = []
        labels = []
        predictions = []
        intra_input_index = []
        annotation = []

        def _check_and_append_feature(feature_name, one_fpl, feature_values):
            self.assertEqual(
                (1, ),
                one_fpl.features[feature_name][encoding.NODE_SUFFIX].shape)
            feature_values.append(
                one_fpl.features[feature_name][encoding.NODE_SUFFIX][0])

        for fpl in fpls:
            _check_and_append_feature('input_index', fpl, input_index)
            _check_and_append_feature('example_count', fpl, example_count)
            _check_and_append_feature('intra_input_index', fpl,
                                      intra_input_index)
            _check_and_append_feature('annotation', fpl, annotation)

            self.assertAllEqual((1, ), fpl.labels[
                encoding.DEFAULT_LABELS_DICT_KEY][encoding.NODE_SUFFIX].shape)
            labels.append(fpl.labels[encoding.DEFAULT_LABELS_DICT_KEY][
                encoding.NODE_SUFFIX][0])

            self.assertAllEqual(
                (1, ),
                fpl.predictions['predictions'][encoding.NODE_SUFFIX].shape)
            predictions.append(
                fpl.predictions['predictions'][encoding.NODE_SUFFIX][0])

        self.assertSequenceEqual([1, 3, 3, 3, 2, 2], example_count)
        self.assertSequenceEqual([1, 2, 2, 2, 4, 4], input_index)
        self.assertSequenceEqual([0, 0, 1, 2, 0, 1], intra_input_index)
        self.assertAllEqual([
            b'raw_input: 1; index: 0', b'raw_input: 3; index: 0',
            b'raw_input: 3; index: 1', b'raw_input: 3; index: 2',
            b'raw_input: 2; index: 0', b'raw_input: 2; index: 1'
        ], annotation)

        self.assertSequenceEqual([1, 2, 2, 2, 4, 4], labels)
        self.assertSequenceEqual([1, 2, 2, 2, 4, 4], predictions)
Beispiel #24
0
 def construct():  # pylint: disable=invalid-name
     """Function for constructing a EvalSavedModel."""
     start_time = datetime.datetime.now()
     result = load.EvalSavedModel(eval_saved_model_path,
                                  include_default_metrics)
     if add_metrics_callbacks:
         result.register_add_metric_callbacks(add_metrics_callbacks)
     result.graph_finalize()
     end_time = datetime.datetime.now()
     model_load_seconds.update(
         int((end_time - start_time).total_seconds()))
     return result
Beispiel #25
0
    def testGetAndSetMetricVariables(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = multi_head.simple_multi_head(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        _, prediction_dict, _ = (
            eval_saved_model.get_features_predictions_labels_dicts())
        with eval_saved_model.graph_as_default():
            metric_ops = {}
            value_op, update_op = tf.contrib.metrics.count(
                prediction_dict['english_head/logits'])
            metric_ops['example_count/english_head'] = (value_op, update_op)

            eval_saved_model.register_additional_metric_ops(metric_ops)

        example1 = self._makeMultiHeadExample('english')
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example1.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)
        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'label/mean/english_head': 1.0,
                'label/mean/chinese_head': 0.0,
                'label/mean/other_head': 0.0,
                'example_count/english_head': 1.0
            })
        metric_variables = eval_saved_model.get_metric_variables()

        example2 = self._makeMultiHeadExample('chinese')
        features_predictions_labels = self.predict_injective_single_example(
            eval_saved_model, example2.SerializeToString())
        eval_saved_model.perform_metrics_update(features_predictions_labels)
        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'label/mean/english_head': 0.5,
                'label/mean/chinese_head': 0.5,
                'label/mean/other_head': 0.0,
                'example_count/english_head': 2.0
            })

        # Now set metric variables to what they were after the first example.
        eval_saved_model.set_metric_variables(metric_variables)
        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(
            metric_values, {
                'label/mean/english_head': 1.0,
                'label/mean/chinese_head': 0.0,
                'label/mean/other_head': 0.0,
                'example_count/english_head': 1.0
            })
    def testAggregateOverallSlice(self):

        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_export_dir)

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(age=3.0,
                                         language='english',
                                         label=1.0)
            example2 = self._makeExample(age=3.0,
                                         language='chinese',
                                         label=0.0)
            example3 = self._makeExample(age=4.0,
                                         language='english',
                                         label=1.0)
            example4 = self._makeExample(age=5.0,
                                         language='chinese',
                                         label=0.0)

            predict_result = eval_saved_model.as_features_predictions_labels(
                eval_saved_model.predict_list([
                    example1.SerializeToString(),
                    example2.SerializeToString(),
                    example3.SerializeToString(),
                    example4.SerializeToString()
                ]))

            metrics, _ = (
                pipeline
                | 'CreateTestInput' >> beam.Create(
                    create_test_input(predict_result, [()]))
                | 'ComputePerSliceMetrics' >> aggregate.ComputePerSliceMetrics(
                    eval_shared_model=eval_shared_model, desired_batch_size=3))

            def check_result(got):
                self.assertEqual(1, len(got), 'got: %s' % got)
                slice_key, metrics = got[0]
                self.assertEqual(slice_key, ())
                self.assertDictElementsAlmostEqual(
                    metrics, {
                        'accuracy': 1.0,
                        'label/mean': 0.5,
                        'my_mean_age': 3.75,
                        'my_mean_age_times_label': 1.75,
                    })

            util.assert_that(metrics, check_result)
Beispiel #27
0
    def construct_fn():  # pylint: disable=invalid-name
        """Function for constructing shared models."""
        # If we are evaluating on TPU, initialize the TPU.
        # TODO(b/143484017): Add model warmup for TPU.
        if tf.saved_model.TPU in tags:
            tf.tpu.experimental.initialize_tpu_system()
        if (model_type == constants.TF_ESTIMATOR
                and eval_constants.EVAL_TAG in tags):
            model = load.EvalSavedModel(
                eval_saved_model_path,
                include_default_metrics,
                additional_fetches=additional_fetches,
                blacklist_feature_fetches=blacklist_feature_fetches,
                tags=tags)
            if add_metrics_callbacks:
                model.register_add_metric_callbacks(add_metrics_callbacks)
            model.graph_finalize()
        elif model_type == constants.TF_KERAS:
            model = tf.keras.models.load_model(eval_saved_model_path)
        elif model_type == constants.TF_LITE:
            # The tf.lite.Interpreter is not thread-safe so we only load the model
            # file's contents and leave construction of the Interpreter up to the
            # PTransform using it.
            model_filename = os.path.join(eval_saved_model_path,
                                          _TFLITE_FILE_NAME)
            with tf.io.gfile.GFile(model_filename, 'rb') as model_file:
                model_bytes = model_file.read()

            # If a SavedModel is present in the same directory, load it as well.
            # This allows the SavedModel to be used for computing the
            # Transformed Features and Labels.
            if (tf.io.gfile.exists(
                    os.path.join(eval_saved_model_path,
                                 tf.saved_model.SAVED_MODEL_FILENAME_PB))
                    or tf.io.gfile.exists(
                        os.path.join(
                            eval_saved_model_path,
                            tf.saved_model.SAVED_MODEL_FILENAME_PBTXT))):
                model = tf.compat.v1.saved_model.load_v2(eval_saved_model_path,
                                                         tags=tags)
                model.contents = model_bytes
            else:
                model = ModelContents(model_bytes)

        elif model_type == constants.TF_JS:
            # We invoke TFJS models via a subprocess call. So this call is no-op.
            return None
        else:
            model = tf.compat.v1.saved_model.load_v2(eval_saved_model_path,
                                                     tags=tags)
        return model
Beispiel #28
0
    def benchmarkEvalSavedModelMetricsResetUpdateGetList(self):
        """Benchmark using the EvalSavedModel to compute metrics.

    Runs EvalSavedModel.metrics_reset_update_get_list and records the wall time
    taken.
    """
        batch_size = 1000

        eval_saved_model = load.EvalSavedModel(
            path=self._dataset.tfma_saved_model_path(),
            include_default_metrics=True)

        records = self._dataset.read_raw_dataset(
            deserialize=False, limit=self._max_num_examples())

        start = time.time()
        accumulators = []
        for batch in benchmark_utils.batched_iterator(records, batch_size):
            accumulators.append(
                eval_saved_model.metrics_reset_update_get_list(batch))
        end = time.time()
        delta = end - start

        # Sanity check
        metric_variables_sum = accumulators[0]
        for acc in accumulators[1:]:
            if len(metric_variables_sum) != len(acc):
                raise ValueError(
                    "all metric variable value lists should have the same length, but "
                    "got lists with different lengths: %d and %d" %
                    (len(metric_variables_sum), len(acc)))
            metric_variables_sum = [
                a + b for a, b in zip(metric_variables_sum, acc)
            ]

        metrics = eval_saved_model.metrics_set_variables_and_get_values(
            metric_variables_sum)
        if "average_loss" not in metrics:
            raise ValueError(
                "metrics should contain average_loss metric, but it did not. "
                "metrics were: %s" % metrics)

        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "batch_size":
                batch_size,
                "num_examples":
                self._dataset.num_examples(limit=self._max_num_examples())
            })
    def testEvaluateWithOnlyAdditionalMetricsBasic(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = multi_head.simple_multi_head(
            None, temp_eval_export_dir)

        eval_saved_model = load.EvalSavedModel(eval_export_dir,
                                               include_default_metrics=False)
        _, prediction_dict, label_dict = (
            eval_saved_model.get_features_predictions_labels_dicts())
        with eval_saved_model.graph_as_default():
            metric_ops = {}
            value_op, update_op = tf.compat.v1.metrics.mean_absolute_error(
                label_dict['english_head'][0][0],
                prediction_dict['english_head/probabilities'][0][1])
            metric_ops['mean_absolute_error/english_head'] = (value_op,
                                                              update_op)

            value_op, update_op = metrics.total(
                tf.shape(input=prediction_dict['english_head/logits'])[0])
            metric_ops['example_count/english_head'] = (value_op, update_op)

            eval_saved_model.register_additional_metric_ops(metric_ops)

        example1 = self._makeMultiHeadExample('english').SerializeToString()
        example2 = self._makeMultiHeadExample('chinese').SerializeToString()
        eval_saved_model.metrics_reset_update_get_list([example1, example2])

        metric_values = eval_saved_model.get_metric_values()

        # Check that the original metrics are not there.
        self.assertNotIn('accuracy/english_head', metric_values)
        self.assertNotIn('accuracy/chinese_head', metric_values)
        self.assertNotIn('accuracy/other_head', metric_values)
        self.assertNotIn('auc/english_head', metric_values)
        self.assertNotIn('auc/chinese_head', metric_values)
        self.assertNotIn('auc/other_head', metric_values)
        self.assertNotIn('label/mean/english_head', metric_values)
        self.assertNotIn('label/mean/chinese_head', metric_values)
        self.assertNotIn('label/mean/other_head', metric_values)

        # Check the added metrics.
        # We don't control the trained model's weights fully, but it should
        # predict probabilities > 0.7.
        self.assertIn('mean_absolute_error/english_head', metric_values)
        self.assertLess(metric_values['mean_absolute_error/english_head'], 0.3)

        self.assertHasKeyWithValueAlmostEqual(metric_values,
                                              'example_count/english_head',
                                              2.0)
    def testEvaluateExistingMetricsCSVInputBasic(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            csv_linear_classifier.simple_csv_linear_classifier(
                None, temp_eval_export_dir))

        eval_saved_model = load.EvalSavedModel(eval_export_dir)
        eval_saved_model.metrics_reset_update_get_list(
            ['3.0,english,1.0', '3.0,chinese,0.0'])

        metric_values = eval_saved_model.get_metric_values()
        self.assertDictElementsAlmostEqual(metric_values, {
            'accuracy': 1.0,
            'auc': 1.0
        })