Esempio n. 1
0
    def testValidation(self):
        # Test no feature spec.
        with self.assertRaisesRegexp(
                ValueError, 'ModelAgnosticConfig must have feature_spec set.'):
            model_agnostic_predict.ModelAgnosticConfig(
                label_keys=['label'],
                prediction_keys=['probabilities'],
                feature_spec=None)

        # Test no prediction keys.
        feature_map = {
            'age':
            tf.FixedLenFeature([], tf.int64),
            'language':
            tf.VarLenFeature(tf.string),
            'probabilities':
            tf.FixedLenFeature([2], tf.int64, default_value=[9, 9]),
            'label':
            tf.FixedLenFeature([], tf.int64)
        }

        with self.assertRaisesRegexp(
                ValueError,
                'ModelAgnosticConfig must have prediction keys set.'):
            model_agnostic_predict.ModelAgnosticConfig(
                label_keys=['label'],
                prediction_keys=[],
                feature_spec=feature_map)

        # Test no label keys.
        with self.assertRaisesRegexp(
                ValueError, 'ModelAgnosticConfig must have label keys set.'):
            model_agnostic_predict.ModelAgnosticConfig(
                label_keys=[],
                prediction_keys=['predictions'],
                feature_spec=feature_map)

        # Test prediction key not in feature spec.
        with self.assertRaisesRegexp(
                ValueError,
                'Prediction key not_prob not defined in feature_spec.'):
            model_agnostic_predict.ModelAgnosticConfig(
                label_keys=['label'],
                prediction_keys=['not_prob'],
                feature_spec=feature_map)

        # Test label key not in feature spec.
        with self.assertRaisesRegexp(
                ValueError,
                'Label key not_label not defined in feature_spec.'):
            model_agnostic_predict.ModelAgnosticConfig(
                label_keys=['not_label'],
                prediction_keys=['probabilities'],
                feature_spec=feature_map)
  def testExtract(self):
    with beam.Pipeline() as pipeline:
      examples = [
          self._makeExample(
              age=3.0, language='english', probabilities=[1.0, 2.0], label=1.0),
          self._makeExample(
              age=3.0, language='chinese', probabilities=[2.0, 3.0], label=0.0),
          self._makeExample(
              age=4.0, language='english', probabilities=[3.0, 4.0], label=1.0),
          self._makeExample(
              age=5.0, language='chinese', probabilities=[4.0, 5.0], label=0.0),
      ]
      serialized_examples = [e.SerializeToString() for e in examples]

      # Set up a config to bucket our example keys.
      feature_map = {
          'age': tf.io.FixedLenFeature([], tf.float32),
          'language': tf.io.VarLenFeature(tf.string),
          'probabilities': tf.io.FixedLenFeature([2], tf.float32),
          'label': tf.io.FixedLenFeature([], tf.float32)
      }
      model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
          label_keys=['label'],
          prediction_keys=['probabilities'],
          feature_spec=feature_map)

      fpl_extracts = (
          pipeline
          | beam.Create(serialized_examples)
          # Our diagnostic outputs, pass types.Extracts throughout, however our
          # aggregating functions do not use this interface.
          | beam.Map(lambda x: {constants.INPUT_KEY: x})
          | 'Extract' >> model_agnostic_extractor.ModelAgnosticExtract(
              model_agnostic_config=model_agnostic_config, desired_batch_size=3)
      )

      def check_result(got):
        try:
          self.assertEqual(4, len(got), 'got: %s' % got)
          for item in got:
            self.assertIn(constants.FEATURES_PREDICTIONS_LABELS_KEY, item)
            fpl = item[constants.FEATURES_PREDICTIONS_LABELS_KEY]
            # Verify fpl contains features, probabilities, and correct labels.
            self.assertIn('language', fpl.features)
            self.assertIn('age', fpl.features)
            self.assertIn('label', fpl.labels)
            self.assertIn('probabilities', fpl.predictions)

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(fpl_extracts, check_result)
Esempio n. 3
0
    def testEvaluateMultiLabelsPredictions(self):
        # Test case where we have multiple labels/predictions
        # Have 6 labels of values 3, 5, 23, 12, 16, 31 and
        # 6 predictions of values 2, 2, 2, 4, 4, 4
        # This should give sum = 108 and mean = 9.

        examples = [
            self._makeExample(age=1.0,
                              prediction=2,
                              prediction_2=4,
                              label=3,
                              label_2=5),
            self._makeExample(age=1.0,
                              prediction=2,
                              prediction_2=4,
                              label=23,
                              label_2=12),
            self._makeExample(age=1.0,
                              prediction=2,
                              prediction_2=4,
                              label=16,
                              label_2=31),
        ]
        serialized_examples = [e.SerializeToString() for e in examples]

        # Set up a model agnostic config so we can get the FPLConfig.
        feature_map = {
            'age': tf.io.FixedLenFeature([], tf.float32),
            'prediction': tf.io.FixedLenFeature([], tf.int64),
            'prediction_2': tf.io.FixedLenFeature([], tf.int64),
            'label': tf.io.FixedLenFeature([], tf.int64),
            'label_2': tf.io.FixedLenFeature([], tf.int64)
        }

        model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
            label_keys=['label', 'label_2'],
            prediction_keys=['prediction', 'prediction_2'],
            feature_spec=feature_map)

        # Create a Model Anostic Evaluate graph handler and feed in the FPL list.
        evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph(
            [add_mean_callback], model_agnostic_config)
        evaluate_graph.metrics_reset_update_get_list(serialized_examples)
        outputs = evaluate_graph.get_metric_values()

        # Verify that we got the right metrics out.
        self.assertEqual(2, len(outputs))
        self.assertEqual(outputs['tf_metric_mean'], 9.0)
        self.assertEqual(outputs['py_func_total_label'], 108.0)
Esempio n. 4
0
    def testEvaluateGraph(self):
        # Have 3 labels of values 3, 23, 16 and predictions of values 2, 2, 2.
        # This should give sum = 48 and mean = 8.
        examples = [
            self._makeExample(age=3.0,
                              language='english',
                              predictions=2.0,
                              labels=3.0),
            self._makeExample(age=3.0,
                              language='chinese',
                              predictions=2.0,
                              labels=23.0),
            self._makeExample(age=4.0,
                              language='english',
                              predictions=2.0,
                              labels=16.0),
        ]
        serialized_examples = [e.SerializeToString() for e in examples]

        # Set up a model agnostic config so we can get the FPLConfig.
        feature_map = {
            'age': tf.io.FixedLenFeature([], tf.float32),
            'language': tf.io.VarLenFeature(tf.string),
            'predictions': tf.io.FixedLenFeature([], tf.float32),
            'labels': tf.io.FixedLenFeature([], tf.float32)
        }

        model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
            label_keys=['labels'],
            prediction_keys=['predictions'],
            feature_spec=feature_map)

        # Create a Model Anostic Evaluate graph handler and feed in the FPL list.
        evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph(
            [add_mean_callback], model_agnostic_config)
        evaluate_graph.metrics_reset_update_get_list(serialized_examples)
        outputs = evaluate_graph.get_metric_values()

        # Verify that we got the right metrics out.
        self.assertEqual(2, len(outputs))
        self.assertEqual(outputs['tf_metric_mean'], 8.0)
        self.assertEqual(outputs['py_func_total_label'], 48.0)
    def testEvaluateGraph(self):
        # Create some FPLs. The Features aren't terribly useful for these metrics.
        # Just make sure they can be processed correctly by the feed/feedlist
        # generation logic by having one dense tensor and one sparse tensor.
        features = {
            'age': {
                encoding.NODE_SUFFIX: np.array([1])
            },
            'language': {
                encoding.NODE_SUFFIX:
                tf.SparseTensorValue(indices=np.array([[0, 0]]),
                                     values=np.array(['english']),
                                     dense_shape=np.array([1, 1]))
            }
        }
        predictions = {'predictions': {encoding.NODE_SUFFIX: np.array([2])}}
        # Have 3 labels of values 3, 23, 16 and predictions of values 2, 2, 2.
        # This should give sum = 48 and mean = 8.
        labels = {'labels': {encoding.NODE_SUFFIX: np.array([3])}}
        labels_2 = {'labels': {encoding.NODE_SUFFIX: np.array([23])}}
        labels_3 = {'labels': {encoding.NODE_SUFFIX: np.array([16])}}

        # Compile the actual FPLs
        fpl = types.FeaturesPredictionsLabels(input_ref=0,
                                              features=features,
                                              predictions=predictions,
                                              labels=labels)
        fpl_2 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_2)
        fpl_3 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_3)

        # Set up a model agnostic config so we can get the FPLConfig.
        feature_map = {
            'age': tf.FixedLenFeature([], tf.float32),
            'language': tf.VarLenFeature(tf.string),
            'predictions': tf.FixedLenFeature([], tf.float32),
            'labels': tf.FixedLenFeature([], tf.float32)
        }

        model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
            label_keys=['labels'],
            prediction_keys=['predictions'],
            feature_spec=feature_map)

        # Create a Model Anostic Evaluate graph handler and feed in the FPL list.
        evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph(
            [add_mean_callback],
            model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig(
                model_agnostic_config))
        evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3])
        outputs = evaluate_graph.get_metric_values()

        # Verify that we got the right metrics out.
        self.assertEqual(2, len(outputs))
        self.assertEqual(outputs['tf_metric_mean'], 8.0)
        self.assertEqual(outputs['py_func_total_label'], 48.0)
    def testModelAgnosticConstructFn(self):
        # End to end test for the entire flow going from tf.Examples -> metrics
        # with slicing.
        with beam.Pipeline() as pipeline:
            # Set up the inputs. All we need is are tf.Examples and an example parsing
            # spec with explicit mapping for key to (Features, Predictions, Labels).
            examples = [
                self._makeExample(age=3.0,
                                  language='english',
                                  probabilities=1.0,
                                  labels=1.0),
                self._makeExample(age=3.0,
                                  language='chinese',
                                  probabilities=3.0,
                                  labels=0.0),
                self._makeExample(age=4.0,
                                  language='english',
                                  probabilities=2.0,
                                  labels=1.0),
                self._makeExample(age=5.0,
                                  language='chinese',
                                  probabilities=3.0,
                                  labels=0.0),
                # Add some examples with no language.
                self._makeExample(age=5.0, probabilities=2.0, labels=10.0),
                self._makeExample(age=6.0, probabilities=1.0, labels=0.0)
            ]
            serialized_examples = [e.SerializeToString() for e in examples]

            # Set up a config to bucket our example keys.
            feature_map = {
                'age': tf.FixedLenFeature([], tf.float32),
                'language': tf.VarLenFeature(tf.string),
                'probabilities': tf.FixedLenFeature([], tf.float32),
                'labels': tf.FixedLenFeature([], tf.float32)
            }

            model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
                label_keys=['labels'],
                prediction_keys=['probabilities'],
                feature_spec=feature_map)

            # Set up the Model Agnostic Extractor
            extractors = [
                model_agnostic_extractor.ModelAgnosticExtractor(
                    model_agnostic_config=model_agnostic_config,
                    desired_batch_size=3),
                slice_key_extractor.SliceKeyExtractor([
                    slicer.SingleSliceSpec(),
                    slicer.SingleSliceSpec(columns=['language'])
                ])
            ]

            # Set up the metrics we wish to calculate via a metric callback. In
            # particular, this metric calculates the mean and sum of all labels.
            eval_shared_model = types.EvalSharedModel(
                add_metrics_callbacks=[add_mean_callback],
                construct_fn=model_agnostic_evaluate_graph.make_construct_fn(
                    add_metrics_callbacks=[add_mean_callback],
                    fpl_feed_config=model_agnostic_extractor.
                    ModelAgnosticGetFPLFeedConfig(model_agnostic_config)))

            # Run our pipeline doing Extract -> Slice -> Fanout -> Calculate Metrics.
            metrics, _ = (
                pipeline
                | 'Create Examples' >> beam.Create(serialized_examples)
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'Extract' >> tfma_unit.Extract(extractors=extractors)  # pylint: disable=no-value-for-parameter
                | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator.
                ComputeMetricsAndPlots(eval_shared_model=eval_shared_model))

            # Verify our metrics are properly generated per slice.
            def check_result(got):
                self.assertEqual(3, len(got), 'got: %s' % got)
                slices = {}
                for slice_key, metrics in got:
                    slices[slice_key] = metrics
                overall_slice = ()
                english_slice = (('language', b'english'), )
                chinese_slice = (('language', b'chinese'), )

                self.assertItemsEqual(
                    list(slices.keys()),
                    [overall_slice, english_slice, chinese_slice])
                # Overall slice has label/predictions sum = 24 and 12 elements.
                self.assertDictElementsAlmostEqual(slices[overall_slice], {
                    'tf_metric_mean': 2.0,
                    'py_func_total_label': 24.0,
                })
                # English slice has label/predictions sum = 5 and 4 elements.
                self.assertDictElementsAlmostEqual(slices[english_slice], {
                    'tf_metric_mean': 1.25,
                    'py_func_total_label': 5.0,
                })
                # Chinese slice has label/predictions sum = 6 and 4 elements.
                self.assertDictElementsAlmostEqual(slices[chinese_slice], {
                    'tf_metric_mean': 1.5,
                    'py_func_total_label': 6.0,
                })

            util.assert_that(metrics, check_result)
    def testEvaluateMultiLabelsPredictions(self):
        # Test case where we have multiple labels/predictions
        features = {'age': {encoding.NODE_SUFFIX: np.array([1])}}
        predictions = {
            'prediction': {
                encoding.NODE_SUFFIX: np.array([2])
            },
            'prediction_2': {
                encoding.NODE_SUFFIX: np.array([4])
            }
        }
        # Have 6 labels of values 3, 5, 23, 12, 16, 31 and
        # 6 predictions of values 2, 2, 2, 4, 4, 4
        # This should give sum = 108 and mean = 9.
        labels = {
            'label': {
                encoding.NODE_SUFFIX: np.array([3])
            },
            'label_2': {
                encoding.NODE_SUFFIX: np.array([5])
            }
        }
        labels_2 = {
            'label': {
                encoding.NODE_SUFFIX: np.array([23])
            },
            'label_2': {
                encoding.NODE_SUFFIX: np.array([12])
            }
        }
        labels_3 = {
            'label': {
                encoding.NODE_SUFFIX: np.array([16])
            },
            'label_2': {
                encoding.NODE_SUFFIX: np.array([31])
            }
        }

        # Compile the actual FPLs
        fpl = types.FeaturesPredictionsLabels(input_ref=0,
                                              features=features,
                                              predictions=predictions,
                                              labels=labels)
        fpl_2 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_2)
        fpl_3 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_3)

        # Set up a model agnostic config so we can get the FPLConfig.
        feature_map = {
            'age': tf.FixedLenFeature([], tf.float32),
            'prediction': tf.FixedLenFeature([], tf.int64),
            'prediction_2': tf.FixedLenFeature([], tf.int64),
            'label': tf.FixedLenFeature([], tf.int64),
            'label_2': tf.FixedLenFeature([], tf.int64)
        }

        model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
            label_keys=['label', 'label_2'],
            prediction_keys=['prediction', 'prediction_2'],
            feature_spec=feature_map)

        # Create a Model Anostic Evaluate graph handler and feed in the FPL list.
        evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph(
            [add_mean_callback],
            model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig(
                model_agnostic_config))
        evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3])
        outputs = evaluate_graph.get_metric_values()

        # Verify that we got the right metrics out.
        self.assertEqual(2, len(outputs))
        self.assertEqual(outputs['tf_metric_mean'], 9.0)
        self.assertEqual(outputs['py_func_total_label'], 108.0)
Esempio n. 8
0
    def testExtractFplExampleGraph(self):
        # Set up some examples with some Sparseness.
        examples = [
            self._makeExample(age=0,
                              language='english',
                              probabilities=[0.2, 0.8],
                              label=1),
            self._makeExample(age=1, language='chinese', label=0),
            self._makeExample(age=2, probabilities=[0.1, 0.9], label=1),
            self._makeExample(language='chinese',
                              probabilities=[0.8, 0.2],
                              label=0),
        ]

        # Set up the expected results on two of the fields. Note that directly
        # entire FPLs will fail in numpy comparison.
        expected_age = [
            np.array([0]),
            np.array([1]),
            np.array([2]),
            np.array([3])
        ]
        expected_language = [
            tf.SparseTensorValue(indices=np.array([[0, 0]]),
                                 values=np.array([b'english'],
                                                 dtype=np.object),
                                 dense_shape=np.array([1, 1])),
            tf.SparseTensorValue(indices=np.array([[0, 0]]),
                                 values=np.array([b'chinese'],
                                                 dtype=np.object),
                                 dense_shape=np.array([1, 1])),
            tf.SparseTensorValue(indices=np.array([], dtype=np.int64).reshape(
                [0, 2]),
                                 values=np.array([], dtype=np.object),
                                 dense_shape=np.array([1, 0])),
            tf.SparseTensorValue(indices=np.array([[0, 0]]),
                                 values=np.array([b'chinese'],
                                                 dtype=np.object),
                                 dense_shape=np.array([1, 1]))
        ]
        expected_probabilities = [
            np.array([[0.2, 0.8]]),
            np.array([[0.5, 0.5]]),
            np.array([[0.1, 0.9]]),
            np.array([[0.8, 0.2]])
        ]
        expected_labels = [
            np.array([1]),
            np.array([0]),
            np.array([1]),
            np.array([0])
        ]

        # Serialize and feed into our graph.
        serialized_examples = [e.SerializeToString() for e in examples]

        # Set up a config to bucket our example keys.
        feature_map = {
            'age':
            tf.FixedLenFeature([1], tf.int64, default_value=[3]),
            'language':
            tf.VarLenFeature(tf.string),
            'probabilities':
            tf.FixedLenFeature([2], tf.float32, default_value=[0.5, 0.5]),
            'label':
            tf.FixedLenFeature([], tf.int64)
        }
        model_agnostic_config = model_agnostic_predict.ModelAgnosticConfig(
            label_keys=['label'],
            prediction_keys=['probabilities'],
            feature_spec=feature_map)

        # Create our model and extract our FPLs.
        agnostic_predict = model_agnostic_predict.ModelAgnosticPredict(
            model_agnostic_config)
        fpls = agnostic_predict.get_fpls_from_examples(serialized_examples)

        # Verify the result is the correct size, has all the keys, and
        # our expected values match.
        self.assertEqual(4, len(fpls))
        for i, fpl in enumerate(fpls):
            self.assertIn('language', fpl.features)
            self.assertIn('label', fpl.labels)
            self.assertIn('label',
                          fpl.features)  # Labels should also be in features.
            self.assertIn('probabilities', fpl.predictions)
            self.assertIn('age', fpl.features)
            self.assertEquals(expected_age[i], fpl.features['age']['node'])
            self.assertSparseTensorValueEqual(expected_language[i],
                                              fpl.features['language']['node'])
            self.assertAllClose(expected_probabilities[i],
                                fpl.predictions['probabilities']['node'])
            self.assertEquals(expected_labels[i], fpl.labels['label']['node'])