def testMultiModelPredict(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, model1_dir = linear_classifier.simple_linear_classifier(
        None, temp_eval_export_dir)
    model1 = model_eval_lib.default_eval_shared_model(
        eval_saved_model_path=model1_dir)
    _, model2_dir = linear_classifier.simple_linear_classifier(
        None, temp_eval_export_dir)
    model2 = model_eval_lib.default_eval_shared_model(
        eval_saved_model_path=model2_dir)
    eval_shared_model = {'model1': model1, 'model2': model2}
    eval_config = config.EvalConfig(model_specs=[
        config.ModelSpec(name='model1', example_weight_key='age'),
        config.ModelSpec(name='model2', example_weight_key='age')
    ])

    tfx_io = raw_tf_record.RawBeamRecordTFXIO(
        physical_format='inmemory',
        raw_record_column_name=constants.ARROW_INPUT_COLUMN,
        telemetry_descriptors=['TFMATest'])
    extractor = predict_extractor.PredictExtractor(
        eval_shared_model, eval_config=eval_config)
    with beam.Pipeline() as pipeline:
      examples = [
          self._makeExample(age=3.0, language='english', label=1.0),
          self._makeExample(age=3.0, language='chinese', label=0.0),
          self._makeExample(age=4.0, language='english', label=1.0),
          self._makeExample(age=5.0, language='chinese', label=0.0),
      ]
      serialized_examples = [e.SerializeToString() for e in examples]

      predict_extracts = (
          pipeline
          | beam.Create(serialized_examples, reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | 'Predict' >> extractor.ptransform)

      def check_result(got):
        try:
          self.assertLen(got, 2)
          for item in got:
            self.assertIn(constants.FEATURES_KEY, item)
            for feature in ('language', 'age'):
              for features_dict in item[constants.FEATURES_KEY]:
                self.assertIn(feature, features_dict)
            self.assertIn(constants.LABELS_KEY, item)
            self.assertIn(constants.PREDICTIONS_KEY, item)
            for model in ('model1', 'model2'):
              for predictions_dict in item[constants.PREDICTIONS_KEY]:
                self.assertIn(model, predictions_dict)
            self.assertIn(constants.EXAMPLE_WEIGHTS_KEY, item)
            for i in range(len(item[constants.FEATURES_KEY])):
              self.assertAlmostEqual(item[constants.FEATURES_KEY][i]['age'],
                                     item[constants.EXAMPLE_WEIGHTS_KEY][i])

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(predict_extracts, check_result)
Exemple #2
0
    def testEvaluateQueryBasedMetrics(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (fixed_prediction_estimator_extra_fields.
                              simple_fixed_prediction_estimator_extra_fields(
                                  None, temp_eval_export_dir))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_export_dir)
        extractors = [
            legacy_predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor()
        ]

        with beam.Pipeline() as pipeline:
            metrics = (
                pipeline
                | 'Create' >> beam.Create(self._get_examples())
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'Extract' >> tfma_unit.Extract(extractors=extractors)  # pylint: disable=no-value-for-parameter
                | 'EvaluateQueryBasedMetrics' >>
                query_based_metrics_evaluator.EvaluateQueryBasedMetrics(
                    prediction_key='',
                    query_id='fixed_string',
                    combine_fns=[
                        query_statistics.QueryStatisticsCombineFn(),
                        ndcg.NdcgMetricCombineFn(at_vals=[1, 2],
                                                 gain_key='fixed_float',
                                                 weight_key='fixed_int'),
                        min_label_position.MinLabelPositionCombineFn(
                            label_key='', weight_key='fixed_int'),
                    ]))

            def check_metrics(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    self.assertDictElementsAlmostEqual(
                        got_metrics, {
                            'post_export_metrics/total_queries':
                            3.0,
                            'post_export_metrics/total_documents':
                            6.0,
                            'post_export_metrics/min_documents':
                            1.0,
                            'post_export_metrics/max_documents':
                            3.0,
                            'post_export_metrics/ndcg@1':
                            0.9166667,
                            'post_export_metrics/ndcg@2':
                            0.9766198,
                            'post_export_metrics/average_min_label_position/__labels':
                            0.6666667,
                        })

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics[constants.METRICS_KEY],
                             check_metrics,
                             label='metrics')
Exemple #3
0
    def testEvaluateNoSlicing(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_export_dir,
            add_metrics_callbacks=[_addExampleCountMetricCallback])
        extractors = [
            legacy_predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor()
        ]

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(age=3.0,
                                         language='english',
                                         label=1.0)
            example2 = self._makeExample(age=3.0,
                                         language='chinese',
                                         label=0.0)
            example3 = self._makeExample(age=4.0,
                                         language='english',
                                         label=1.0)
            example4 = self._makeExample(age=5.0,
                                         language='chinese',
                                         label=0.0)

            (metrics, _), _ = (
                pipeline
                | 'Create' >> beam.Create([
                    example1.SerializeToString(),
                    example2.SerializeToString(),
                    example3.SerializeToString(),
                    example4.SerializeToString()
                ])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'Extract' >> tfma_unit.Extract(extractors=extractors)  # pylint: disable=no-value-for-parameter
                | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator.
                _ComputeMetricsAndPlots(eval_shared_model=eval_shared_model))

            def check_result(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictElementsAlmostEqual(
                        value, {
                            'accuracy': 1.0,
                            'label/mean': 0.5,
                            'my_mean_age': 3.75,
                            'my_mean_age_times_label': 1.75,
                            'added_example_count': 4.0
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
Exemple #4
0
    def testEvaluateNoSlicingAddPostExportAndCustomMetricsUnsupervisedModel(
            self):
        # Mainly for testing that the ExampleCount post export metric works with
        # unsupervised models.
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (fixed_prediction_estimator_no_labels.
                              simple_fixed_prediction_estimator_no_labels(
                                  None, temp_eval_export_dir))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_export_dir,
            add_metrics_callbacks=[
                post_export_metrics.example_count(),
                post_export_metrics.example_weight(
                    example_weight_key='prediction')
            ])
        extractors = [
            legacy_predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor()
        ]

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(prediction=1.0)
            example2 = self._makeExample(prediction=2.0)

            (metrics, plots), _ = (
                pipeline
                | 'Create' >> beam.Create([
                    example1.SerializeToString(),
                    example2.SerializeToString(),
                ])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'Extract' >> tfma_unit.Extract(extractors=extractors)  # pylint: disable=no-value-for-parameter
                | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator.
                _ComputeMetricsAndPlots(eval_shared_model=eval_shared_model))

            def check_result(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictElementsAlmostEqual(
                        got_values_dict=value,
                        expected_values_dict={
                            'average_loss': 2.5,
                            metric_keys.EXAMPLE_COUNT: 2.0,
                            metric_keys.EXAMPLE_WEIGHT: 3.0
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result, label='metrics')
            util.assert_that(plots, util.is_empty(), label='plots')
def BuildAnalysisTable(  # pylint: disable=invalid-name
    examples: beam.pvalue.PCollection,
    eval_shared_model: types.EvalSharedModel,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    desired_batch_size: Optional[int] = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None
) -> beam.pvalue.PCollection:
  """Builds an analysis table from data extracted from the input.

  Use this function to build an example-oriented PCollection of output data
  useful for debugging models.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_shared_model: Shared model parameters for EvalSavedModel.
    slice_spec: Optional list of SingleSliceSpec specifying the slices to slice
      the data into. If None, defaults to the overall slice.
    desired_batch_size: Optional batch size for batching in Predict and
      Aggregate.
    extractors: Optional list of Extractors to execute prior to slicing and
      aggregating the metrics. If not provided, a default set will be run.
    evaluators: Optional list of Evaluators for evaluating Extracts. If not
      provided a default set will be used..

  Returns:
    beam.pvalue.PCollection of Extracts. The caller is responsible for
    committing to file for now.
  """
  if not slice_spec:
    slice_spec = [slicer.SingleSliceSpec()]

  if not extractors:
    extractors = [
        legacy_predict_extractor.PredictExtractor(eval_shared_model,
                                                  desired_batch_size),
        legacy_feature_extractor.FeatureExtractor(),
        slice_key_extractor.SliceKeyExtractor(slice_spec)
    ]
  if not evaluators:
    evaluators = [analysis_table_evaluator.AnalysisTableEvaluator()]

  # pylint: disable=no-value-for-parameter
  return (examples
          | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
          | model_eval_lib.ExtractAndEvaluate(
              extractors=extractors, evaluators=evaluators))
Exemple #6
0
    def testEvaluateWithPlots(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            fixed_prediction_estimator.simple_fixed_prediction_estimator(
                None, temp_eval_export_dir))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_export_dir,
            add_metrics_callbacks=[
                post_export_metrics.example_count(),
                post_export_metrics.auc_plots()
            ])
        extractors = [
            legacy_predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor()
        ]

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(prediction=0.0, label=1.0)
            example2 = self._makeExample(prediction=0.7, label=0.0)
            example3 = self._makeExample(prediction=0.8, label=1.0)
            example4 = self._makeExample(prediction=1.0, label=1.0)

            (metrics, plots), _ = (
                pipeline
                | 'Create' >> beam.Create([
                    example1.SerializeToString(),
                    example2.SerializeToString(),
                    example3.SerializeToString(),
                    example4.SerializeToString()
                ])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'Extract' >> tfma_unit.Extract(extractors=extractors)  # pylint: disable=no-value-for-parameter
                | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator.
                _ComputeMetricsAndPlots(eval_shared_model=eval_shared_model))

            def check_metrics(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictElementsAlmostEqual(
                        got_values_dict=value,
                        expected_values_dict={
                            metric_keys.EXAMPLE_COUNT: 4.0,
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_metrics, label='metrics')

            def check_plots(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictMatrixRowsAlmostEqual(
                        got_values_dict=value,
                        expected_values_dict={
                            metric_keys.AUC_PLOTS_MATRICES:
                            [(8001, [2, 1, 0, 1, 1.0 / 1.0, 1.0 / 3.0])],
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(plots, check_plots, label='plots')
Exemple #7
0
    def testEvaluateNoSlicingAddPostExportAndCustomMetrics(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_export_dir,
            add_metrics_callbacks=[
                _addExampleCountMetricCallback,
                # Note that since everything runs in-process this doesn't
                # actually test that the py_func can be correctly recreated
                # on workers in a distributed context.
                _addPyFuncMetricCallback,
                post_export_metrics.example_count(),
                post_export_metrics.example_weight(example_weight_key='age')
            ])
        extractors = [
            legacy_predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor()
        ]

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(age=3.0,
                                         language='english',
                                         label=1.0)
            example2 = self._makeExample(age=3.0,
                                         language='chinese',
                                         label=0.0)
            example3 = self._makeExample(age=4.0,
                                         language='english',
                                         label=1.0)
            example4 = self._makeExample(age=5.0,
                                         language='chinese',
                                         label=0.0)

            (metrics, plots), _ = (
                pipeline
                | 'Create' >> beam.Create([
                    example1.SerializeToString(),
                    example2.SerializeToString(),
                    example3.SerializeToString(),
                    example4.SerializeToString()
                ])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'Extract' >> tfma_unit.Extract(extractors=extractors)  # pylint: disable=no-value-for-parameter
                | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator.
                _ComputeMetricsAndPlots(eval_shared_model=eval_shared_model))

            def check_result(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictElementsAlmostEqual(
                        got_values_dict=value,
                        expected_values_dict={
                            'accuracy': 1.0,
                            'label/mean': 0.5,
                            'my_mean_age': 3.75,
                            'my_mean_age_times_label': 1.75,
                            'added_example_count': 4.0,
                            'py_func_label_sum': 2.0,
                            metric_keys.EXAMPLE_COUNT: 4.0,
                            metric_keys.EXAMPLE_WEIGHT: 15.0
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result, label='metrics')
            util.assert_that(plots, util.is_empty(), label='plots')
Exemple #8
0
    def testEvaluateWithSlicingAndUncertainty(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_export_dir,
            add_metrics_callbacks=[_addExampleCountMetricCallback])
        extractors = [
            legacy_predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor([
                slicer.SingleSliceSpec(),
                slicer.SingleSliceSpec(columns=['slice_key'])
            ])
        ]

        for batch_size in [1, 2, 4, 8]:

            with beam.Pipeline() as pipeline:
                example1 = self._makeExample(age=3.0,
                                             language='english',
                                             label=1.0,
                                             slice_key='first_slice')
                example2 = self._makeExample(age=3.0,
                                             language='chinese',
                                             label=0.0,
                                             slice_key='first_slice')
                example3 = self._makeExample(age=4.0,
                                             language='english',
                                             label=0.0,
                                             slice_key='second_slice')
                example4 = self._makeExample(age=5.0,
                                             language='chinese',
                                             label=1.0,
                                             slice_key='second_slice')
                example5 = self._makeExample(age=5.0,
                                             language='chinese',
                                             label=1.0,
                                             slice_key='second_slice')

                (metrics, _), _ = (
                    pipeline
                    | 'Create' >> beam.Create([
                        example1.SerializeToString(),
                        example2.SerializeToString(),
                        example3.SerializeToString(),
                        example4.SerializeToString(),
                        example5.SerializeToString(),
                    ])
                    | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                    | 'Extract' >> tfma_unit.Extract(extractors=extractors)  # pylint: disable=no-value-for-parameter
                    | 'ComputeMetricsAndPlots' >>
                    metrics_and_plots_evaluator._ComputeMetricsAndPlots(
                        eval_shared_model=eval_shared_model,
                        desired_batch_size=batch_size,
                        compute_confidence_intervals=True))

                def check_result(got):
                    try:
                        self.assertEqual(3, len(got), 'got: %s' % got)
                        slices = {}
                        for slice_key, value in got:
                            slices[slice_key] = value
                        overall_slice = ()
                        first_slice = (('slice_key', 'first_slice'), )
                        second_slice = (('slice_key', 'second_slice'), )
                        self.assertCountEqual(
                            list(slices.keys()),
                            [overall_slice, first_slice, second_slice])
                        self.assertDictElementsWithTDistributionAlmostEqual(
                            slices[overall_slice], {
                                'accuracy': 0.4,
                                'label/mean': 0.6,
                                'my_mean_age': 4.0,
                                'my_mean_age_times_label': 2.6,
                                'added_example_count': 5.0
                            })
                        self.assertDictElementsWithTDistributionAlmostEqual(
                            slices[first_slice], {
                                'accuracy': 1.0,
                                'label/mean': 0.5,
                                'my_mean_age': 3.0,
                                'my_mean_age_times_label': 1.5,
                                'added_example_count': 2.0
                            })
                        self.assertDictElementsWithTDistributionAlmostEqual(
                            slices[second_slice], {
                                'accuracy': 0.0,
                                'label/mean': 2.0 / 3.0,
                                'my_mean_age': 14.0 / 3.0,
                                'my_mean_age_times_label': 10.0 / 3.0,
                                'added_example_count': 3.0
                            })

                    except AssertionError as err:
                        # This function is redefined every iteration, so it will have the
                        # right value of batch_size.
                        raise util.BeamAssertException(
                            'batch_size = %d, error: %s' % (batch_size, err))  # pylint: disable=cell-var-from-loop

                util.assert_that(metrics, check_result, label='metrics')