Esempio n. 1
0
    def benchmarkPredictionsExtractorManualActuation(self):
        """Benchmark PredictionsExtractor "manually"."""
        self._init_model()
        extracts = self._readDatasetIntoBatchedExtracts()
        num_examples = sum(
            [e[constants.ARROW_RECORD_BATCH_KEY].num_rows for e in extracts])
        extracts = [self._extract_features_and_labels(e) for e in extracts]

        prediction_do_fn = model_util.ModelSignaturesDoFn(
            eval_config=self._eval_config,
            eval_shared_models={"": self._eval_shared_model},
            signature_names={constants.PREDICTIONS_KEY: {
                "": [None]
            }},
            prefer_dict_outputs=False)
        prediction_do_fn.setup()

        start = time.time()
        predict_result = []
        for e in extracts:
            predict_result.extend(prediction_do_fn.process(e))

        end = time.time()
        delta = end - start
        self.report_benchmark(iters=1,
                              wall_time=delta,
                              extras={"num_examples": num_examples})
Esempio n. 2
0
def _ExtractPredictions(  # pylint: disable=invalid-name
    extracts: beam.pvalue.PCollection,
    eval_config: config.EvalConfig,
    eval_shared_models: Optional[Dict[Text, types.EvalSharedModel]],
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
) -> beam.pvalue.PCollection:
    """A PTransform that adds predictions and possibly other tensors to extracts.

  Args:
    extracts: PCollection of extracts containing model inputs keyed by
      tfma.FEATURES_KEY (if model inputs are named) or tfma.INPUTS_KEY (if model
      takes raw tf.Examples as input).
    eval_config: Eval config.
    eval_shared_models: Shared model parameters keyed by model name or None.
    tensor_adapter_config: Tensor adapter config which specifies how to obtain
      tensors from the Arrow RecordBatch.

  Returns:
    PCollection of Extracts updated with the predictions.
  """

    if eval_shared_models:
        signature_names = {}
        for spec in eval_config.model_specs:
            model_name = '' if len(eval_config.model_specs) == 1 else spec.name
            signature_names[model_name] = [spec.signature_name]

        return (extracts
                | 'Predict' >> beam.ParDo(
                    model_util.ModelSignaturesDoFn(
                        eval_config=eval_config,
                        eval_shared_models=eval_shared_models,
                        signature_names={
                            constants.PREDICTIONS_KEY: signature_names
                        },
                        prefer_dict_outputs=False,
                        tensor_adapter_config=tensor_adapter_config)))
    else:

        def extract_predictions(  # pylint: disable=invalid-name
            batched_extracts: types.Extracts) -> types.Extracts:
            """Extract predictions from extracts containing features."""
            result = copy.copy(batched_extracts)
            predictions = model_util.get_feature_values_for_model_spec_field(
                list(eval_config.model_specs), 'prediction_key',
                'prediction_keys', result)
            if predictions is not None:
                result[constants.PREDICTIONS_KEY] = predictions
            return result

        return extracts | 'ExtractPredictions' >> beam.Map(extract_predictions)
Esempio n. 3
0
def _ExtractBatchedPredictions(
    extracts: beam.pvalue.PCollection,
    eval_config: config.EvalConfig,
    eval_shared_models: Dict[Text, types.EvalSharedModel],
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
) -> beam.pvalue.PCollection:
    signature_names = {}
    for spec in eval_config.model_specs:
        model_name = '' if len(eval_config.model_specs) == 1 else spec.name
        signature_names[model_name] = [spec.signature_name]

    return (
        extracts
        | 'Predict' >> beam.ParDo(
            model_util.ModelSignaturesDoFn(
                eval_config=eval_config,
                eval_shared_models=eval_shared_models,
                signature_names={constants.PREDICTIONS_KEY: signature_names},
                prefer_dict_outputs=True,
                tensor_adapter_config=tensor_adapter_config)))
Esempio n. 4
0
def _ExtractTransformedFeatures(  # pylint: disable=invalid-name
    extracts: beam.pvalue.PCollection,
    eval_config: config.EvalConfig,
    eval_shared_models: Dict[Text, types.EvalSharedModel],
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
) -> beam.pvalue.PCollection:
    """A PTransform that updates extracts to include transformed features.

  Args:
    extracts: PCollection of extracts containing raw inputs keyed by
      tfma.FEATURES_KEY (if preprocessing function inputs are named) or
      tfma.INPUTS_KEY (if preprocessing functions take raw tf.Examples as input)
    eval_config: Eval config.
    eval_shared_models: Shared model parameters keyed by model name.
    tensor_adapter_config: Optional tensor adapter config which specifies how to
      obtain tensors from the Arrow RecordBatch.

  Returns:
    PCollection of Extracts updated with the to include transformed features
    stored under the key tfma.TRANSFORMED_FEATURES_KEY.
  """
    signature_names = {}
    for spec in eval_config.model_specs:
        model_name = '' if len(eval_config.model_specs) == 1 else spec.name
        signature_names[model_name] = list(spec.preprocessing_function_names)

    return (extracts
            | 'Predict' >> beam.ParDo(
                model_util.ModelSignaturesDoFn(
                    eval_config=eval_config,
                    eval_shared_models=eval_shared_models,
                    signature_names={
                        constants.TRANSFORMED_FEATURES_KEY: signature_names
                    },
                    default_signature_names=list(_DEFAULT_SIGNATURE_NAMES),
                    prefer_dict_outputs=True,
                    tensor_adapter_config=tensor_adapter_config)))
    def testModelSignaturesDoFn(self, save_as_keras, signature_names,
                                default_signature_names, prefer_dict_outputs,
                                use_schema, expected_num_outputs):
        export_path = self.createModelWithMultipleDenseInputs(save_as_keras)
        eval_shared_models = {}
        model_specs = []
        for sigs in signature_names.values():
            for model_name in sigs:
                if model_name not in eval_shared_models:
                    eval_shared_models[
                        model_name] = self.createTestEvalSharedModel(
                            eval_saved_model_path=export_path,
                            model_name=model_name,
                            tags=[tf.saved_model.SERVING])
                    model_specs.append(config.ModelSpec(name=model_name))
        eval_config = config.EvalConfig(model_specs=model_specs)
        schema = self.createDenseInputsSchema() if use_schema else None
        tfx_io = tf_example_record.TFExampleBeamRecord(
            physical_format='text',
            schema=schema,
            raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        tensor_adapter_config = None
        if use_schema:
            tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                arrow_schema=tfx_io.ArrowSchema(),
                tensor_representations=tfx_io.TensorRepresentations())

        examples = [
            self._makeExample(input_1=1.0, input_2=2.0),
            self._makeExample(input_1=3.0, input_2=4.0),
            self._makeExample(input_1=5.0, input_2=6.0),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (pipeline
                      | 'Create' >> beam.Create(
                          [e.SerializeToString() for e in examples])
                      | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
                      | 'ToExtracts' >> beam.Map(_record_batch_to_extracts)
                      | 'ModelSignatures' >> beam.ParDo(
                          model_util.ModelSignaturesDoFn(
                              eval_config=eval_config,
                              eval_shared_models=eval_shared_models,
                              signature_names=signature_names,
                              default_signature_names=default_signature_names,
                              prefer_dict_outputs=prefer_dict_outputs,
                              tensor_adapter_config=tensor_adapter_config)))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    for key in signature_names:
                        self.assertIn(key, got[0])
                        if prefer_dict_outputs:
                            for entry in got[0][key]:
                                self.assertIsInstance(entry, dict)
                                self.assertLen(entry, expected_num_outputs)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Esempio n. 6
0
    def _runMetricsPlotsAndValidationsEvaluatorManualActuation(
            self, with_confidence_intervals, metrics_specs=None):
        """Benchmark MetricsPlotsAndValidationsEvaluator "manually"."""
        self._init_model()
        if not metrics_specs:
            metrics_specs = self._eval_config.metrics_specs

        extracts = self._readDatasetIntoBatchedExtracts()
        num_examples = sum(
            [e[constants.ARROW_RECORD_BATCH_KEY].num_rows for e in extracts])
        extracts = [self._extract_features_and_labels(e) for e in extracts]

        prediction_do_fn = model_util.ModelSignaturesDoFn(
            eval_config=self._eval_config,
            eval_shared_models={"": self._eval_shared_model},
            signature_names={constants.PREDICTIONS_KEY: {
                "": [None]
            }},
            prefer_dict_outputs=False)
        prediction_do_fn.setup()

        # Have to predict first
        predict_result = []
        for e in extracts:
            predict_result.extend(prediction_do_fn.process(e))

        # Unbatch extracts
        unbatched_extarcts = []
        for e in predict_result:
            unbatched_extarcts.extend(
                unbatch_extractor._ExtractUnbatchedInputs(e))  # pylint: disable=protected-access

        # Add global slice key.
        for e in unbatched_extarcts:
            e[tfma.SLICE_KEY_TYPES_KEY] = ()

        # Now Evaluate
        inputs_per_accumulator = 1000
        start = time.time()

        computations, _ = (
            # pylint: disable=protected-access
            metrics_plots_and_validations_evaluator.
            _filter_and_separate_computations(
                metric_specs.to_computations(metrics_specs,
                                             eval_config=self._eval_config)))
        # pylint: enable=protected-access

        processed = []
        for elem in unbatched_extarcts:
            processed.append(
                next(
                    metrics_plots_and_validations_evaluator._PreprocessorDoFn(  # pylint: disable=protected-access
                        computations).process(elem)))

        combiner = metrics_plots_and_validations_evaluator._ComputationsCombineFn(  # pylint: disable=protected-access
            computations=computations,
            compute_with_sampling=with_confidence_intervals)

        accumulators = []
        for batch in benchmark_utils.batched_iterator(processed,
                                                      inputs_per_accumulator):
            accumulator = combiner.create_accumulator()
            for elem in batch:
                accumulator = combiner.add_input(accumulator, elem)
            accumulators.append(accumulator)

        final_accumulator = combiner.merge_accumulators(accumulators)
        final_output = combiner.extract_output(final_accumulator)
        end = time.time()
        delta = end - start

        # Sanity check the example count. This is not timed.
        example_count_key = metric_types.MetricKey(name="example_count")
        example_count = None
        for x in final_output:
            if example_count_key in x:
                example_count = x[example_count_key]
                break

        if example_count is None:
            raise ValueError(
                "example_count was not in the final list of metrics. "
                "metrics were: %s" % str(final_output))

        if with_confidence_intervals:
            # If we're computing using confidence intervals, the example count will
            # not be exact.
            lower_bound = int(0.9 * num_examples)
            upper_bound = int(1.1 * num_examples)
            if example_count < lower_bound or example_count > upper_bound:
                raise ValueError("example count out of bounds: expecting "
                                 "%d < example_count < %d, but got %d" %
                                 (lower_bound, upper_bound, example_count))
        else:
            # If we're not using confidence intervals, we expect the example count to
            # be exact.
            if example_count != num_examples:
                raise ValueError(
                    "example count mismatch: expecting %d got %d" %
                    (num_examples, example_count))

        self.report_benchmark(iters=1,
                              wall_time=delta,
                              extras={
                                  "inputs_per_accumulator":
                                  inputs_per_accumulator,
                                  "num_examples": num_examples
                              })