def benchmarkPredictExtractorManualActuation(self):
        """Benchmark PredictExtractorV2 "manually"."""
        self._init_model()
        records = self._readDatasetIntoExtracts()
        extracts = []
        for elem in records:
            extracts.append(
                input_extractor._ParseExample(elem, self._eval_config))  # pylint: disable=protected-access

        prediction_do_fn = predict_extractor_v2._PredictionDoFn(  # pylint: disable=protected-access
            eval_config=self._eval_config,
            eval_shared_models={"": self._eval_shared_model})
        prediction_do_fn.setup()

        start = time.time()
        predict_result = []
        predict_batch_size = 1000
        for batch in benchmark_utils.batched_iterator(extracts,
                                                      predict_batch_size):
            predict_result.extend(prediction_do_fn.process(batch))

        end = time.time()
        delta = end - start
        self.report_benchmark(iters=1,
                              wall_time=delta,
                              extras={"num_examples": len(records)})
Beispiel #2
0
    def benchmarkPredict(self):
        """Benchmark the predict and aggregate combine stages "manually".

    Runs _TFMAPredictionDoFn "manually" outside a Beam pipeline. Records the
    wall time taken.
    """
        # Run InputsToExtracts manually.
        records = []
        for x in self._dataset.read_raw_dataset(deserialize=False,
                                                limit=MAX_NUM_EXAMPLES):
            records.append({tfma.constants.INPUT_KEY: x})

        fn = tfma.extractors.predict_extractor._TFMAPredictionDoFn(  # pylint: disable=protected-access
            eval_shared_models={"": tfma.default_eval_shared_model(
                eval_saved_model_path=self._dataset.tfma_saved_model_path())},
            eval_config=None)
        fn.setup()

        # Predict
        predict_batch_size = 1000
        predict_result = []
        start = time.time()
        for batch in benchmark_utils.batched_iterator(records,
                                                      predict_batch_size):
            predict_result.extend(fn.process(batch))
        end = time.time()
        delta = end - start
        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "batch_size": predict_batch_size,
                "num_examples":
                self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
            })
Beispiel #3
0
    def benchmarkEvalSavedModelPredict(self):
        """Benchmark using the EvalSavedModel to make predictions.

    Runs EvalSavedModel.predict_list and records the wall time taken.
    """
        batch_size = 1000

        eval_saved_model = load.EvalSavedModel(
            path=self._dataset.tfma_saved_model_path(),
            include_default_metrics=True)

        records = self._dataset.read_raw_dataset(deserialize=False,
                                                 limit=MAX_NUM_EXAMPLES)

        start = time.time()
        for batch in benchmark_utils.batched_iterator(records, batch_size):
            eval_saved_model.predict_list(batch)
        end = time.time()
        delta = end - start
        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "batch_size": batch_size,
                "num_examples":
                self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
            })
Beispiel #4
0
    def benchmarkEvalSavedModelMetricsResetUpdateGetList(self):
        """Benchmark using the EvalSavedModel to compute metrics.

    Runs EvalSavedModel.metrics_reset_update_get_list and records the wall time
    taken.
    """
        batch_size = 1000

        eval_saved_model = load.EvalSavedModel(
            path=self._dataset.tfma_saved_model_path(),
            include_default_metrics=True)

        records = self._dataset.read_raw_dataset(
            deserialize=False, limit=self._max_num_examples())

        start = time.time()
        accumulators = []
        for batch in benchmark_utils.batched_iterator(records, batch_size):
            accumulators.append(
                eval_saved_model.metrics_reset_update_get_list(batch))
        end = time.time()
        delta = end - start

        # Sanity check
        metric_variables_sum = accumulators[0]
        for acc in accumulators[1:]:
            if len(metric_variables_sum) != len(acc):
                raise ValueError(
                    "all metric variable value lists should have the same length, but "
                    "got lists with different lengths: %d and %d" %
                    (len(metric_variables_sum), len(acc)))
            metric_variables_sum = [
                a + b for a, b in zip(metric_variables_sum, acc)
            ]

        metrics = eval_saved_model.metrics_set_variables_and_get_values(
            metric_variables_sum)
        if "average_loss" not in metrics:
            raise ValueError(
                "metrics should contain average_loss metric, but it did not. "
                "metrics were: %s" % metrics)

        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "batch_size":
                batch_size,
                "num_examples":
                self._dataset.num_examples(limit=self._max_num_examples())
            })
Beispiel #5
0
def _get_batched_records(dataset):
  """Returns a (batch_size, iterator for batched records) tuple for the dataset.

  Args:
    dataset: BenchmarkDataset object.

  Returns:
    Tuple of (batch_size, iterator for batched records), where records are
    decoded tf.train.Examples.
  """
  batch_size = 1000
  common_variables = _get_common_variables(dataset)
  converter = tft.coders.ExampleProtoCoder(
      common_variables.tf_metadata_schema, serialized=False)
  records = [converter.decode(x) for x in dataset.read_raw_dataset()]
  return batch_size, benchmark_utils.batched_iterator(records, batch_size)
Beispiel #6
0
def _get_batched_records(dataset, max_num_examples=None):
    """Returns a (batch_size, iterator for batched records) tuple for the dataset.

  Args:
    dataset: BenchmarkDataset object.
    max_num_examples: Maximum number of examples to read from the dataset.

  Returns:
    Tuple of (batch_size, iterator for batched records), where records are
    decoded tf.train.Examples.
  """
    batch_size = 1000
    common_variables = _get_common_variables(dataset)
    converter = example_coder.ExamplesToRecordBatchDecoder(
        common_variables.transform_input_dataset_metadata.schema.
        SerializeToString())
    serialized_records = benchmark_utils.batched_iterator(
        dataset.read_raw_dataset(deserialize=False, limit=max_num_examples),
        batch_size)
    records = [converter.DecodeBatch(x) for x in serialized_records]
    return batch_size, records
    def _runMetricsAndPlotsEvaluatorManualActuation(self,
                                                    with_confidence_intervals,
                                                    metrics_specs=None):
        """Benchmark MetricsAndPlotsEvaluatorV2 "manually"."""
        self._init_model()
        if not metrics_specs:
            metrics_specs = self._eval_config.metrics_specs

        records = self._readDatasetIntoExtracts()
        extracts = []
        for elem in records:
            extracts.append(
                input_extractor._ParseExample(elem, self._eval_config))  # pylint: disable=protected-access

        prediction_do_fn = predict_extractor_v2._PredictionDoFn(  # pylint: disable=protected-access
            eval_config=self._eval_config,
            eval_shared_models={"": self._eval_shared_model})
        prediction_do_fn.setup()

        # Have to predict first
        predict_result = []
        predict_batch_size = 1000
        for batch in benchmark_utils.batched_iterator(extracts,
                                                      predict_batch_size):
            predict_result.extend(prediction_do_fn.process(batch))

        # Now Evaluate
        inputs_per_accumulator = 1000
        start = time.time()

        computations, _ = (
            metrics_and_plots_evaluator_v2._filter_and_separate_computations(  # pylint: disable=protected-access
                metric_specs.to_computations(metrics_specs,
                                             eval_config=self._eval_config)))

        processed = []
        for elem in predict_result:
            processed.append(
                next(
                    metrics_and_plots_evaluator_v2._PreprocessorDoFn(  # pylint: disable=protected-access
                        computations).process(elem)))

        combiner = metrics_and_plots_evaluator_v2._ComputationsCombineFn(  # pylint: disable=protected-access
            computations=computations,
            compute_with_sampling=with_confidence_intervals)

        accumulators = []
        for batch in benchmark_utils.batched_iterator(processed,
                                                      inputs_per_accumulator):
            accumulator = combiner.create_accumulator()
            for elem in batch:
                accumulator = combiner.add_input(accumulator, elem)
            accumulators.append(accumulator)

        final_accumulator = combiner.merge_accumulators(accumulators)
        final_output = combiner.extract_output(final_accumulator)
        end = time.time()
        delta = end - start

        # Sanity check the example count. This is not timed.
        example_count_key = metric_types.MetricKey(name="example_count")
        example_count = None
        for x in final_output:
            if example_count_key in x:
                example_count = x[example_count_key]
                break

        if example_count is None:
            raise ValueError(
                "example_count was not in the final list of metrics. "
                "metrics were: %s" % str(final_output))

        if with_confidence_intervals:
            # If we're computing using confidence intervals, the example count will
            # not be exact.
            lower_bound = int(0.9 * len(records))
            upper_bound = int(1.1 * len(records))
            if example_count < lower_bound or example_count > upper_bound:
                raise ValueError("example count out of bounds: expecting "
                                 "%d < example_count < %d, but got %d" %
                                 (lower_bound, upper_bound, example_count))
        else:
            # If we're not using confidence intervals, we expect the example count to
            # be exact.
            if example_count != len(records):
                raise ValueError(
                    "example count mismatch: expecting %d got %d" %
                    (len(records), example_count))

        self.report_benchmark(iters=1,
                              wall_time=delta,
                              extras={
                                  "inputs_per_accumulator":
                                  inputs_per_accumulator,
                                  "num_examples": len(records)
                              })
Beispiel #8
0
    def benchmarkAggregateCombineManualActuation(self):
        """Benchmark the aggregate combine stage "manually".

    Runs _AggregateCombineFn "manually" outside a Beam pipeline. Records the
    wall time taken.
    """

        # Run InputsToExtracts manually.
        records = []
        for x in self._dataset.read_raw_dataset(deserialize=False,
                                                limit=MAX_NUM_EXAMPLES):
            records.append({tfma.constants.INPUT_KEY: x})

        fn = tfma.extractors.predict_extractor._TFMAPredictionDoFn(  # pylint: disable=protected-access
            eval_shared_models={"": tfma.default_eval_shared_model(
                eval_saved_model_path=self._dataset.tfma_saved_model_path())},
            eval_config=None)
        fn.setup()

        # Predict
        predict_batch_size = 1000
        predict_result = []
        for batch in benchmark_utils.batched_iterator(records,
                                                      predict_batch_size):
            predict_result.extend(fn.process(batch))

        # AggregateCombineFn
        #
        # We simulate accumulating records into multiple different accumulators,
        # each with inputs_per_accumulator records, and then merging the resulting
        # accumulators together at one go.

        # Number of elements to feed into a single accumulator.
        # (This means we will have len(records) / inputs_per_accumulator
        # accumulators to merge).
        inputs_per_accumulator = 1000

        combiner = tfma.evaluators.aggregate._AggregateCombineFn(  # pylint: disable=protected-access
            eval_shared_model=tfma.default_eval_shared_model(
                eval_saved_model_path=self._dataset.tfma_saved_model_path()))
        accumulators = []

        start = time.time()
        for batch in benchmark_utils.batched_iterator(predict_result,
                                                      inputs_per_accumulator):
            accumulator = combiner.create_accumulator()
            for elem in batch:
                combiner.add_input(accumulator, elem)
            accumulators.append(accumulator)
        final_accumulator = combiner.merge_accumulators(accumulators)
        final_output = combiner.extract_output(final_accumulator)
        end = time.time()
        delta = end - start

        # Extract output to sanity check example count. This is not timed.
        extract_fn = tfma.evaluators.aggregate._ExtractOutputDoFn(  # pylint: disable=protected-access
            eval_shared_model=tfma.default_eval_shared_model(
                eval_saved_model_path=self._dataset.tfma_saved_model_path()))
        extract_fn.setup()
        interpreted_output = list(extract_fn.process(((), final_output)))
        if len(interpreted_output) != 1:
            raise ValueError("expecting exactly 1 interpreted output, got %d" %
                             (len(interpreted_output)))
        got_example_count = interpreted_output[0][1].get(
            "post_export_metrics/example_count")
        if got_example_count != self._dataset.num_examples(
                limit=MAX_NUM_EXAMPLES):
            raise ValueError(
                "example count mismatch: expecting %d got %d" %
                (self._dataset.num_examples(limit=MAX_NUM_EXAMPLES),
                 got_example_count))

        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "inputs_per_accumulator": inputs_per_accumulator,
                "num_examples":
                self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
            })
Beispiel #9
0
    def _runMetricsPlotsAndValidationsEvaluatorManualActuation(
            self,
            with_confidence_intervals,
            multi_model,
            metrics_specs=None,
            validation=False):
        """Benchmark MetricsPlotsAndValidationsEvaluator "manually"."""
        self._init_model(multi_model, validation)
        if not metrics_specs:
            metrics_specs = self._eval_config.metrics_specs

        extracts = self._readDatasetIntoBatchedExtracts()
        num_examples = sum(
            [e[constants.ARROW_RECORD_BATCH_KEY].num_rows for e in extracts])
        extracts = [self._extract_features_and_labels(e) for e in extracts]

        prediction_do_fn = model_util.ModelSignaturesDoFn(
            eval_config=self._eval_config,
            eval_shared_models=self._eval_shared_models,
            signature_names={
                constants.PREDICTIONS_KEY:
                {name: [None]
                 for name in self._eval_shared_models}
            },
            prefer_dict_outputs=False)
        prediction_do_fn.setup()

        # Have to predict first
        predict_result = []
        for e in extracts:
            predict_result.extend(prediction_do_fn.process(e))

        # Unbatch extracts
        unbatched_extracts = []
        for e in predict_result:
            unbatched_extracts.extend(
                unbatch_extractor._extract_unbatched_inputs(e))  # pylint: disable=protected-access

        # Add global slice key.
        for e in unbatched_extracts:
            e[tfma.SLICE_KEY_TYPES_KEY] = ()

        # Now Evaluate
        inputs_per_accumulator = 1000
        start = time.time()
        for _ in range(_ITERS):
            computations, _, _, _ = (
                # pylint: disable=protected-access
                metrics_plots_and_validations_evaluator.
                _filter_and_separate_computations(
                    metric_specs_util.to_computations(
                        metrics_specs, eval_config=self._eval_config)))
            # pylint: enable=protected-access

            processed = []
            for elem in unbatched_extracts:
                processed.append(
                    next(
                        metrics_plots_and_validations_evaluator.
                        _PreprocessorDoFn(  # pylint: disable=protected-access
                            computations).process(elem)))

            combiner = metrics_plots_and_validations_evaluator._ComputationsCombineFn(  # pylint: disable=protected-access
                computations=computations)
            if with_confidence_intervals:
                combiner = poisson_bootstrap._BootstrapCombineFn(combiner)  # pylint: disable=protected-access
            combiner.setup()

            accumulators = []
            for batch in benchmark_utils.batched_iterator(
                    processed, inputs_per_accumulator):
                accumulator = combiner.create_accumulator()
                for elem in batch:
                    accumulator = combiner.add_input(accumulator, elem)
                accumulators.append(accumulator)

            final_accumulator = combiner.merge_accumulators(accumulators)
            final_output = combiner.extract_output(final_accumulator)
        end = time.time()
        delta = end - start

        # Sanity check the example count. This is not timed.
        example_count_key = metric_types.MetricKey(
            name="example_count",
            model_name="candidate" if multi_model else "")
        if example_count_key in final_output:
            example_count = final_output[example_count_key]
        else:
            raise ValueError(
                "example_count_key ({}) was not in the final list of "
                "metrics. metrics were: {}".format(example_count_key,
                                                   final_output))

        if with_confidence_intervals:
            # If we're computing using confidence intervals, the example count will
            # not be exact.
            lower_bound = int(0.9 * num_examples)
            upper_bound = int(1.1 * num_examples)
            if example_count < lower_bound or example_count > upper_bound:
                raise ValueError("example count out of bounds: expecting "
                                 "%d < example_count < %d, but got %d" %
                                 (lower_bound, upper_bound, example_count))
        else:
            # If we're not using confidence intervals, we expect the example count to
            # be exact.
            if example_count != num_examples:
                raise ValueError(
                    "example count mismatch: expecting %d got %d" %
                    (num_examples, example_count))

        self.report_benchmark(iters=_ITERS,
                              wall_time=delta,
                              extras={
                                  "inputs_per_accumulator":
                                  inputs_per_accumulator,
                                  "num_examples": num_examples
                              })
Beispiel #10
0
    def _runMetricsAndPlotsEvaluatorManualActuation(self,
                                                    with_confidence_intervals,
                                                    metrics_specs=None):
        """Benchmark MetricsAndPlotsEvaluatorV2 "manually"."""
        self._init_model()
        if not metrics_specs:
            metrics_specs = self._eval_config.metrics_specs

        extracts = self._readDatasetIntoBatchedExtracts()
        num_examples = sum(
            [e[constants.ARROW_RECORD_BATCH_KEY].num_rows for e in extracts])
        extracts = [
            batched_input_extractor._ExtractInputs(e, self._eval_config)  # pylint: disable=protected-access
            for e in extracts
        ]

        prediction_do_fn = batched_predict_extractor_v2._BatchedPredictionDoFn(  # pylint: disable=protected-access
            eval_config=self._eval_config,
            eval_shared_models={"": self._eval_shared_model})
        prediction_do_fn.setup()

        # Have to predict first
        predict_result = []
        for e in extracts:
            predict_result.extend(prediction_do_fn.process(e))

        # Unbatch extracts
        unbatched_extarcts = []
        for e in predict_result:
            unbatched_extarcts.extend(
                unbatch_extractor._ExtractUnbatchedInputs(e))  # pylint: disable=protected-access

        # Add global slice key.
        for e in unbatched_extarcts:
            e[tfma.SLICE_KEY_TYPES_KEY] = ()

        # Now Evaluate
        inputs_per_accumulator = 1000
        start = time.time()

        computations, _ = (
            # pylint: disable=protected-access
            metrics_plots_and_validations_evaluator.
            _filter_and_separate_computations(
                metric_specs.to_computations(metrics_specs,
                                             eval_config=self._eval_config)))
        # pylint: enable=protected-access

        processed = []
        for elem in unbatched_extarcts:
            processed.append(
                next(
                    metrics_plots_and_validations_evaluator._PreprocessorDoFn(  # pylint: disable=protected-access
                        computations).process(elem)))

        combiner = metrics_plots_and_validations_evaluator._ComputationsCombineFn(  # pylint: disable=protected-access
            computations=computations,
            compute_with_sampling=with_confidence_intervals)

        accumulators = []
        for batch in benchmark_utils.batched_iterator(processed,
                                                      inputs_per_accumulator):
            accumulator = combiner.create_accumulator()
            for elem in batch:
                accumulator = combiner.add_input(accumulator, elem)
            accumulators.append(accumulator)

        final_accumulator = combiner.merge_accumulators(accumulators)
        final_output = combiner.extract_output(final_accumulator)
        end = time.time()
        delta = end - start

        # Sanity check the example count. This is not timed.
        example_count_key = metric_types.MetricKey(name="example_count")
        example_count = None
        for x in final_output:
            if example_count_key in x:
                example_count = x[example_count_key]
                break

        if example_count is None:
            raise ValueError(
                "example_count was not in the final list of metrics. "
                "metrics were: %s" % str(final_output))

        if with_confidence_intervals:
            # If we're computing using confidence intervals, the example count will
            # not be exact.
            lower_bound = int(0.9 * num_examples)
            upper_bound = int(1.1 * num_examples)
            if example_count < lower_bound or example_count > upper_bound:
                raise ValueError("example count out of bounds: expecting "
                                 "%d < example_count < %d, but got %d" %
                                 (lower_bound, upper_bound, example_count))
        else:
            # If we're not using confidence intervals, we expect the example count to
            # be exact.
            if example_count != num_examples:
                raise ValueError(
                    "example count mismatch: expecting %d got %d" %
                    (num_examples, example_count))

        self.report_benchmark(iters=1,
                              wall_time=delta,
                              extras={
                                  "inputs_per_accumulator":
                                  inputs_per_accumulator,
                                  "num_examples": num_examples
                              })