Example #1
0
def BuildDiagnosticTable(
        # pylint: disable=invalid-name
        examples,
        eval_saved_model_path,
        extractors=None,
        desired_batch_size=None):
    """Build diagnostics for the spacified EvalSavedModel and example collection.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_saved_model_path: Path to EvalSavedModel. This directory should contain
      the saved_model.pb file.
    extractors: Optional list of Extractors to execute prior to slicing and
      aggregating the metrics. If not provided, a default set will be run.
    desired_batch_size: Optional batch size for batching in Predict and
      Aggregate.

  Returns:
    PCollection of ExampleAndExtracts
  """

    if not extractors:
        extractors = [
            PredictExtractor(eval_saved_model_path, None, shared.Shared(),
                             desired_batch_size),
            types.Extractor(stage_name='ExtractFeatures',
                            ptransform=feature_extractor.ExtractFeatures()),
        ]
    return (
        examples
        | 'ToExampleAndExtracts' >>
        beam.Map(lambda x: types.ExampleAndExtracts(example=x, extracts={}))
        | Extract(extractors=extractors))
Example #2
0
def BuildDiagnosticTable(
        # pylint: disable=invalid-name
        examples,
        eval_saved_model_path,
        desired_batch_size=None):
    """Build diagnostics for the spacified EvalSavedModel and example collection.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_saved_model_path: Path to EvalSavedModel. This directory should contain
      the saved_model.pb file.
    desired_batch_size: Optional batch size for batching in Predict and
      Aggregate.

  Returns:
    PCollection of ExampleAndExtracts
  """
    return (
        examples
        | 'ToExampleAndExtracts' >>
        beam.Map(lambda x: types.ExampleAndExtracts(example=x, extracts={}))
        | 'Predict' >> predict_extractor.TFMAPredict(
            eval_saved_model_path,
            add_metrics_callbacks=None,
            shared_handle=shared.Shared(),
            desired_batch_size=desired_batch_size)
        | 'ExtractFeatures' >> feature_extractor.ExtractFeatures())
def TFMAPredict(  # pylint: disable=invalid-name
        examples_and_extracts,
        eval_shared_model,
        desired_batch_size=None,
        materialize=True):
    """A PTransform that adds predictions to ExamplesAndExtracts.

  Args:
    examples_and_extracts: PCollection of ExampleAndExtracts containing a
      serialized example to be fed to the model.
    eval_shared_model: Shared model parameters for EvalSavedModel.
    desired_batch_size: Optional. Desired batch size for prediction.
    materialize: True to call the FeatureExtractor to add MaterializedColumn
      entries for the features, predictions, and labels.

  Returns:
    PCollection of ExamplesAndExtracts, where the extracts contains the
    features, predictions, labels retrieved.
  """
    batch_args = {}
    if desired_batch_size:
        batch_args = dict(min_batch_size=desired_batch_size,
                          max_batch_size=desired_batch_size)

    # We don't actually need to add the add_metrics_callbacks to do Predict,
    # but because if we want to share the model between Predict and subsequent
    # stages (i.e. we use same shared handle for this and subsequent stages),
    # then if we don't add the metrics callbacks here, they won't be present
    # in the model in the later stages if we reuse the model from this stage.
    examples_and_extracts = (
        examples_and_extracts
        | 'Batch' >> beam.BatchElements(**batch_args)
        | 'Predict' >> beam.ParDo(
            _TFMAPredictionDoFn(eval_shared_model=eval_shared_model)))

    if materialize:
        return (examples_and_extracts
                | 'ExtractFeatures' >> feature_extractor.ExtractFeatures())

    return examples_and_extracts