Example #1
0
    def _generate_blessing_result(self, eval_examples_uri: Text,
                                  slice_spec: List[
                                      tfma.slicer.SingleSliceSpec],
                                  current_model_dir: Text,
                                  blessed_model_dir: Text) -> bool:
        current_model_eval_result_path = os.path.join(
            self._temp_path, constants.CURRENT_MODEL_EVAL_RESULT_PATH)
        blessed_model_eval_result_path = os.path.join(
            self._temp_path, constants.BLESSED_MODEL_EVAL_RESULT_PATH)

        with self._make_beam_pipeline() as pipeline:
            eval_data = (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord(
                file_pattern=io_utils.all_files_pattern(eval_examples_uri)))

            current_model = tfma.default_eval_shared_model(
                eval_saved_model_path=path_utils.eval_model_path(
                    current_model_dir))
            (eval_data
             | 'EvalCurrentModel' >> tfma.ExtractEvaluateAndWriteResults(  # pylint: disable=expression-not-assigned
                 eval_shared_model=current_model,
                 slice_spec=slice_spec,
                 output_path=current_model_eval_result_path))

            if blessed_model_dir is not None:
                blessed_model = tfma.default_eval_shared_model(
                    eval_saved_model_path=path_utils.eval_model_path(
                        blessed_model_dir))
                (eval_data
                 | 'EvalBlessedModel' >> tfma.ExtractEvaluateAndWriteResults(  # pylint: disable=expression-not-assigned
                     eval_shared_model=blessed_model,
                     slice_spec=slice_spec,
                     output_path=blessed_model_eval_result_path))

        absl.logging.info(
            'all files in current_model_eval_result_path: [%s]',
            str(tf.io.gfile.listdir(current_model_eval_result_path)))
        current_model_eval_result = tfma.load_eval_result(
            output_path=current_model_eval_result_path)

        if not self._pass_threshold(current_model_eval_result):
            absl.logging.info('Current model does not pass threshold.')
            return False
        absl.logging.info('Current model passes threshold.')

        if blessed_model_dir is None:
            absl.logging.info('No blessed model yet.')
            return True
        absl.logging.info(
            'all files in blessed_model_eval_result: [%s]',
            str(tf.io.gfile.listdir(blessed_model_eval_result_path)))
        blessed_model_eval_result = tfma.load_eval_result(
            output_path=blessed_model_eval_result_path)

        if (self._compare_eval_result(current_model_eval_result,
                                      blessed_model_eval_result)):
            absl.logging.info('Current model better than blessed model.')
            return True
        else:
            absl.logging.info('Current model worse than blessed model.')
            return False
Example #2
0
    def _generate_blessing_result(self, eval_examples_uri, slice_spec,
                                  current_model_dir, blessed_model_dir):
        current_model_eval_result_path = os.path.join(
            self._temp_path, CURRENT_MODEL_EVAL_RESULT_PATH)
        blessed_model_eval_result_path = os.path.join(
            self._temp_path, BLESSED_MODEL_EVAL_RESULT_PATH)

        with beam.Pipeline(argv=self._get_beam_pipeline_args()) as pipeline:
            eval_data = (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord(
                file_pattern=io_utils.all_files_pattern(eval_examples_uri)))

            current_model = tfma.default_eval_shared_model(
                eval_saved_model_path=path_utils.eval_model_path(
                    current_model_dir))
            (eval_data
             | 'EvalCurrentModel' >> tfma.ExtractEvaluateAndWriteResults(  # pylint: disable=expression-not-assigned
                 eval_shared_model=current_model,
                 slice_spec=slice_spec,
                 output_path=current_model_eval_result_path))

            if blessed_model_dir is not None:
                blessed_model = tfma.default_eval_shared_model(
                    eval_saved_model_path=path_utils.eval_model_path(
                        blessed_model_dir))
                (eval_data
                 | 'EvalBlessedModel' >> tfma.ExtractEvaluateAndWriteResults(  # pylint: disable=expression-not-assigned
                     eval_shared_model=blessed_model,
                     slice_spec=slice_spec,
                     output_path=blessed_model_eval_result_path))

        current_model_eval_result = tfma.load_eval_result(
            output_path=current_model_eval_result_path)

        if not self._pass_threshold(current_model_eval_result):
            tf.logging.info('Current model does not pass threshold.')
            return False
        tf.logging.info('Current model passes threshold.')

        if blessed_model_dir is None:
            tf.logging.info('No blessed model yet.')
            return True

        blessed_model_eval_result = tfma.load_eval_result(
            output_path=blessed_model_eval_result_path)

        if (self._compare_eval_result(current_model_eval_result,
                                      blessed_model_eval_result)):
            tf.logging.info('Current model better than blessed model.')
            return True
        else:
            tf.logging.info('Current model worse than blessed model.')
            return False
Example #3
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data.

    Returns:
      None
    """
        if 'model_exports' not in input_dict:
            raise ValueError('\'model_exports\' is missing in input dict.')
        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'output' not in output_dict:
            raise ValueError('\'output\' is missing in output dict.')

        self._log_startup(input_dict, output_dict, exec_properties)

        # Extract input artifacts
        model_exports_uri = artifact_utils.get_single_uri(
            input_dict['model_exports'])

        feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
        json_format.Parse(exec_properties['feature_slicing_spec'],
                          feature_slicing_spec)
        slice_spec = self._get_slice_spec_from_feature_slicing_spec(
            feature_slicing_spec)

        output_uri = artifact_utils.get_single_uri(output_dict['output'])

        eval_model_path = path_utils.eval_model_path(model_exports_uri)

        tf.logging.info('Using {} for model eval.'.format(eval_model_path))
        eval_shared_model = tfma.default_eval_shared_model(
            eval_saved_model_path=eval_model_path)

        tf.logging.info('Evaluating model.')
        with self._make_beam_pipeline() as pipeline:
            # pylint: disable=expression-not-assigned
            (pipeline
             | 'ReadData' >>
             beam.io.ReadFromTFRecord(file_pattern=io_utils.all_files_pattern(
                 artifact_utils.get_split_uri(input_dict['examples'], 'eval')))
             | 'ExtractEvaluateAndWriteResults' >>
             tfma.ExtractEvaluateAndWriteResults(
                 eval_shared_model=eval_shared_model,
                 slice_spec=slice_spec,
                 output_path=output_uri))
        tf.logging.info(
            'Evaluation complete. Results written to {}.'.format(output_uri))
Example #4
0
def process_tfma(eval_model_dir=None,
                 eval_result_dir=None,
                 bq_table=None,
                 max_rows=None,
                 schema_file=None,
                 pipeline_args=None):
    """Runs a batch job to evaluate the eval_model against the given input.
    :param eval_model_dir:
    :param eval_result_dir:
    :param bq_table:
    :param max_rows:
    :param max_rows:
    :param pipeline_args:
    :return:
    """

    slice_spec = [tfma.slicer.SingleSliceSpec()]

    for slice in my_metadata.TFMA_SLICERS:
        slice_spec.append(
            tfma.slicer.SingleSliceSpec(columns=slice[0], features=slice[1]))

    schema = my_metadata.read_schema(schema_file)

    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=eval_model_dir,
        add_metrics_callbacks=[
            tfma.post_export_metrics.calibration_plot_and_prediction_histogram(
            ),
            tfma.post_export_metrics.auc_plots(),
            tfma.post_export_metrics.auc()
        ])

    with beam.Pipeline(argv=pipeline_args) as pipeline:

        query = sql_queries.get_tfma_sql_query(bq_table, max_rows)
        raw_feature_spec = my_metadata.get_raw_feature_spec(schema)

        raw_data = (
            pipeline
            | 'ReadBigQuery' >> beam.io.Read(
                beam.io.BigQuerySource(query=query, use_standard_sql=True))
            | 'CleanData' >>
            beam.Map(lambda x:
                     (my_metadata.clean_raw_data_dict(x, raw_feature_spec))))

        # Examples must be in clean tf-example format.
        coder = my_metadata.make_proto_coder(schema)

        _ = (raw_data
             | 'ToSerializedTFExample' >> beam.Map(coder.encode)
             | 'ExtractEvaluateAndWriteResults' >>
             tfma.ExtractEvaluateAndWriteResults(
                 eval_shared_model=eval_shared_model,
                 slice_spec=slice_spec,
                 output_path=eval_result_dir))
Example #5
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - eval_config: JSON string of tfma.EvalConfig.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data. Deprecated, use
          eval_config.slicing_specs instead.

    Returns:
      None
    """
    if constants.EXAMPLES_KEY not in input_dict:
      raise ValueError('EXAMPLES_KEY is missing from input dict.')
    if constants.MODEL_KEY not in input_dict:
      raise ValueError('MODEL_KEY is missing from input dict.')
    if constants.EVALUATION_KEY not in output_dict:
      raise ValueError('EVALUATION_KEY is missing from output dict.')
    if len(input_dict[constants.MODEL_KEY]) > 1:
      raise ValueError(
          'There can be only one candidate model, there are {}.'.format(
              len(input_dict[constants.MODEL_KEY])))
    if constants.BASELINE_MODEL_KEY in input_dict and len(
        input_dict[constants.BASELINE_MODEL_KEY]) > 1:
      raise ValueError(
          'There can be only one baseline model, there are {}.'.format(
              len(input_dict[constants.BASELINE_MODEL_KEY])))

    self._log_startup(input_dict, output_dict, exec_properties)

    # Add fairness indicator metric callback if necessary.
    fairness_indicator_thresholds = exec_properties.get(
        'fairness_indicator_thresholds', None)
    add_metrics_callbacks = None
    if fairness_indicator_thresholds:
      # Need to import the following module so that the fairness indicator
      # post-export metric is registered.
      import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
      add_metrics_callbacks = [
          tfma.post_export_metrics.fairness_indicators(  # pytype: disable=module-attr
              thresholds=fairness_indicator_thresholds),
      ]

    def _get_eval_saved_model(artifact: List[types.Artifact],
                              tags=None) -> tfma.EvalSharedModel:
      model_uri = artifact_utils.get_single_uri(artifact)
      if tags and tf.saved_model.SERVING in tags:
        model_path = path_utils.serving_model_path(model_uri)
      else:
        model_path = path_utils.eval_model_path(model_uri)
      return tfma.default_eval_shared_model(
          eval_saved_model_path=model_path,
          tags=tags,
          add_metrics_callbacks=add_metrics_callbacks)

    output_uri = artifact_utils.get_single_uri(
        output_dict[constants.EVALUATION_KEY])

    run_validation = False
    if 'eval_config' in exec_properties and exec_properties['eval_config']:
      slice_spec = None
      eval_config = tfma.EvalConfig()
      json_format.Parse(exec_properties['eval_config'], eval_config)
      # Do not validate model when there is no thresholds configured. This is to
      # avoid accidentally blessing models when users forget to set thresholds.
      for metrics_spec in eval_config.metrics_specs:
        if (metrics_spec.thresholds or any(
            metric.HasField('threshold') for metric in metrics_spec.metrics)):
          run_validation = True
          break
      if len(eval_config.model_specs) > 2:
        raise ValueError(
            """Cannot support more than two models. There are {} models in this
             eval_config.""".format(len(eval_config.model_specs)))
      if not eval_config.model_specs:
        eval_config.model_specs.add()
      # Remove baseline model_spec and all change thresholds if there is no
      # baseline model provided.
      if not input_dict.get(constants.BASELINE_MODEL_KEY):
        tmp_model_specs = []
        for model_spec in eval_config.model_specs:
          if not model_spec.is_baseline:
            tmp_model_specs.append(model_spec)
        del eval_config.model_specs[:]
        eval_config.model_specs.extend(tmp_model_specs)
        absl.logging.info("""No baseline model provided, ignoring all
            baseline model_spec.""")
        for metrics_spec in eval_config.metrics_specs:
          for metric in metrics_spec.metrics:
            metric.threshold.ClearField('change_threshold')
          for threshold in metrics_spec.thresholds.values():
            threshold.ClearField('change_threshold')
        absl.logging.info("""No baseline model provided, ignoring all
            change thresholds.""")
      # Extract model artifacts.
      models = {}
      for model_spec in eval_config.model_specs:
        if model_spec.signature_name != 'eval':
          tags = [tf.saved_model.SERVING]
        if model_spec.is_baseline:
          models[model_spec.name] = _get_eval_saved_model(
              input_dict[constants.BASELINE_MODEL_KEY], tags)
          absl.logging.info('Using {} as baseline model.'.format(
              models[model_spec.name].model_path))
        else:
          models[model_spec.name] = _get_eval_saved_model(
              input_dict[constants.MODEL_KEY], tags)
          absl.logging.info('Using {} for model eval.'.format(
              models[model_spec.name].model_path))
    else:
      eval_config = None
      assert ('feature_slicing_spec' in exec_properties and
              exec_properties['feature_slicing_spec']
             ), 'both eval_config and feature_slicing_spec are unset.'
      feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
      json_format.Parse(exec_properties['feature_slicing_spec'],
                        feature_slicing_spec)
      slice_spec = self._get_slice_spec_from_feature_slicing_spec(
          feature_slicing_spec)
      models = _get_eval_saved_model(input_dict[constants.MODEL_KEY])
      absl.logging.info('Using {} for model eval.'.format(models.model_path))

    absl.logging.info('Evaluating model.')
    with self._make_beam_pipeline() as pipeline:
      # pylint: disable=expression-not-assigned
      (pipeline
       | 'ReadData' >> beam.io.ReadFromTFRecord(
           file_pattern=io_utils.all_files_pattern(
               artifact_utils.get_split_uri(input_dict[constants.EXAMPLES_KEY],
                                            'eval')))
       |
       'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults(
           eval_shared_model=models,
           eval_config=eval_config,
           output_path=output_uri,
           slice_spec=slice_spec))
    absl.logging.info(
        'Evaluation complete. Results written to {}.'.format(output_uri))

    if not run_validation:
      # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported.
      absl.logging.info('No threshold configured, will not validate model.')
      return
    # Set up blessing artifact
    blessing = artifact_utils.get_single_instance(
        output_dict[constants.BLESSING_KEY])
    blessing.set_string_custom_property(
        constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY,
        artifact_utils.get_single_uri(input_dict[constants.MODEL_KEY]))
    blessing.set_int_custom_property(
        constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY,
        input_dict[constants.MODEL_KEY][0].id)
    if input_dict.get(constants.BASELINE_MODEL_KEY):
      baseline_model = input_dict[constants.BASELINE_MODEL_KEY][0]
      blessing.set_string_custom_property(
          constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY,
          baseline_model.uri)
      blessing.set_int_custom_property(
          constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY, baseline_model.id)
    if 'current_component_id' in exec_properties:
      blessing.set_string_custom_property(
          'component_id', exec_properties['current_component_id'])
    # Check validation result and write BLESSED file accordingly.
    validation_file = os.path.join(output_uri, tfma.constants.VALIDATIONS_KEY)
    absl.logging.info('Checking validation results.')
    validation_result = tfma.load_validation_result(validation_file)
    if validation_result.validation_ok:
      io_utils.write_string_file(
          os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '')
      blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                                       constants.BLESSED_VALUE)
    else:
      io_utils.write_string_file(
          os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME), '')
      blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                                       constants.NOT_BLESSED_VALUE)
    absl.logging.info('Blessing result {} written to {}.'.format(
        validation_result.validation_ok, blessing.uri))
Example #6
0
def process_tfma(eval_result_dir,
                 schema_file,
                 input_csv=None,
                 big_query_table=None,
                 eval_model_dir=None,
                 max_eval_rows=None,
                 pipeline_args=None):
  """Runs a batch job to evaluate the eval_model against the given input.

  Args:
    eval_result_dir: A directory where the evaluation result should be written
      to.
    schema_file: A file containing a text-serialized Schema that describes the
      eval data.
    input_csv: A path to a csv file which should be the input for evaluation.
      This can only be set if big_query_table is None.
    big_query_table: A BigQuery table name specified as DATASET.TABLE which
      should be the input for evaluation. This can only be set if input_csv is
      None.
    eval_model_dir: A directory where the eval model is located.
    max_eval_rows: Number of rows to query from BigQuery.
    pipeline_args: additional DataflowRunner or DirectRunner args passed to the
      beam pipeline.

  Raises:
    ValueError: if input_csv and big_query_table are not specified correctly.
  """

  if input_csv == big_query_table and input_csv is None:
    raise ValueError(
        'one of --input_csv or --big_query_table should be provided.')

  slice_spec = [
      tfma.slicer.SingleSliceSpec(),
      tfma.slicer.SingleSliceSpec(columns=['trip_start_hour'])
  ]

  schema = taxi.read_schema(schema_file)

  eval_shared_model = tfma.default_eval_shared_model(
      eval_saved_model_path=eval_model_dir,
      add_metrics_callbacks=[
          tfma.post_export_metrics.calibration_plot_and_prediction_histogram(),
          tfma.post_export_metrics.auc_plots()
      ])

  with beam.Pipeline(argv=pipeline_args) as pipeline:
    if input_csv:
      csv_coder = taxi.make_csv_coder(schema)
      raw_data = (
          pipeline
          | 'ReadFromText' >> beam.io.ReadFromText(
              input_csv, skip_header_lines=1)
          | 'ParseCSV' >> beam.Map(csv_coder.decode))
    else:
      assert big_query_table
      query = taxi.make_sql(big_query_table, max_eval_rows, for_eval=True)
      raw_feature_spec = taxi.get_raw_feature_spec(schema)
      raw_data = (
          pipeline
          | 'ReadBigQuery' >> beam.io.Read(
              beam.io.BigQuerySource(query=query, use_standard_sql=True))
          | 'CleanData' >>
          beam.Map(lambda x: (taxi.clean_raw_data_dict(x, raw_feature_spec))))

    # Examples must be in clean tf-example format.
    coder = taxi.make_proto_coder(schema)

    _ = (
        raw_data
        | 'ToSerializedTFExample' >> beam.Map(coder.encode)
        |
        'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults(
            eval_shared_model=eval_shared_model,
            slice_spec=slice_spec,
            output_path=eval_result_dir))
Example #7
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data.

    Returns:
      None
    """
    if 'model_exports' not in input_dict:
      raise ValueError('\'model_exports\' is missing in input dict.')
    if 'examples' not in input_dict:
      raise ValueError('\'examples\' is missing in input dict.')
    if 'output' not in output_dict:
      raise ValueError('\'output\' is missing in output dict.')

    self._log_startup(input_dict, output_dict, exec_properties)

    # Extract input artifacts
    model_exports_uri = artifact_utils.get_single_uri(
        input_dict['model_exports'])

    feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
    json_format.Parse(exec_properties['feature_slicing_spec'],
                      feature_slicing_spec)
    slice_spec = self._get_slice_spec_from_feature_slicing_spec(
        feature_slicing_spec)

    output_uri = artifact_utils.get_single_uri(output_dict['output'])

    eval_model_path = path_utils.eval_model_path(model_exports_uri)

    # Add fairness indicator metric callback if necessary.
    fairness_indicator_thresholds = exec_properties.get(
        'fairness_indicator_thresholds', None)
    add_metrics_callbacks = None
    if fairness_indicator_thresholds:
      # Need to import the following module so that the fairness indicator
      # post-export metric is registered.
      import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
      add_metrics_callbacks = [
          tfma.post_export_metrics.fairness_indicators(  # pytype: disable=module-attr
              thresholds=fairness_indicator_thresholds),
      ]

    absl.logging.info('Using {} for model eval.'.format(eval_model_path))
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=eval_model_path,
        add_metrics_callbacks=add_metrics_callbacks)

    absl.logging.info('Evaluating model.')
    with self._make_beam_pipeline() as pipeline:
      # pylint: disable=expression-not-assigned
      (pipeline
       | 'ReadData' >> beam.io.ReadFromTFRecord(
           file_pattern=io_utils.all_files_pattern(
               artifact_utils.get_split_uri(input_dict['examples'], 'eval')))
       |
       'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults(
           eval_shared_model=eval_shared_model,
           slice_spec=slice_spec,
           output_path=output_uri))
    absl.logging.info(
        'Evaluation complete. Results written to {}.'.format(output_uri))
Example #8
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - eval_config: JSON string of tfma.EvalConfig.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data. Deprecated, use
          eval_config.slicing_specs instead.

    Returns:
      None
    """
        if constants.EXAMPLES_KEY not in input_dict:
            raise ValueError('EXAMPLES_KEY is missing from input dict.')
        if constants.MODEL_KEY not in input_dict:
            raise ValueError('MODEL_KEY is missing from input dict.')
        if constants.EVALUATION_KEY not in output_dict:
            raise ValueError('EVALUATION_KEY is missing from output dict.')
        if len(input_dict[constants.MODEL_KEY]) > 1:
            raise ValueError(
                'There can be only one candidate model, there are {}.'.format(
                    len(input_dict[constants.MODEL_KEY])))
        if constants.BASELINE_MODEL_KEY in input_dict and len(
                input_dict[constants.BASELINE_MODEL_KEY]) > 1:
            raise ValueError(
                'There can be only one baseline model, there are {}.'.format(
                    len(input_dict[constants.BASELINE_MODEL_KEY])))

        self._log_startup(input_dict, output_dict, exec_properties)

        # Add fairness indicator metric callback if necessary.
        fairness_indicator_thresholds = exec_properties.get(
            'fairness_indicator_thresholds', None)
        add_metrics_callbacks = None
        if fairness_indicator_thresholds:
            # Need to import the following module so that the fairness indicator
            # post-export metric is registered.
            import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
            add_metrics_callbacks = [
                tfma.post_export_metrics.fairness_indicators(  # pytype: disable=module-attr
                    thresholds=fairness_indicator_thresholds),
            ]

        output_uri = artifact_utils.get_single_uri(
            output_dict[constants.EVALUATION_KEY])

        run_validation = False
        models = []
        if 'eval_config' in exec_properties and exec_properties['eval_config']:
            slice_spec = None
            has_baseline = bool(input_dict.get(constants.BASELINE_MODEL_KEY))
            eval_config = tfma.EvalConfig()
            json_format.Parse(exec_properties['eval_config'], eval_config)
            eval_config = tfma.update_eval_config_with_defaults(
                eval_config,
                maybe_add_baseline=has_baseline,
                maybe_remove_baseline=not has_baseline)
            tfma.verify_eval_config(eval_config)
            # Do not validate model when there is no thresholds configured. This is to
            # avoid accidentally blessing models when users forget to set thresholds.
            run_validation = bool(
                tfma.metrics.metric_thresholds_from_metrics_specs(
                    eval_config.metrics_specs))
            if len(eval_config.model_specs) > 2:
                raise ValueError(
                    """Cannot support more than two models. There are {} models in this
             eval_config.""".format(len(eval_config.model_specs)))
            # Extract model artifacts.
            for model_spec in eval_config.model_specs:
                if model_spec.is_baseline:
                    model_uri = artifact_utils.get_single_uri(
                        input_dict[constants.BASELINE_MODEL_KEY])
                else:
                    model_uri = artifact_utils.get_single_uri(
                        input_dict[constants.MODEL_KEY])
                if tfma.get_model_type(model_spec) == tfma.TF_ESTIMATOR:
                    model_path = path_utils.eval_model_path(model_uri)
                else:
                    model_path = path_utils.serving_model_path(model_uri)
                absl.logging.info('Using {} as {} model.'.format(
                    model_path, model_spec.name))
                models.append(
                    tfma.default_eval_shared_model(
                        model_name=model_spec.name,
                        eval_saved_model_path=model_path,
                        add_metrics_callbacks=add_metrics_callbacks,
                        eval_config=eval_config))
        else:
            eval_config = None
            assert ('feature_slicing_spec' in exec_properties
                    and exec_properties['feature_slicing_spec']
                    ), 'both eval_config and feature_slicing_spec are unset.'
            feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
            json_format.Parse(exec_properties['feature_slicing_spec'],
                              feature_slicing_spec)
            slice_spec = self._get_slice_spec_from_feature_slicing_spec(
                feature_slicing_spec)
            model_uri = artifact_utils.get_single_uri(
                input_dict[constants.MODEL_KEY])
            model_path = path_utils.eval_model_path(model_uri)
            absl.logging.info('Using {} for model eval.'.format(model_path))
            models.append(
                tfma.default_eval_shared_model(
                    eval_saved_model_path=model_path,
                    add_metrics_callbacks=add_metrics_callbacks))

        file_pattern = io_utils.all_files_pattern(
            artifact_utils.get_split_uri(input_dict[constants.EXAMPLES_KEY],
                                         'eval'))
        eval_shared_model = models[0] if len(models) == 1 else models
        schema = None
        if constants.SCHEMA_KEY in input_dict:
            schema = io_utils.SchemaReader().read(
                io_utils.get_only_uri_in_dir(
                    artifact_utils.get_single_uri(
                        input_dict[constants.SCHEMA_KEY])))

        absl.logging.info('Evaluating model.')
        with self._make_beam_pipeline() as pipeline:
            # pylint: disable=expression-not-assigned
            if _USE_TFXIO:
                tensor_adapter_config = None
                if tfma.is_batched_input(eval_shared_model, eval_config):
                    tfxio = tf_example_record.TFExampleRecord(
                        file_pattern=file_pattern,
                        schema=schema,
                        raw_record_column_name=tfma.BATCHED_INPUT_KEY)
                    if schema is not None:
                        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                            arrow_schema=tfxio.ArrowSchema(),
                            tensor_representations=tfxio.TensorRepresentations(
                            ))
                    data = pipeline | 'ReadFromTFRecordToArrow' >> tfxio.BeamSource(
                    )
                else:
                    data = pipeline | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord(
                        file_pattern=file_pattern)
                (data
                 | 'ExtractEvaluateAndWriteResults' >>
                 tfma.ExtractEvaluateAndWriteResults(
                     eval_shared_model=models[0]
                     if len(models) == 1 else models,
                     eval_config=eval_config,
                     output_path=output_uri,
                     slice_spec=slice_spec,
                     tensor_adapter_config=tensor_adapter_config))
            else:
                data = pipeline | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord(
                    file_pattern=file_pattern)
                (data
                 | 'ExtractEvaluateAndWriteResults' >>
                 tfma.ExtractEvaluateAndWriteResults(
                     eval_shared_model=models[0]
                     if len(models) == 1 else models,
                     eval_config=eval_config,
                     output_path=output_uri,
                     slice_spec=slice_spec))
        absl.logging.info(
            'Evaluation complete. Results written to {}.'.format(output_uri))

        if not run_validation:
            # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported.
            absl.logging.info(
                'No threshold configured, will not validate model.')
            return
        # Set up blessing artifact
        blessing = artifact_utils.get_single_instance(
            output_dict[constants.BLESSING_KEY])
        blessing.set_string_custom_property(
            constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY,
            artifact_utils.get_single_uri(input_dict[constants.MODEL_KEY]))
        blessing.set_int_custom_property(
            constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY,
            input_dict[constants.MODEL_KEY][0].id)
        if input_dict.get(constants.BASELINE_MODEL_KEY):
            baseline_model = input_dict[constants.BASELINE_MODEL_KEY][0]
            blessing.set_string_custom_property(
                constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY,
                baseline_model.uri)
            blessing.set_int_custom_property(
                constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY,
                baseline_model.id)
        if 'current_component_id' in exec_properties:
            blessing.set_string_custom_property(
                'component_id', exec_properties['current_component_id'])
        # Check validation result and write BLESSED file accordingly.
        absl.logging.info('Checking validation results.')
        validation_result = tfma.load_validation_result(output_uri)
        if validation_result.validation_ok:
            io_utils.write_string_file(
                os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '')
            blessing.set_int_custom_property(
                constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                constants.BLESSED_VALUE)
        else:
            io_utils.write_string_file(
                os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME),
                '')
            blessing.set_int_custom_property(
                constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                constants.NOT_BLESSED_VALUE)
        absl.logging.info('Blessing result {} written to {}.'.format(
            validation_result.validation_ok, blessing.uri))
    def _write_tfma(self,
                    tfma_path: str,
                    output_file_format: str,
                    store: Optional[mlmd.MetadataStore] = None):
        _, eval_saved_model_path = (
            fixed_prediction_estimator.simple_fixed_prediction_estimator(
                export_path=None,
                eval_export_path=os.path.join(self.tmpdir, 'eval_export_dir')))
        eval_config = tfma.EvalConfig(model_specs=[tfma.ModelSpec()])
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_saved_model_path,
            add_metrics_callbacks=[
                tfma.post_export_metrics.example_count(),
                tfma.post_export_metrics.
                calibration_plot_and_prediction_histogram(num_buckets=2)
            ])
        extractors = [
            tfma.extractors.legacy_predict_extractor.PredictExtractor(
                eval_shared_model, eval_config=eval_config),
            tfma.extractors.unbatch_extractor.UnbatchExtractor(),
            tfma.extractors.slice_key_extractor.SliceKeyExtractor()
        ]
        evaluators = [
            tfma.evaluators.legacy_metrics_and_plots_evaluator.
            MetricsAndPlotsEvaluator(eval_shared_model)
        ]
        writers = [
            tfma.writers.MetricsPlotsAndValidationsWriter(
                output_paths={
                    'metrics': os.path.join(tfma_path, 'metrics'),
                    'plots': os.path.join(tfma_path, 'plots')
                },
                output_file_format=output_file_format,
                eval_config=eval_config,
                add_metrics_callbacks=eval_shared_model.add_metrics_callbacks)
        ]

        tfx_io = raw_tf_record.RawBeamRecordTFXIO(
            physical_format='inmemory',
            raw_record_column_name='__raw_record__',
            telemetry_descriptors=['TFMATest'])
        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(prediction=0.0, label=1.0)
            example2 = self._makeExample(prediction=1.0, label=1.0)
            _ = (pipeline
                 | 'Create' >> beam.Create([
                     example1.SerializeToString(),
                     example2.SerializeToString(),
                 ])
                 | 'BatchExamples' >> tfx_io.BeamSource()
                 | 'ExtractEvaluateAndWriteResults' >>
                 tfma.ExtractEvaluateAndWriteResults(
                     eval_config=eval_config,
                     eval_shared_model=eval_shared_model,
                     extractors=extractors,
                     evaluators=evaluators,
                     writers=writers))

        if store:
            eval_type = metadata_store_pb2.ArtifactType()
            eval_type.name = standard_artifacts.ModelEvaluation.TYPE_NAME
            eval_type_id = store.put_artifact_type(eval_type)

            artifact = metadata_store_pb2.Artifact()
            artifact.uri = tfma_path
            artifact.type_id = eval_type_id
            store.put_artifacts([artifact])
Example #10
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - eval_config: JSON string of tfma.EvalConfig.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data. Deprecated, use
          eval_config.slicing_specs instead.
        - example_splits: JSON-serialized list of names of splits on which the
          metrics are computed. Default behavior (when example_splits is set to
          None) is using the 'eval' split.

    Returns:
      None
    """
        if constants.EXAMPLES_KEY not in input_dict:
            raise ValueError('EXAMPLES_KEY is missing from input dict.')
        if constants.MODEL_KEY not in input_dict:
            raise ValueError('MODEL_KEY is missing from input dict.')
        if constants.EVALUATION_KEY not in output_dict:
            raise ValueError('EVALUATION_KEY is missing from output dict.')
        if len(input_dict[constants.MODEL_KEY]) > 1:
            raise ValueError(
                'There can be only one candidate model, there are %d.' %
                (len(input_dict[constants.MODEL_KEY])))
        if constants.BASELINE_MODEL_KEY in input_dict and len(
                input_dict[constants.BASELINE_MODEL_KEY]) > 1:
            raise ValueError(
                'There can be only one baseline model, there are %d.' %
                (len(input_dict[constants.BASELINE_MODEL_KEY])))

        self._log_startup(input_dict, output_dict, exec_properties)

        # Add fairness indicator metric callback if necessary.
        fairness_indicator_thresholds = exec_properties.get(
            'fairness_indicator_thresholds', None)
        add_metrics_callbacks = None
        if fairness_indicator_thresholds:
            add_metrics_callbacks = [
                tfma.post_export_metrics.fairness_indicators(  # pytype: disable=module-attr
                    thresholds=fairness_indicator_thresholds),
            ]

        output_uri = artifact_utils.get_single_uri(
            output_dict[constants.EVALUATION_KEY])

        eval_shared_model_fn = udf_utils.try_get_fn(
            exec_properties=exec_properties,
            fn_name='custom_eval_shared_model'
        ) or tfma.default_eval_shared_model

        run_validation = False
        models = []
        if 'eval_config' in exec_properties and exec_properties['eval_config']:
            slice_spec = None
            has_baseline = bool(input_dict.get(constants.BASELINE_MODEL_KEY))
            eval_config = tfma.EvalConfig()
            json_format.Parse(exec_properties['eval_config'], eval_config)
            eval_config = tfma.update_eval_config_with_defaults(
                eval_config,
                maybe_add_baseline=has_baseline,
                maybe_remove_baseline=not has_baseline)
            tfma.verify_eval_config(eval_config)
            # Do not validate model when there is no thresholds configured. This is to
            # avoid accidentally blessing models when users forget to set thresholds.
            run_validation = bool(
                tfma.metrics.metric_thresholds_from_metrics_specs(
                    eval_config.metrics_specs))
            if len(eval_config.model_specs) > 2:
                raise ValueError(
                    """Cannot support more than two models. There are %d models in this
             eval_config.""" % (len(eval_config.model_specs)))
            # Extract model artifacts.
            for model_spec in eval_config.model_specs:
                if model_spec.is_baseline:
                    model_uri = artifact_utils.get_single_uri(
                        input_dict[constants.BASELINE_MODEL_KEY])
                else:
                    model_uri = artifact_utils.get_single_uri(
                        input_dict[constants.MODEL_KEY])
                if tfma.get_model_type(model_spec) == tfma.TF_ESTIMATOR:
                    model_path = path_utils.eval_model_path(model_uri)
                else:
                    model_path = path_utils.serving_model_path(model_uri)
                logging.info('Using %s as %s model.', model_path,
                             model_spec.name)
                models.append(
                    eval_shared_model_fn(
                        eval_saved_model_path=model_path,
                        model_name=model_spec.name,
                        eval_config=eval_config,
                        add_metrics_callbacks=add_metrics_callbacks))
        else:
            eval_config = None
            assert ('feature_slicing_spec' in exec_properties
                    and exec_properties['feature_slicing_spec']
                    ), 'both eval_config and feature_slicing_spec are unset.'
            feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
            json_format.Parse(exec_properties['feature_slicing_spec'],
                              feature_slicing_spec)
            slice_spec = self._get_slice_spec_from_feature_slicing_spec(
                feature_slicing_spec)
            model_uri = artifact_utils.get_single_uri(
                input_dict[constants.MODEL_KEY])
            model_path = path_utils.eval_model_path(model_uri)
            logging.info('Using %s for model eval.', model_path)
            models.append(
                eval_shared_model_fn(
                    eval_saved_model_path=model_path,
                    model_name='',
                    eval_config=None,
                    add_metrics_callbacks=add_metrics_callbacks))

        eval_shared_model = models[0] if len(models) == 1 else models
        schema = None
        if constants.SCHEMA_KEY in input_dict:
            schema = io_utils.SchemaReader().read(
                io_utils.get_only_uri_in_dir(
                    artifact_utils.get_single_uri(
                        input_dict[constants.SCHEMA_KEY])))

        # Load and deserialize example splits from execution properties.
        example_splits = json_utils.loads(
            exec_properties.get(constants.EXAMPLE_SPLITS_KEY, 'null'))
        if not example_splits:
            example_splits = ['eval']
            logging.info(
                "The 'example_splits' parameter is not set, using 'eval' "
                'split.')

        logging.info('Evaluating model.')
        with self._make_beam_pipeline() as pipeline:
            examples_list = []
            tensor_adapter_config = None
            # pylint: disable=expression-not-assigned
            if _USE_TFXIO and tfma.is_batched_input(eval_shared_model,
                                                    eval_config):
                tfxio_factory = tfxio_utils.get_tfxio_factory_from_artifact(
                    examples=[
                        artifact_utils.get_single_instance(
                            input_dict[constants.EXAMPLES_KEY])
                    ],
                    telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
                    schema=schema,
                    raw_record_column_name=tfma_constants.ARROW_INPUT_COLUMN)
                # TODO(b/161935932): refactor after TFXIO supports multiple patterns.
                for split in example_splits:
                    file_pattern = io_utils.all_files_pattern(
                        artifact_utils.get_split_uri(
                            input_dict[constants.EXAMPLES_KEY], split))
                    tfxio = tfxio_factory(file_pattern)
                    data = (pipeline
                            | 'ReadFromTFRecordToArrow[%s]' % split >>
                            tfxio.BeamSource())
                    examples_list.append(data)
                if schema is not None:
                    # Use last tfxio as TensorRepresentations and ArrowSchema are fixed.
                    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                        arrow_schema=tfxio.ArrowSchema(),
                        tensor_representations=tfxio.TensorRepresentations())
            else:
                for split in example_splits:
                    file_pattern = io_utils.all_files_pattern(
                        artifact_utils.get_split_uri(
                            input_dict[constants.EXAMPLES_KEY], split))
                    data = (
                        pipeline
                        | 'ReadFromTFRecord[%s]' % split >>
                        beam.io.ReadFromTFRecord(file_pattern=file_pattern))
                    examples_list.append(data)

            custom_extractors = udf_utils.try_get_fn(
                exec_properties=exec_properties, fn_name='custom_extractors')
            extractors = None
            if custom_extractors:
                extractors = custom_extractors(
                    eval_shared_model=eval_shared_model,
                    eval_config=eval_config,
                    tensor_adapter_config=tensor_adapter_config)

            (examples_list | 'FlattenExamples' >> beam.Flatten()
             | 'ExtractEvaluateAndWriteResults' >>
             tfma.ExtractEvaluateAndWriteResults(
                 eval_shared_model=models[0] if len(models) == 1 else models,
                 eval_config=eval_config,
                 extractors=extractors,
                 output_path=output_uri,
                 slice_spec=slice_spec,
                 tensor_adapter_config=tensor_adapter_config))
        logging.info('Evaluation complete. Results written to %s.', output_uri)

        if not run_validation:
            # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported.
            logging.info('No threshold configured, will not validate model.')
            return
        # Set up blessing artifact
        blessing = artifact_utils.get_single_instance(
            output_dict[constants.BLESSING_KEY])
        blessing.set_string_custom_property(
            constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY,
            artifact_utils.get_single_uri(input_dict[constants.MODEL_KEY]))
        blessing.set_int_custom_property(
            constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY,
            input_dict[constants.MODEL_KEY][0].id)
        if input_dict.get(constants.BASELINE_MODEL_KEY):
            baseline_model = input_dict[constants.BASELINE_MODEL_KEY][0]
            blessing.set_string_custom_property(
                constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY,
                baseline_model.uri)
            blessing.set_int_custom_property(
                constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY,
                baseline_model.id)
        if 'current_component_id' in exec_properties:
            blessing.set_string_custom_property(
                'component_id', exec_properties['current_component_id'])
        # Check validation result and write BLESSED file accordingly.
        logging.info('Checking validation results.')
        validation_result = tfma.load_validation_result(output_uri)
        if validation_result.validation_ok:
            io_utils.write_string_file(
                os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '')
            blessing.set_int_custom_property(
                constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                constants.BLESSED_VALUE)
        else:
            io_utils.write_string_file(
                os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME),
                '')
            blessing.set_int_custom_property(
                constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                constants.NOT_BLESSED_VALUE)
        logging.info('Blessing result %s written to %s.',
                     validation_result.validation_ok, blessing.uri)
Example #11
0
def append_tfma_pipeline(pipeline: beam.Pipeline,
                         me_eval_config: me_proto.EvaluationConfig,
                         problem_type: constants.ProblemType,
                         tfma_format: Optional[bool] = False,
                         json_mode: Optional[bool] = False,
                         schema: Optional[Any] = None):
    """Extend a beam pipeline to add TFMA evaluation given a configuration.

  Args:
    pipeline: A beam pipeline.
    me_eval_config: A ME Evaluation Configuration.
    problem_type: Defines what type of problem to expect.
    tfma_format: If true, use TFMA format, if false use Model Evaluation.
    json_mode: Output metrics in a plain text mode.
    schema: Optional tf.metadata schema. If you need to pass multi-tensor input
      to the model, you need to pass the schema.
  """
    input_files = (
        me_eval_config.data_spec.input_source_spec.jsonl_file_spec.file_names)
    output_path = me_eval_config.output_spec.gcs_sink.path
    data_spec = me_eval_config.data_spec
    weight_column_spec = ColumnSpec(
        me_eval_config.data_spec.example_weight_key_spec
    ) if me_eval_config.data_spec.HasField('example_weight_key_spec') else None
    eval_column_specs = EvaluationColumnSpecs(
        ground_truth_column_spec=ColumnSpec(
            me_eval_config.data_spec.label_key_spec),
        example_weight_column_spec=weight_column_spec,
        predicted_score_column_spec=ColumnSpec(
            data_spec.predicted_score_key_spec)
        if data_spec.HasField('predicted_score_key_spec') else None,
        predicted_label_column_spec=ColumnSpec(
            data_spec.predicted_label_key_spec)
        if data_spec.HasField('predicted_label_key_spec') else None,
        predicted_label_id_column_spec=ColumnSpec(
            data_spec.predicted_label_id_key_spec)
        if data_spec.HasField('predicted_label_id_key_spec') else None)
    class_name_list = list(me_eval_config.data_spec.labels) or None
    quantile_list = list(data_spec.quantiles) or None
    quantile_index = data_spec.quantile_index if data_spec.quantile_index >= 0 else None
    tfma_eval_config = tfma_adapter.METoTFMA(class_name_list).eval_config(
        me_eval_config)
    me_writers = [
        tfma.writers.Writer(
            stage_name='WriteMetrics',
            # pylint:disable=no-value-for-parameter
            ptransform=_write_metrics(output_file=os.path.join(
                output_path, constants.Pipeline.METRICS_KEY),
                                      problem_type=problem_type,
                                      class_labels=class_name_list,
                                      tfma_format=tfma_format,
                                      json_mode=json_mode)),
    ]

    coder = tf_example_record.TFExampleBeamRecord(
        physical_format='inmem',
        schema=schema,
        raw_record_column_name=tfma.ARROW_INPUT_COLUMN,
        telemetry_descriptors=None)
    tensor_adapter_config = None
    if schema is not None:
        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
            arrow_schema=coder.ArrowSchema(),
            tensor_representations=coder.TensorRepresentations())
    _ = (pipeline | 'InputFileList' >> beam.Create(input_files)
         | 'ReadText' >> beam.io.textio.ReadAllFromText()
         | 'ParseData' >> beam.ParDo(
             JSONToSerializedExample(eval_column_specs=eval_column_specs,
                                     class_list=class_name_list,
                                     quantile_list=quantile_list,
                                     quantile_index=quantile_index))
         | 'ExamplesToRecordBatch' >> coder.BeamSource()
         | 'ExtractEvaluateAndWriteResults' >>
         tfma.ExtractEvaluateAndWriteResults(
             eval_config=tfma_eval_config,
             writers=me_writers,
             tensor_adapter_config=tensor_adapter_config))
Example #12
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:

        # Check the inputs
        if constants.EXAMPLES not in input_dict:
            raise ValueError(f'{constants.EXAMPLES} is missing from inputs')
        examples_artifact = input_dict[constants.EXAMPLES]

        input_uri = artifact_utils.get_single_uri(examples_artifact)
        if len(zenml_path_utils.list_dir(input_uri)) == 0:
            raise AssertionError(
                'ZenML can not run the evaluation as the provided input '
                'configuration does not point towards any data. Specifically, '
                'if you are using the agnostic evaluator, please make sure '
                'that you are using a proper test_fn in your trainer step to '
                'write these results.')

        else:
            # Check the outputs
            if constants.EVALUATION not in output_dict:
                raise ValueError(
                    f'{constants.EVALUATION} is missing from outputs')
            evaluation_artifact = output_dict[constants.EVALUATION]
            output_uri = artifact_utils.get_single_uri(evaluation_artifact)

            # Resolve the schema
            schema = None
            if constants.SCHEMA in input_dict:
                schema_artifact = input_dict[constants.SCHEMA]
                schema_uri = artifact_utils.get_single_uri(schema_artifact)
                reader = io_utils.SchemaReader()
                schema = reader.read(io_utils.get_only_uri_in_dir(schema_uri))

            # Create the step with the schema attached if provided
            source = exec_properties[StepKeys.SOURCE]
            args = exec_properties[StepKeys.ARGS]
            c = source_utils.load_source_path_class(source)
            evaluator_step: BaseEvaluatorStep = c(**args)

            # Check the execution parameters
            eval_config = evaluator_step.build_config()
            eval_config = tfma.update_eval_config_with_defaults(eval_config)
            tfma.verify_eval_config(eval_config)

            # Resolve the model
            if constants.MODEL in input_dict:
                model_artifact = input_dict[constants.MODEL]
                model_uri = artifact_utils.get_single_uri(model_artifact)
                model_path = path_utils.serving_model_path(model_uri)

                model_fn = try_get_fn(evaluator_step.CUSTOM_MODULE,
                                      'custom_eval_shared_model'
                                      ) or tfma.default_eval_shared_model

                eval_shared_model = model_fn(
                    model_name='',  # TODO: Fix with model names
                    eval_saved_model_path=model_path,
                    eval_config=eval_config)
            else:
                eval_shared_model = None

            self._log_startup(input_dict, output_dict, exec_properties)

            # Main pipeline
            logging.info('Evaluating model.')
            with self._make_beam_pipeline() as pipeline:
                examples_list = []
                tensor_adapter_config = None

                if tfma.is_batched_input(eval_shared_model, eval_config):
                    tfxio_factory = tfxio_utils.get_tfxio_factory_from_artifact(
                        examples=[
                            artifact_utils.get_single_instance(
                                examples_artifact)
                        ],
                        telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
                        schema=schema,
                        raw_record_column_name=tfma_constants.
                        ARROW_INPUT_COLUMN)
                    for split in evaluator_step.splits:
                        file_pattern = io_utils.all_files_pattern(
                            artifact_utils.get_split_uri(
                                examples_artifact, split))
                        tfxio = tfxio_factory(file_pattern)
                        data = (pipeline
                                | 'ReadFromTFRecordToArrow[%s]' % split >>
                                tfxio.BeamSource())
                        examples_list.append(data)
                    if schema is not None:
                        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                            arrow_schema=tfxio.ArrowSchema(),
                            tensor_representations=tfxio.TensorRepresentations(
                            ))
                else:
                    for split in evaluator_step.splits:
                        file_pattern = io_utils.all_files_pattern(
                            artifact_utils.get_split_uri(
                                examples_artifact, split))
                        data = (pipeline
                                | 'ReadFromTFRecord[%s]' % split >> beam.io.
                                ReadFromTFRecord(file_pattern=file_pattern))
                        examples_list.append(data)

                # Resolve custom extractors
                custom_extractors = try_get_fn(evaluator_step.CUSTOM_MODULE,
                                               'custom_extractors')
                extractors = None
                if custom_extractors:
                    extractors = custom_extractors(
                        eval_shared_model=eval_shared_model,
                        eval_config=eval_config,
                        tensor_adapter_config=tensor_adapter_config)

                # Resolve custom evaluators
                custom_evaluators = try_get_fn(evaluator_step.CUSTOM_MODULE,
                                               'custom_evaluators')
                evaluators = None
                if custom_evaluators:
                    evaluators = custom_evaluators(
                        eval_shared_model=eval_shared_model,
                        eval_config=eval_config,
                        tensor_adapter_config=tensor_adapter_config)

                # Extract, evaluate and write
                (examples_list | 'FlattenExamples' >> beam.Flatten()
                 | 'ExtractEvaluateAndWriteResults' >>
                 tfma.ExtractEvaluateAndWriteResults(
                     eval_config=eval_config,
                     eval_shared_model=eval_shared_model,
                     output_path=output_uri,
                     extractors=extractors,
                     evaluators=evaluators,
                     tensor_adapter_config=tensor_adapter_config))
            logging.info('Evaluation complete. Results written to %s.',
                         output_uri)
Example #13
0
  def Do(self, input_dict: Dict[str, List[types.Artifact]],
         output_dict: Dict[str, List[types.Artifact]],
         exec_properties: Dict[str, Any]) -> None:
    """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - evaluation: model evaluation results.
      exec_properties: A dict of execution properties.
        - eval_config: JSON string of tfma.EvalConfig.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data. Deprecated, use
          eval_config.slicing_specs instead.
        - example_splits: JSON-serialized list of names of splits on which the
          metrics are computed. Default behavior (when example_splits is set to
          None) is using the 'eval' split.

    Returns:
      None
    """
    if standard_component_specs.EXAMPLES_KEY not in input_dict:
      raise ValueError('EXAMPLES_KEY is missing from input dict.')
    if standard_component_specs.EVALUATION_KEY not in output_dict:
      raise ValueError('EVALUATION_KEY is missing from output dict.')
    if standard_component_specs.MODEL_KEY in input_dict and len(
        input_dict[standard_component_specs.MODEL_KEY]) > 1:
      raise ValueError('There can be only one candidate model, there are %d.' %
                       (len(input_dict[standard_component_specs.MODEL_KEY])))
    if standard_component_specs.BASELINE_MODEL_KEY in input_dict and len(
        input_dict[standard_component_specs.BASELINE_MODEL_KEY]) > 1:
      raise ValueError(
          'There can be only one baseline model, there are %d.' %
          (len(input_dict[standard_component_specs.BASELINE_MODEL_KEY])))

    self._log_startup(input_dict, output_dict, exec_properties)

    # Add fairness indicator metric callback if necessary.
    fairness_indicator_thresholds = json_utils.loads(
        exec_properties.get(
            standard_component_specs.FAIRNESS_INDICATOR_THRESHOLDS_KEY, 'null'))
    add_metrics_callbacks = None
    if fairness_indicator_thresholds:
      add_metrics_callbacks = [
          tfma.post_export_metrics.fairness_indicators(  # pytype: disable=module-attr
              thresholds=fairness_indicator_thresholds),
      ]

    output_uri = artifact_utils.get_single_uri(
        output_dict[constants.EVALUATION_KEY])

    # Make sure user packages get propagated to the remote Beam worker.
    unused_module_path, extra_pip_packages = udf_utils.decode_user_module_key(
        exec_properties.get(standard_component_specs.MODULE_PATH_KEY, None))
    for pip_package_path in extra_pip_packages:
      local_pip_package_path = io_utils.ensure_local(pip_package_path)
      self._beam_pipeline_args.append('--extra_package=%s' %
                                      local_pip_package_path)

    eval_shared_model_fn = udf_utils.try_get_fn(
        exec_properties=exec_properties,
        fn_name='custom_eval_shared_model') or tfma.default_eval_shared_model

    run_validation = False
    models = []
    if (standard_component_specs.EVAL_CONFIG_KEY in exec_properties
        and exec_properties[standard_component_specs.EVAL_CONFIG_KEY]):
      slice_spec = None
      has_baseline = bool(
          input_dict.get(standard_component_specs.BASELINE_MODEL_KEY))
      eval_config = tfma.EvalConfig()
      proto_utils.json_to_proto(
          exec_properties[standard_component_specs.EVAL_CONFIG_KEY],
          eval_config)
      # rubber_stamp is always assumed true, i.e., change threshold will always
      # be ignored when a baseline model is missing.
      if hasattr(tfma, 'utils'):
        eval_config = tfma.utils.update_eval_config_with_defaults(
            eval_config, has_baseline=has_baseline, rubber_stamp=True)
        tfma.utils.verify_eval_config(eval_config)
      else:
        # TODO(b/171992041): Replaced by tfma.utils.
        eval_config = tfma.update_eval_config_with_defaults(
            eval_config, has_baseline=has_baseline, rubber_stamp=True)
        tfma.verify_eval_config(eval_config)
      # Do not validate model when there is no thresholds configured. This is to
      # avoid accidentally blessing models when users forget to set thresholds.
      run_validation = bool(
          tfma.metrics.metric_thresholds_from_metrics_specs(
              eval_config.metrics_specs, eval_config=eval_config))
      if len(eval_config.model_specs) > 2:
        raise ValueError(
            """Cannot support more than two models. There are %d models in this
             eval_config.""" % (len(eval_config.model_specs)))
      # Extract model artifacts.
      for model_spec in eval_config.model_specs:
        if standard_component_specs.MODEL_KEY not in input_dict:
          if not model_spec.prediction_key:
            raise ValueError(
                'model_spec.prediction_key required if model not provided')
          continue
        if model_spec.is_baseline:
          model_artifact = artifact_utils.get_single_instance(
              input_dict[standard_component_specs.BASELINE_MODEL_KEY])
        else:
          model_artifact = artifact_utils.get_single_instance(
              input_dict[standard_component_specs.MODEL_KEY])
        # TODO(b/171992041): tfma.get_model_type replaced by tfma.utils.
        if ((hasattr(tfma, 'utils') and
             tfma.utils.get_model_type(model_spec) == tfma.TF_ESTIMATOR) or
            hasattr(tfma, 'get_model_type') and
            tfma.get_model_type(model_spec) == tfma.TF_ESTIMATOR):
          model_path = path_utils.eval_model_path(
              model_artifact.uri,
              path_utils.is_old_model_artifact(model_artifact))
        else:
          model_path = path_utils.serving_model_path(
              model_artifact.uri,
              path_utils.is_old_model_artifact(model_artifact))
        logging.info('Using %s as %s model.', model_path, model_spec.name)
        models.append(
            eval_shared_model_fn(
                eval_saved_model_path=model_path,
                model_name=model_spec.name,
                eval_config=eval_config,
                add_metrics_callbacks=add_metrics_callbacks))
    else:
      eval_config = None
      assert (standard_component_specs.FEATURE_SLICING_SPEC_KEY
              in exec_properties and
              exec_properties[standard_component_specs.FEATURE_SLICING_SPEC_KEY]
             ), 'both eval_config and feature_slicing_spec are unset.'
      feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
      proto_utils.json_to_proto(
          exec_properties[standard_component_specs.FEATURE_SLICING_SPEC_KEY],
          feature_slicing_spec)
      slice_spec = self._get_slice_spec_from_feature_slicing_spec(
          feature_slicing_spec)
      model_artifact = artifact_utils.get_single_instance(
          input_dict[standard_component_specs.MODEL_KEY])
      model_path = path_utils.eval_model_path(
          model_artifact.uri, path_utils.is_old_model_artifact(model_artifact))
      logging.info('Using %s for model eval.', model_path)
      models.append(
          eval_shared_model_fn(
              eval_saved_model_path=model_path,
              model_name='',
              eval_config=None,
              add_metrics_callbacks=add_metrics_callbacks))

    eval_shared_model = models[0] if len(models) == 1 else models
    schema = None
    if standard_component_specs.SCHEMA_KEY in input_dict:
      schema = io_utils.SchemaReader().read(
          io_utils.get_only_uri_in_dir(
              artifact_utils.get_single_uri(
                  input_dict[standard_component_specs.SCHEMA_KEY])))

    # Load and deserialize example splits from execution properties.
    example_splits = json_utils.loads(
        exec_properties.get(standard_component_specs.EXAMPLE_SPLITS_KEY,
                            'null'))
    if not example_splits:
      example_splits = ['eval']
      logging.info("The 'example_splits' parameter is not set, using 'eval' "
                   'split.')

    logging.info('Evaluating model.')
    # TempPipInstallContext is needed here so that subprocesses (which
    # may be created by the Beam multi-process DirectRunner) can find the
    # needed dependencies.
    # TODO(b/187122662): Move this to the ExecutorOperator or Launcher.
    with udf_utils.TempPipInstallContext(extra_pip_packages):
      with self._make_beam_pipeline() as pipeline:
        examples_list = []
        tensor_adapter_config = None
        # pylint: disable=expression-not-assigned
        if tfma.is_batched_input(eval_shared_model, eval_config):
          tfxio_factory = tfxio_utils.get_tfxio_factory_from_artifact(
              examples=input_dict[standard_component_specs.EXAMPLES_KEY],
              telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
              schema=schema,
              raw_record_column_name=tfma_constants.ARROW_INPUT_COLUMN)
          # TODO(b/161935932): refactor after TFXIO supports multiple patterns.
          for split in example_splits:
            split_uris = artifact_utils.get_split_uris(
                input_dict[standard_component_specs.EXAMPLES_KEY], split)
            for index in range(len(split_uris)):
              split_uri = split_uris[index]
              file_pattern = io_utils.all_files_pattern(split_uri)
              tfxio = tfxio_factory(file_pattern)
              data = (
                  pipeline
                  | f'ReadFromTFRecordToArrow[{split}][{index}]' >>
                  tfxio.BeamSource())
              examples_list.append(data)
          if schema is not None:
            # Use last tfxio as TensorRepresentations and ArrowSchema are fixed.
            tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                arrow_schema=tfxio.ArrowSchema(),
                tensor_representations=tfxio.TensorRepresentations())
        else:
          for split in example_splits:
            split_uris = artifact_utils.get_split_uris(
                input_dict[standard_component_specs.EXAMPLES_KEY], split)
            for index in range(len(split_uris)):
              split_uri = split_uris[index]
              file_pattern = io_utils.all_files_pattern(split_uri)
              data = (
                  pipeline
                  | f'ReadFromTFRecord[{split}][{index}]' >>
                  beam.io.ReadFromTFRecord(file_pattern=file_pattern))
              examples_list.append(data)

        custom_extractors = udf_utils.try_get_fn(
            exec_properties=exec_properties, fn_name='custom_extractors')
        extractors = None
        if custom_extractors:
          extractors = custom_extractors(
              eval_shared_model=eval_shared_model,
              eval_config=eval_config,
              tensor_adapter_config=tensor_adapter_config)

        (examples_list | 'FlattenExamples' >> beam.Flatten()
         | 'ExtractEvaluateAndWriteResults' >>
         (tfma.ExtractEvaluateAndWriteResults(
             eval_shared_model=models[0] if len(models) == 1 else models,
             eval_config=eval_config,
             extractors=extractors,
             output_path=output_uri,
             slice_spec=slice_spec,
             tensor_adapter_config=tensor_adapter_config)))
    logging.info('Evaluation complete. Results written to %s.', output_uri)

    if not run_validation:
      # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported.
      logging.info('No threshold configured, will not validate model.')
      return
    # Set up blessing artifact
    blessing = artifact_utils.get_single_instance(
        output_dict[standard_component_specs.BLESSING_KEY])
    blessing.set_string_custom_property(
        constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY,
        artifact_utils.get_single_uri(
            input_dict[standard_component_specs.MODEL_KEY]))
    blessing.set_int_custom_property(
        constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY,
        input_dict[standard_component_specs.MODEL_KEY][0].id)
    if input_dict.get(standard_component_specs.BASELINE_MODEL_KEY):
      baseline_model = input_dict[
          standard_component_specs.BASELINE_MODEL_KEY][0]
      blessing.set_string_custom_property(
          constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY,
          baseline_model.uri)
      blessing.set_int_custom_property(
          constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY, baseline_model.id)
    if 'current_component_id' in exec_properties:
      blessing.set_string_custom_property(
          'component_id', exec_properties['current_component_id'])
    # Check validation result and write BLESSED file accordingly.
    logging.info('Checking validation results.')
    validation_result = tfma.load_validation_result(output_uri)
    if validation_result.validation_ok:
      io_utils.write_string_file(
          os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '')
      blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                                       constants.BLESSED_VALUE)
    else:
      io_utils.write_string_file(
          os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME), '')
      blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                                       constants.NOT_BLESSED_VALUE)
    logging.info('Blessing result %s written to %s.',
                 validation_result.validation_ok, blessing.uri)
Example #14
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs a batch job to evaluate the eval_model against the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - model_exports: exported model.
        - examples: examples for eval the model.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: model evaluation results.
      exec_properties: A dict of execution properties.
        - eval_config: JSON string of tfma.EvalConfig.
        - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec
          instance, providing the way to slice the data. Deprecated, use
          eval_config.slicing_specs instead.

    Returns:
      None
    """
        if EXAMPLES_KEY not in input_dict:
            raise ValueError('EXAMPLES_KEY is missing from input dict.')
        if MODEL_KEY not in input_dict:
            raise ValueError('MODEL_KEY is missing from input dict.')
        if EVALUATION_KEY not in output_dict:
            raise ValueError('EVALUATION_KEY is missing from output dict.')
        if len(input_dict[MODEL_KEY]) > 1:
            raise ValueError(
                'There can be only one candidate model, there are {}.'.format(
                    len(input_dict[MODEL_KEY])))
        if BASELINE_MODEL_KEY in input_dict and len(
                input_dict[BASELINE_MODEL_KEY]) > 1:
            raise ValueError(
                'There can be only one baseline model, there are {}.'.format(
                    len(input_dict[BASELINE_MODEL_KEY])))

        self._log_startup(input_dict, output_dict, exec_properties)

        output_uri = artifact_utils.get_single_uri(output_dict[EVALUATION_KEY])

        # Add fairness indicator metric callback if necessary.
        fairness_indicator_thresholds = exec_properties.get(
            'fairness_indicator_thresholds', None)
        add_metrics_callbacks = None
        if fairness_indicator_thresholds:
            # Need to import the following module so that the fairness indicator
            # post-export metric is registered.
            import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
            add_metrics_callbacks = [
                tfma.post_export_metrics.fairness_indicators(  # pytype: disable=module-attr
                    thresholds=fairness_indicator_thresholds),
            ]

        def _get_eval_saved_model(artifact: List[types.Artifact],
                                  tags=None) -> tfma.EvalSharedModel:
            model_uri = artifact_utils.get_single_uri(artifact)
            if tags and tf.saved_model.SERVING in tags:
                model_path = path_utils.serving_model_path(model_uri)
            else:
                model_path = path_utils.eval_model_path(model_uri)
            return tfma.default_eval_shared_model(
                eval_saved_model_path=model_path,
                tags=tags,
                add_metrics_callbacks=add_metrics_callbacks)

        # Extract model artifacts.
        # Baseline will be ignored if baseline is not configured in model_spec.
        if 'eval_config' in exec_properties and exec_properties['eval_config']:
            slice_spec = None
            eval_config = tfma.EvalConfig()
            json_format.Parse(exec_properties['eval_config'], eval_config)
            if len(eval_config.model_specs) > 2:
                raise ValueError(
                    """Cannot support more than two models. There are {} models in this
             eval_config.""".format(len(eval_config.model_specs)))
            models = {}
            if not eval_config.model_specs:
                eval_config.model_specs.add()
            for model_spec in eval_config.model_specs:
                if model_spec.signature_name != 'eval':
                    tags = [tf.saved_model.SERVING]
                if model_spec.is_baseline:
                    if BASELINE_MODEL_KEY not in input_dict:
                        raise ValueError(
                            """No baseline model is present in Evaluator, check whether a
                 baseline is provided to the Executor.""")
                    models[model_spec.name] = _get_eval_saved_model(
                        input_dict[BASELINE_MODEL_KEY], tags)
                    absl.logging.info('Using {} as baseline model.'.format(
                        models[model_spec.name].model_path))
                else:
                    models[model_spec.name] = _get_eval_saved_model(
                        input_dict[MODEL_KEY], tags)
                    absl.logging.info('Using {} for model eval.'.format(
                        models[model_spec.name].model_path))
        else:
            eval_config = None
            assert ('feature_slicing_spec' in exec_properties
                    and exec_properties['feature_slicing_spec']
                    ), 'both eval_config and feature_slicing_spec are unset.'
            feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec()
            json_format.Parse(exec_properties['feature_slicing_spec'],
                              feature_slicing_spec)
            slice_spec = self._get_slice_spec_from_feature_slicing_spec(
                feature_slicing_spec)
            models = _get_eval_saved_model(input_dict[MODEL_KEY])
            absl.logging.info('Using {} for model eval.'.format(
                models.model_path))

        absl.logging.info('Evaluating model.')
        with self._make_beam_pipeline() as pipeline:
            # pylint: disable=expression-not-assigned
            (pipeline
             | 'ReadData' >>
             beam.io.ReadFromTFRecord(file_pattern=io_utils.all_files_pattern(
                 artifact_utils.get_split_uri(input_dict[EXAMPLES_KEY],
                                              'eval')))
             | 'ExtractEvaluateAndWriteResults' >>
             tfma.ExtractEvaluateAndWriteResults(eval_shared_model=models,
                                                 eval_config=eval_config,
                                                 output_path=output_uri,
                                                 slice_spec=slice_spec))
        absl.logging.info(
            'Evaluation complete. Results written to {}.'.format(output_uri))