def _generate_blessing_result(self, eval_examples_uri: Text, slice_spec: List[ tfma.slicer.SingleSliceSpec], current_model_dir: Text, blessed_model_dir: Text) -> bool: current_model_eval_result_path = os.path.join( self._temp_path, constants.CURRENT_MODEL_EVAL_RESULT_PATH) blessed_model_eval_result_path = os.path.join( self._temp_path, constants.BLESSED_MODEL_EVAL_RESULT_PATH) with self._make_beam_pipeline() as pipeline: eval_data = (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord( file_pattern=io_utils.all_files_pattern(eval_examples_uri))) current_model = tfma.default_eval_shared_model( eval_saved_model_path=path_utils.eval_model_path( current_model_dir)) (eval_data | 'EvalCurrentModel' >> tfma.ExtractEvaluateAndWriteResults( # pylint: disable=expression-not-assigned eval_shared_model=current_model, slice_spec=slice_spec, output_path=current_model_eval_result_path)) if blessed_model_dir is not None: blessed_model = tfma.default_eval_shared_model( eval_saved_model_path=path_utils.eval_model_path( blessed_model_dir)) (eval_data | 'EvalBlessedModel' >> tfma.ExtractEvaluateAndWriteResults( # pylint: disable=expression-not-assigned eval_shared_model=blessed_model, slice_spec=slice_spec, output_path=blessed_model_eval_result_path)) absl.logging.info( 'all files in current_model_eval_result_path: [%s]', str(tf.io.gfile.listdir(current_model_eval_result_path))) current_model_eval_result = tfma.load_eval_result( output_path=current_model_eval_result_path) if not self._pass_threshold(current_model_eval_result): absl.logging.info('Current model does not pass threshold.') return False absl.logging.info('Current model passes threshold.') if blessed_model_dir is None: absl.logging.info('No blessed model yet.') return True absl.logging.info( 'all files in blessed_model_eval_result: [%s]', str(tf.io.gfile.listdir(blessed_model_eval_result_path))) blessed_model_eval_result = tfma.load_eval_result( output_path=blessed_model_eval_result_path) if (self._compare_eval_result(current_model_eval_result, blessed_model_eval_result)): absl.logging.info('Current model better than blessed model.') return True else: absl.logging.info('Current model worse than blessed model.') return False
def _generate_blessing_result(self, eval_examples_uri, slice_spec, current_model_dir, blessed_model_dir): current_model_eval_result_path = os.path.join( self._temp_path, CURRENT_MODEL_EVAL_RESULT_PATH) blessed_model_eval_result_path = os.path.join( self._temp_path, BLESSED_MODEL_EVAL_RESULT_PATH) with beam.Pipeline(argv=self._get_beam_pipeline_args()) as pipeline: eval_data = (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord( file_pattern=io_utils.all_files_pattern(eval_examples_uri))) current_model = tfma.default_eval_shared_model( eval_saved_model_path=path_utils.eval_model_path( current_model_dir)) (eval_data | 'EvalCurrentModel' >> tfma.ExtractEvaluateAndWriteResults( # pylint: disable=expression-not-assigned eval_shared_model=current_model, slice_spec=slice_spec, output_path=current_model_eval_result_path)) if blessed_model_dir is not None: blessed_model = tfma.default_eval_shared_model( eval_saved_model_path=path_utils.eval_model_path( blessed_model_dir)) (eval_data | 'EvalBlessedModel' >> tfma.ExtractEvaluateAndWriteResults( # pylint: disable=expression-not-assigned eval_shared_model=blessed_model, slice_spec=slice_spec, output_path=blessed_model_eval_result_path)) current_model_eval_result = tfma.load_eval_result( output_path=current_model_eval_result_path) if not self._pass_threshold(current_model_eval_result): tf.logging.info('Current model does not pass threshold.') return False tf.logging.info('Current model passes threshold.') if blessed_model_dir is None: tf.logging.info('No blessed model yet.') return True blessed_model_eval_result = tfma.load_eval_result( output_path=blessed_model_eval_result_path) if (self._compare_eval_result(current_model_eval_result, blessed_model_eval_result)): tf.logging.info('Current model better than blessed model.') return True else: tf.logging.info('Current model worse than blessed model.') return False
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs a batch job to evaluate the eval_model against the given input. Args: input_dict: Input dict from input key to a list of Artifacts. - model_exports: exported model. - examples: examples for eval the model. output_dict: Output dict from output key to a list of Artifacts. - output: model evaluation results. exec_properties: A dict of execution properties. - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec instance, providing the way to slice the data. Returns: None """ if 'model_exports' not in input_dict: raise ValueError('\'model_exports\' is missing in input dict.') if 'examples' not in input_dict: raise ValueError('\'examples\' is missing in input dict.') if 'output' not in output_dict: raise ValueError('\'output\' is missing in output dict.') self._log_startup(input_dict, output_dict, exec_properties) # Extract input artifacts model_exports_uri = artifact_utils.get_single_uri( input_dict['model_exports']) feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec() json_format.Parse(exec_properties['feature_slicing_spec'], feature_slicing_spec) slice_spec = self._get_slice_spec_from_feature_slicing_spec( feature_slicing_spec) output_uri = artifact_utils.get_single_uri(output_dict['output']) eval_model_path = path_utils.eval_model_path(model_exports_uri) tf.logging.info('Using {} for model eval.'.format(eval_model_path)) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=eval_model_path) tf.logging.info('Evaluating model.') with self._make_beam_pipeline() as pipeline: # pylint: disable=expression-not-assigned (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord(file_pattern=io_utils.all_files_pattern( artifact_utils.get_split_uri(input_dict['examples'], 'eval'))) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=eval_shared_model, slice_spec=slice_spec, output_path=output_uri)) tf.logging.info( 'Evaluation complete. Results written to {}.'.format(output_uri))
def process_tfma(eval_model_dir=None, eval_result_dir=None, bq_table=None, max_rows=None, schema_file=None, pipeline_args=None): """Runs a batch job to evaluate the eval_model against the given input. :param eval_model_dir: :param eval_result_dir: :param bq_table: :param max_rows: :param max_rows: :param pipeline_args: :return: """ slice_spec = [tfma.slicer.SingleSliceSpec()] for slice in my_metadata.TFMA_SLICERS: slice_spec.append( tfma.slicer.SingleSliceSpec(columns=slice[0], features=slice[1])) schema = my_metadata.read_schema(schema_file) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=eval_model_dir, add_metrics_callbacks=[ tfma.post_export_metrics.calibration_plot_and_prediction_histogram( ), tfma.post_export_metrics.auc_plots(), tfma.post_export_metrics.auc() ]) with beam.Pipeline(argv=pipeline_args) as pipeline: query = sql_queries.get_tfma_sql_query(bq_table, max_rows) raw_feature_spec = my_metadata.get_raw_feature_spec(schema) raw_data = ( pipeline | 'ReadBigQuery' >> beam.io.Read( beam.io.BigQuerySource(query=query, use_standard_sql=True)) | 'CleanData' >> beam.Map(lambda x: (my_metadata.clean_raw_data_dict(x, raw_feature_spec)))) # Examples must be in clean tf-example format. coder = my_metadata.make_proto_coder(schema) _ = (raw_data | 'ToSerializedTFExample' >> beam.Map(coder.encode) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=eval_shared_model, slice_spec=slice_spec, output_path=eval_result_dir))
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs a batch job to evaluate the eval_model against the given input. Args: input_dict: Input dict from input key to a list of Artifacts. - model_exports: exported model. - examples: examples for eval the model. output_dict: Output dict from output key to a list of Artifacts. - output: model evaluation results. exec_properties: A dict of execution properties. - eval_config: JSON string of tfma.EvalConfig. - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec instance, providing the way to slice the data. Deprecated, use eval_config.slicing_specs instead. Returns: None """ if constants.EXAMPLES_KEY not in input_dict: raise ValueError('EXAMPLES_KEY is missing from input dict.') if constants.MODEL_KEY not in input_dict: raise ValueError('MODEL_KEY is missing from input dict.') if constants.EVALUATION_KEY not in output_dict: raise ValueError('EVALUATION_KEY is missing from output dict.') if len(input_dict[constants.MODEL_KEY]) > 1: raise ValueError( 'There can be only one candidate model, there are {}.'.format( len(input_dict[constants.MODEL_KEY]))) if constants.BASELINE_MODEL_KEY in input_dict and len( input_dict[constants.BASELINE_MODEL_KEY]) > 1: raise ValueError( 'There can be only one baseline model, there are {}.'.format( len(input_dict[constants.BASELINE_MODEL_KEY]))) self._log_startup(input_dict, output_dict, exec_properties) # Add fairness indicator metric callback if necessary. fairness_indicator_thresholds = exec_properties.get( 'fairness_indicator_thresholds', None) add_metrics_callbacks = None if fairness_indicator_thresholds: # Need to import the following module so that the fairness indicator # post-export metric is registered. import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators # pylint: disable=g-import-not-at-top, unused-variable add_metrics_callbacks = [ tfma.post_export_metrics.fairness_indicators( # pytype: disable=module-attr thresholds=fairness_indicator_thresholds), ] def _get_eval_saved_model(artifact: List[types.Artifact], tags=None) -> tfma.EvalSharedModel: model_uri = artifact_utils.get_single_uri(artifact) if tags and tf.saved_model.SERVING in tags: model_path = path_utils.serving_model_path(model_uri) else: model_path = path_utils.eval_model_path(model_uri) return tfma.default_eval_shared_model( eval_saved_model_path=model_path, tags=tags, add_metrics_callbacks=add_metrics_callbacks) output_uri = artifact_utils.get_single_uri( output_dict[constants.EVALUATION_KEY]) run_validation = False if 'eval_config' in exec_properties and exec_properties['eval_config']: slice_spec = None eval_config = tfma.EvalConfig() json_format.Parse(exec_properties['eval_config'], eval_config) # Do not validate model when there is no thresholds configured. This is to # avoid accidentally blessing models when users forget to set thresholds. for metrics_spec in eval_config.metrics_specs: if (metrics_spec.thresholds or any( metric.HasField('threshold') for metric in metrics_spec.metrics)): run_validation = True break if len(eval_config.model_specs) > 2: raise ValueError( """Cannot support more than two models. There are {} models in this eval_config.""".format(len(eval_config.model_specs))) if not eval_config.model_specs: eval_config.model_specs.add() # Remove baseline model_spec and all change thresholds if there is no # baseline model provided. if not input_dict.get(constants.BASELINE_MODEL_KEY): tmp_model_specs = [] for model_spec in eval_config.model_specs: if not model_spec.is_baseline: tmp_model_specs.append(model_spec) del eval_config.model_specs[:] eval_config.model_specs.extend(tmp_model_specs) absl.logging.info("""No baseline model provided, ignoring all baseline model_spec.""") for metrics_spec in eval_config.metrics_specs: for metric in metrics_spec.metrics: metric.threshold.ClearField('change_threshold') for threshold in metrics_spec.thresholds.values(): threshold.ClearField('change_threshold') absl.logging.info("""No baseline model provided, ignoring all change thresholds.""") # Extract model artifacts. models = {} for model_spec in eval_config.model_specs: if model_spec.signature_name != 'eval': tags = [tf.saved_model.SERVING] if model_spec.is_baseline: models[model_spec.name] = _get_eval_saved_model( input_dict[constants.BASELINE_MODEL_KEY], tags) absl.logging.info('Using {} as baseline model.'.format( models[model_spec.name].model_path)) else: models[model_spec.name] = _get_eval_saved_model( input_dict[constants.MODEL_KEY], tags) absl.logging.info('Using {} for model eval.'.format( models[model_spec.name].model_path)) else: eval_config = None assert ('feature_slicing_spec' in exec_properties and exec_properties['feature_slicing_spec'] ), 'both eval_config and feature_slicing_spec are unset.' feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec() json_format.Parse(exec_properties['feature_slicing_spec'], feature_slicing_spec) slice_spec = self._get_slice_spec_from_feature_slicing_spec( feature_slicing_spec) models = _get_eval_saved_model(input_dict[constants.MODEL_KEY]) absl.logging.info('Using {} for model eval.'.format(models.model_path)) absl.logging.info('Evaluating model.') with self._make_beam_pipeline() as pipeline: # pylint: disable=expression-not-assigned (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord( file_pattern=io_utils.all_files_pattern( artifact_utils.get_split_uri(input_dict[constants.EXAMPLES_KEY], 'eval'))) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=models, eval_config=eval_config, output_path=output_uri, slice_spec=slice_spec)) absl.logging.info( 'Evaluation complete. Results written to {}.'.format(output_uri)) if not run_validation: # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported. absl.logging.info('No threshold configured, will not validate model.') return # Set up blessing artifact blessing = artifact_utils.get_single_instance( output_dict[constants.BLESSING_KEY]) blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY, artifact_utils.get_single_uri(input_dict[constants.MODEL_KEY])) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY, input_dict[constants.MODEL_KEY][0].id) if input_dict.get(constants.BASELINE_MODEL_KEY): baseline_model = input_dict[constants.BASELINE_MODEL_KEY][0] blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY, baseline_model.uri) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY, baseline_model.id) if 'current_component_id' in exec_properties: blessing.set_string_custom_property( 'component_id', exec_properties['current_component_id']) # Check validation result and write BLESSED file accordingly. validation_file = os.path.join(output_uri, tfma.constants.VALIDATIONS_KEY) absl.logging.info('Checking validation results.') validation_result = tfma.load_validation_result(validation_file) if validation_result.validation_ok: io_utils.write_string_file( os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '') blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.BLESSED_VALUE) else: io_utils.write_string_file( os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME), '') blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.NOT_BLESSED_VALUE) absl.logging.info('Blessing result {} written to {}.'.format( validation_result.validation_ok, blessing.uri))
def process_tfma(eval_result_dir, schema_file, input_csv=None, big_query_table=None, eval_model_dir=None, max_eval_rows=None, pipeline_args=None): """Runs a batch job to evaluate the eval_model against the given input. Args: eval_result_dir: A directory where the evaluation result should be written to. schema_file: A file containing a text-serialized Schema that describes the eval data. input_csv: A path to a csv file which should be the input for evaluation. This can only be set if big_query_table is None. big_query_table: A BigQuery table name specified as DATASET.TABLE which should be the input for evaluation. This can only be set if input_csv is None. eval_model_dir: A directory where the eval model is located. max_eval_rows: Number of rows to query from BigQuery. pipeline_args: additional DataflowRunner or DirectRunner args passed to the beam pipeline. Raises: ValueError: if input_csv and big_query_table are not specified correctly. """ if input_csv == big_query_table and input_csv is None: raise ValueError( 'one of --input_csv or --big_query_table should be provided.') slice_spec = [ tfma.slicer.SingleSliceSpec(), tfma.slicer.SingleSliceSpec(columns=['trip_start_hour']) ] schema = taxi.read_schema(schema_file) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=eval_model_dir, add_metrics_callbacks=[ tfma.post_export_metrics.calibration_plot_and_prediction_histogram(), tfma.post_export_metrics.auc_plots() ]) with beam.Pipeline(argv=pipeline_args) as pipeline: if input_csv: csv_coder = taxi.make_csv_coder(schema) raw_data = ( pipeline | 'ReadFromText' >> beam.io.ReadFromText( input_csv, skip_header_lines=1) | 'ParseCSV' >> beam.Map(csv_coder.decode)) else: assert big_query_table query = taxi.make_sql(big_query_table, max_eval_rows, for_eval=True) raw_feature_spec = taxi.get_raw_feature_spec(schema) raw_data = ( pipeline | 'ReadBigQuery' >> beam.io.Read( beam.io.BigQuerySource(query=query, use_standard_sql=True)) | 'CleanData' >> beam.Map(lambda x: (taxi.clean_raw_data_dict(x, raw_feature_spec)))) # Examples must be in clean tf-example format. coder = taxi.make_proto_coder(schema) _ = ( raw_data | 'ToSerializedTFExample' >> beam.Map(coder.encode) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=eval_shared_model, slice_spec=slice_spec, output_path=eval_result_dir))
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs a batch job to evaluate the eval_model against the given input. Args: input_dict: Input dict from input key to a list of Artifacts. - model_exports: exported model. - examples: examples for eval the model. output_dict: Output dict from output key to a list of Artifacts. - output: model evaluation results. exec_properties: A dict of execution properties. - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec instance, providing the way to slice the data. Returns: None """ if 'model_exports' not in input_dict: raise ValueError('\'model_exports\' is missing in input dict.') if 'examples' not in input_dict: raise ValueError('\'examples\' is missing in input dict.') if 'output' not in output_dict: raise ValueError('\'output\' is missing in output dict.') self._log_startup(input_dict, output_dict, exec_properties) # Extract input artifacts model_exports_uri = artifact_utils.get_single_uri( input_dict['model_exports']) feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec() json_format.Parse(exec_properties['feature_slicing_spec'], feature_slicing_spec) slice_spec = self._get_slice_spec_from_feature_slicing_spec( feature_slicing_spec) output_uri = artifact_utils.get_single_uri(output_dict['output']) eval_model_path = path_utils.eval_model_path(model_exports_uri) # Add fairness indicator metric callback if necessary. fairness_indicator_thresholds = exec_properties.get( 'fairness_indicator_thresholds', None) add_metrics_callbacks = None if fairness_indicator_thresholds: # Need to import the following module so that the fairness indicator # post-export metric is registered. import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators # pylint: disable=g-import-not-at-top, unused-variable add_metrics_callbacks = [ tfma.post_export_metrics.fairness_indicators( # pytype: disable=module-attr thresholds=fairness_indicator_thresholds), ] absl.logging.info('Using {} for model eval.'.format(eval_model_path)) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=eval_model_path, add_metrics_callbacks=add_metrics_callbacks) absl.logging.info('Evaluating model.') with self._make_beam_pipeline() as pipeline: # pylint: disable=expression-not-assigned (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord( file_pattern=io_utils.all_files_pattern( artifact_utils.get_split_uri(input_dict['examples'], 'eval'))) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=eval_shared_model, slice_spec=slice_spec, output_path=output_uri)) absl.logging.info( 'Evaluation complete. Results written to {}.'.format(output_uri))
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs a batch job to evaluate the eval_model against the given input. Args: input_dict: Input dict from input key to a list of Artifacts. - model_exports: exported model. - examples: examples for eval the model. output_dict: Output dict from output key to a list of Artifacts. - output: model evaluation results. exec_properties: A dict of execution properties. - eval_config: JSON string of tfma.EvalConfig. - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec instance, providing the way to slice the data. Deprecated, use eval_config.slicing_specs instead. Returns: None """ if constants.EXAMPLES_KEY not in input_dict: raise ValueError('EXAMPLES_KEY is missing from input dict.') if constants.MODEL_KEY not in input_dict: raise ValueError('MODEL_KEY is missing from input dict.') if constants.EVALUATION_KEY not in output_dict: raise ValueError('EVALUATION_KEY is missing from output dict.') if len(input_dict[constants.MODEL_KEY]) > 1: raise ValueError( 'There can be only one candidate model, there are {}.'.format( len(input_dict[constants.MODEL_KEY]))) if constants.BASELINE_MODEL_KEY in input_dict and len( input_dict[constants.BASELINE_MODEL_KEY]) > 1: raise ValueError( 'There can be only one baseline model, there are {}.'.format( len(input_dict[constants.BASELINE_MODEL_KEY]))) self._log_startup(input_dict, output_dict, exec_properties) # Add fairness indicator metric callback if necessary. fairness_indicator_thresholds = exec_properties.get( 'fairness_indicator_thresholds', None) add_metrics_callbacks = None if fairness_indicator_thresholds: # Need to import the following module so that the fairness indicator # post-export metric is registered. import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators # pylint: disable=g-import-not-at-top, unused-variable add_metrics_callbacks = [ tfma.post_export_metrics.fairness_indicators( # pytype: disable=module-attr thresholds=fairness_indicator_thresholds), ] output_uri = artifact_utils.get_single_uri( output_dict[constants.EVALUATION_KEY]) run_validation = False models = [] if 'eval_config' in exec_properties and exec_properties['eval_config']: slice_spec = None has_baseline = bool(input_dict.get(constants.BASELINE_MODEL_KEY)) eval_config = tfma.EvalConfig() json_format.Parse(exec_properties['eval_config'], eval_config) eval_config = tfma.update_eval_config_with_defaults( eval_config, maybe_add_baseline=has_baseline, maybe_remove_baseline=not has_baseline) tfma.verify_eval_config(eval_config) # Do not validate model when there is no thresholds configured. This is to # avoid accidentally blessing models when users forget to set thresholds. run_validation = bool( tfma.metrics.metric_thresholds_from_metrics_specs( eval_config.metrics_specs)) if len(eval_config.model_specs) > 2: raise ValueError( """Cannot support more than two models. There are {} models in this eval_config.""".format(len(eval_config.model_specs))) # Extract model artifacts. for model_spec in eval_config.model_specs: if model_spec.is_baseline: model_uri = artifact_utils.get_single_uri( input_dict[constants.BASELINE_MODEL_KEY]) else: model_uri = artifact_utils.get_single_uri( input_dict[constants.MODEL_KEY]) if tfma.get_model_type(model_spec) == tfma.TF_ESTIMATOR: model_path = path_utils.eval_model_path(model_uri) else: model_path = path_utils.serving_model_path(model_uri) absl.logging.info('Using {} as {} model.'.format( model_path, model_spec.name)) models.append( tfma.default_eval_shared_model( model_name=model_spec.name, eval_saved_model_path=model_path, add_metrics_callbacks=add_metrics_callbacks, eval_config=eval_config)) else: eval_config = None assert ('feature_slicing_spec' in exec_properties and exec_properties['feature_slicing_spec'] ), 'both eval_config and feature_slicing_spec are unset.' feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec() json_format.Parse(exec_properties['feature_slicing_spec'], feature_slicing_spec) slice_spec = self._get_slice_spec_from_feature_slicing_spec( feature_slicing_spec) model_uri = artifact_utils.get_single_uri( input_dict[constants.MODEL_KEY]) model_path = path_utils.eval_model_path(model_uri) absl.logging.info('Using {} for model eval.'.format(model_path)) models.append( tfma.default_eval_shared_model( eval_saved_model_path=model_path, add_metrics_callbacks=add_metrics_callbacks)) file_pattern = io_utils.all_files_pattern( artifact_utils.get_split_uri(input_dict[constants.EXAMPLES_KEY], 'eval')) eval_shared_model = models[0] if len(models) == 1 else models schema = None if constants.SCHEMA_KEY in input_dict: schema = io_utils.SchemaReader().read( io_utils.get_only_uri_in_dir( artifact_utils.get_single_uri( input_dict[constants.SCHEMA_KEY]))) absl.logging.info('Evaluating model.') with self._make_beam_pipeline() as pipeline: # pylint: disable=expression-not-assigned if _USE_TFXIO: tensor_adapter_config = None if tfma.is_batched_input(eval_shared_model, eval_config): tfxio = tf_example_record.TFExampleRecord( file_pattern=file_pattern, schema=schema, raw_record_column_name=tfma.BATCHED_INPUT_KEY) if schema is not None: tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfxio.ArrowSchema(), tensor_representations=tfxio.TensorRepresentations( )) data = pipeline | 'ReadFromTFRecordToArrow' >> tfxio.BeamSource( ) else: data = pipeline | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord( file_pattern=file_pattern) (data | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=models[0] if len(models) == 1 else models, eval_config=eval_config, output_path=output_uri, slice_spec=slice_spec, tensor_adapter_config=tensor_adapter_config)) else: data = pipeline | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord( file_pattern=file_pattern) (data | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=models[0] if len(models) == 1 else models, eval_config=eval_config, output_path=output_uri, slice_spec=slice_spec)) absl.logging.info( 'Evaluation complete. Results written to {}.'.format(output_uri)) if not run_validation: # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported. absl.logging.info( 'No threshold configured, will not validate model.') return # Set up blessing artifact blessing = artifact_utils.get_single_instance( output_dict[constants.BLESSING_KEY]) blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY, artifact_utils.get_single_uri(input_dict[constants.MODEL_KEY])) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY, input_dict[constants.MODEL_KEY][0].id) if input_dict.get(constants.BASELINE_MODEL_KEY): baseline_model = input_dict[constants.BASELINE_MODEL_KEY][0] blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY, baseline_model.uri) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY, baseline_model.id) if 'current_component_id' in exec_properties: blessing.set_string_custom_property( 'component_id', exec_properties['current_component_id']) # Check validation result and write BLESSED file accordingly. absl.logging.info('Checking validation results.') validation_result = tfma.load_validation_result(output_uri) if validation_result.validation_ok: io_utils.write_string_file( os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '') blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.BLESSED_VALUE) else: io_utils.write_string_file( os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME), '') blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.NOT_BLESSED_VALUE) absl.logging.info('Blessing result {} written to {}.'.format( validation_result.validation_ok, blessing.uri))
def _write_tfma(self, tfma_path: str, output_file_format: str, store: Optional[mlmd.MetadataStore] = None): _, eval_saved_model_path = ( fixed_prediction_estimator.simple_fixed_prediction_estimator( export_path=None, eval_export_path=os.path.join(self.tmpdir, 'eval_export_dir'))) eval_config = tfma.EvalConfig(model_specs=[tfma.ModelSpec()]) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=[ tfma.post_export_metrics.example_count(), tfma.post_export_metrics. calibration_plot_and_prediction_histogram(num_buckets=2) ]) extractors = [ tfma.extractors.legacy_predict_extractor.PredictExtractor( eval_shared_model, eval_config=eval_config), tfma.extractors.unbatch_extractor.UnbatchExtractor(), tfma.extractors.slice_key_extractor.SliceKeyExtractor() ] evaluators = [ tfma.evaluators.legacy_metrics_and_plots_evaluator. MetricsAndPlotsEvaluator(eval_shared_model) ] writers = [ tfma.writers.MetricsPlotsAndValidationsWriter( output_paths={ 'metrics': os.path.join(tfma_path, 'metrics'), 'plots': os.path.join(tfma_path, 'plots') }, output_file_format=output_file_format, eval_config=eval_config, add_metrics_callbacks=eval_shared_model.add_metrics_callbacks) ] tfx_io = raw_tf_record.RawBeamRecordTFXIO( physical_format='inmemory', raw_record_column_name='__raw_record__', telemetry_descriptors=['TFMATest']) with beam.Pipeline() as pipeline: example1 = self._makeExample(prediction=0.0, label=1.0) example2 = self._makeExample(prediction=1.0, label=1.0) _ = (pipeline | 'Create' >> beam.Create([ example1.SerializeToString(), example2.SerializeToString(), ]) | 'BatchExamples' >> tfx_io.BeamSource() | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_config=eval_config, eval_shared_model=eval_shared_model, extractors=extractors, evaluators=evaluators, writers=writers)) if store: eval_type = metadata_store_pb2.ArtifactType() eval_type.name = standard_artifacts.ModelEvaluation.TYPE_NAME eval_type_id = store.put_artifact_type(eval_type) artifact = metadata_store_pb2.Artifact() artifact.uri = tfma_path artifact.type_id = eval_type_id store.put_artifacts([artifact])
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs a batch job to evaluate the eval_model against the given input. Args: input_dict: Input dict from input key to a list of Artifacts. - model_exports: exported model. - examples: examples for eval the model. output_dict: Output dict from output key to a list of Artifacts. - output: model evaluation results. exec_properties: A dict of execution properties. - eval_config: JSON string of tfma.EvalConfig. - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec instance, providing the way to slice the data. Deprecated, use eval_config.slicing_specs instead. - example_splits: JSON-serialized list of names of splits on which the metrics are computed. Default behavior (when example_splits is set to None) is using the 'eval' split. Returns: None """ if constants.EXAMPLES_KEY not in input_dict: raise ValueError('EXAMPLES_KEY is missing from input dict.') if constants.MODEL_KEY not in input_dict: raise ValueError('MODEL_KEY is missing from input dict.') if constants.EVALUATION_KEY not in output_dict: raise ValueError('EVALUATION_KEY is missing from output dict.') if len(input_dict[constants.MODEL_KEY]) > 1: raise ValueError( 'There can be only one candidate model, there are %d.' % (len(input_dict[constants.MODEL_KEY]))) if constants.BASELINE_MODEL_KEY in input_dict and len( input_dict[constants.BASELINE_MODEL_KEY]) > 1: raise ValueError( 'There can be only one baseline model, there are %d.' % (len(input_dict[constants.BASELINE_MODEL_KEY]))) self._log_startup(input_dict, output_dict, exec_properties) # Add fairness indicator metric callback if necessary. fairness_indicator_thresholds = exec_properties.get( 'fairness_indicator_thresholds', None) add_metrics_callbacks = None if fairness_indicator_thresholds: add_metrics_callbacks = [ tfma.post_export_metrics.fairness_indicators( # pytype: disable=module-attr thresholds=fairness_indicator_thresholds), ] output_uri = artifact_utils.get_single_uri( output_dict[constants.EVALUATION_KEY]) eval_shared_model_fn = udf_utils.try_get_fn( exec_properties=exec_properties, fn_name='custom_eval_shared_model' ) or tfma.default_eval_shared_model run_validation = False models = [] if 'eval_config' in exec_properties and exec_properties['eval_config']: slice_spec = None has_baseline = bool(input_dict.get(constants.BASELINE_MODEL_KEY)) eval_config = tfma.EvalConfig() json_format.Parse(exec_properties['eval_config'], eval_config) eval_config = tfma.update_eval_config_with_defaults( eval_config, maybe_add_baseline=has_baseline, maybe_remove_baseline=not has_baseline) tfma.verify_eval_config(eval_config) # Do not validate model when there is no thresholds configured. This is to # avoid accidentally blessing models when users forget to set thresholds. run_validation = bool( tfma.metrics.metric_thresholds_from_metrics_specs( eval_config.metrics_specs)) if len(eval_config.model_specs) > 2: raise ValueError( """Cannot support more than two models. There are %d models in this eval_config.""" % (len(eval_config.model_specs))) # Extract model artifacts. for model_spec in eval_config.model_specs: if model_spec.is_baseline: model_uri = artifact_utils.get_single_uri( input_dict[constants.BASELINE_MODEL_KEY]) else: model_uri = artifact_utils.get_single_uri( input_dict[constants.MODEL_KEY]) if tfma.get_model_type(model_spec) == tfma.TF_ESTIMATOR: model_path = path_utils.eval_model_path(model_uri) else: model_path = path_utils.serving_model_path(model_uri) logging.info('Using %s as %s model.', model_path, model_spec.name) models.append( eval_shared_model_fn( eval_saved_model_path=model_path, model_name=model_spec.name, eval_config=eval_config, add_metrics_callbacks=add_metrics_callbacks)) else: eval_config = None assert ('feature_slicing_spec' in exec_properties and exec_properties['feature_slicing_spec'] ), 'both eval_config and feature_slicing_spec are unset.' feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec() json_format.Parse(exec_properties['feature_slicing_spec'], feature_slicing_spec) slice_spec = self._get_slice_spec_from_feature_slicing_spec( feature_slicing_spec) model_uri = artifact_utils.get_single_uri( input_dict[constants.MODEL_KEY]) model_path = path_utils.eval_model_path(model_uri) logging.info('Using %s for model eval.', model_path) models.append( eval_shared_model_fn( eval_saved_model_path=model_path, model_name='', eval_config=None, add_metrics_callbacks=add_metrics_callbacks)) eval_shared_model = models[0] if len(models) == 1 else models schema = None if constants.SCHEMA_KEY in input_dict: schema = io_utils.SchemaReader().read( io_utils.get_only_uri_in_dir( artifact_utils.get_single_uri( input_dict[constants.SCHEMA_KEY]))) # Load and deserialize example splits from execution properties. example_splits = json_utils.loads( exec_properties.get(constants.EXAMPLE_SPLITS_KEY, 'null')) if not example_splits: example_splits = ['eval'] logging.info( "The 'example_splits' parameter is not set, using 'eval' " 'split.') logging.info('Evaluating model.') with self._make_beam_pipeline() as pipeline: examples_list = [] tensor_adapter_config = None # pylint: disable=expression-not-assigned if _USE_TFXIO and tfma.is_batched_input(eval_shared_model, eval_config): tfxio_factory = tfxio_utils.get_tfxio_factory_from_artifact( examples=[ artifact_utils.get_single_instance( input_dict[constants.EXAMPLES_KEY]) ], telemetry_descriptors=_TELEMETRY_DESCRIPTORS, schema=schema, raw_record_column_name=tfma_constants.ARROW_INPUT_COLUMN) # TODO(b/161935932): refactor after TFXIO supports multiple patterns. for split in example_splits: file_pattern = io_utils.all_files_pattern( artifact_utils.get_split_uri( input_dict[constants.EXAMPLES_KEY], split)) tfxio = tfxio_factory(file_pattern) data = (pipeline | 'ReadFromTFRecordToArrow[%s]' % split >> tfxio.BeamSource()) examples_list.append(data) if schema is not None: # Use last tfxio as TensorRepresentations and ArrowSchema are fixed. tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfxio.ArrowSchema(), tensor_representations=tfxio.TensorRepresentations()) else: for split in example_splits: file_pattern = io_utils.all_files_pattern( artifact_utils.get_split_uri( input_dict[constants.EXAMPLES_KEY], split)) data = ( pipeline | 'ReadFromTFRecord[%s]' % split >> beam.io.ReadFromTFRecord(file_pattern=file_pattern)) examples_list.append(data) custom_extractors = udf_utils.try_get_fn( exec_properties=exec_properties, fn_name='custom_extractors') extractors = None if custom_extractors: extractors = custom_extractors( eval_shared_model=eval_shared_model, eval_config=eval_config, tensor_adapter_config=tensor_adapter_config) (examples_list | 'FlattenExamples' >> beam.Flatten() | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=models[0] if len(models) == 1 else models, eval_config=eval_config, extractors=extractors, output_path=output_uri, slice_spec=slice_spec, tensor_adapter_config=tensor_adapter_config)) logging.info('Evaluation complete. Results written to %s.', output_uri) if not run_validation: # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported. logging.info('No threshold configured, will not validate model.') return # Set up blessing artifact blessing = artifact_utils.get_single_instance( output_dict[constants.BLESSING_KEY]) blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY, artifact_utils.get_single_uri(input_dict[constants.MODEL_KEY])) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY, input_dict[constants.MODEL_KEY][0].id) if input_dict.get(constants.BASELINE_MODEL_KEY): baseline_model = input_dict[constants.BASELINE_MODEL_KEY][0] blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY, baseline_model.uri) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY, baseline_model.id) if 'current_component_id' in exec_properties: blessing.set_string_custom_property( 'component_id', exec_properties['current_component_id']) # Check validation result and write BLESSED file accordingly. logging.info('Checking validation results.') validation_result = tfma.load_validation_result(output_uri) if validation_result.validation_ok: io_utils.write_string_file( os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '') blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.BLESSED_VALUE) else: io_utils.write_string_file( os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME), '') blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.NOT_BLESSED_VALUE) logging.info('Blessing result %s written to %s.', validation_result.validation_ok, blessing.uri)
def append_tfma_pipeline(pipeline: beam.Pipeline, me_eval_config: me_proto.EvaluationConfig, problem_type: constants.ProblemType, tfma_format: Optional[bool] = False, json_mode: Optional[bool] = False, schema: Optional[Any] = None): """Extend a beam pipeline to add TFMA evaluation given a configuration. Args: pipeline: A beam pipeline. me_eval_config: A ME Evaluation Configuration. problem_type: Defines what type of problem to expect. tfma_format: If true, use TFMA format, if false use Model Evaluation. json_mode: Output metrics in a plain text mode. schema: Optional tf.metadata schema. If you need to pass multi-tensor input to the model, you need to pass the schema. """ input_files = ( me_eval_config.data_spec.input_source_spec.jsonl_file_spec.file_names) output_path = me_eval_config.output_spec.gcs_sink.path data_spec = me_eval_config.data_spec weight_column_spec = ColumnSpec( me_eval_config.data_spec.example_weight_key_spec ) if me_eval_config.data_spec.HasField('example_weight_key_spec') else None eval_column_specs = EvaluationColumnSpecs( ground_truth_column_spec=ColumnSpec( me_eval_config.data_spec.label_key_spec), example_weight_column_spec=weight_column_spec, predicted_score_column_spec=ColumnSpec( data_spec.predicted_score_key_spec) if data_spec.HasField('predicted_score_key_spec') else None, predicted_label_column_spec=ColumnSpec( data_spec.predicted_label_key_spec) if data_spec.HasField('predicted_label_key_spec') else None, predicted_label_id_column_spec=ColumnSpec( data_spec.predicted_label_id_key_spec) if data_spec.HasField('predicted_label_id_key_spec') else None) class_name_list = list(me_eval_config.data_spec.labels) or None quantile_list = list(data_spec.quantiles) or None quantile_index = data_spec.quantile_index if data_spec.quantile_index >= 0 else None tfma_eval_config = tfma_adapter.METoTFMA(class_name_list).eval_config( me_eval_config) me_writers = [ tfma.writers.Writer( stage_name='WriteMetrics', # pylint:disable=no-value-for-parameter ptransform=_write_metrics(output_file=os.path.join( output_path, constants.Pipeline.METRICS_KEY), problem_type=problem_type, class_labels=class_name_list, tfma_format=tfma_format, json_mode=json_mode)), ] coder = tf_example_record.TFExampleBeamRecord( physical_format='inmem', schema=schema, raw_record_column_name=tfma.ARROW_INPUT_COLUMN, telemetry_descriptors=None) tensor_adapter_config = None if schema is not None: tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=coder.ArrowSchema(), tensor_representations=coder.TensorRepresentations()) _ = (pipeline | 'InputFileList' >> beam.Create(input_files) | 'ReadText' >> beam.io.textio.ReadAllFromText() | 'ParseData' >> beam.ParDo( JSONToSerializedExample(eval_column_specs=eval_column_specs, class_list=class_name_list, quantile_list=quantile_list, quantile_index=quantile_index)) | 'ExamplesToRecordBatch' >> coder.BeamSource() | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_config=tfma_eval_config, writers=me_writers, tensor_adapter_config=tensor_adapter_config))
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: # Check the inputs if constants.EXAMPLES not in input_dict: raise ValueError(f'{constants.EXAMPLES} is missing from inputs') examples_artifact = input_dict[constants.EXAMPLES] input_uri = artifact_utils.get_single_uri(examples_artifact) if len(zenml_path_utils.list_dir(input_uri)) == 0: raise AssertionError( 'ZenML can not run the evaluation as the provided input ' 'configuration does not point towards any data. Specifically, ' 'if you are using the agnostic evaluator, please make sure ' 'that you are using a proper test_fn in your trainer step to ' 'write these results.') else: # Check the outputs if constants.EVALUATION not in output_dict: raise ValueError( f'{constants.EVALUATION} is missing from outputs') evaluation_artifact = output_dict[constants.EVALUATION] output_uri = artifact_utils.get_single_uri(evaluation_artifact) # Resolve the schema schema = None if constants.SCHEMA in input_dict: schema_artifact = input_dict[constants.SCHEMA] schema_uri = artifact_utils.get_single_uri(schema_artifact) reader = io_utils.SchemaReader() schema = reader.read(io_utils.get_only_uri_in_dir(schema_uri)) # Create the step with the schema attached if provided source = exec_properties[StepKeys.SOURCE] args = exec_properties[StepKeys.ARGS] c = source_utils.load_source_path_class(source) evaluator_step: BaseEvaluatorStep = c(**args) # Check the execution parameters eval_config = evaluator_step.build_config() eval_config = tfma.update_eval_config_with_defaults(eval_config) tfma.verify_eval_config(eval_config) # Resolve the model if constants.MODEL in input_dict: model_artifact = input_dict[constants.MODEL] model_uri = artifact_utils.get_single_uri(model_artifact) model_path = path_utils.serving_model_path(model_uri) model_fn = try_get_fn(evaluator_step.CUSTOM_MODULE, 'custom_eval_shared_model' ) or tfma.default_eval_shared_model eval_shared_model = model_fn( model_name='', # TODO: Fix with model names eval_saved_model_path=model_path, eval_config=eval_config) else: eval_shared_model = None self._log_startup(input_dict, output_dict, exec_properties) # Main pipeline logging.info('Evaluating model.') with self._make_beam_pipeline() as pipeline: examples_list = [] tensor_adapter_config = None if tfma.is_batched_input(eval_shared_model, eval_config): tfxio_factory = tfxio_utils.get_tfxio_factory_from_artifact( examples=[ artifact_utils.get_single_instance( examples_artifact) ], telemetry_descriptors=_TELEMETRY_DESCRIPTORS, schema=schema, raw_record_column_name=tfma_constants. ARROW_INPUT_COLUMN) for split in evaluator_step.splits: file_pattern = io_utils.all_files_pattern( artifact_utils.get_split_uri( examples_artifact, split)) tfxio = tfxio_factory(file_pattern) data = (pipeline | 'ReadFromTFRecordToArrow[%s]' % split >> tfxio.BeamSource()) examples_list.append(data) if schema is not None: tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfxio.ArrowSchema(), tensor_representations=tfxio.TensorRepresentations( )) else: for split in evaluator_step.splits: file_pattern = io_utils.all_files_pattern( artifact_utils.get_split_uri( examples_artifact, split)) data = (pipeline | 'ReadFromTFRecord[%s]' % split >> beam.io. ReadFromTFRecord(file_pattern=file_pattern)) examples_list.append(data) # Resolve custom extractors custom_extractors = try_get_fn(evaluator_step.CUSTOM_MODULE, 'custom_extractors') extractors = None if custom_extractors: extractors = custom_extractors( eval_shared_model=eval_shared_model, eval_config=eval_config, tensor_adapter_config=tensor_adapter_config) # Resolve custom evaluators custom_evaluators = try_get_fn(evaluator_step.CUSTOM_MODULE, 'custom_evaluators') evaluators = None if custom_evaluators: evaluators = custom_evaluators( eval_shared_model=eval_shared_model, eval_config=eval_config, tensor_adapter_config=tensor_adapter_config) # Extract, evaluate and write (examples_list | 'FlattenExamples' >> beam.Flatten() | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_config=eval_config, eval_shared_model=eval_shared_model, output_path=output_uri, extractors=extractors, evaluators=evaluators, tensor_adapter_config=tensor_adapter_config)) logging.info('Evaluation complete. Results written to %s.', output_uri)
def Do(self, input_dict: Dict[str, List[types.Artifact]], output_dict: Dict[str, List[types.Artifact]], exec_properties: Dict[str, Any]) -> None: """Runs a batch job to evaluate the eval_model against the given input. Args: input_dict: Input dict from input key to a list of Artifacts. - model: exported model. - examples: examples for eval the model. output_dict: Output dict from output key to a list of Artifacts. - evaluation: model evaluation results. exec_properties: A dict of execution properties. - eval_config: JSON string of tfma.EvalConfig. - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec instance, providing the way to slice the data. Deprecated, use eval_config.slicing_specs instead. - example_splits: JSON-serialized list of names of splits on which the metrics are computed. Default behavior (when example_splits is set to None) is using the 'eval' split. Returns: None """ if standard_component_specs.EXAMPLES_KEY not in input_dict: raise ValueError('EXAMPLES_KEY is missing from input dict.') if standard_component_specs.EVALUATION_KEY not in output_dict: raise ValueError('EVALUATION_KEY is missing from output dict.') if standard_component_specs.MODEL_KEY in input_dict and len( input_dict[standard_component_specs.MODEL_KEY]) > 1: raise ValueError('There can be only one candidate model, there are %d.' % (len(input_dict[standard_component_specs.MODEL_KEY]))) if standard_component_specs.BASELINE_MODEL_KEY in input_dict and len( input_dict[standard_component_specs.BASELINE_MODEL_KEY]) > 1: raise ValueError( 'There can be only one baseline model, there are %d.' % (len(input_dict[standard_component_specs.BASELINE_MODEL_KEY]))) self._log_startup(input_dict, output_dict, exec_properties) # Add fairness indicator metric callback if necessary. fairness_indicator_thresholds = json_utils.loads( exec_properties.get( standard_component_specs.FAIRNESS_INDICATOR_THRESHOLDS_KEY, 'null')) add_metrics_callbacks = None if fairness_indicator_thresholds: add_metrics_callbacks = [ tfma.post_export_metrics.fairness_indicators( # pytype: disable=module-attr thresholds=fairness_indicator_thresholds), ] output_uri = artifact_utils.get_single_uri( output_dict[constants.EVALUATION_KEY]) # Make sure user packages get propagated to the remote Beam worker. unused_module_path, extra_pip_packages = udf_utils.decode_user_module_key( exec_properties.get(standard_component_specs.MODULE_PATH_KEY, None)) for pip_package_path in extra_pip_packages: local_pip_package_path = io_utils.ensure_local(pip_package_path) self._beam_pipeline_args.append('--extra_package=%s' % local_pip_package_path) eval_shared_model_fn = udf_utils.try_get_fn( exec_properties=exec_properties, fn_name='custom_eval_shared_model') or tfma.default_eval_shared_model run_validation = False models = [] if (standard_component_specs.EVAL_CONFIG_KEY in exec_properties and exec_properties[standard_component_specs.EVAL_CONFIG_KEY]): slice_spec = None has_baseline = bool( input_dict.get(standard_component_specs.BASELINE_MODEL_KEY)) eval_config = tfma.EvalConfig() proto_utils.json_to_proto( exec_properties[standard_component_specs.EVAL_CONFIG_KEY], eval_config) # rubber_stamp is always assumed true, i.e., change threshold will always # be ignored when a baseline model is missing. if hasattr(tfma, 'utils'): eval_config = tfma.utils.update_eval_config_with_defaults( eval_config, has_baseline=has_baseline, rubber_stamp=True) tfma.utils.verify_eval_config(eval_config) else: # TODO(b/171992041): Replaced by tfma.utils. eval_config = tfma.update_eval_config_with_defaults( eval_config, has_baseline=has_baseline, rubber_stamp=True) tfma.verify_eval_config(eval_config) # Do not validate model when there is no thresholds configured. This is to # avoid accidentally blessing models when users forget to set thresholds. run_validation = bool( tfma.metrics.metric_thresholds_from_metrics_specs( eval_config.metrics_specs, eval_config=eval_config)) if len(eval_config.model_specs) > 2: raise ValueError( """Cannot support more than two models. There are %d models in this eval_config.""" % (len(eval_config.model_specs))) # Extract model artifacts. for model_spec in eval_config.model_specs: if standard_component_specs.MODEL_KEY not in input_dict: if not model_spec.prediction_key: raise ValueError( 'model_spec.prediction_key required if model not provided') continue if model_spec.is_baseline: model_artifact = artifact_utils.get_single_instance( input_dict[standard_component_specs.BASELINE_MODEL_KEY]) else: model_artifact = artifact_utils.get_single_instance( input_dict[standard_component_specs.MODEL_KEY]) # TODO(b/171992041): tfma.get_model_type replaced by tfma.utils. if ((hasattr(tfma, 'utils') and tfma.utils.get_model_type(model_spec) == tfma.TF_ESTIMATOR) or hasattr(tfma, 'get_model_type') and tfma.get_model_type(model_spec) == tfma.TF_ESTIMATOR): model_path = path_utils.eval_model_path( model_artifact.uri, path_utils.is_old_model_artifact(model_artifact)) else: model_path = path_utils.serving_model_path( model_artifact.uri, path_utils.is_old_model_artifact(model_artifact)) logging.info('Using %s as %s model.', model_path, model_spec.name) models.append( eval_shared_model_fn( eval_saved_model_path=model_path, model_name=model_spec.name, eval_config=eval_config, add_metrics_callbacks=add_metrics_callbacks)) else: eval_config = None assert (standard_component_specs.FEATURE_SLICING_SPEC_KEY in exec_properties and exec_properties[standard_component_specs.FEATURE_SLICING_SPEC_KEY] ), 'both eval_config and feature_slicing_spec are unset.' feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec() proto_utils.json_to_proto( exec_properties[standard_component_specs.FEATURE_SLICING_SPEC_KEY], feature_slicing_spec) slice_spec = self._get_slice_spec_from_feature_slicing_spec( feature_slicing_spec) model_artifact = artifact_utils.get_single_instance( input_dict[standard_component_specs.MODEL_KEY]) model_path = path_utils.eval_model_path( model_artifact.uri, path_utils.is_old_model_artifact(model_artifact)) logging.info('Using %s for model eval.', model_path) models.append( eval_shared_model_fn( eval_saved_model_path=model_path, model_name='', eval_config=None, add_metrics_callbacks=add_metrics_callbacks)) eval_shared_model = models[0] if len(models) == 1 else models schema = None if standard_component_specs.SCHEMA_KEY in input_dict: schema = io_utils.SchemaReader().read( io_utils.get_only_uri_in_dir( artifact_utils.get_single_uri( input_dict[standard_component_specs.SCHEMA_KEY]))) # Load and deserialize example splits from execution properties. example_splits = json_utils.loads( exec_properties.get(standard_component_specs.EXAMPLE_SPLITS_KEY, 'null')) if not example_splits: example_splits = ['eval'] logging.info("The 'example_splits' parameter is not set, using 'eval' " 'split.') logging.info('Evaluating model.') # TempPipInstallContext is needed here so that subprocesses (which # may be created by the Beam multi-process DirectRunner) can find the # needed dependencies. # TODO(b/187122662): Move this to the ExecutorOperator or Launcher. with udf_utils.TempPipInstallContext(extra_pip_packages): with self._make_beam_pipeline() as pipeline: examples_list = [] tensor_adapter_config = None # pylint: disable=expression-not-assigned if tfma.is_batched_input(eval_shared_model, eval_config): tfxio_factory = tfxio_utils.get_tfxio_factory_from_artifact( examples=input_dict[standard_component_specs.EXAMPLES_KEY], telemetry_descriptors=_TELEMETRY_DESCRIPTORS, schema=schema, raw_record_column_name=tfma_constants.ARROW_INPUT_COLUMN) # TODO(b/161935932): refactor after TFXIO supports multiple patterns. for split in example_splits: split_uris = artifact_utils.get_split_uris( input_dict[standard_component_specs.EXAMPLES_KEY], split) for index in range(len(split_uris)): split_uri = split_uris[index] file_pattern = io_utils.all_files_pattern(split_uri) tfxio = tfxio_factory(file_pattern) data = ( pipeline | f'ReadFromTFRecordToArrow[{split}][{index}]' >> tfxio.BeamSource()) examples_list.append(data) if schema is not None: # Use last tfxio as TensorRepresentations and ArrowSchema are fixed. tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfxio.ArrowSchema(), tensor_representations=tfxio.TensorRepresentations()) else: for split in example_splits: split_uris = artifact_utils.get_split_uris( input_dict[standard_component_specs.EXAMPLES_KEY], split) for index in range(len(split_uris)): split_uri = split_uris[index] file_pattern = io_utils.all_files_pattern(split_uri) data = ( pipeline | f'ReadFromTFRecord[{split}][{index}]' >> beam.io.ReadFromTFRecord(file_pattern=file_pattern)) examples_list.append(data) custom_extractors = udf_utils.try_get_fn( exec_properties=exec_properties, fn_name='custom_extractors') extractors = None if custom_extractors: extractors = custom_extractors( eval_shared_model=eval_shared_model, eval_config=eval_config, tensor_adapter_config=tensor_adapter_config) (examples_list | 'FlattenExamples' >> beam.Flatten() | 'ExtractEvaluateAndWriteResults' >> (tfma.ExtractEvaluateAndWriteResults( eval_shared_model=models[0] if len(models) == 1 else models, eval_config=eval_config, extractors=extractors, output_path=output_uri, slice_spec=slice_spec, tensor_adapter_config=tensor_adapter_config))) logging.info('Evaluation complete. Results written to %s.', output_uri) if not run_validation: # TODO(jinhuang): delete the BLESSING_KEY from output_dict when supported. logging.info('No threshold configured, will not validate model.') return # Set up blessing artifact blessing = artifact_utils.get_single_instance( output_dict[standard_component_specs.BLESSING_KEY]) blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_URI_KEY, artifact_utils.get_single_uri( input_dict[standard_component_specs.MODEL_KEY])) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY, input_dict[standard_component_specs.MODEL_KEY][0].id) if input_dict.get(standard_component_specs.BASELINE_MODEL_KEY): baseline_model = input_dict[ standard_component_specs.BASELINE_MODEL_KEY][0] blessing.set_string_custom_property( constants.ARTIFACT_PROPERTY_BASELINE_MODEL_URI_KEY, baseline_model.uri) blessing.set_int_custom_property( constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY, baseline_model.id) if 'current_component_id' in exec_properties: blessing.set_string_custom_property( 'component_id', exec_properties['current_component_id']) # Check validation result and write BLESSED file accordingly. logging.info('Checking validation results.') validation_result = tfma.load_validation_result(output_uri) if validation_result.validation_ok: io_utils.write_string_file( os.path.join(blessing.uri, constants.BLESSED_FILE_NAME), '') blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.BLESSED_VALUE) else: io_utils.write_string_file( os.path.join(blessing.uri, constants.NOT_BLESSED_FILE_NAME), '') blessing.set_int_custom_property(constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.NOT_BLESSED_VALUE) logging.info('Blessing result %s written to %s.', validation_result.validation_ok, blessing.uri)
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs a batch job to evaluate the eval_model against the given input. Args: input_dict: Input dict from input key to a list of Artifacts. - model_exports: exported model. - examples: examples for eval the model. output_dict: Output dict from output key to a list of Artifacts. - output: model evaluation results. exec_properties: A dict of execution properties. - eval_config: JSON string of tfma.EvalConfig. - feature_slicing_spec: JSON string of evaluator_pb2.FeatureSlicingSpec instance, providing the way to slice the data. Deprecated, use eval_config.slicing_specs instead. Returns: None """ if EXAMPLES_KEY not in input_dict: raise ValueError('EXAMPLES_KEY is missing from input dict.') if MODEL_KEY not in input_dict: raise ValueError('MODEL_KEY is missing from input dict.') if EVALUATION_KEY not in output_dict: raise ValueError('EVALUATION_KEY is missing from output dict.') if len(input_dict[MODEL_KEY]) > 1: raise ValueError( 'There can be only one candidate model, there are {}.'.format( len(input_dict[MODEL_KEY]))) if BASELINE_MODEL_KEY in input_dict and len( input_dict[BASELINE_MODEL_KEY]) > 1: raise ValueError( 'There can be only one baseline model, there are {}.'.format( len(input_dict[BASELINE_MODEL_KEY]))) self._log_startup(input_dict, output_dict, exec_properties) output_uri = artifact_utils.get_single_uri(output_dict[EVALUATION_KEY]) # Add fairness indicator metric callback if necessary. fairness_indicator_thresholds = exec_properties.get( 'fairness_indicator_thresholds', None) add_metrics_callbacks = None if fairness_indicator_thresholds: # Need to import the following module so that the fairness indicator # post-export metric is registered. import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators # pylint: disable=g-import-not-at-top, unused-variable add_metrics_callbacks = [ tfma.post_export_metrics.fairness_indicators( # pytype: disable=module-attr thresholds=fairness_indicator_thresholds), ] def _get_eval_saved_model(artifact: List[types.Artifact], tags=None) -> tfma.EvalSharedModel: model_uri = artifact_utils.get_single_uri(artifact) if tags and tf.saved_model.SERVING in tags: model_path = path_utils.serving_model_path(model_uri) else: model_path = path_utils.eval_model_path(model_uri) return tfma.default_eval_shared_model( eval_saved_model_path=model_path, tags=tags, add_metrics_callbacks=add_metrics_callbacks) # Extract model artifacts. # Baseline will be ignored if baseline is not configured in model_spec. if 'eval_config' in exec_properties and exec_properties['eval_config']: slice_spec = None eval_config = tfma.EvalConfig() json_format.Parse(exec_properties['eval_config'], eval_config) if len(eval_config.model_specs) > 2: raise ValueError( """Cannot support more than two models. There are {} models in this eval_config.""".format(len(eval_config.model_specs))) models = {} if not eval_config.model_specs: eval_config.model_specs.add() for model_spec in eval_config.model_specs: if model_spec.signature_name != 'eval': tags = [tf.saved_model.SERVING] if model_spec.is_baseline: if BASELINE_MODEL_KEY not in input_dict: raise ValueError( """No baseline model is present in Evaluator, check whether a baseline is provided to the Executor.""") models[model_spec.name] = _get_eval_saved_model( input_dict[BASELINE_MODEL_KEY], tags) absl.logging.info('Using {} as baseline model.'.format( models[model_spec.name].model_path)) else: models[model_spec.name] = _get_eval_saved_model( input_dict[MODEL_KEY], tags) absl.logging.info('Using {} for model eval.'.format( models[model_spec.name].model_path)) else: eval_config = None assert ('feature_slicing_spec' in exec_properties and exec_properties['feature_slicing_spec'] ), 'both eval_config and feature_slicing_spec are unset.' feature_slicing_spec = evaluator_pb2.FeatureSlicingSpec() json_format.Parse(exec_properties['feature_slicing_spec'], feature_slicing_spec) slice_spec = self._get_slice_spec_from_feature_slicing_spec( feature_slicing_spec) models = _get_eval_saved_model(input_dict[MODEL_KEY]) absl.logging.info('Using {} for model eval.'.format( models.model_path)) absl.logging.info('Evaluating model.') with self._make_beam_pipeline() as pipeline: # pylint: disable=expression-not-assigned (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord(file_pattern=io_utils.all_files_pattern( artifact_utils.get_split_uri(input_dict[EXAMPLES_KEY], 'eval'))) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults(eval_shared_model=models, eval_config=eval_config, output_path=output_uri, slice_spec=slice_spec)) absl.logging.info( 'Evaluation complete. Results written to {}.'.format(output_uri))