def testVerifyEvaluatorRaisesValueError(self): extractors = [ extractor.Extractor(stage_name='ExtractorThatExists', ptransform=None) ] evaluator.verify_evaluator( evaluator.Evaluator(stage_name='EvaluatorWithoutError', run_after='ExtractorThatExists', ptransform=None), extractors) with self.assertRaises(ValueError): evaluator.verify_evaluator( evaluator.Evaluator(stage_name='EvaluatorWithError', run_after='ExtractorThatDoesNotExist', ptransform=None), extractors)
def ExtractEvaluateAndWriteResults( # pylint: disable=invalid-name examples, eval_shared_model, output_path, display_only_data_location=None, slice_spec=None, desired_batch_size=None, extractors=None, evaluators=None, writers=None, write_config=True, num_bootstrap_samples=1): """PTransform for performing extraction, evaluation, and writing results. Users who want to construct their own Beam pipelines instead of using the lightweight run_model_analysis functions should use this PTransform. Example usage: eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=model_location, add_metrics_callbacks=[...], example_weight_key=example_weight_key) with beam.Pipeline(runner=...) as p: _ = (p | 'ReadData' >> beam.io.ReadFromTFRecord(data_location) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=eval_shared_model, output_path=output_path, display_only_data_location=data_location, slice_spec=slice_spec, ...)) result = tfma.load_eval_result(output_path=output_path) tfma.view.render_slicing_metrics(result) Note that the exact serialization format is an internal implementation detail and subject to change. Users should only use the TFMA functions to write and read the results. Args: examples: PCollection of input examples. Can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). eval_shared_model: Shared model parameters for EvalSavedModel including any additional metrics (see EvalSharedModel for more information on how to configure additional metrics). output_path: Path to output metrics and plots results. display_only_data_location: Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path. slice_spec: Optional list of SingleSliceSpec specifying the slices to slice the data into. If None, defaults to the overall slice. desired_batch_size: Optional batch size for batching in Predict and Aggregate. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used. write_config: True to write the config along with the results. num_bootstrap_samples: Optional, set to at least 20 in order to calculate metrics with confidence intervals. Raises: ValueError: If matching Extractor not found for an Evaluator. Returns: PDone. """ if not extractors: extractors = default_extractors(eval_shared_model=eval_shared_model, slice_spec=slice_spec, desired_batch_size=desired_batch_size, materialize=False) if not evaluators: evaluators = default_evaluators( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, num_bootstrap_samples=num_bootstrap_samples) for v in evaluators: evaluator.verify_evaluator(v, extractors) if not writers: writers = default_writers(output_path=output_path) data_location = '<user provided PCollection>' if display_only_data_location is not None: data_location = display_only_data_location example_weight_metric_key = metric_keys.EXAMPLE_COUNT if eval_shared_model.example_weight_key: example_weight_metric_key = metric_keys.EXAMPLE_WEIGHT eval_config = EvalConfig( model_location=eval_shared_model.model_path, data_location=data_location, slice_spec=slice_spec, example_weight_metric_key=example_weight_metric_key, num_bootstrap_samples=num_bootstrap_samples) # pylint: disable=no-value-for-parameter _ = (examples | 'InputsToExtracts' >> InputsToExtracts() | 'ExtractAndEvaluate' >> ExtractAndEvaluate(extractors=extractors, evaluators=evaluators) | 'WriteResults' >> WriteResults(writers=writers)) if write_config: _ = examples.pipeline | WriteEvalConfig(eval_config, output_path) # pylint: enable=no-value-for-parameter return beam.pvalue.PDone(examples.pipeline)
def ExtractEvaluateAndWriteResults( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, eval_shared_model: Optional[Union[types.EvalSharedModel, Dict[Text, types.EvalSharedModel]]] = None, eval_config: config.EvalConfig = None, extractors: Optional[List[extractor.Extractor]] = None, evaluators: Optional[List[evaluator.Evaluator]] = None, writers: Optional[List[writer.Writer]] = None, output_path: Optional[Text] = None, display_only_data_location: Optional[Text] = None, display_only_file_format: Optional[Text] = None, slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, write_config: Optional[bool] = True, compute_confidence_intervals: Optional[bool] = False, k_anonymization_count: int = 1, desired_batch_size: Optional[int] = None, random_seed_for_testing: Optional[int] = None) -> beam.pvalue.PDone: """PTransform for performing extraction, evaluation, and writing results. Users who want to construct their own Beam pipelines instead of using the lightweight run_model_analysis functions should use this PTransform. Example usage: eval_config = tfma.EvalConfig(slicing_specs=[...], metrics_specs=[...]) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=model_location, eval_config=eval_config) with beam.Pipeline(runner=...) as p: _ = (p | 'ReadData' >> beam.io.ReadFromTFRecord(data_location) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=eval_shared_model, eval_config=eval_config, ...)) result = tfma.load_eval_result(output_path=output_path) tfma.view.render_slicing_metrics(result) Note that the exact serialization format is an internal implementation detail and subject to change. Users should only use the TFMA functions to write and read the results. Args: examples: PCollection of input examples. Can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). eval_shared_model: Optional shared model (single-model evaluation) or dict of shared models keyed by model name (multi-model evaluation). Only required if needed by default extractors, evaluators, or writers and for display purposes of the model path. eval_config: Eval config. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used. output_path: Path to output metrics and plots results. display_only_data_location: Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path. display_only_file_format: Optional format of the examples. This is used only for display purposes. slice_spec: Deprecated (use EvalConfig). write_config: Deprecated (use EvalConfig). compute_confidence_intervals: Deprecated (use EvalConfig). k_anonymization_count: Deprecated (use EvalConfig). desired_batch_size: Optional batch size for batching in Predict. random_seed_for_testing: Provide for deterministic tests only. Raises: ValueError: If EvalConfig invalid or matching Extractor not found for an Evaluator. Returns: PDone. """ eval_shared_models = eval_shared_model if not isinstance(eval_shared_model, dict): eval_shared_models = {'': eval_shared_model} if eval_config is None: model_specs = [] for model_name, shared_model in eval_shared_models.items(): example_weight_key = shared_model.example_weight_key example_weight_keys = {} if example_weight_key and isinstance(example_weight_key, dict): example_weight_keys = example_weight_key example_weight_key = '' model_specs.append( config.ModelSpec( name=model_name, example_weight_key=example_weight_key, example_weight_keys=example_weight_keys)) slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] options = config.Options() options.compute_confidence_intervals.value = compute_confidence_intervals options.k_anonymization_count.value = k_anonymization_count if not write_config: options.disabled_outputs.values.append(_EVAL_CONFIG_FILE) eval_config = config.EvalConfig( model_specs=model_specs, slicing_specs=slicing_specs, options=options) else: eval_config = config.update_eval_config_with_defaults(eval_config) config.verify_eval_config(eval_config) if not extractors: extractors = default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model, materialize=False, desired_batch_size=desired_batch_size) if not evaluators: evaluators = default_evaluators( eval_config=eval_config, eval_shared_model=eval_shared_model, random_seed_for_testing=random_seed_for_testing) for v in evaluators: evaluator.verify_evaluator(v, extractors) if not writers: writers = default_writers( output_path=output_path, eval_shared_model=eval_shared_model) # pylint: disable=no-value-for-parameter _ = ( examples | 'InputsToExtracts' >> InputsToExtracts() | 'ExtractAndEvaluate' >> ExtractAndEvaluate( extractors=extractors, evaluators=evaluators) | 'WriteResults' >> WriteResults(writers=writers)) if _EVAL_CONFIG_FILE not in eval_config.options.disabled_outputs.values: data_location = '<user provided PCollection>' if display_only_data_location is not None: data_location = display_only_data_location file_format = '<unknown>' if display_only_file_format is not None: file_format = display_only_file_format model_locations = {} for k, v in eval_shared_models.items(): model_locations[k] = ('<unknown>' if v is None or v.model_path is None else v.model_path) _ = ( examples.pipeline | WriteEvalConfig(eval_config, output_path, data_location, file_format, model_locations)) # pylint: enable=no-value-for-parameter return beam.pvalue.PDone(examples.pipeline)
def ExtractEvaluateAndWriteResults( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, eval_shared_model: Optional[types.EvalSharedModel] = None, eval_shared_models: Optional[List[types.EvalSharedModel]] = None, eval_config: config.EvalConfig = None, extractors: Optional[List[extractor.Extractor]] = None, evaluators: Optional[List[evaluator.Evaluator]] = None, writers: Optional[List[writer.Writer]] = None, output_path: Optional[Text] = None, display_only_data_location: Optional[Text] = None, slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, desired_batch_size: Optional[int] = None, write_config: Optional[bool] = True, compute_confidence_intervals: Optional[bool] = False, k_anonymization_count: int = 1) -> beam.pvalue.PDone: """PTransform for performing extraction, evaluation, and writing results. Users who want to construct their own Beam pipelines instead of using the lightweight run_model_analysis functions should use this PTransform. Example usage: eval_config = tfma.EvalConfig( input_data_specs=[tfma.InputDataSpec(location=data_location)], model_specs=[tfma.ModelSpec(location=model_location)], output_data_specs=[tfma.OutputDataSpec(default_location=output_path)], slicing_specs=[...], metrics_specs=[...]) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=model_location, add_metrics_callbacks=[...]) with beam.Pipeline(runner=...) as p: _ = (p | 'ReadData' >> beam.io.ReadFromTFRecord(data_location) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_config=eval_config, eval_shared_models=[eval_shared_model], ...)) result = tfma.load_eval_result(output_path=output_path) tfma.view.render_slicing_metrics(result) Note that the exact serialization format is an internal implementation detail and subject to change. Users should only use the TFMA functions to write and read the results. Args: examples: PCollection of input examples. Can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). eval_shared_model: Shared model (single-model evaluation). eval_shared_models: Shared models (multi-model evaluation). eval_config: Eval config. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used. output_path: Deprecated (use EvalConfig). display_only_data_location: Deprecated (use EvalConfig). slice_spec: Deprecated (use EvalConfig). desired_batch_size: Deprecated (use EvalConfig). write_config: Deprecated (use EvalConfig). compute_confidence_intervals: Deprecated (use EvalConfig). k_anonymization_count: Deprecated (use EvalConfig). Raises: ValueError: If matching Extractor not found for an Evaluator. Returns: PDone. """ if eval_shared_model is not None: eval_shared_models = [eval_shared_model] if eval_config is None: data_location = '<user provided PCollection>' if display_only_data_location is not None: data_location = display_only_data_location disabled_outputs = None if not write_config: disabled_outputs = [_EVAL_CONFIG_FILE] model_specs = [] for m in eval_shared_models: example_weight_key = m.example_weight_key example_weight_keys = {} if example_weight_key and isinstance(example_weight_key, dict): example_weight_keys = example_weight_key example_weight_key = '' model_specs.append( config.ModelSpec(location=m.model_path, example_weight_key=example_weight_key, example_weight_keys=example_weight_keys)) slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] options = config.Options() options.compute_confidence_intervals.value = compute_confidence_intervals options.k_anonymization_count.value = k_anonymization_count if desired_batch_size: options.desired_batch_size.value = desired_batch_size eval_config = config.EvalConfig( input_data_specs=[config.InputDataSpec(location=data_location)], model_specs=model_specs, output_data_specs=[ config.OutputDataSpec(default_location=output_path, disabled_outputs=disabled_outputs) ], slicing_specs=slicing_specs, options=options) if not extractors: extractors = default_extractors(eval_config=eval_config, eval_shared_models=eval_shared_models, materialize=False) if not evaluators: evaluators = default_evaluators(eval_config=eval_config, eval_shared_models=eval_shared_models) for v in evaluators: evaluator.verify_evaluator(v, extractors) if not writers: writers = default_writers(eval_config=eval_config, eval_shared_models=eval_shared_models) # pylint: disable=no-value-for-parameter _ = (examples | 'InputsToExtracts' >> InputsToExtracts() | 'ExtractAndEvaluate' >> ExtractAndEvaluate(extractors=extractors, evaluators=evaluators) | 'WriteResults' >> WriteResults(writers=writers)) # TODO(b/141016373): Add support for multiple models. if _EVAL_CONFIG_FILE not in eval_config.output_data_specs[ 0].disabled_outputs: _ = examples.pipeline | WriteEvalConfig(eval_config) # pylint: enable=no-value-for-parameter return beam.pvalue.PDone(examples.pipeline)