def testSerializeDeserializeToFile(self): metrics_slice_key1 = (('fruit', 'pear'), ('animal', 'duck')) metrics1 = { 'alpha': np.array([0.1]), 'bravo': np.array([0.2]), 'charlie': np.float32(0.3) } expected_metrics1 = { 'alpha': [0.1], 'bravo': [0.2], 'charlie': 0.3, } plots_slice_key1 = (('fruit', 'peach'), ('animal', 'cow')) plots1 = { 'alpha': np.array([0.5, 0.6, 0.7]), 'bravo': np.array([0.6, 0.7, 0.8]), 'charlie': np.float32(0.7) } expected_plots1 = { 'alpha': [0.5, 0.6, 0.7], 'bravo': [0.6, 0.7, 0.8], 'charlie': 0.7, } eval_config = api_types.EvalConfig( model_location='/path/to/model', data_location='/path/to/data', slice_spec=[ slicer.SingleSliceSpec(features=[('age', 5), ('gender', 'f')], columns=['country']), slicer.SingleSliceSpec(features=[('age', 6), ('gender', 'm')], columns=['interest']) ], example_weight_metric_key='key') output_path = self._getTempDir() with beam.Pipeline() as pipeline: metrics = (pipeline | 'CreateMetrics' >> beam.Create( [(metrics_slice_key1, metrics1)])) plots = ( pipeline | 'CreatePlots' >> beam.Create([(plots_slice_key1, plots1)])) _ = ((metrics, plots) | 'WriteMetricsPlotsAndConfig' >> serialization.WriteMetricsPlotsAndConfig( output_path=output_path, eval_config=eval_config)) metrics, plots = serialization.load_plots_and_metrics(output_path) self.assertSliceMetricsListEqual( [(metrics_slice_key1, expected_metrics1)], metrics) self.assertSliceMetricsListEqual([(plots_slice_key1, expected_plots1)], plots) got_eval_config = serialization.load_eval_config(output_path) self.assertEqual(eval_config, got_eval_config)
def EvaluateAndWriteResults( # pylint: disable=invalid-name examples, eval_saved_model_path, output_path, display_only_data_location = None, slice_spec = None, example_weight_key = None, add_metrics_callbacks = None, # pylint: disable=bad-whitespace desired_batch_size = None, ): """Public API version of evaluate.Evaluate that handles example weights. Users who want to construct their own Beam pipelines instead of using the lightweight run_model_analysis functions should use this PTransform. Example usage: with beam.Pipeline(runner=...) as p: _ = (p | 'ReadData' >> beam.io.ReadFromTFRecord(data_location) | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults( eval_saved_model_path=model_location, output_path=output_path, display_only_data_location=data_location, slice_spec=slice_spec, example_weight_key=example_weight_key, ...)) result = tfma.load_eval_result(output_path=output_path) tfma.view.render_slicing_metrics(result) Note that the exact serialization format is an internal implementation detail and subject to change. Users should only use the TFMA functions to write and read the results. Args: examples: PCollection of input examples. Can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). eval_saved_model_path: Path to EvalSavedModel. This directory should contain the saved_model.pb file. output_path: Path to output metrics and plots results. display_only_data_location: Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path. slice_spec: Optional list of SingleSliceSpec specifying the slices to slice the data into. If None, defaults to the overall slice. example_weight_key: The key of the example weight column. If None, weight will be 1 for each example. add_metrics_callbacks: Optional list of callbacks for adding additional metrics to the graph. The names of the metrics added by the callbacks should not conflict with existing metrics, or metrics added by other callbacks. See below for more details about what each callback should do. desired_batch_size: Optional batch size for batching in Predict and Aggregate. Returns: PDone. """ if add_metrics_callbacks is None: add_metrics_callbacks = [] # Always compute example weight and example count. # pytype: disable=module-attr example_count_callback = post_export_metrics.example_count() example_weight_metric_key = metric_keys.EXAMPLE_COUNT add_metrics_callbacks.append(example_count_callback) if example_weight_key: example_weight_metric_key = metric_keys.EXAMPLE_WEIGHT example_weight_callback = post_export_metrics.example_weight( example_weight_key) add_metrics_callbacks.append(example_weight_callback) # pytype: enable=module-attr metrics, plots = examples | 'Evaluate' >> evaluate.Evaluate( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks, slice_spec=slice_spec, desired_batch_size=desired_batch_size) data_location = '<user provided PCollection>' if display_only_data_location is not None: data_location = display_only_data_location eval_config = api_types.EvalConfig( model_location=eval_saved_model_path, data_location=data_location, slice_spec=slice_spec, example_weight_metric_key=example_weight_metric_key) _ = ((metrics, plots) | 'SerializeMetricsAndPlots' >> serialization.SerializeMetricsAndPlots( post_export_metrics=add_metrics_callbacks) | 'WriteMetricsPlotsAndConfig' >> serialization.WriteMetricsPlotsAndConfig( output_path=output_path, eval_config=eval_config)) return beam.pvalue.PDone(examples.pipeline)
def testSerializeDeserializeToFile(self): metrics_slice_key = _make_slice_key('fruit', 'pear', 'animal', 'duck') metrics_for_slice = text_format.Parse( """ slice_key { single_slice_keys { column: "fruit" bytes_value: "pear" } single_slice_keys { column: "animal" bytes_value: "duck" } } metrics { key: "accuracy" value { double_value { value: 0.8 } } } metrics { key: "example_weight" value { double_value { value: 10.0 } } } metrics { key: "auc" value { bounded_value { lower_bound { value: 0.1 } upper_bound { value: 0.3 } value { value: 0.2 } } } } metrics { key: "auprc" value { bounded_value { lower_bound { value: 0.05 } upper_bound { value: 0.17 } value { value: 0.1 } } } }""", metrics_for_slice_pb2.MetricsForSlice()) plots_for_slice = text_format.Parse( """ slice_key { single_slice_keys { column: "fruit" bytes_value: "peach" } single_slice_keys { column: "animal" bytes_value: "cow" } } plot_data { calibration_histogram_buckets { buckets { lower_threshold_inclusive: -inf upper_threshold_exclusive: 0.0 num_weighted_examples { value: 0.0 } total_weighted_label { value: 0.0 } total_weighted_refined_prediction { value: 0.0 } } buckets { lower_threshold_inclusive: 0.0 upper_threshold_exclusive: 0.5 num_weighted_examples { value: 1.0 } total_weighted_label { value: 1.0 } total_weighted_refined_prediction { value: 0.3 } } buckets { lower_threshold_inclusive: 0.5 upper_threshold_exclusive: 1.0 num_weighted_examples { value: 1.0 } total_weighted_label { value: 0.0 } total_weighted_refined_prediction { value: 0.7 } } buckets { lower_threshold_inclusive: 1.0 upper_threshold_exclusive: inf num_weighted_examples { value: 0.0 } total_weighted_label { value: 0.0 } total_weighted_refined_prediction { value: 0.0 } } } }""", metrics_for_slice_pb2.PlotsForSlice()) plots_slice_key = _make_slice_key('fruit', 'peach', 'animal', 'cow') eval_config = api_types.EvalConfig( model_location='/path/to/model', data_location='/path/to/data', slice_spec=[ slicer.SingleSliceSpec(features=[('age', 5), ('gender', 'f')], columns=['country']), slicer.SingleSliceSpec(features=[('age', 6), ('gender', 'm')], columns=['interest']) ], example_weight_metric_key='key') output_path = self._getTempDir() with beam.Pipeline() as pipeline: metrics = (pipeline | 'CreateMetrics' >> beam.Create( [metrics_for_slice.SerializeToString()])) plots = (pipeline | 'CreatePlots' >> beam.Create( [plots_for_slice.SerializeToString()])) _ = ((metrics, plots) | 'WriteMetricsPlotsAndConfig' >> serialization.WriteMetricsPlotsAndConfig( output_path=output_path, eval_config=eval_config)) metrics, plots = serialization.load_plots_and_metrics(output_path) self.assertSliceMetricsListEqual( [(metrics_slice_key, metrics_for_slice.metrics)], metrics) self.assertSlicePlotsListEqual( [(plots_slice_key, plots_for_slice.plot_data)], plots) got_eval_config = serialization.load_eval_config(output_path) self.assertEqual(eval_config, got_eval_config)
def ExtractEvaluateAndWriteResults( # pylint: disable=invalid-name examples, eval_shared_model, output_path, display_only_data_location=None, slice_spec=None, desired_batch_size=None, extractors=None, fanout=16, ): """Public API version of evaluate.Evaluate that handles example weights. Users who want to construct their own Beam pipelines instead of using the lightweight run_model_analysis functions should use this PTransform. Example usage: eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=model_location, add_metrics_callbacks=[...], example_weight_key=example_weight_key) with beam.Pipeline(runner=...) as p: _ = (p | 'ReadData' >> beam.io.ReadFromTFRecord(data_location) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=eval_shared_model, output_path=output_path, display_only_data_location=data_location, slice_spec=slice_spec, ...)) result = tfma.load_eval_result(output_path=output_path) tfma.view.render_slicing_metrics(result) Note that the exact serialization format is an internal implementation detail and subject to change. Users should only use the TFMA functions to write and read the results. Args: examples: PCollection of input examples. Can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). eval_shared_model: Shared model parameters for EvalSavedModel including any additional metrics (see EvalSharedModel for more information on how to configure additional metrics). output_path: Path to output metrics and plots results. display_only_data_location: Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path. slice_spec: Optional list of SingleSliceSpec specifying the slices to slice the data into. If None, defaults to the overall slice. desired_batch_size: Optional batch size for batching in Predict and Aggregate. extractors: Optional list of Extractors to apply to ExampleAndExtracts. If provided, the extracts MUST contain a FeaturesPredictionsLabels extract with key 'fpl' and a list of SliceKeyType extracts with key 'slice_keys'. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. Raises: ValueError: If PredictExtractor or SliceKeyExtractor is not present in extractors. Returns: PDone. """ if not extractors: extractors = default_extractors(eval_shared_model=eval_shared_model, slice_spec=slice_spec, desired_batch_size=desired_batch_size, materialize=False) metrics, plots = ( examples | 'ToExampleAndExtracts' >> evaluate.ToExampleAndExtracts() | 'Extract' >> evaluate.Extract(extractors=extractors) | 'Evaluate' >> evaluate.Evaluate(eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, fanout=fanout)) data_location = '<user provided PCollection>' if display_only_data_location is not None: data_location = display_only_data_location example_weight_metric_key = metric_keys.EXAMPLE_COUNT if eval_shared_model.example_weight_key: example_weight_metric_key = metric_keys.EXAMPLE_WEIGHT eval_config = api_types.EvalConfig( model_location=eval_shared_model.model_path, data_location=data_location, slice_spec=slice_spec, example_weight_metric_key=example_weight_metric_key) _ = ((metrics, plots) | 'SerializeMetricsAndPlots' >> serialization.SerializeMetricsAndPlots( post_export_metrics=eval_shared_model.add_metrics_callbacks) | 'WriteMetricsPlotsAndConfig' >> serialization.WriteMetricsPlotsAndConfig(output_path=output_path, eval_config=eval_config)) return beam.pvalue.PDone(examples.pipeline)