def testSliceOneSlice(self): with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_api.ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['gender']) ]) | 'FanoutSlices' >> slice_api.FanoutSlices()) def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) expected_result = [ ((), fpls[0]), ((), fpls[1]), ((('gender', 'f'),), fpls[0]), ((('gender', 'm'),), fpls[1]), ] self.assertEqual(sorted(got), sorted(expected_result)) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def testSliceDefaultSlice(self): with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = (pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys( [slicer.SingleSliceSpec()]) | 'FanoutSlices' >> slice_api.FanoutSlices()) def check_result(got): try: self.assertEqual(2, len(got), 'got: %s' % got) expected_result = [ ((), fpls[0]), ((), fpls[1]), ] self.assertEqual(len(got), len(expected_result)) self.assertTrue(got[0] == expected_result[0] and got[1] == expected_result[1] or got[1] == expected_result[0] and got[0] == expected_result[1]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def testSliceOnMetaFeature(self): # We want to make sure that slicing on the newly added feature works, so # pulling in slice here. with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractInterestsNum' >> meta_feature_extractor.ExtractMetaFeature(get_num_interests) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['num_interests']) ]) | 'FanoutSlices' >> slice_api.FanoutSlices()) def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) expected_slice_keys = [ (), (), (('num_interests', 1),), (('num_interests', 2),), ] self.assertEqual( sorted(slice_key for slice_key, _ in got), sorted(expected_slice_keys)) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def Evaluate( # pylint: disable=invalid-name examples_and_extracts, eval_shared_model, desired_batch_size=None, fanout=16, ): """Evaluate the given EvalSavedModel on the given examples. This is for TFMA use only. Users should call tfma.ExtractEvaluateAndWriteResults instead of this function. Args: examples_and_extracts: PCollection of ExampleAndExtracts. The extracts MUST contain a FeaturesPredictionsLabels extract with key 'fpl' and a list of SliceKeyType extracts with key 'slice_keys'. Typically these will be added by calling the default_extractors function. eval_shared_model: Shared model parameters for EvalSavedModel including any additional metrics (see EvalSharedModel for more information on how to configure additional metrics). desired_batch_size: Optional batch size for batching in Aggregate. Returns: DoOutputsTuple. The tuple entries are PCollection of (slice key, metrics) and PCollection of (slice key, plot metrics). """ # pylint: disable=no-value-for-parameter return ( examples_and_extracts # Input: one example at a time, with slice keys in extracts. # Output: one fpl example per slice key (notice that the example turns # into n, replicated once per applicable slice key) | 'FanoutSlices' >> slice_api.FanoutSlices() # Each slice key lands on one shard where metrics are computed for all # examples in that shard -- the "map" and "reduce" parts of the # computation happen within this shard. # Output: Multi-outputs, a dict of slice key to computed metrics, and # plots if applicable. | 'ComputePerSliceMetrics' >> aggregate.ComputePerSliceMetrics( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, fanout=fanout))
def Evaluate( # pylint: disable=invalid-name examples, eval_saved_model_path, extractors=None, add_metrics_callbacks=None, slice_spec=None, desired_batch_size=None, ): """Evaluate the given EvalSavedModel on the given examples. This is for TFMA use only. Users should call tfma.EvaluateAndWriteResults instead of this function. Args: examples: PCollection of input examples. Can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). eval_saved_model_path: Path to EvalSavedModel. This directory should contain the saved_model.pb file. extractors: Optional list of Extractors to execute prior to slicing and aggregating the metrics. If not provided, a default set will be run. add_metrics_callbacks: Optional list of callbacks for adding additional metrics to the graph. The names of the metrics added by the callbacks should not conflict with existing metrics, or metrics added by other callbacks. See below for more details about what each callback should do. slice_spec: Optional list of SingleSliceSpec specifying the slices to slice the data into. If None, defaults to the overall slice. desired_batch_size: Optional batch size for batching in Predict and Aggregate. More details on add_metrics_callbacks: Each add_metrics_callback should have the following prototype: def add_metrics_callback(features_dict, predictions_dict, labels_dict): Note that features_dict, predictions_dict and labels_dict are not necessarily dictionaries - they might also be Tensors, depending on what the model's eval_input_receiver_fn returns. It should create and return a metric_ops dictionary, such that metric_ops['metric_name'] = (value_op, update_op), just as in the Trainer. Short example: def add_metrics_callback(features_dict, predictions_dict, labels): metrics_ops = {} metric_ops['mean_label'] = tf.metrics.mean(labels) metric_ops['mean_probability'] = tf.metrics.mean(tf.slice( predictions_dict['probabilities'], [0, 1], [2, 1])) return metric_ops Returns: DoOutputsTuple. The tuple entries are PCollection of (slice key, metrics) and PCollection of (slice key, plot metrics). """ if slice_spec is None: slice_spec = [slicer.SingleSliceSpec()] shared_handle = shared.Shared() if not extractors: extractors = [ PredictExtractor(eval_saved_model_path, add_metrics_callbacks, shared_handle, desired_batch_size), # For each example, determine the slice keys that apply to that example # and append them to extracts. types.Extractor(stage_name='ExtractSliceKeys', ptransform=slice_api.ExtractSliceKeys(slice_spec)) ] # pylint: disable=no-value-for-parameter return ( examples # Our diagnostic outputs, pass types.ExampleAndExtracts throughout, # however our aggregating functions do not use this interface. | 'ToExampleAndExtracts' >> beam.Map(lambda x: types.ExampleAndExtracts(example=x, extracts={})) | Extract(extractors=extractors) # Input: one example at a time, with slice keys in extracts. # Output: one fpl example per slice key (notice that the example turns # into n, replicated once per applicable slice key) | 'FanoutSlices' >> slice_api.FanoutSlices() # Each slice key lands on one shard where metrics are computed for all # examples in that shard -- the "map" and "reduce" parts of the # computation happen within this shard. # Output: Tuple[slicer.SliceKeyType, MetricVariablesType] | 'Aggregate' >> _Aggregate(eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks, shared_handle=shared_handle, desired_batch_size=desired_batch_size) # Different metrics for a given slice key are brought together. | 'ExtractOutput' >> _ExtractOutput( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks, shared_handle=shared_handle))