def testValidation(self): # Test no feature spec. with self.assertRaisesRegexp( ValueError, 'ModelAgnosticConfig must have feature_spec set.'): model_agnostic_predict.ModelAgnosticConfig( label_keys=['label'], prediction_keys=['probabilities'], feature_spec=None) # Test no prediction keys. feature_map = { 'age': tf.FixedLenFeature([], tf.int64), 'language': tf.VarLenFeature(tf.string), 'probabilities': tf.FixedLenFeature([2], tf.int64, default_value=[9, 9]), 'label': tf.FixedLenFeature([], tf.int64) } with self.assertRaisesRegexp( ValueError, 'ModelAgnosticConfig must have prediction keys set.'): model_agnostic_predict.ModelAgnosticConfig( label_keys=['label'], prediction_keys=[], feature_spec=feature_map) # Test no label keys. with self.assertRaisesRegexp( ValueError, 'ModelAgnosticConfig must have label keys set.'): model_agnostic_predict.ModelAgnosticConfig( label_keys=[], prediction_keys=['predictions'], feature_spec=feature_map) # Test prediction key not in feature spec. with self.assertRaisesRegexp( ValueError, 'Prediction key not_prob not defined in feature_spec.'): model_agnostic_predict.ModelAgnosticConfig( label_keys=['label'], prediction_keys=['not_prob'], feature_spec=feature_map) # Test label key not in feature spec. with self.assertRaisesRegexp( ValueError, 'Label key not_label not defined in feature_spec.'): model_agnostic_predict.ModelAgnosticConfig( label_keys=['not_label'], prediction_keys=['probabilities'], feature_spec=feature_map)
def testExtract(self): with beam.Pipeline() as pipeline: examples = [ self._makeExample( age=3.0, language='english', probabilities=[1.0, 2.0], label=1.0), self._makeExample( age=3.0, language='chinese', probabilities=[2.0, 3.0], label=0.0), self._makeExample( age=4.0, language='english', probabilities=[3.0, 4.0], label=1.0), self._makeExample( age=5.0, language='chinese', probabilities=[4.0, 5.0], label=0.0), ] serialized_examples = [e.SerializeToString() for e in examples] # Set up a config to bucket our example keys. feature_map = { 'age': tf.io.FixedLenFeature([], tf.float32), 'language': tf.io.VarLenFeature(tf.string), 'probabilities': tf.io.FixedLenFeature([2], tf.float32), 'label': tf.io.FixedLenFeature([], tf.float32) } model_agnostic_config = agnostic_predict.ModelAgnosticConfig( label_keys=['label'], prediction_keys=['probabilities'], feature_spec=feature_map) fpl_extracts = ( pipeline | beam.Create(serialized_examples) # Our diagnostic outputs, pass types.Extracts throughout, however our # aggregating functions do not use this interface. | beam.Map(lambda x: {constants.INPUT_KEY: x}) | 'Extract' >> model_agnostic_extractor.ModelAgnosticExtract( model_agnostic_config=model_agnostic_config, desired_batch_size=3) ) def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) for item in got: self.assertIn(constants.FEATURES_PREDICTIONS_LABELS_KEY, item) fpl = item[constants.FEATURES_PREDICTIONS_LABELS_KEY] # Verify fpl contains features, probabilities, and correct labels. self.assertIn('language', fpl.features) self.assertIn('age', fpl.features) self.assertIn('label', fpl.labels) self.assertIn('probabilities', fpl.predictions) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(fpl_extracts, check_result)
def testEvaluateMultiLabelsPredictions(self): # Test case where we have multiple labels/predictions # Have 6 labels of values 3, 5, 23, 12, 16, 31 and # 6 predictions of values 2, 2, 2, 4, 4, 4 # This should give sum = 108 and mean = 9. examples = [ self._makeExample(age=1.0, prediction=2, prediction_2=4, label=3, label_2=5), self._makeExample(age=1.0, prediction=2, prediction_2=4, label=23, label_2=12), self._makeExample(age=1.0, prediction=2, prediction_2=4, label=16, label_2=31), ] serialized_examples = [e.SerializeToString() for e in examples] # Set up a model agnostic config so we can get the FPLConfig. feature_map = { 'age': tf.io.FixedLenFeature([], tf.float32), 'prediction': tf.io.FixedLenFeature([], tf.int64), 'prediction_2': tf.io.FixedLenFeature([], tf.int64), 'label': tf.io.FixedLenFeature([], tf.int64), 'label_2': tf.io.FixedLenFeature([], tf.int64) } model_agnostic_config = agnostic_predict.ModelAgnosticConfig( label_keys=['label', 'label_2'], prediction_keys=['prediction', 'prediction_2'], feature_spec=feature_map) # Create a Model Anostic Evaluate graph handler and feed in the FPL list. evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph( [add_mean_callback], model_agnostic_config) evaluate_graph.metrics_reset_update_get_list(serialized_examples) outputs = evaluate_graph.get_metric_values() # Verify that we got the right metrics out. self.assertEqual(2, len(outputs)) self.assertEqual(outputs['tf_metric_mean'], 9.0) self.assertEqual(outputs['py_func_total_label'], 108.0)
def testEvaluateGraph(self): # Have 3 labels of values 3, 23, 16 and predictions of values 2, 2, 2. # This should give sum = 48 and mean = 8. examples = [ self._makeExample(age=3.0, language='english', predictions=2.0, labels=3.0), self._makeExample(age=3.0, language='chinese', predictions=2.0, labels=23.0), self._makeExample(age=4.0, language='english', predictions=2.0, labels=16.0), ] serialized_examples = [e.SerializeToString() for e in examples] # Set up a model agnostic config so we can get the FPLConfig. feature_map = { 'age': tf.io.FixedLenFeature([], tf.float32), 'language': tf.io.VarLenFeature(tf.string), 'predictions': tf.io.FixedLenFeature([], tf.float32), 'labels': tf.io.FixedLenFeature([], tf.float32) } model_agnostic_config = agnostic_predict.ModelAgnosticConfig( label_keys=['labels'], prediction_keys=['predictions'], feature_spec=feature_map) # Create a Model Anostic Evaluate graph handler and feed in the FPL list. evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph( [add_mean_callback], model_agnostic_config) evaluate_graph.metrics_reset_update_get_list(serialized_examples) outputs = evaluate_graph.get_metric_values() # Verify that we got the right metrics out. self.assertEqual(2, len(outputs)) self.assertEqual(outputs['tf_metric_mean'], 8.0) self.assertEqual(outputs['py_func_total_label'], 48.0)
def testEvaluateGraph(self): # Create some FPLs. The Features aren't terribly useful for these metrics. # Just make sure they can be processed correctly by the feed/feedlist # generation logic by having one dense tensor and one sparse tensor. features = { 'age': { encoding.NODE_SUFFIX: np.array([1]) }, 'language': { encoding.NODE_SUFFIX: tf.SparseTensorValue(indices=np.array([[0, 0]]), values=np.array(['english']), dense_shape=np.array([1, 1])) } } predictions = {'predictions': {encoding.NODE_SUFFIX: np.array([2])}} # Have 3 labels of values 3, 23, 16 and predictions of values 2, 2, 2. # This should give sum = 48 and mean = 8. labels = {'labels': {encoding.NODE_SUFFIX: np.array([3])}} labels_2 = {'labels': {encoding.NODE_SUFFIX: np.array([23])}} labels_3 = {'labels': {encoding.NODE_SUFFIX: np.array([16])}} # Compile the actual FPLs fpl = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels) fpl_2 = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels_2) fpl_3 = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels_3) # Set up a model agnostic config so we can get the FPLConfig. feature_map = { 'age': tf.FixedLenFeature([], tf.float32), 'language': tf.VarLenFeature(tf.string), 'predictions': tf.FixedLenFeature([], tf.float32), 'labels': tf.FixedLenFeature([], tf.float32) } model_agnostic_config = agnostic_predict.ModelAgnosticConfig( label_keys=['labels'], prediction_keys=['predictions'], feature_spec=feature_map) # Create a Model Anostic Evaluate graph handler and feed in the FPL list. evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph( [add_mean_callback], model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig( model_agnostic_config)) evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3]) outputs = evaluate_graph.get_metric_values() # Verify that we got the right metrics out. self.assertEqual(2, len(outputs)) self.assertEqual(outputs['tf_metric_mean'], 8.0) self.assertEqual(outputs['py_func_total_label'], 48.0)
def testModelAgnosticConstructFn(self): # End to end test for the entire flow going from tf.Examples -> metrics # with slicing. with beam.Pipeline() as pipeline: # Set up the inputs. All we need is are tf.Examples and an example parsing # spec with explicit mapping for key to (Features, Predictions, Labels). examples = [ self._makeExample(age=3.0, language='english', probabilities=1.0, labels=1.0), self._makeExample(age=3.0, language='chinese', probabilities=3.0, labels=0.0), self._makeExample(age=4.0, language='english', probabilities=2.0, labels=1.0), self._makeExample(age=5.0, language='chinese', probabilities=3.0, labels=0.0), # Add some examples with no language. self._makeExample(age=5.0, probabilities=2.0, labels=10.0), self._makeExample(age=6.0, probabilities=1.0, labels=0.0) ] serialized_examples = [e.SerializeToString() for e in examples] # Set up a config to bucket our example keys. feature_map = { 'age': tf.FixedLenFeature([], tf.float32), 'language': tf.VarLenFeature(tf.string), 'probabilities': tf.FixedLenFeature([], tf.float32), 'labels': tf.FixedLenFeature([], tf.float32) } model_agnostic_config = agnostic_predict.ModelAgnosticConfig( label_keys=['labels'], prediction_keys=['probabilities'], feature_spec=feature_map) # Set up the Model Agnostic Extractor extractors = [ model_agnostic_extractor.ModelAgnosticExtractor( model_agnostic_config=model_agnostic_config, desired_batch_size=3), slice_key_extractor.SliceKeyExtractor([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['language']) ]) ] # Set up the metrics we wish to calculate via a metric callback. In # particular, this metric calculates the mean and sum of all labels. eval_shared_model = types.EvalSharedModel( add_metrics_callbacks=[add_mean_callback], construct_fn=model_agnostic_evaluate_graph.make_construct_fn( add_metrics_callbacks=[add_mean_callback], fpl_feed_config=model_agnostic_extractor. ModelAgnosticGetFPLFeedConfig(model_agnostic_config))) # Run our pipeline doing Extract -> Slice -> Fanout -> Calculate Metrics. metrics, _ = ( pipeline | 'Create Examples' >> beam.Create(serialized_examples) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'Extract' >> tfma_unit.Extract(extractors=extractors) # pylint: disable=no-value-for-parameter | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator. ComputeMetricsAndPlots(eval_shared_model=eval_shared_model)) # Verify our metrics are properly generated per slice. def check_result(got): self.assertEqual(3, len(got), 'got: %s' % got) slices = {} for slice_key, metrics in got: slices[slice_key] = metrics overall_slice = () english_slice = (('language', b'english'), ) chinese_slice = (('language', b'chinese'), ) self.assertItemsEqual( list(slices.keys()), [overall_slice, english_slice, chinese_slice]) # Overall slice has label/predictions sum = 24 and 12 elements. self.assertDictElementsAlmostEqual(slices[overall_slice], { 'tf_metric_mean': 2.0, 'py_func_total_label': 24.0, }) # English slice has label/predictions sum = 5 and 4 elements. self.assertDictElementsAlmostEqual(slices[english_slice], { 'tf_metric_mean': 1.25, 'py_func_total_label': 5.0, }) # Chinese slice has label/predictions sum = 6 and 4 elements. self.assertDictElementsAlmostEqual(slices[chinese_slice], { 'tf_metric_mean': 1.5, 'py_func_total_label': 6.0, }) util.assert_that(metrics, check_result)
def testEvaluateMultiLabelsPredictions(self): # Test case where we have multiple labels/predictions features = {'age': {encoding.NODE_SUFFIX: np.array([1])}} predictions = { 'prediction': { encoding.NODE_SUFFIX: np.array([2]) }, 'prediction_2': { encoding.NODE_SUFFIX: np.array([4]) } } # Have 6 labels of values 3, 5, 23, 12, 16, 31 and # 6 predictions of values 2, 2, 2, 4, 4, 4 # This should give sum = 108 and mean = 9. labels = { 'label': { encoding.NODE_SUFFIX: np.array([3]) }, 'label_2': { encoding.NODE_SUFFIX: np.array([5]) } } labels_2 = { 'label': { encoding.NODE_SUFFIX: np.array([23]) }, 'label_2': { encoding.NODE_SUFFIX: np.array([12]) } } labels_3 = { 'label': { encoding.NODE_SUFFIX: np.array([16]) }, 'label_2': { encoding.NODE_SUFFIX: np.array([31]) } } # Compile the actual FPLs fpl = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels) fpl_2 = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels_2) fpl_3 = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels_3) # Set up a model agnostic config so we can get the FPLConfig. feature_map = { 'age': tf.FixedLenFeature([], tf.float32), 'prediction': tf.FixedLenFeature([], tf.int64), 'prediction_2': tf.FixedLenFeature([], tf.int64), 'label': tf.FixedLenFeature([], tf.int64), 'label_2': tf.FixedLenFeature([], tf.int64) } model_agnostic_config = agnostic_predict.ModelAgnosticConfig( label_keys=['label', 'label_2'], prediction_keys=['prediction', 'prediction_2'], feature_spec=feature_map) # Create a Model Anostic Evaluate graph handler and feed in the FPL list. evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph( [add_mean_callback], model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig( model_agnostic_config)) evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3]) outputs = evaluate_graph.get_metric_values() # Verify that we got the right metrics out. self.assertEqual(2, len(outputs)) self.assertEqual(outputs['tf_metric_mean'], 9.0) self.assertEqual(outputs['py_func_total_label'], 108.0)
def testExtractFplExampleGraph(self): # Set up some examples with some Sparseness. examples = [ self._makeExample(age=0, language='english', probabilities=[0.2, 0.8], label=1), self._makeExample(age=1, language='chinese', label=0), self._makeExample(age=2, probabilities=[0.1, 0.9], label=1), self._makeExample(language='chinese', probabilities=[0.8, 0.2], label=0), ] # Set up the expected results on two of the fields. Note that directly # entire FPLs will fail in numpy comparison. expected_age = [ np.array([0]), np.array([1]), np.array([2]), np.array([3]) ] expected_language = [ tf.SparseTensorValue(indices=np.array([[0, 0]]), values=np.array([b'english'], dtype=np.object), dense_shape=np.array([1, 1])), tf.SparseTensorValue(indices=np.array([[0, 0]]), values=np.array([b'chinese'], dtype=np.object), dense_shape=np.array([1, 1])), tf.SparseTensorValue(indices=np.array([], dtype=np.int64).reshape( [0, 2]), values=np.array([], dtype=np.object), dense_shape=np.array([1, 0])), tf.SparseTensorValue(indices=np.array([[0, 0]]), values=np.array([b'chinese'], dtype=np.object), dense_shape=np.array([1, 1])) ] expected_probabilities = [ np.array([[0.2, 0.8]]), np.array([[0.5, 0.5]]), np.array([[0.1, 0.9]]), np.array([[0.8, 0.2]]) ] expected_labels = [ np.array([1]), np.array([0]), np.array([1]), np.array([0]) ] # Serialize and feed into our graph. serialized_examples = [e.SerializeToString() for e in examples] # Set up a config to bucket our example keys. feature_map = { 'age': tf.FixedLenFeature([1], tf.int64, default_value=[3]), 'language': tf.VarLenFeature(tf.string), 'probabilities': tf.FixedLenFeature([2], tf.float32, default_value=[0.5, 0.5]), 'label': tf.FixedLenFeature([], tf.int64) } model_agnostic_config = model_agnostic_predict.ModelAgnosticConfig( label_keys=['label'], prediction_keys=['probabilities'], feature_spec=feature_map) # Create our model and extract our FPLs. agnostic_predict = model_agnostic_predict.ModelAgnosticPredict( model_agnostic_config) fpls = agnostic_predict.get_fpls_from_examples(serialized_examples) # Verify the result is the correct size, has all the keys, and # our expected values match. self.assertEqual(4, len(fpls)) for i, fpl in enumerate(fpls): self.assertIn('language', fpl.features) self.assertIn('label', fpl.labels) self.assertIn('label', fpl.features) # Labels should also be in features. self.assertIn('probabilities', fpl.predictions) self.assertIn('age', fpl.features) self.assertEquals(expected_age[i], fpl.features['age']['node']) self.assertSparseTensorValueEqual(expected_language[i], fpl.features['language']['node']) self.assertAllClose(expected_probabilities[i], fpl.predictions['probabilities']['node']) self.assertEquals(expected_labels[i], fpl.labels['label']['node'])