def testTFJSPredictExtractorWithKerasModel(self, multi_model, multi_output): input1 = tf.keras.layers.Input(shape=(1, ), name='input1') input2 = tf.keras.layers.Input(shape=(1, ), name='input2') inputs = [input1, input2] input_layer = tf.keras.layers.concatenate(inputs) output_layers = {} output_layers['output1'] = (tf.keras.layers.Dense( 1, activation=tf.nn.sigmoid, name='output1')(input_layer)) if multi_output: output_layers['output2'] = (tf.keras.layers.Dense( 1, activation=tf.nn.sigmoid, name='output2')(input_layer)) model = tf.keras.models.Model(inputs, output_layers) model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001), loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) train_features = {'input1': [[0.0], [1.0]], 'input2': [[1.0], [0.0]]} labels = {'output1': [[1], [0]]} if multi_output: labels['output2'] = [[1], [0]] example_weights = {'output1': [1.0, 0.5]} if multi_output: example_weights['output2'] = [1.0, 0.5] dataset = tf.data.Dataset.from_tensor_slices( (train_features, labels, example_weights)) dataset = dataset.shuffle(buffer_size=1).repeat().batch(2) model.fit(dataset, steps_per_epoch=1) src_model_path = tempfile.mkdtemp() model.save(src_model_path) dst_model_path = tempfile.mkdtemp() converter.convert([ '--input_format=tf_saved_model', '--saved_model_tags=serve', '--signature_name=serving_default', src_model_path, dst_model_path, ]) model_specs = [config.ModelSpec(name='model1', model_type='tf_js')] if multi_model: model_specs.append( config.ModelSpec(name='model2', model_type='tf_js')) eval_config = config.EvalConfig(model_specs=model_specs) eval_shared_models = [ self.createTestEvalSharedModel( model_name='model1', eval_saved_model_path=dst_model_path, model_type='tf_js') ] if multi_model: eval_shared_models.append( self.createTestEvalSharedModel( model_name='model2', eval_saved_model_path=dst_model_path, model_type='tf_js')) schema = text_format.Parse( """ feature { name: "input1" type: FLOAT } feature { name: "input2" type: FLOAT } feature { name: "non_model_feature" type: INT } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN) feature_extractor = features_extractor.FeaturesExtractor(eval_config) predictor = tfjs_predict_extractor.TFJSPredictExtractor( eval_config=eval_config, eval_shared_model=eval_shared_models) examples = [ self._makeExample(input1=0.0, input2=1.0, non_model_feature=0), self._makeExample(input1=1.0, input2=0.0, non_model_feature=1), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | feature_extractor.stage_name >> feature_extractor.ptransform | predictor.stage_name >> predictor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) got = got[0] self.assertIn(constants.PREDICTIONS_KEY, got) self.assertLen(got[constants.PREDICTIONS_KEY], 2) for item in got[constants.PREDICTIONS_KEY]: if multi_model: self.assertIn('model1', item) self.assertIn('model2', item) if multi_output: self.assertIn('Identity', item['model1']) self.assertIn('Identity_1', item['model1']) elif multi_output: self.assertIn('Identity', item) self.assertIn('Identity_1', item) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
class ModelUtilTest(testutil.TensorflowModelAnalysisTest, parameterized.TestCase): def createDenseInputsSchema(self): return text_format.Parse( """ tensor_representation_group { key: "" value { tensor_representation { key: "input_1" value { dense_tensor { column_name: "input_1" shape { dim { size: 1 } } } } } tensor_representation { key: "input_2" value { dense_tensor { column_name: "input_2" shape { dim { size: 1 } } } } } } } feature { name: "input_1" type: FLOAT } feature { name: "input_2" type: FLOAT } feature { name: "non_model_feature" type: INT } """, schema_pb2.Schema()) def createModelWithSingleInput(self, save_as_keras): input_layer = tf.keras.layers.Input(shape=(1, ), name='input') output_layer = tf.keras.layers.Dense( 1, activation=tf.nn.sigmoid)(input_layer) model = tf.keras.models.Model(input_layer, output_layer) @tf.function def serving_default(s): return model(s) input_spec = { 'input': tf.TensorSpec(shape=(None, 1), dtype=tf.string, name='input'), } signatures = { 'serving_default': serving_default.get_concrete_function(input_spec), 'custom_signature': serving_default.get_concrete_function(input_spec), } export_path = tempfile.mkdtemp() if save_as_keras: model.save(export_path, save_format='tf', signatures=signatures) else: tf.saved_model.save(model, export_path, signatures=signatures) return export_path def createModelWithMultipleDenseInputs(self, save_as_keras): input1 = tf.keras.layers.Input(shape=(1, ), name='input_1') input2 = tf.keras.layers.Input(shape=(1, ), name='input_2') inputs = [input1, input2] input_layer = tf.keras.layers.concatenate(inputs) output_layer = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, name='output')(input_layer) model = tf.keras.models.Model(inputs, output_layer) # Add custom attribute to model to test callables stored as attributes model.custom_attribute = tf.keras.models.Model(inputs, output_layer) @tf.function def serving_default(serialized_tf_examples): parsed_features = tf.io.parse_example( serialized_tf_examples, { 'input_1': tf.io.FixedLenFeature([1], dtype=tf.float32), 'input_2': tf.io.FixedLenFeature([1], dtype=tf.float32) }) return model(parsed_features) @tf.function def custom_single_output(features): return model(features) @tf.function def custom_multi_output(features): return {'output1': model(features), 'output2': model(features)} input_spec = tf.TensorSpec(shape=(None, ), dtype=tf.string, name='examples') custom_input_spec = { 'input_1': tf.TensorSpec(shape=(None, 1), dtype=tf.float32, name='input_1'), 'input_2': tf.TensorSpec(shape=(None, 1), dtype=tf.float32, name='input_2') } signatures = { 'serving_default': serving_default.get_concrete_function(input_spec), 'custom_single_output': custom_single_output.get_concrete_function(custom_input_spec), 'custom_multi_output': custom_multi_output.get_concrete_function(custom_input_spec) } export_path = tempfile.mkdtemp() if save_as_keras: model.save(export_path, save_format='tf', signatures=signatures) else: tf.saved_model.save(model, export_path, signatures=signatures) return export_path def createModelWithMultipleMixedInputs(self, save_as_keras): dense_input = tf.keras.layers.Input(shape=(2, ), name='input_1', dtype=tf.int64) dense_float_input = tf.cast(dense_input, tf.float32) sparse_input = tf.keras.layers.Input(shape=(1, ), name='input_2', sparse=True) dense_sparse_input = tf.keras.layers.Dense( 1, name='dense_input2')(sparse_input) ragged_input = tf.keras.layers.Input(shape=(None, ), name='input_3', ragged=True) dense_ragged_input = tf.keras.layers.Lambda(lambda x: x.to_tensor())( ragged_input) dense_ragged_input.set_shape((None, 1)) inputs = [dense_input, sparse_input, ragged_input] input_layer = tf.keras.layers.concatenate( [dense_float_input, dense_sparse_input, dense_ragged_input]) output_layer = tf.keras.layers.Dense( 1, activation=tf.nn.sigmoid)(input_layer) model = tf.keras.models.Model(inputs, output_layer) @tf.function def serving_default(features): return model(features) input_spec = { 'input_1': tf.TensorSpec(shape=(None, 2), dtype=tf.int64, name='input_1'), 'input_2': tf.SparseTensorSpec(shape=(None, 1), dtype=tf.float32), 'input_3': tf.RaggedTensorSpec(shape=(None, 1), dtype=tf.float32) } signatures = { 'serving_default': serving_default.get_concrete_function(input_spec), 'custom_signature': serving_default.get_concrete_function(input_spec), } export_path = tempfile.mkdtemp() if save_as_keras: model.save(export_path, save_format='tf', signatures=signatures) else: tf.saved_model.save(model, export_path, signatures=signatures) return export_path def testFilterByInputNames(self): tensors = { 'f1': tf.constant([[1.1], [2.1]], dtype=tf.float32), 'f2': tf.constant([[1], [2]], dtype=tf.int64), 'f3': tf.constant([['hello'], ['world']], dtype=tf.string) } filtered_tensors = model_util.filter_by_input_names( tensors, ['f1', 'f3']) self.assertLen(filtered_tensors, 2) self.assertAllEqual(tf.constant([[1.1], [2.1]], dtype=tf.float32), filtered_tensors['f1']) self.assertAllEqual( tf.constant([['hello'], ['world']], dtype=tf.string), filtered_tensors['f3']) def testFilterByInputNamesKeras(self): tensors = { 'f1': tf.constant([[1.1], [2.1]], dtype=tf.float32), 'f2': tf.constant([[1], [2]], dtype=tf.int64), 'f3': tf.constant([['hello'], ['world']], dtype=tf.string) } filtered_tensors = model_util.filter_by_input_names( tensors, [ 'f1' + model_util.KERAS_INPUT_SUFFIX, 'f3' + model_util.KERAS_INPUT_SUFFIX ]) self.assertLen(filtered_tensors, 2) self.assertAllEqual( tf.constant([[1.1], [2.1]], dtype=tf.float32), filtered_tensors['f1' + model_util.KERAS_INPUT_SUFFIX]) self.assertAllEqual( tf.constant([['hello'], ['world']], dtype=tf.string), filtered_tensors['f3' + model_util.KERAS_INPUT_SUFFIX]) @parameterized.named_parameters( ('output_name_and_label_key', config.ModelSpec(label_key='label'), 'output', 'label'), ('output_name_and_label_keys', config.ModelSpec(label_keys={'output': 'label'}), 'output', 'label'), ('output_name_and_no_label_keys', config.ModelSpec(), 'output', None), ('no_output_name_and_label_key', config.ModelSpec(label_key='label'), '', 'label'), ('no_output_name_and_no_label_keys', config.ModelSpec(), '', None)) def testGetLabelKey(self, model_spec, output_name, expected_label_key): self.assertEqual(expected_label_key, model_util.get_label_key(model_spec, output_name)) def testGetLabelKeyNoOutputAndLabelKeys(self): with self.assertRaises(ValueError): model_util.get_label_key( config.ModelSpec(label_keys={'output1': 'label'}), '') @parameterized.named_parameters( { 'testcase_name': 'single_model_single_key', 'model_specs': [config.ModelSpec(label_key='feature1')], 'field': 'label_key', 'multi_output_field': 'label_keys', 'expected_values': [ [1.0, 1.1, 1.2], ] }, { 'testcase_name': 'single_model_multi_key', 'model_specs': [ config.ModelSpec(label_keys={ 'output1': 'feature1', 'output2': 'feature2' }) ], 'field': 'label_key', 'multi_output_field': 'label_keys', 'expected_values': [ { 'output1': [1.0, 1.1, 1.2], 'output2': [2.0, 2.1, 2.2] }, ] }, { 'testcase_name': 'multi_model_single_key', 'model_specs': [ config.ModelSpec(name='model1', example_weight_key='feature2'), config.ModelSpec(name='model2', example_weight_key='feature3') ], 'field': 'example_weight_key', 'multi_output_field': 'example_weight_keys', 'expected_values': [ { 'model1': [2.0, 2.1, 2.2], 'model2': [3.0, 3.1, 3.2] }, ] }, { 'testcase_name': 'multi_model_multi_key', 'model_specs': [ config.ModelSpec(name='model1', prediction_keys={ 'output1': 'feature1', 'output2': 'feature2' }), config.ModelSpec(name='model2', prediction_keys={ 'output1': 'feature1', 'output3': 'feature3' }) ], 'field': 'prediction_key', 'multi_output_field': 'prediction_keys', 'expected_values': [ { 'model1': { 'output1': [1.0, 1.1, 1.2], 'output2': [2.0, 2.1, 2.2] }, 'model2': { 'output1': [1.0, 1.1, 1.2], 'output3': [3.0, 3.1, 3.2] } }, ] }, ) def testGetFeatureValuesForModelSpecField(self, model_specs, field, multi_output_field, expected_values): extracts = { # Only need the num_rows from RecordBatch so use fake array of same len # as features. constants.ARROW_RECORD_BATCH_KEY: pa.RecordBatch.from_arrays([pa.array([1])], ['dummy']), constants.FEATURES_KEY: [ { 'feature1': [1.0, 1.1, 1.2], 'feature2': [2.0, 2.1, 2.2], 'feature3': [3.0, 3.1, 3.2], }, ] } got = model_util.get_feature_values_for_model_spec_field( model_specs, field, multi_output_field, extracts) self.assertAlmostEqual(expected_values, got) @parameterized.named_parameters( { 'testcase_name': 'single_model_single_key', 'model_specs': [config.ModelSpec(label_key='feature2')], 'field': 'label_key', 'multi_output_field': 'label_keys', 'expected_values': [ [4.0, 4.1, 4.2], ] }, { 'testcase_name': 'single_model_multi_key', 'model_specs': [ config.ModelSpec(label_keys={ 'output1': 'feature1', 'output2': 'feature2' }) ], 'field': 'label_key', 'multi_output_field': 'label_keys', 'expected_values': [ { 'output1': [1.0, 1.1, 1.2], 'output2': [4.0, 4.1, 4.2] }, ] }, ) def testGetFeatureValuesForModelSpecFieldWithSingleModelTransforedFeatures( self, model_specs, field, multi_output_field, expected_values): extracts = { # Only need the num_rows from RecordBatch so use fake array of same len # as features. constants.ARROW_RECORD_BATCH_KEY: pa.RecordBatch.from_arrays([pa.array([1])], ['dummy']), constants.FEATURES_KEY: [ { 'feature1': [1.0, 1.1, 1.2], 'feature2': [2.0, 2.1, 2.2], }, ], constants.TRANSFORMED_FEATURES_KEY: [ { 'feature2': [4.0, 4.1, 4.2], }, ] } got = model_util.get_feature_values_for_model_spec_field( model_specs, field, multi_output_field, extracts) self.assertAlmostEqual(expected_values, got) @parameterized.named_parameters( { 'testcase_name': 'multi_model_single_key', 'model_specs': [ config.ModelSpec(name='model1', example_weight_key='feature2'), config.ModelSpec(name='model2', example_weight_key='feature3') ], 'field': 'example_weight_key', 'multi_output_field': 'example_weight_keys', 'expected_values': [ { 'model1': [4.0, 4.1, 4.2], 'model2': [7.0, 7.1, 7.2] }, ] }, { 'testcase_name': 'multi_model_multi_key', 'model_specs': [ config.ModelSpec(name='model1', example_weight_keys={ 'output1': 'feature1', 'output2': 'feature2' }), config.ModelSpec(name='model2', example_weight_keys={ 'output1': 'feature1', 'output3': 'feature3' }) ], 'field': 'example_weight_key', 'multi_output_field': 'example_weight_keys', 'expected_values': [ { 'model1': { 'output1': [1.0, 1.1, 1.2], 'output2': [4.0, 4.1, 4.2] }, 'model2': { 'output1': [1.0, 1.1, 1.2], 'output3': [7.0, 7.1, 7.2] } }, ] }, ) def testGetFeatureValuesForModelSpecFieldWithMultiModelTransforedFeatures( self, model_specs, field, multi_output_field, expected_values): extracts = { # Only need the num_rows from RecordBatch so use fake array of same len # as features. constants.ARROW_RECORD_BATCH_KEY: pa.RecordBatch.from_arrays([pa.array([1])], ['dummy']), constants.FEATURES_KEY: [ { 'feature1': [1.0, 1.1, 1.2], 'feature2': [2.0, 2.1, 2.2], }, ], constants.TRANSFORMED_FEATURES_KEY: [ { 'model1': { 'feature2': [4.0, 4.1, 4.2], 'feature3': [5.0, 5.1, 5.2] }, 'model2': { 'feature2': [6.0, 6.1, 6.2], 'feature3': [7.0, 7.1, 7.2] } }, ] } got = model_util.get_feature_values_for_model_spec_field( model_specs, field, multi_output_field, extracts) self.assertAlmostEqual(expected_values, got) def testGetFeatureValuesForModelSpecFieldNoValues(self): model_spec = config.ModelSpec(name='model1', example_weight_key='feature2') extracts = { constants.ARROW_RECORD_BATCH_KEY: pa.RecordBatch.from_arrays([pa.array([1])], ['dummy']), } got = model_util.get_feature_values_for_model_spec_field( [model_spec], 'example_weight', 'example_weights', extracts) self.assertIsNone(got) @parameterized.named_parameters( ('keras_serving_default', True, 'serving_default'), ('keras_custom_signature', True, 'custom_signature'), ('tf2_serving_default', False, 'serving_default'), ('tf2_custom_signature', False, 'custom_signature')) def testGetCallableWithSignatures(self, save_as_keras, signature_name): export_path = self.createModelWithSingleInput(save_as_keras) if save_as_keras: model = tf.keras.models.load_model(export_path) else: model = tf.compat.v1.saved_model.load_v2(export_path) self.assertIsNotNone(model_util.get_callable(model, signature_name)) @parameterized.named_parameters(('keras', True), ('tf2', False)) def testGetCallableWithMissingSignatures(self, save_as_keras): export_path = self.createModelWithSingleInput(save_as_keras) if save_as_keras: model = tf.keras.models.load_model(export_path) else: model = tf.compat.v1.saved_model.load_v2(export_path) with self.assertRaises(ValueError): model_util.get_callable(model, 'non_existent') @unittest.skipIf(_TF_MAJOR_VERSION < 2, 'not all input types supported for TF1') def testGetCallableWithKerasModel(self): export_path = self.createModelWithMultipleMixedInputs(True) model = tf.keras.models.load_model(export_path) self.assertEqual(model, model_util.get_callable(model)) @parameterized.named_parameters( ('keras_serving_default', True, 'serving_default'), ('keras_custom_signature', True, 'custom_signature'), ('tf2_serving_default', False, None), ('tf2_custom_signature', False, 'custom_signature')) def testGetInputSpecsWithSignatures(self, save_as_keras, signature_name): export_path = self.createModelWithSingleInput(save_as_keras) if save_as_keras: model = tf.keras.models.load_model(export_path) else: model = tf.compat.v1.saved_model.load_v2(export_path) self.assertEqual( { 'input': tf.TensorSpec(name='input', shape=(None, 1), dtype=tf.string), }, model_util.get_input_specs(model, signature_name)) @parameterized.named_parameters(('keras', True), ('tf2', False)) def testGetInputSpecsWithMissingSignatures(self, save_as_keras): export_path = self.createModelWithSingleInput(save_as_keras) if save_as_keras: model = tf.keras.models.load_model(export_path) else: model = tf.compat.v1.saved_model.load_v2(export_path) with self.assertRaises(ValueError): model_util.get_callable(model, 'non_existent') @unittest.skipIf(_TF_MAJOR_VERSION < 2, 'not all input types supported for TF1') def testGetInputSpecsWithKerasModel(self): export_path = self.createModelWithMultipleMixedInputs(True) model = tf.keras.models.load_model(export_path) # Some versions of TF set the TensorSpec.name and others do not. Since we # don't care about the name, clear it from the output for testing purposes specs = model_util.get_input_specs(model) for k, v in specs.items(): if isinstance(v, tf.TensorSpec): specs[k] = tf.TensorSpec(shape=v.shape, dtype=v.dtype) self.assertEqual( { 'input_1': tf.TensorSpec(shape=(None, 2), dtype=tf.int64), 'input_2': tf.SparseTensorSpec(shape=(None, 1), dtype=tf.float32), 'input_3': tf.RaggedTensorSpec(shape=(None, None), dtype=tf.float32), }, specs) def testInputSpecsToTensorRepresentations(self): tensor_representations = model_util.input_specs_to_tensor_representations( { 'input_1': tf.TensorSpec(shape=(None, 2), dtype=tf.int64), 'input_2': tf.SparseTensorSpec(shape=(None, 1), dtype=tf.float32), 'input_3': tf.RaggedTensorSpec(shape=(None, None), dtype=tf.float32), }) dense_tensor_representation = text_format.Parse( """ dense_tensor { column_name: "input_1" shape { dim { size: 2 } } } """, schema_pb2.TensorRepresentation()) sparse_tensor_representation = text_format.Parse( """ varlen_sparse_tensor { column_name: "input_2" } """, schema_pb2.TensorRepresentation()) ragged_tensor_representation = text_format.Parse( """ ragged_tensor { feature_path { step: "input_3" } } """, schema_pb2.TensorRepresentation()) self.assertEqual( { 'input_1': dense_tensor_representation, 'input_2': sparse_tensor_representation, 'input_3': ragged_tensor_representation }, tensor_representations) def testInputSpecsToTensorRepresentationsRaisesWithUnknownDims(self): with self.assertRaises(ValueError): model_util.input_specs_to_tensor_representations({ 'input_1': tf.TensorSpec(shape=(None, None), dtype=tf.int64), }) @parameterized.named_parameters( ('keras_default', True, { constants.PREDICTIONS_KEY: { '': [None] } }, None, False, True, 1), ('tf_default', False, { constants.PREDICTIONS_KEY: { '': [None] } }, None, False, True, 1), ('keras_serving_default', True, { constants.PREDICTIONS_KEY: { '': ['serving_default'] } }, None, False, True, 1), ('tf_serving_default', False, { constants.PREDICTIONS_KEY: { '': ['serving_default'] } }, None, False, True, 1), ('keras_custom_single_output', True, { constants.PREDICTIONS_KEY: { '': ['custom_single_output'] } }, None, False, True, 1), ('tf_custom_single_output', False, { constants.PREDICTIONS_KEY: { '': ['custom_single_output'] } }, None, False, True, 1), ('keras_custom_multi_output', True, { constants.PREDICTIONS_KEY: { '': ['custom_multi_output'] } }, None, False, True, 2), ('tf_custom_multi_output', False, { constants.PREDICTIONS_KEY: { '': ['custom_multi_output'] } }, None, False, True, 2), ('multi_model', True, { constants.PREDICTIONS_KEY: { 'model1': ['custom_multi_output'], 'model2': ['custom_multi_output'] } }, None, False, True, 2), ('default_signatures', True, { constants.PREDICTIONS_KEY: { '': [], } }, ['unknown', 'custom_single_output'], False, True, 1), ('keras_prefer_dict_outputs', True, { constants.FEATURES_KEY: { '': [], } }, ['unknown', 'custom_single_output', 'custom_multi_output' ], True, True, 3), ('tf_prefer_dict_outputs', False, { constants.FEATURES_KEY: { '': [], } }, ['unknown', 'custom_single_output', 'custom_multi_output' ], True, True, 3), ('custom_attribute', True, { constants.FEATURES_KEY: { '': ['custom_attribute'], } }, None, True, True, 1), ('keras_no_schema', True, { constants.PREDICTIONS_KEY: { '': [None] } }, None, False, False, 1), ('tf_no_schema', False, { constants.PREDICTIONS_KEY: { '': [None] } }, None, False, False, 1), ) @unittest.skipIf(_TF_MAJOR_VERSION < 2, 'not all signatures supported for TF1') def testModelSignaturesDoFn(self, save_as_keras, signature_names, default_signature_names, prefer_dict_outputs, use_schema, expected_num_outputs): export_path = self.createModelWithMultipleDenseInputs(save_as_keras) eval_shared_models = {} model_specs = [] for sigs in signature_names.values(): for model_name in sigs: if model_name not in eval_shared_models: eval_shared_models[ model_name] = self.createTestEvalSharedModel( eval_saved_model_path=export_path, model_name=model_name, tags=[tf.saved_model.SERVING]) model_specs.append(config.ModelSpec(name=model_name)) eval_config = config.EvalConfig(model_specs=model_specs) schema = self.createDenseInputsSchema() if use_schema else None tfx_io = tf_example_record.TFExampleBeamRecord( physical_format='text', schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN) tensor_adapter_config = None if use_schema: tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfx_io.ArrowSchema(), tensor_representations=tfx_io.TensorRepresentations()) examples = [ self._makeExample(input_1=1.0, input_2=2.0), self._makeExample(input_1=3.0, input_2=4.0), self._makeExample(input_1=5.0, input_2=6.0), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = (pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3) | 'ToExtracts' >> beam.Map(_record_batch_to_extracts) | 'ModelSignatures' >> beam.ParDo( model_util.ModelSignaturesDoFn( eval_config=eval_config, eval_shared_models=eval_shared_models, signature_names=signature_names, default_signature_names=default_signature_names, prefer_dict_outputs=prefer_dict_outputs, tensor_adapter_config=tensor_adapter_config))) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) for key in signature_names: self.assertIn(key, got[0]) if prefer_dict_outputs: for entry in got[0][key]: self.assertIsInstance(entry, dict) self.assertLen(entry, expected_num_outputs) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result') def testHasRubberStamp(self): # Model agnostic. self.assertFalse(model_util.has_rubber_stamp(None)) # All non baseline models has rubber stamp. baseline = self.createTestEvalSharedModel( model_name=constants.BASELINE_KEY) candidate = self.createTestEvalSharedModel( model_name=constants.CANDIDATE_KEY, rubber_stamp=True) self.assertTrue(model_util.has_rubber_stamp([baseline, candidate])) # Not all non baseline has rubber stamp. candidate_nr = self.createTestEvalSharedModel( model_name=constants.CANDIDATE_KEY) self.assertFalse(model_util.has_rubber_stamp([candidate_nr])) self.assertFalse( model_util.has_rubber_stamp([baseline, candidate, candidate_nr]))
def testUnbatchExtractor(self): model_spec = config.ModelSpec(label_key='label', example_weight_key='example_weight') eval_config = config.EvalConfig(model_specs=[model_spec]) input_extractor = batched_input_extractor.BatchedInputExtractor( eval_config) unbatch_inputs_extractor = unbatch_extractor.UnbatchExtractor() schema = text_format.Parse( """ feature { name: "label" type: FLOAT } feature { name: "example_weight" type: FLOAT } feature { name: "fixed_int" type: INT } feature { name: "fixed_float" type: FLOAT } feature { name: "fixed_string" type: BYTES } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN) examples = [ self._makeExample(label=1.0, example_weight=0.5, fixed_int=1, fixed_float=1.0, fixed_string='fixed_string1'), self._makeExample(label=0.0, example_weight=0.0, fixed_int=1, fixed_float=1.0, fixed_string='fixed_string2'), self._makeExample(label=0.0, example_weight=1.0, fixed_int=2, fixed_float=0.0, fixed_string='fixed_string3') ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | input_extractor.stage_name >> input_extractor.ptransform | unbatch_inputs_extractor.stage_name >> unbatch_inputs_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 3) self.assertDictElementsAlmostEqual( got[0][constants.FEATURES_KEY], { 'fixed_int': np.array([1]), 'fixed_float': np.array([1.0]), }) self.assertEqual( got[0][constants.FEATURES_KEY]['fixed_string'], np.array([b'fixed_string1'])) self.assertAlmostEqual(got[0][constants.LABELS_KEY], np.array([1.0])) self.assertAlmostEqual( got[0][constants.EXAMPLE_WEIGHTS_KEY], np.array([0.5])) self.assertDictElementsAlmostEqual( got[1][constants.FEATURES_KEY], { 'fixed_int': np.array([1]), 'fixed_float': np.array([1.0]), }) self.assertEqual( got[1][constants.FEATURES_KEY]['fixed_string'], np.array([b'fixed_string2'])) self.assertAlmostEqual(got[1][constants.LABELS_KEY], np.array([0.0])) self.assertAlmostEqual( got[1][constants.EXAMPLE_WEIGHTS_KEY], np.array([0.0])) self.assertDictElementsAlmostEqual( got[2][constants.FEATURES_KEY], { 'fixed_int': np.array([2]), 'fixed_float': np.array([0.0]), }) self.assertEqual( got[2][constants.FEATURES_KEY]['fixed_string'], np.array([b'fixed_string3'])) self.assertAlmostEqual(got[2][constants.LABELS_KEY], np.array([0.0])) self.assertAlmostEqual( got[2][constants.EXAMPLE_WEIGHTS_KEY], np.array([1.0])) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testEvaluateWithQueryBasedMetrics(self): temp_export_dir = self._getExportDir() _, export_dir = (fixed_prediction_estimator_extra_fields. simple_fixed_prediction_estimator_extra_fields( None, temp_export_dir)) eval_config = config.EvalConfig( model_specs=[ config.ModelSpec(location=export_dir, label_key='label', example_weight_key='fixed_int') ], slicing_specs=[ config.SlicingSpec(), config.SlicingSpec(feature_keys=['fixed_string']), ], metrics_specs=metric_specs.specs_from_metrics( [ndcg.NDCG(gain_key='fixed_float', name='ndcg')], binarize=config.BinarizationOptions(top_k_list=[1, 2]), query_key='fixed_string')) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) slice_spec = [ slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs ] extractors = [ input_extractor.InputExtractor(eval_config=eval_config), predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model]), slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec) ] evaluators = [ metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=eval_config, eval_shared_models=[eval_shared_model]) ] # fixed_string used as query_key # fixed_float used as gain_key for NDCG # fixed_int used as example_weight_key for NDCG examples = [ self._makeExample(prediction=0.2, label=1.0, fixed_float=1.0, fixed_string='query1', fixed_int=1), self._makeExample(prediction=0.8, label=0.0, fixed_float=0.5, fixed_string='query1', fixed_int=1), self._makeExample(prediction=0.5, label=0.0, fixed_float=0.5, fixed_string='query2', fixed_int=2), self._makeExample(prediction=0.9, label=1.0, fixed_float=1.0, fixed_string='query2', fixed_int=2), self._makeExample(prediction=0.1, label=0.0, fixed_float=0.1, fixed_string='query2', fixed_int=2), self._makeExample(prediction=0.9, label=1.0, fixed_float=1.0, fixed_string='query3', fixed_int=3) ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter metrics = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators)) # pylint: enable=no-value-for-parameter def check_metrics(got): try: self.assertLen(got, 4) slices = {} for slice_key, value in got: slices[slice_key] = value overall_slice = () query1_slice = (('fixed_string', b'query1'), ) query2_slice = (('fixed_string', b'query2'), ) query3_slice = (('fixed_string', b'query3'), ) self.assertCountEqual(list(slices.keys()), [ overall_slice, query1_slice, query2_slice, query3_slice ]) example_count_key = metric_types.MetricKey( name='example_count') weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count') ndcg1_key = metric_types.MetricKey( name='ndcg', sub_key=metric_types.SubKey(top_k=1)) ndcg2_key = metric_types.MetricKey( name='ndcg', sub_key=metric_types.SubKey(top_k=2)) # Query1 (weight=1): (p=0.8, g=0.5) (p=0.2, g=1.0) # Query2 (weight=2): (p=0.9, g=1.0) (p=0.5, g=0.5) (p=0.1, g=0.1) # Query3 (weight=3): (p=0.9, g=1.0) # # DCG@1: 0.5, 1.0, 1.0 # NDCG@1: 0.5, 1.0, 1.0 # Average NDCG@1: (1 * 0.5 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.92 # # DCG@2: (0.5 + 1.0/log(3) ~ 0.630930 # (1.0 + 0.5/log(3) ~ 1.315465 # 1.0 # NDCG@2: (0.5 + 1.0/log(3)) / (1.0 + 0.5/log(3)) ~ 0.85972 # (1.0 + 0.5/log(3)) / (1.0 + 0.5/log(3)) = 1.0 # 1.0 # Average NDCG@2: (1 * 0.860 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.97 self.assertDictElementsAlmostEqual( slices[overall_slice], { example_count_key: 6, weighted_example_count_key: 11.0, ndcg1_key: 0.9166667, ndcg2_key: 0.9766198 }) self.assertDictElementsAlmostEqual( slices[query1_slice], { example_count_key: 2, weighted_example_count_key: 2.0, ndcg1_key: 0.5, ndcg2_key: 0.85972 }) self.assertDictElementsAlmostEqual( slices[query2_slice], { example_count_key: 3, weighted_example_count_key: 6.0, ndcg1_key: 1.0, ndcg2_key: 1.0 }) self.assertDictElementsAlmostEqual( slices[query3_slice], { example_count_key: 1, weighted_example_count_key: 3.0, ndcg1_key: 1.0, ndcg2_key: 1.0 }) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics[constants.METRICS_KEY], check_metrics, label='metrics')
def testPredictExtractorWithSequentialKerasModel(self): # Note that the input will be called 'test_input' model = tf.keras.models.Sequential([ tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, input_shape=(1, ), name='test') ]) model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001), loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) train_features = {'test_input': [[0.0], [1.0]]} labels = [[1], [0]] example_weights = [1.0, 0.5] dataset = tf.data.Dataset.from_tensor_slices( (train_features, labels, example_weights)) dataset = dataset.shuffle(buffer_size=1).repeat().batch(2) model.fit(dataset, steps_per_epoch=1) export_dir = self._getExportDir() model.save(export_dir, save_format='tf') eval_config = config.EvalConfig( model_specs=[config.ModelSpec(location=export_dir)]) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) predict_extractor = predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model]) # Notice that the features are 'test' but the model expects 'test_input'. # This tests that the PredictExtractor properly handles this case. predict_features = [ { 'test': np.array([0.0], dtype=np.float32), 'non_model_feature': np.array([0]), # should be ignored by model }, { 'test': np.array([1.0], dtype=np.float32), 'non_model_feature': np.array([1]), # should be ignored by model } ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create(predict_features) | 'FeaturesToExtracts' >> beam.Map(lambda x: {constants.FEATURES_KEY: x}) | predict_extractor.stage_name >> predict_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 2) # We can't verify the actual predictions, but we can verify the keys. for item in got: self.assertIn(constants.PREDICTIONS_KEY, item) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testEvaluateWithMultiClassModel(self): n_classes = 3 temp_export_dir = self._getExportDir() _, export_dir = dnn_classifier.simple_dnn_classifier( None, temp_export_dir, n_classes=n_classes) # Add example_count and weighted_example_count eval_config = config.EvalConfig( model_specs=[ config.ModelSpec(location=export_dir, label_key='label', example_weight_key='age') ], slicing_specs=[config.SlicingSpec()], metrics_specs=metric_specs.specs_from_metrics( [calibration.MeanLabel('mean_label')], binarize=config.BinarizationOptions( class_ids=range(n_classes)))) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) slice_spec = [ slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs ] extractors = [ input_extractor.InputExtractor(eval_config=eval_config), predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model]), slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec) ] evaluators = [ metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=eval_config, eval_shared_models=[eval_shared_model]) ] examples = [ self._makeExample(age=1.0, language='english', label=0), self._makeExample(age=2.0, language='chinese', label=1), self._makeExample(age=3.0, language='english', label=2), self._makeExample(age=4.0, language='chinese', label=1), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter metrics = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators)) # pylint: enable=no-value-for-parameter def check_metrics(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] example_count_key = metric_types.MetricKey( name='example_count') weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count') label_key_class_0 = metric_types.MetricKey( name='mean_label', sub_key=metric_types.SubKey(class_id=0)) label_key_class_1 = metric_types.MetricKey( name='mean_label', sub_key=metric_types.SubKey(class_id=1)) label_key_class_2 = metric_types.MetricKey( name='mean_label', sub_key=metric_types.SubKey(class_id=2)) self.assertEqual(got_slice_key, ()) self.assertDictElementsAlmostEqual( got_metrics, { example_count_key: 4, weighted_example_count_key: (1.0 + 2.0 + 3.0 + 4.0), label_key_class_0: (1 * 1.0 + 0 * 2.0 + 0 * 3.0 + 0 * 4.0) / (1.0 + 2.0 + 3.0 + 4.0), label_key_class_1: (0 * 1.0 + 1 * 2.0 + 0 * 3.0 + 1 * 4.0) / (1.0 + 2.0 + 3.0 + 4.0), label_key_class_2: (0 * 1.0 + 0 * 2.0 + 1 * 3.0 + 0 * 4.0) / (1.0 + 2.0 + 3.0 + 4.0) }) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics[constants.METRICS_KEY], check_metrics, label='metrics')
def testEvaluateWithMultiOutputModel(self): temp_export_dir = self._getExportDir() _, export_dir = multi_head.simple_multi_head(None, temp_export_dir) eval_config = config.EvalConfig( model_specs=[ config.ModelSpec(location=export_dir, label_keys={ 'chinese_head': 'chinese_label', 'english_head': 'english_label', 'other_head': 'other_label' }, example_weight_keys={ 'chinese_head': 'age', 'english_head': 'age', 'other_head': 'age' }) ], slicing_specs=[config.SlicingSpec()], metrics_specs=metric_specs.specs_from_metrics({ 'chinese_head': [calibration.MeanLabel('mean_label')], 'english_head': [calibration.MeanLabel('mean_label')], 'other_head': [calibration.MeanLabel('mean_label')], })) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) slice_spec = [ slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs ] extractors = [ input_extractor.InputExtractor(eval_config=eval_config), predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model]), slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec) ] evaluators = [ metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=eval_config, eval_shared_models=[eval_shared_model]) ] examples = [ self._makeExample(age=1.0, language='english', english_label=1.0, chinese_label=0.0, other_label=0.0), self._makeExample(age=1.0, language='chinese', english_label=0.0, chinese_label=1.0, other_label=0.0), self._makeExample(age=2.0, language='english', english_label=1.0, chinese_label=0.0, other_label=0.0), self._makeExample(age=2.0, language='other', english_label=0.0, chinese_label=1.0, other_label=1.0), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter metrics = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators)) # pylint: enable=no-value-for-parameter def check_metrics(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) example_count_key = metric_types.MetricKey( name='example_count') chinese_weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count', output_name='chinese_head') chinese_label_key = metric_types.MetricKey( name='mean_label', output_name='chinese_head') english_weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count', output_name='english_head') english_label_key = metric_types.MetricKey( name='mean_label', output_name='english_head') other_weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count', output_name='other_head') other_label_key = metric_types.MetricKey( name='mean_label', output_name='other_head') self.assertDictElementsAlmostEqual( got_metrics, { example_count_key: 4, chinese_label_key: (0.0 + 1.0 + 2 * 0.0 + 2 * 1.0) / (1.0 + 1.0 + 2.0 + 2.0), chinese_weighted_example_count_key: (1.0 + 1.0 + 2.0 + 2.0), english_label_key: (1.0 + 0.0 + 2 * 1.0 + 2 * 0.0) / (1.0 + 1.0 + 2.0 + 2.0), english_weighted_example_count_key: (1.0 + 1.0 + 2.0 + 2.0), other_label_key: (0.0 + 0.0 + 2 * 0.0 + 2 * 1.0) / (1.0 + 1.0 + 2.0 + 2.0), other_weighted_example_count_key: (1.0 + 1.0 + 2.0 + 2.0) }) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics[constants.METRICS_KEY], check_metrics, label='metrics')
def testBatchSizeLimit(self): temp_export_dir = self._getExportDir() _, export_dir = batch_size_limited_classifier.simple_batch_size_limited_classifier( None, temp_export_dir) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) eval_config = config.EvalConfig(model_specs=[config.ModelSpec()]) schema = text_format.Parse( """ feature { name: "classes" type: BYTES } feature { name: "scores" type: FLOAT } feature { name: "labels" type: BYTES } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfx_io.ArrowSchema(), tensor_representations=tfx_io.TensorRepresentations()) input_extractor = batched_input_extractor.BatchedInputExtractor( eval_config) predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor( eval_config=eval_config, eval_shared_model=eval_shared_model, tensor_adapter_config=tensor_adapter_config) examples = [] for _ in range(4): examples.append( self._makeExample(classes='first', scores=0.0, labels='third')) with beam.Pipeline() as pipeline: predict_extracts = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=1) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | input_extractor.stage_name >> input_extractor.ptransform | predict_extractor.stage_name >> predict_extractor.ptransform) def check_result(got): try: self.assertLen(got, 4) # We can't verify the actual predictions, but we can verify the keys. for item in got: self.assertIn(constants.BATCHED_PREDICTIONS_KEY, item) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(predict_extracts, check_result, label='result')
def testBatchedInputExtractor(self, label): model_spec = config.ModelSpec( label_key=label, example_weight_key='example_weight') eval_config = config.EvalConfig(model_specs=[model_spec]) input_extractor = batched_input_extractor.BatchedInputExtractor(eval_config) label_feature = '' if label is not None: label_feature = """ feature { name: "%s" type: FLOAT } """ % label schema = text_format.Parse( label_feature + """ feature { name: "example_weight" type: FLOAT } feature { name: "fixed_int" type: INT } feature { name: "fixed_float" type: FLOAT } feature { name: "fixed_string" type: BYTES } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN) def maybe_add_key(d, key, value): if key is not None: d[key] = value return d example_kwargs = [ maybe_add_key( { 'example_weight': 0.5, 'fixed_int': 1, 'fixed_float': 1.0, 'fixed_string': 'fixed_string1' }, label, 1.0), maybe_add_key( { 'example_weight': 0.0, 'fixed_int': 1, 'fixed_float': 1.0, 'fixed_string': 'fixed_string2' }, label, 0.0), maybe_add_key( { 'example_weight': 1.0, 'fixed_int': 2, 'fixed_float': 0.0, 'fixed_string': 'fixed_string3' }, label, 0.0), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create([ self._makeExample(**kwargs).SerializeToString() for kwargs in example_kwargs ], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | input_extractor.stage_name >> input_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) self.assertDictElementsAlmostEqual( got[0][constants.FEATURES_KEY][0], maybe_add_key( { 'fixed_int': np.array([1]), 'fixed_float': np.array([1.0]), 'example_weight': np.array([0.5]), }, label, np.array([1.0]))) self.assertEqual(got[0][constants.FEATURES_KEY][0]['fixed_string'], np.array([b'fixed_string1'])) self.assertAlmostEqual(got[0][constants.LABELS_KEY][0], np.array([1.0]) if label is not None else None) self.assertAlmostEqual(got[0][constants.EXAMPLE_WEIGHTS_KEY][0], np.array([0.5])) self.assertDictElementsAlmostEqual( got[0][constants.FEATURES_KEY][1], maybe_add_key( { 'fixed_int': np.array([1]), 'fixed_float': np.array([1.0]), 'example_weight': np.array([0.0]), }, label, np.array([0.0]))) self.assertEqual(got[0][constants.FEATURES_KEY][1]['fixed_string'], np.array([b'fixed_string2'])) self.assertAlmostEqual(got[0][constants.LABELS_KEY][1], np.array([0.0]) if label is not None else None) self.assertAlmostEqual(got[0][constants.EXAMPLE_WEIGHTS_KEY][1], np.array([0.0])) self.assertDictElementsAlmostEqual( got[0][constants.FEATURES_KEY][2], maybe_add_key( { 'fixed_int': np.array([2]), 'fixed_float': np.array([0.0]), 'example_weight': np.array([1.0]), }, label, np.array([0.0]))) self.assertEqual(got[0][constants.FEATURES_KEY][2]['fixed_string'], np.array([b'fixed_string3'])) self.assertAlmostEqual(got[0][constants.LABELS_KEY][2], np.array([0.0]) if label is not None else None) self.assertAlmostEqual(got[0][constants.EXAMPLE_WEIGHTS_KEY][2], np.array([1.0])) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testPredictExtractorWithSequentialKerasModel(self): # Note that the input will be called 'test_input' model = tf.keras.models.Sequential([ tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, input_shape=(2, ), name='test') ]) model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001), loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) train_features = {'test_input': [[0.0, 0.0], [1.0, 1.0]]} labels = [[1], [0]] example_weights = [1.0, 0.5] dataset = tf.data.Dataset.from_tensor_slices( (train_features, labels, example_weights)) dataset = dataset.shuffle(buffer_size=1).repeat().batch(2) model.fit(dataset, steps_per_epoch=1) export_dir = self._getExportDir() model.save(export_dir, save_format='tf') eval_config = config.EvalConfig(model_specs=[config.ModelSpec()]) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) schema = text_format.Parse( """ tensor_representation_group { key: "" value { tensor_representation { key: "test" value { dense_tensor { column_name: "test" shape { dim { size: 2 } } } } } } } feature { name: "test" type: FLOAT } feature { name: "non_model_feature" type: INT } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfx_io.ArrowSchema(), tensor_representations=tfx_io.TensorRepresentations()) input_extractor = batched_input_extractor.BatchedInputExtractor( eval_config) predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor( eval_config=eval_config, eval_shared_model=eval_shared_model, tensor_adapter_config=tensor_adapter_config) # Notice that the features are 'test' but the model expects 'test_input'. # This tests that the PredictExtractor properly handles this case. examples = [ self._makeExample( test=[0.0, 0.0], non_model_feature=0), # should be ignored by model self._makeExample( test=[1.0, 1.0], non_model_feature=1), # should be ignored by model ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | input_extractor.stage_name >> input_extractor.ptransform | predict_extractor.stage_name >> predict_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) # We can't verify the actual predictions, but we can verify the keys. for item in got: self.assertIn(constants.BATCHED_PREDICTIONS_KEY, item) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testBatchSizeLimitWithKerasModel(self): input1 = tf.keras.layers.Input(shape=(1, ), batch_size=1, name='input1') input2 = tf.keras.layers.Input(shape=(1, ), batch_size=1, name='input2') inputs = [input1, input2] input_layer = tf.keras.layers.concatenate(inputs) def add_1(tensor): return tf.add_n([tensor, tf.constant(1.0, shape=(1, 2))]) assert_layer = tf.keras.layers.Lambda(add_1)(input_layer) model = tf.keras.models.Model(inputs, assert_layer) model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001), loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) export_dir = self._getExportDir() model.save(export_dir, save_format='tf') eval_config = config.EvalConfig(model_specs=[config.ModelSpec()]) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) schema = text_format.Parse( """ tensor_representation_group { key: "" value { tensor_representation { key: "input1" value { dense_tensor { column_name: "input1" shape { dim { size: 1 } } } } } tensor_representation { key: "input2" value { dense_tensor { column_name: "input2" shape { dim { size: 1 } } } } } } } feature { name: "input1" type: FLOAT } feature { name: "input2" type: FLOAT } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfx_io.ArrowSchema(), tensor_representations=tfx_io.TensorRepresentations()) input_extractor = batched_input_extractor.BatchedInputExtractor( eval_config) predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor( eval_config=eval_config, eval_shared_model=eval_shared_model, tensor_adapter_config=tensor_adapter_config) examples = [] for _ in range(4): examples.append(self._makeExample(input1=0.0, input2=1.0)) with beam.Pipeline() as pipeline: predict_extracts = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=1) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | input_extractor.stage_name >> input_extractor.ptransform | predict_extractor.stage_name >> predict_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 4) # We can't verify the actual predictions, but we can verify the keys. for item in got: self.assertIn(constants.BATCHED_PREDICTIONS_KEY, item) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(predict_extracts, check_result, label='result')
def testPredictExtractorWithRegressionModel(self): temp_export_dir = self._getExportDir() export_dir, _ = (fixed_prediction_estimator_extra_fields. simple_fixed_prediction_estimator_extra_fields( temp_export_dir, None)) eval_config = config.EvalConfig(model_specs=[config.ModelSpec()]) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) schema = text_format.Parse( """ feature { name: "prediction" type: FLOAT } feature { name: "label" type: FLOAT } feature { name: "fixed_int" type: INT } feature { name: "fixed_float" type: FLOAT } feature { name: "fixed_string" type: BYTES } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfx_io.ArrowSchema(), tensor_representations=tfx_io.TensorRepresentations()) input_extractor = batched_input_extractor.BatchedInputExtractor( eval_config) predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor( eval_config=eval_config, eval_shared_model=eval_shared_model, tensor_adapter_config=tensor_adapter_config) examples = [ self._makeExample(prediction=0.2, label=1.0, fixed_int=1, fixed_float=1.0, fixed_string='fixed_string1'), self._makeExample(prediction=0.8, label=0.0, fixed_int=1, fixed_float=1.0, fixed_string='fixed_string2'), self._makeExample(prediction=0.5, label=0.0, fixed_int=2, fixed_float=1.0, fixed_string='fixed_string3') ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | input_extractor.stage_name >> input_extractor.ptransform | predict_extractor.stage_name >> predict_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) self.assertIn(constants.BATCHED_PREDICTIONS_KEY, got[0]) expected_preds = [0.2, 0.8, 0.5] self.assertAlmostEqual( got[0][constants.BATCHED_PREDICTIONS_KEY], expected_preds) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testPredictExtractorWithMultiModels(self): temp_export_dir = self._getExportDir() export_dir1, _ = multi_head.simple_multi_head(temp_export_dir, None) export_dir2, _ = multi_head.simple_multi_head(temp_export_dir, None) eval_config = config.EvalConfig(model_specs=[ config.ModelSpec(name='model1'), config.ModelSpec(name='model2') ]) eval_shared_model1 = self.createTestEvalSharedModel( eval_saved_model_path=export_dir1, tags=[tf.saved_model.SERVING]) eval_shared_model2 = self.createTestEvalSharedModel( eval_saved_model_path=export_dir2, tags=[tf.saved_model.SERVING]) schema = text_format.Parse( """ feature { name: "age" type: FLOAT } feature { name: "langauge" type: BYTES } feature { name: "english_label" type: FLOAT } feature { name: "chinese_label" type: FLOAT } feature { name: "other_label" type: FLOAT } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfx_io.ArrowSchema(), tensor_representations=tfx_io.TensorRepresentations()) input_extractor = batched_input_extractor.BatchedInputExtractor( eval_config) predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor( eval_config=eval_config, eval_shared_model={ 'model1': eval_shared_model1, 'model2': eval_shared_model2 }, tensor_adapter_config=tensor_adapter_config) examples = [ self._makeExample(age=1.0, language='english', english_label=1.0, chinese_label=0.0, other_label=0.0), self._makeExample(age=1.0, language='chinese', english_label=0.0, chinese_label=1.0, other_label=0.0), self._makeExample(age=2.0, language='english', english_label=1.0, chinese_label=0.0, other_label=0.0), self._makeExample(age=2.0, language='other', english_label=0.0, chinese_label=1.0, other_label=1.0) ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=4) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | input_extractor.stage_name >> input_extractor.ptransform | predict_extractor.stage_name >> predict_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) for item in got: # We can't verify the actual predictions, but we can verify the keys self.assertIn(constants.BATCHED_PREDICTIONS_KEY, item) for pred in item[constants.BATCHED_PREDICTIONS_KEY]: for model_name in ('model1', 'model2'): self.assertIn(model_name, pred) for output_name in ('chinese_head', 'english_head', 'other_head'): for pred_key in ('logistic', 'probabilities', 'all_classes'): self.assertIn( output_name + '/' + pred_key, pred[model_name]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testBatchedInputExtractorMultiModel(self): model_spec1 = config.ModelSpec(name='model1', label_key='label', example_weight_key='example_weight', prediction_key='fixed_float') model_spec2 = config.ModelSpec(name='model2', label_keys={ 'output1': 'label1', 'output2': 'label2' }, example_weight_keys={ 'output1': 'example_weight1', 'output2': 'example_weight2' }, prediction_keys={ 'output1': 'fixed_float', 'output2': 'fixed_float' }) eval_config = config.EvalConfig(model_specs=[model_spec1, model_spec2]) input_extractor = batched_input_extractor.BatchedInputExtractor( eval_config) schema = text_format.Parse( """ feature { name: "label" type: FLOAT } feature { name: "label1" type: FLOAT } feature { name: "label2" type: FLOAT } feature { name: "example_weight" type: FLOAT } feature { name: "example_weight1" type: FLOAT } feature { name: "example_weight2" type: FLOAT } feature { name: "fixed_int" type: INT } feature { name: "fixed_float" type: FLOAT } feature { name: "fixed_string" type: BYTES } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY) examples = [ self._makeExample(label=1.0, label1=1.0, label2=0.0, example_weight=0.5, example_weight1=0.5, example_weight2=0.5, fixed_int=1, fixed_float=1.0, fixed_string='fixed_string1'), self._makeExample(label=1.0, label1=1.0, label2=1.0, example_weight=0.0, example_weight1=0.0, example_weight2=1.0, fixed_int=1, fixed_float=2.0, fixed_string='fixed_string2'), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | input_extractor.stage_name >> input_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) self.assertDictElementsAlmostEqual( got[0][constants.BATCHED_FEATURES_KEY][0], { 'fixed_int': np.array([1]), }) self.assertEqual( got[0][constants.BATCHED_FEATURES_KEY][0] ['fixed_string'], np.array([b'fixed_string1'])) for model_name in ('model1', 'model2'): self.assertIn(model_name, got[0][constants.BATCHED_LABELS_KEY][0]) self.assertIn( model_name, got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][0]) self.assertIn( model_name, got[0][constants.BATCHED_PREDICTIONS_KEY][0]) self.assertAlmostEqual( got[0][constants.BATCHED_LABELS_KEY][0]['model1'], np.array([1.0])) self.assertDictElementsAlmostEqual( got[0][constants.BATCHED_LABELS_KEY][0]['model2'], { 'output1': np.array([1.0]), 'output2': np.array([0.0]) }) self.assertAlmostEqual( got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][0] ['model1'], np.array([0.5])) self.assertDictElementsAlmostEqual( got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][0] ['model2'], { 'output1': np.array([0.5]), 'output2': np.array([0.5]) }) self.assertAlmostEqual( got[0][constants.BATCHED_PREDICTIONS_KEY][0]['model1'], np.array([1.0])) self.assertDictElementsAlmostEqual( got[0][constants.BATCHED_PREDICTIONS_KEY][0]['model2'], { 'output1': np.array([1.0]), 'output2': np.array([1.0]) }) self.assertDictElementsAlmostEqual( got[0][constants.BATCHED_FEATURES_KEY][1], { 'fixed_int': np.array([1]), }) self.assertEqual( got[0][constants.BATCHED_FEATURES_KEY][1] ['fixed_string'], np.array([b'fixed_string2'])) for model_name in ('model1', 'model2'): self.assertIn(model_name, got[0][constants.BATCHED_LABELS_KEY][1]) self.assertIn( model_name, got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][1]) self.assertIn( model_name, got[0][constants.BATCHED_PREDICTIONS_KEY][1]) self.assertAlmostEqual( got[0][constants.BATCHED_LABELS_KEY][1]['model1'], np.array([1.0])) self.assertDictElementsAlmostEqual( got[0][constants.BATCHED_LABELS_KEY][1]['model2'], { 'output1': np.array([1.0]), 'output2': np.array([1.0]) }) self.assertAlmostEqual( got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][1] ['model1'], np.array([0.0])) self.assertDictElementsAlmostEqual( got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][1] ['model2'], { 'output1': np.array([0.0]), 'output2': np.array([1.0]) }) self.assertAlmostEqual( got[0][constants.BATCHED_PREDICTIONS_KEY][1]['model1'], np.array([2.0])) self.assertDictElementsAlmostEqual( got[0][constants.BATCHED_PREDICTIONS_KEY][1]['model2'], { 'output1': np.array([2.0]), 'output2': np.array([2.0]) }) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testTFlitePredictExtractorWithSingleOutputModel(self, multi_model, multi_output, batch_examples, batch_inputs): input1 = tf.keras.layers.Input(shape=(1,), name='input1') input2 = tf.keras.layers.Input(shape=(1,), name='input2') inputs = [input1, input2] input_layer = tf.keras.layers.concatenate(inputs) output_layers = {} output_layers['output1'] = ( tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, name='output1')(input_layer)) if multi_output: output_layers['output2'] = ( tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, name='output2')(input_layer)) model = tf.keras.models.Model(inputs, output_layers) model.compile( optimizer=tf.keras.optimizers.Adam(lr=.001), loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) train_features = {'input1': [[0.0], [1.0]], 'input2': [[1.0], [0.0]]} labels = {'output1': [[1], [0]]} if multi_output: labels['output2'] = [[1], [0]] example_weights = {'output1': [1.0, 0.5]} if multi_output: example_weights['output2'] = [1.0, 0.5] dataset = tf.data.Dataset.from_tensor_slices( (train_features, labels, example_weights)) dataset = dataset.shuffle(buffer_size=1).repeat().batch(2) model.fit(dataset, steps_per_epoch=1) converter = tf.compat.v2.lite.TFLiteConverter.from_keras_model(model) tflite_model = converter.convert() tflite_model_dir = tempfile.mkdtemp() with tf.io.gfile.GFile(os.path.join(tflite_model_dir, 'tflite'), 'wb') as f: f.write(tflite_model) model_specs = [config.ModelSpec(name='model1')] if multi_model: model_specs.append(config.ModelSpec(name='model2')) eval_config = config.EvalConfig(model_specs=model_specs) eval_shared_models = [ self.createTestEvalSharedModel( model_name='model1', eval_saved_model_path=tflite_model_dir) ] if multi_model: eval_shared_models.append( self.createTestEvalSharedModel( model_name='model2', eval_saved_model_path=tflite_model_dir)) desired_batch_size = 2 if batch_examples else None predictor = tflite_predict_extractor.TFLitePredictExtractor( eval_config=eval_config, eval_shared_model=eval_shared_models, desired_batch_size=desired_batch_size) predict_features = [ { 'input1': np.array([0.0], dtype=np.float32), 'input2': np.array([1.0], dtype=np.float32), 'non_model_feature': np.array([0]), # should be ignored by model }, { 'input1': np.array([1.0], dtype=np.float32), 'input2': np.array([0.0], dtype=np.float32), 'non_model_feature': np.array([1]), # should be ignored by model } ] if batch_inputs: predict_features = [{k: np.expand_dims(v, 0) for k, v in p.items()} for p in predict_features] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create(predict_features) | 'FeaturesToExtracts' >> beam.Map(lambda x: {constants.FEATURES_KEY: x}) | predictor.stage_name >> predictor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 2) # We can't verify the actual predictions, but we can verify the keys. for item in got: self.assertIn(constants.PREDICTIONS_KEY, item) # TODO(dzats): TFLite seems to currently rename all outputs to # Identity*. Update this test to check for output1 and output2 # when this changes. if multi_model: self.assertIn('model1', item[constants.PREDICTIONS_KEY]) self.assertIn('model2', item[constants.PREDICTIONS_KEY]) if multi_output: self.assertIn('Identity', item[constants.PREDICTIONS_KEY]['model1']) self.assertIn('Identity_1', item[constants.PREDICTIONS_KEY]['model1']) elif multi_output: self.assertIn('Identity', item[constants.PREDICTIONS_KEY]) self.assertIn('Identity_1', item[constants.PREDICTIONS_KEY]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def assertMetricsComputedWithBeamAre( self, eval_saved_model_path: Text, serialized_examples: List[bytes], expected_metrics: Dict[Text, Any], add_metrics_callbacks: Optional[List[ types.AddMetricsCallbackType]] = None): """Checks metrics computed using Beam. Metrics will be computed over all examples, without any slicing. If you want to provide your own PCollection (e.g. read a large number of examples from a file), if you want to check metrics over certain slices, or if you want to add additional post-export metrics, use the more general assertGeneralMetricsComputedWithBeamAre. Example usage: self.assertMetricsComputedWithBeamAre( eval_saved_model_path=path, serialized_examples=[self.makeExample(age=5, label=1.0), self.makeExample(age=10, label=0.0)], expected_metrics={'average_loss': 0.1}) Args: eval_saved_model_path: Path to the directory containing the EvalSavedModel. serialized_examples: List of serialized example bytes. expected_metrics: Dictionary of expected metric values. add_metrics_callbacks: Optional. Callbacks for adding additional metrics. """ def check_metrics(got): """Check metrics callback.""" try: self.assertEqual( 1, len(got), 'expecting metrics for exactly one slice, but got %d ' 'slices instead. metrics were: %s' % (len(got), got)) (slice_key, value) = got[0] self.assertEqual((), slice_key) self.assertDictElementsWithinBounds( got_values_dict=value, expected_values_dict=expected_metrics) except AssertionError as err: raise beam_util.BeamAssertException(err) eval_config = config.EvalConfig( input_data_specs=[config.InputDataSpec()], model_specs=[config.ModelSpec(location=eval_saved_model_path)], output_data_specs=[config.OutputDataSpec()]) eval_shared_model = model_eval_lib.default_eval_shared_model( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model) with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter (metrics, _), _ = ( pipeline | 'CreateExamples' >> beam.Create(serialized_examples) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'Extract' >> Extract(extractors=extractors) | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator. ComputeMetricsAndPlots(eval_shared_model=eval_shared_model)) # pylint: enable=no-value-for-parameter beam_util.assert_that(metrics, check_metrics)
def testEvaluateWithBinaryClassificationModel(self): n_classes = 2 temp_export_dir = self._getExportDir() _, export_dir = dnn_classifier.simple_dnn_classifier( None, temp_export_dir, n_classes=n_classes) # Add mean_label, example_count, weighted_example_count, calibration_plot eval_config = config.EvalConfig( model_specs=[ config.ModelSpec(location=export_dir, label_key='label', example_weight_key='age') ], slicing_specs=[config.SlicingSpec()], metrics_specs=metric_specs.specs_from_metrics([ calibration.MeanLabel('mean_label'), calibration_plot.CalibrationPlot(name='calibration_plot', num_buckets=10) ])) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) slice_spec = [ slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs ] extractors = [ input_extractor.InputExtractor(eval_config=eval_config), predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model]), slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec) ] evaluators = [ metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=eval_config, eval_shared_models=[eval_shared_model]) ] examples = [ self._makeExample(age=1.0, language='english', label=0.0), self._makeExample(age=2.0, language='chinese', label=1.0), self._makeExample(age=3.0, language='chinese', label=0.0), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter metrics_and_plots = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators)) # pylint: enable=no-value-for-parameter def check_metrics(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) example_count_key = metric_types.MetricKey( name='example_count') weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count') label_key = metric_types.MetricKey(name='mean_label') self.assertDictElementsAlmostEqual( got_metrics, { example_count_key: 3, weighted_example_count_key: (1.0 + 2.0 + 3.0), label_key: (0 * 1.0 + 1 * 2.0 + 0 * 3.0) / (1.0 + 2.0 + 3.0), }) except AssertionError as err: raise util.BeamAssertException(err) def check_plots(got): try: self.assertLen(got, 1) got_slice_key, got_plots = got[0] self.assertEqual(got_slice_key, ()) plot_key = metric_types.PlotKey('calibration_plot') self.assertIn(plot_key, got_plots) # 10 buckets + 2 for edge cases self.assertLen(got_plots[plot_key].buckets, 12) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics_and_plots[constants.METRICS_KEY], check_metrics, label='metrics') util.assert_that(metrics_and_plots[constants.PLOTS_KEY], check_plots, label='plots')
def assertGeneralMetricsComputedWithBeamAre( self, eval_saved_model_path: Text, examples_pcollection: beam.pvalue.PCollection, slice_spec: List[slicer.SingleSliceSpec], add_metrics_callbacks: List[types.AddMetricsCallbackType], expected_slice_metrics: Dict[Any, Dict[Text, Any]]): """Checks metrics computed using Beam. A more general version of assertMetricsComputedWithBeamAre. Note that the caller is responsible for setting up and running the Beam pipeline. Example usage: def add_metrics(features, predictions, labels): metric_ops = { 'mse': tf.metrics.mean_squared_error(labels, predictions['logits']), 'mae': tf.metrics.mean_absolute_error(labels, predictions['logits']), } return metric_ops with beam.Pipeline() as pipeline: expected_slice_metrics = { (): { 'mae': 0.1, 'mse': 0.2, tfma.post_export_metrics.metric_keys.AUC: tfma.test.BoundedValue(lower_bound=0.5) }, (('age', 10),): { 'mae': 0.2, 'mse': 0.3, tfma.post_export_metrics.metric_keys.AUC: tfma.test.BoundedValue(lower_bound=0.5) }, } examples = pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(path) self.assertGeneralMetricsComputedWithBeamAre( eval_saved_model_path=path, examples_pcollection=examples, slice_spec=[tfma.slicer.SingleSliceSpec(), tfma.slicer.SingleSliceSpec(columns=['age'])], add_metrics_callbacks=[ add_metrics, tfma.post_export_metrics.auc()], expected_slice_metrics=expected_slice_metrics) Args: eval_saved_model_path: Path to the directory containing the EvalSavedModel. examples_pcollection: A PCollection of serialized example bytes. slice_spec: List of slice specifications. add_metrics_callbacks: Callbacks for adding additional metrics. expected_slice_metrics: Dictionary of dictionaries describing the expected metrics for each slice. The outer dictionary map slice keys to the expected metrics for that slice. """ def check_metrics(got): """Check metrics callback.""" try: slices = {} for slice_key, value in got: slices[slice_key] = value self.assertItemsEqual(list(slices.keys()), list(expected_slice_metrics.keys())) for slice_key, expected_metrics in expected_slice_metrics.items( ): self.assertDictElementsWithinBounds( got_values_dict=slices[slice_key], expected_values_dict=expected_metrics) except AssertionError as err: raise beam_util.BeamAssertException(err) slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] eval_config = config.EvalConfig( input_data_specs=[config.InputDataSpec()], model_specs=[config.ModelSpec(location=eval_saved_model_path)], output_data_specs=[config.OutputDataSpec()], slicing_specs=slicing_specs) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model) # pylint: disable=no-value-for-parameter (metrics, _), _ = (examples_pcollection | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'Extract' >> Extract(extractors=extractors) | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator. ComputeMetricsAndPlots(eval_shared_model=eval_shared_model)) # pylint: enable=no-value-for-parameter beam_util.assert_that(metrics, check_metrics)
def testEvaluateWithSlicing(self): temp_export_dir = self._getExportDir() _, export_dir = (fixed_prediction_estimator_extra_fields. simple_fixed_prediction_estimator_extra_fields( None, temp_export_dir)) eval_config = config.EvalConfig( model_specs=[ config.ModelSpec(location=export_dir, label_key='label', example_weight_key='fixed_float') ], slicing_specs=[ config.SlicingSpec(), config.SlicingSpec(feature_keys=['fixed_string']), ], metrics_specs=metric_specs.specs_from_metrics([ calibration.MeanLabel('mean_label'), calibration.MeanPrediction('mean_prediction') ])) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir) slice_spec = [ slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs ] extractors = [ predict_extractor.PredictExtractor( eval_shared_model=eval_shared_model), slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec) ] evaluators = [ metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=eval_config, eval_shared_models=[eval_shared_model]) ] # fixed_float used as example_weight key examples = [ self._makeExample(prediction=0.2, label=1.0, fixed_int=1, fixed_float=1.0, fixed_string='fixed_string1'), self._makeExample(prediction=0.8, label=0.0, fixed_int=1, fixed_float=1.0, fixed_string='fixed_string1'), self._makeExample(prediction=0.5, label=0.0, fixed_int=2, fixed_float=2.0, fixed_string='fixed_string2') ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter metrics = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators)) # pylint: enable=no-value-for-parameter def check_metrics(got): try: self.assertLen(got, 3) slices = {} for slice_key, value in got: slices[slice_key] = value overall_slice = () fixed_string1_slice = (('fixed_string', b'fixed_string1'), ) fixed_string2_slice = (('fixed_string', b'fixed_string2'), ) self.asssertCountEqual(list(slices.keys()), [ overall_slice, fixed_string1_slice, fixed_string2_slice ]) example_count_key = metric_types.MetricKey( name='example_count') weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count') label_key = metric_types.MetricKey(name='mean_label') pred_key = metric_types.MetricKey(name='mean_prediction') self.assertDictElementsAlmostEqual( slices[overall_slice], { example_count_key: 3, weighted_example_count_key: 4.0, label_key: (1.0 + 0.0 + 2 * 0.0) / (1.0 + 1.0 + 2.0), pred_key: (0.2 + 0.8 + 2 * 0.5) / (1.0 + 1.0 + 2.0), }) self.assertDictElementsAlmostEqual( slices[fixed_string1_slice], { example_count_key: 2, weighted_example_count_key: 2.0, label_key: (1.0 + 0.0) / (1.0 + 1.0), pred_key: (0.2 + 0.8) / (1.0 + 1.0), }) self.assertDictElementsAlmostEqual( slices[fixed_string2_slice], { example_count_key: 1, weighted_example_count_key: 2.0, label_key: (2 * 0.0) / 2.0, pred_key: (2 * 0.5) / 2.0, }) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics[constants.METRICS_KEY], check_metrics, label='metrics')
def testRunModelAnalysisWithKerasModel(self): input_layer = tf.keras.layers.Input(shape=(28 * 28, ), name='data') output_layer = tf.keras.layers.Dense( 10, activation=tf.nn.softmax)(input_layer) model = tf.keras.models.Model(input_layer, output_layer) model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001), loss=tf.keras.losses.categorical_crossentropy) features = {'data': [[0.0] * 28 * 28]} labels = [[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]] example_weights = [1.0] dataset = tf.data.Dataset.from_tensor_slices( (features, labels, example_weights)) dataset = dataset.shuffle(buffer_size=1).repeat().batch(1) model.fit(dataset, steps_per_epoch=1) model_location = os.path.join(self._getTempDir(), 'export_dir') model.save(model_location, save_format='tf') examples = [ self._makeExample(data=[0.0] * 28 * 28, label=1.0), self._makeExample(data=[1.0] * 28 * 28, label=5.0), self._makeExample(data=[1.0] * 28 * 28, label=9.0), ] data_location = self._writeTFExamplesToTFRecords(examples) metrics_spec = config.MetricsSpec() for metric in (tf.keras.metrics.AUC(), ): cfg = tf.keras.utils.serialize_keras_object(metric) metrics_spec.metrics.append( config.MetricConfig(class_name=cfg['class_name'], config=json.dumps(cfg['config']))) for class_id in (0, 5, 9): metrics_spec.binarize.class_ids.append(class_id) eval_config = config.EvalConfig( model_specs=[config.ModelSpec(label_key='label')], metrics_specs=[metrics_spec]) eval_result = model_eval_lib.run_model_analysis( eval_config=eval_config, eval_shared_model=model_eval_lib.default_eval_shared_model( eval_saved_model_path=model_location, tags=[tf.saved_model.SERVING]), data_location=data_location, output_path=self._getTempDir()) self.assertEqual(eval_result.model_location, model_location) self.assertEqual(eval_result.data_location, data_location) self.assertLen(eval_result.slicing_metrics, 1) got_slice_key, got_metrics = eval_result.slicing_metrics[0] self.assertEqual(got_slice_key, ()) self.assertIn('', got_metrics) # output_name got_metrics = got_metrics[''] expected_metrics = { 'classId:0': { 'auc': True, }, 'classId:5': { 'auc': True, }, 'classId:9': { 'auc': True, }, } for class_id in expected_metrics: self.assertIn(class_id, got_metrics) for k in expected_metrics[class_id]: self.assertIn(k, got_metrics[class_id])
def testEvaluateWithKerasModel(self): input1 = tf.keras.layers.Input(shape=(1, ), name='input1') input2 = tf.keras.layers.Input(shape=(1, ), name='input2') inputs = [input1, input2] input_layer = tf.keras.layers.concatenate(inputs) output_layer = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, name='output')(input_layer) model = tf.keras.models.Model(inputs, output_layer) model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001), loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) features = {'input1': [[0.0], [1.0]], 'input2': [[1.0], [0.0]]} labels = [[1], [0]] example_weights = [1.0, 0.5] dataset = tf.data.Dataset.from_tensor_slices( (features, labels, example_weights)) dataset = dataset.shuffle(buffer_size=1).repeat().batch(2) model.fit(dataset, steps_per_epoch=1) export_dir = self._getExportDir() model.save(export_dir, save_format='tf') eval_config = config.EvalConfig( model_specs=[ config.ModelSpec(location=export_dir, label_key='label', example_weight_key='example_weight') ], slicing_specs=[config.SlicingSpec()], metrics_specs=metric_specs.specs_from_metrics( [calibration.MeanLabel('mean_label')])) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) slice_spec = [ slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs ] extractors = [ input_extractor.InputExtractor(eval_config=eval_config), predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model]), slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec) ] evaluators = [ metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=eval_config, eval_shared_models=[eval_shared_model]) ] examples = [ self._makeExample(input1=0.0, input2=1.0, label=1.0, example_weight=1.0, extra_feature='non_model_feature'), self._makeExample(input1=1.0, input2=0.0, label=0.0, example_weight=0.5, extra_feature='non_model_feature'), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter metrics = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators)) # pylint: enable=no-value-for-parameter def check_metrics(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) example_count_key = metric_types.MetricKey( name='example_count') weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count') label_key = metric_types.MetricKey(name='mean_label') self.assertDictElementsAlmostEqual( got_metrics, { example_count_key: 2, weighted_example_count_key: (1.0 + 0.5), label_key: (1.0 * 1.0 + 0.0 * 0.5) / (1.0 + 0.5), }) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics[constants.METRICS_KEY], check_metrics, label='metrics')
def testRunModelAnalysisWithQueryBasedMetrics(self): input_layer = tf.keras.layers.Input(shape=(1, ), name='age') output_layer = tf.keras.layers.Dense( 1, activation=tf.nn.sigmoid)(input_layer) model = tf.keras.models.Model(input_layer, output_layer) model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001), loss=tf.keras.losses.binary_crossentropy) features = {'age': [[20.0]]} labels = [[1]] example_weights = [1.0] dataset = tf.data.Dataset.from_tensor_slices( (features, labels, example_weights)) dataset = dataset.shuffle(buffer_size=1).repeat().batch(1) model.fit(dataset, steps_per_epoch=1) model_location = os.path.join(self._getTempDir(), 'export_dir') model.save(model_location, save_format='tf') examples = [ self._makeExample(age=3.0, language='english', label=1.0), self._makeExample(age=5.0, language='chinese', label=0.0), self._makeExample(age=3.0, language='english', label=0.0), self._makeExample(age=5.0, language='chinese', label=1.0) ] data_location = self._writeTFExamplesToTFRecords(examples) slicing_specs = [config.SlicingSpec()] eval_config = config.EvalConfig( model_specs=[config.ModelSpec(label_key='label')], slicing_specs=slicing_specs, metrics_specs=metric_specs.specs_from_metrics( [ndcg.NDCG(gain_key='age', name='ndcg')], binarize=config.BinarizationOptions(top_k_list=[1]), query_key='language')) eval_shared_model = model_eval_lib.default_eval_shared_model( eval_saved_model_path=model_location, tags=[tf.saved_model.SERVING]) eval_result = model_eval_lib.run_model_analysis( eval_config=eval_config, eval_shared_model=eval_shared_model, data_location=data_location, output_path=self._getTempDir(), evaluators=[ metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=eval_config, eval_shared_model=eval_shared_model) ]) self.assertEqual(eval_result.model_location, model_location) self.assertEqual(eval_result.data_location, data_location) self.assertLen(eval_result.slicing_metrics, 1) got_slice_key, got_metrics = eval_result.slicing_metrics[0] self.assertEqual(got_slice_key, ()) self.assertIn('', got_metrics) # output_name got_metrics = got_metrics[''] expected_metrics = { '': { 'example_count': True, 'weighted_example_count': True, }, 'topK:1': { 'ndcg': True, }, } for group in expected_metrics: self.assertIn(group, got_metrics) for k in expected_metrics[group]: self.assertIn(k, got_metrics[group])
def testPredictExtractorWithMultiModels(self): temp_export_dir = self._getExportDir() export_dir1, _ = multi_head.simple_multi_head(temp_export_dir, None) export_dir2, _ = multi_head.simple_multi_head(temp_export_dir, None) eval_config = config.EvalConfig(model_specs=[ config.ModelSpec(location=export_dir1, name='model1'), config.ModelSpec(location=export_dir2, name='model2') ]) eval_shared_model1 = self.createTestEvalSharedModel( eval_saved_model_path=export_dir1, tags=[tf.saved_model.SERVING]) eval_shared_model2 = self.createTestEvalSharedModel( eval_saved_model_path=export_dir2, tags=[tf.saved_model.SERVING]) predict_extractor = predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model1, eval_shared_model2]) examples = [ self._makeExample(age=1.0, language='english', english_label=1.0, chinese_label=0.0, other_label=0.0), self._makeExample(age=1.0, language='chinese', english_label=0.0, chinese_label=1.0, other_label=0.0), self._makeExample(age=2.0, language='english', english_label=1.0, chinese_label=0.0, other_label=0.0), self._makeExample(age=2.0, language='other', english_label=0.0, chinese_label=1.0, other_label=1.0) ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | predict_extractor.stage_name >> predict_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 4) for item in got: # We can't verify the actual predictions, but we can verify the keys self.assertIn(constants.PREDICTIONS_KEY, item) for model_name in ('model1', 'model2'): self.assertIn(model_name, item[constants.PREDICTIONS_KEY]) for output_name in ('chinese_head', 'english_head', 'other_head'): for pred_key in ('logistic', 'probabilities', 'all_classes'): self.assertIn( output_name + '/' + pred_key, item[constants.PREDICTIONS_KEY] [model_name]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testWriteMetricsAndPlots(self): metrics_file = os.path.join(self._getTempDir(), 'metrics') plots_file = os.path.join(self._getTempDir(), 'plots') temp_eval_export_dir = os.path.join(self._getTempDir(), 'eval_export_dir') _, eval_export_dir = ( fixed_prediction_estimator.simple_fixed_prediction_estimator( None, temp_eval_export_dir)) eval_config = config.EvalConfig( model_specs=[config.ModelSpec()], options=config.Options(disabled_outputs=['eval_config.json'])) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=eval_export_dir, add_metrics_callbacks=[ post_export_metrics.example_count(), post_export_metrics.calibration_plot_and_prediction_histogram( num_buckets=2) ]) extractors = [ predict_extractor.PredictExtractor(eval_shared_model), slice_key_extractor.SliceKeyExtractor() ] evaluators = [ metrics_and_plots_evaluator.MetricsAndPlotsEvaluator( eval_shared_model) ] output_paths = { constants.METRICS_KEY: metrics_file, constants.PLOTS_KEY: plots_file } writers = [ metrics_and_plots_writer.MetricsAndPlotsWriter( output_paths, eval_shared_model.add_metrics_callbacks) ] with beam.Pipeline() as pipeline: example1 = self._makeExample(prediction=0.0, label=1.0) example2 = self._makeExample(prediction=1.0, label=1.0) # pylint: disable=no-value-for-parameter _ = (pipeline | 'Create' >> beam.Create([ example1.SerializeToString(), example2.SerializeToString(), ]) | 'ExtractEvaluateAndWriteResults' >> model_eval_lib.ExtractEvaluateAndWriteResults( eval_config=eval_config, eval_shared_model=eval_shared_model, extractors=extractors, evaluators=evaluators, writers=writers)) # pylint: enable=no-value-for-parameter expected_metrics_for_slice = text_format.Parse( """ slice_key {} metrics { key: "average_loss" value { double_value { value: 0.5 } } } metrics { key: "post_export_metrics/example_count" value { double_value { value: 2.0 } } } """, metrics_for_slice_pb2.MetricsForSlice()) metric_records = [] for record in tf.compat.v1.python_io.tf_record_iterator(metrics_file): metric_records.append( metrics_for_slice_pb2.MetricsForSlice.FromString(record)) self.assertEqual(1, len(metric_records), 'metrics: %s' % metric_records) self.assertProtoEquals(expected_metrics_for_slice, metric_records[0]) expected_plots_for_slice = text_format.Parse( """ slice_key {} plots { key: "post_export_metrics" value { calibration_histogram_buckets { buckets { lower_threshold_inclusive: -inf num_weighted_examples {} total_weighted_label {} total_weighted_refined_prediction {} } buckets { upper_threshold_exclusive: 0.5 num_weighted_examples { value: 1.0 } total_weighted_label { value: 1.0 } total_weighted_refined_prediction {} } buckets { lower_threshold_inclusive: 0.5 upper_threshold_exclusive: 1.0 num_weighted_examples { } total_weighted_label {} total_weighted_refined_prediction {} } buckets { lower_threshold_inclusive: 1.0 upper_threshold_exclusive: inf num_weighted_examples { value: 1.0 } total_weighted_label { value: 1.0 } total_weighted_refined_prediction { value: 1.0 } } } } } """, metrics_for_slice_pb2.PlotsForSlice()) plot_records = [] for record in tf.compat.v1.python_io.tf_record_iterator(plots_file): plot_records.append( metrics_for_slice_pb2.PlotsForSlice.FromString(record)) self.assertEqual(1, len(plot_records), 'plots: %s' % plot_records) self.assertProtoEquals(expected_plots_for_slice, plot_records[0])
def testGetLabelKeyNoOutputAndLabelKeys(self): with self.assertRaises(ValueError): model_util.get_label_key( config.ModelSpec(label_keys={'output1': 'label'}), '')
def ExtractEvaluateAndWriteResults( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, eval_shared_model: Optional[Union[types.EvalSharedModel, Dict[Text, types.EvalSharedModel]]] = None, eval_config: config.EvalConfig = None, extractors: Optional[List[extractor.Extractor]] = None, evaluators: Optional[List[evaluator.Evaluator]] = None, writers: Optional[List[writer.Writer]] = None, output_path: Optional[Text] = None, display_only_data_location: Optional[Text] = None, display_only_file_format: Optional[Text] = None, slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, write_config: Optional[bool] = True, compute_confidence_intervals: Optional[bool] = False, k_anonymization_count: int = 1, desired_batch_size: Optional[int] = None, random_seed_for_testing: Optional[int] = None) -> beam.pvalue.PDone: """PTransform for performing extraction, evaluation, and writing results. Users who want to construct their own Beam pipelines instead of using the lightweight run_model_analysis functions should use this PTransform. Example usage: eval_config = tfma.EvalConfig(slicing_specs=[...], metrics_specs=[...]) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=model_location) with beam.Pipeline(runner=...) as p: _ = (p | 'ReadData' >> beam.io.ReadFromTFRecord(data_location) | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_shared_model=eval_shared_model, eval_config=eval_config, ...)) result = tfma.load_eval_result(output_path=output_path) tfma.view.render_slicing_metrics(result) Note that the exact serialization format is an internal implementation detail and subject to change. Users should only use the TFMA functions to write and read the results. Args: examples: PCollection of input examples. Can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). eval_shared_model: Optional shared model (single-model evaluation) or dict of shared models keyed by model name (multi-model evaluation). Only required if needed by default extractors, evaluators, or writers and for display purposes of the model path. eval_config: Eval config. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used. output_path: Path to output metrics and plots results. display_only_data_location: Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path. display_only_file_format: Optional format of the examples. This is used only for display purposes. slice_spec: Deprecated (use EvalConfig). write_config: Deprecated (use EvalConfig). compute_confidence_intervals: Deprecated (use EvalConfig). k_anonymization_count: Deprecated (use EvalConfig). desired_batch_size: Optional batch size for batching in Predict. random_seed_for_testing: Provide for deterministic tests only. Raises: ValueError: If EvalConfig invalid or matching Extractor not found for an Evaluator. Returns: PDone. """ eval_shared_models = eval_shared_model if not isinstance(eval_shared_model, dict): eval_shared_models = {'': eval_shared_model} if eval_config is None: model_specs = [] for model_name, shared_model in eval_shared_models.items(): example_weight_key = shared_model.example_weight_key example_weight_keys = {} if example_weight_key and isinstance(example_weight_key, dict): example_weight_keys = example_weight_key example_weight_key = '' model_specs.append( config.ModelSpec(name=model_name, example_weight_key=example_weight_key, example_weight_keys=example_weight_keys)) slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] options = config.Options() options.compute_confidence_intervals.value = compute_confidence_intervals options.k_anonymization_count.value = k_anonymization_count if not write_config: options.disabled_outputs.append(_EVAL_CONFIG_FILE) eval_config = config.EvalConfig(model_specs=model_specs, slicing_specs=slicing_specs, options=options) # Add default ModelSpec if empty. if (eval_shared_models and len(eval_shared_models) == 1 and not eval_config.model_specs): tmp_config = config.EvalConfig() tmp_config.CopyFrom(eval_config) eval_config = tmp_config eval_config.model_specs.add() config.verify_eval_config(eval_config) if not extractors: extractors = default_extractors(eval_config=eval_config, eval_shared_model=eval_shared_model, materialize=False, desired_batch_size=desired_batch_size) if not evaluators: evaluators = default_evaluators( eval_config=eval_config, eval_shared_model=eval_shared_model, random_seed_for_testing=random_seed_for_testing) for v in evaluators: evaluator.verify_evaluator(v, extractors) if not writers: writers = default_writers(output_path=output_path, eval_shared_model=eval_shared_model) # pylint: disable=no-value-for-parameter _ = (examples | 'InputsToExtracts' >> InputsToExtracts() | 'ExtractAndEvaluate' >> ExtractAndEvaluate(extractors=extractors, evaluators=evaluators) | 'WriteResults' >> WriteResults(writers=writers)) if _EVAL_CONFIG_FILE not in eval_config.options.disabled_outputs: data_location = '<user provided PCollection>' if display_only_data_location is not None: data_location = display_only_data_location file_format = '<unknown>' if display_only_file_format is not None: file_format = display_only_file_format model_locations = {} for k, v in eval_shared_models.items(): model_locations[k] = ('<unknown>' if v is None or v.model_path is None else v.model_path) _ = (examples.pipeline | WriteEvalConfig(eval_config, output_path, data_location, file_format, model_locations)) # pylint: enable=no-value-for-parameter return beam.pvalue.PDone(examples.pipeline)
def testModelSignaturesDoFn(self, save_as_keras, signature_names, default_signature_names, prefer_dict_outputs, use_schema, expected_num_outputs): export_path = self.createModelWithMultipleDenseInputs(save_as_keras) eval_shared_models = {} model_specs = [] for sigs in signature_names.values(): for model_name in sigs: if model_name not in eval_shared_models: eval_shared_models[ model_name] = self.createTestEvalSharedModel( eval_saved_model_path=export_path, model_name=model_name, tags=[tf.saved_model.SERVING]) model_specs.append(config.ModelSpec(name=model_name)) eval_config = config.EvalConfig(model_specs=model_specs) schema = self.createDenseInputsSchema() if use_schema else None tfx_io = tf_example_record.TFExampleBeamRecord( physical_format='text', schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN) tensor_adapter_config = None if use_schema: tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfx_io.ArrowSchema(), tensor_representations=tfx_io.TensorRepresentations()) examples = [ self._makeExample(input_1=1.0, input_2=2.0), self._makeExample(input_1=3.0, input_2=4.0), self._makeExample(input_1=5.0, input_2=6.0), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = (pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3) | 'ToExtracts' >> beam.Map(_record_batch_to_extracts) | 'ModelSignatures' >> beam.ParDo( model_util.ModelSignaturesDoFn( eval_config=eval_config, eval_shared_models=eval_shared_models, signature_names=signature_names, default_signature_names=default_signature_names, prefer_dict_outputs=prefer_dict_outputs, tensor_adapter_config=tensor_adapter_config))) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) for key in signature_names: self.assertIn(key, got[0]) if prefer_dict_outputs: for entry in got[0][key]: self.assertIsInstance(entry, dict) self.assertLen(entry, expected_num_outputs) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def run_model_analysis( eval_shared_model: Optional[Union[types.EvalSharedModel, Dict[Text, types.EvalSharedModel]]] = None, eval_config: config.EvalConfig = None, data_location: Text = '', file_format: Text = 'tfrecords', output_path: Optional[Text] = None, extractors: Optional[List[extractor.Extractor]] = None, evaluators: Optional[List[evaluator.Evaluator]] = None, writers: Optional[List[writer.Writer]] = None, pipeline_options: Optional[Any] = None, slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, write_config: Optional[bool] = True, compute_confidence_intervals: Optional[bool] = False, k_anonymization_count: int = 1, desired_batch_size: Optional[int] = None, random_seed_for_testing: Optional[int] = None ) -> Union[EvalResult, EvalResults]: """Runs TensorFlow model analysis. It runs a Beam pipeline to compute the slicing metrics exported in TensorFlow Eval SavedModel and returns the results. This is a simplified API for users who want to quickly get something running locally. Users who wish to create their own Beam pipelines can use the Evaluate PTransform instead. Args: eval_shared_model: Optional shared model (single-model evaluation) or dict of shared models keyed by model name (multi-model evaluation). Only required if needed by default extractors, evaluators, or writers. eval_config: Eval config. data_location: The location of the data files. file_format: The file format of the data, can be either 'text' or 'tfrecords' for now. By default, 'tfrecords' will be used. output_path: The directory to output metrics and results to. If None, we use a temporary directory. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used. pipeline_options: Optional arguments to run the Pipeline, for instance whether to run directly. slice_spec: Deprecated (use EvalConfig). write_config: Deprecated (use EvalConfig). compute_confidence_intervals: Deprecated (use EvalConfig). k_anonymization_count: Deprecated (use EvalConfig). desired_batch_size: Optional batch size for batching in Predict. random_seed_for_testing: Provide for deterministic tests only. Returns: An EvalResult that can be used with the TFMA visualization functions. Raises: ValueError: If the file_format is unknown to us. """ _assert_tensorflow_version() if output_path is None: output_path = tempfile.mkdtemp() if not tf.io.gfile.exists(output_path): tf.io.gfile.makedirs(output_path) if eval_config is None: model_specs = [] eval_shared_models = eval_shared_model if not isinstance(eval_shared_model, dict): eval_shared_models = {'': eval_shared_model} for model_name, shared_model in eval_shared_models.items(): example_weight_key = shared_model.example_weight_key example_weight_keys = {} if example_weight_key and isinstance(example_weight_key, dict): example_weight_keys = example_weight_key example_weight_key = '' model_specs.append( config.ModelSpec(name=model_name, example_weight_key=example_weight_key, example_weight_keys=example_weight_keys)) slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] options = config.Options() options.compute_confidence_intervals.value = compute_confidence_intervals options.k_anonymization_count.value = k_anonymization_count if not write_config: options.disabled_outputs.append(_EVAL_CONFIG_FILE) eval_config = config.EvalConfig(model_specs=model_specs, slicing_specs=slicing_specs, options=options) with beam.Pipeline(options=pipeline_options) as p: if file_format == 'tfrecords': data = p | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord( file_pattern=data_location, compression_type=beam.io.filesystem.CompressionTypes.AUTO) elif file_format == 'text': data = p | 'ReadFromText' >> beam.io.textio.ReadFromText( data_location) else: raise ValueError('unknown file_format: {}'.format(file_format)) # pylint: disable=no-value-for-parameter _ = ( data | 'ExtractEvaluateAndWriteResults' >> ExtractEvaluateAndWriteResults( eval_config=eval_config, eval_shared_model=eval_shared_model, display_only_data_location=data_location, display_only_file_format=file_format, output_path=output_path, extractors=extractors, evaluators=evaluators, writers=writers, desired_batch_size=desired_batch_size, random_seed_for_testing=random_seed_for_testing)) # pylint: enable=no-value-for-parameter if len(eval_config.model_specs) <= 1: return load_eval_result(output_path) else: results = [] for spec in eval_config.model_specs: results.append(load_eval_result(output_path, model_name=spec.name)) return EvalResults(results, constants.MODEL_CENTRIC_MODE)
def _makeEvalConfig(self): eval_config = config.EvalConfig( model_specs=[config.ModelSpec(example_weight_key='testing_key')]) return eval_config
def testRunModelAnalysisWithUncertainty(self): model_location = self._exportEvalSavedModel( linear_classifier.simple_linear_classifier) examples = [ self._makeExample(age=3.0, language='english', label=1.0), self._makeExample(age=3.0, language='chinese', label=0.0), self._makeExample(age=4.0, language='english', label=1.0), self._makeExample(age=5.0, language='chinese', label=1.0), self._makeExample(age=5.0, language='hindi', label=1.0) ] data_location = self._writeTFExamplesToTFRecords(examples) slicing_specs = [config.SlicingSpec(feature_keys=['language'])] options = config.Options() options.compute_confidence_intervals.value = True options.k_anonymization_count.value = 2 eval_config = config.EvalConfig( input_data_specs=[config.InputDataSpec(location=data_location)], model_specs=[config.ModelSpec(location=model_location)], output_data_specs=[ config.OutputDataSpec(default_location=self._getTempDir()) ], slicing_specs=slicing_specs, options=options) eval_result = model_eval_lib.run_model_analysis( eval_config=eval_config, eval_shared_models=[ model_eval_lib.default_eval_shared_model( eval_saved_model_path=model_location, example_weight_key='age') ]) # We only check some of the metrics to ensure that the end-to-end # pipeline works. expected = { (('language', 'hindi'), ): { u'__ERROR__': { 'debugMessage': u'Example count for this slice key is lower than the ' u'minimum required value: 2. No data is aggregated for ' u'this slice.' }, }, (('language', 'chinese'), ): { metric_keys.EXAMPLE_WEIGHT: { 'doubleValue': 8.0 }, metric_keys.EXAMPLE_COUNT: { 'doubleValue': 2.0 }, }, (('language', 'english'), ): { 'accuracy': { 'boundedValue': { 'value': 1.0, 'lowerBound': 1.0, 'upperBound': 1.0, 'methodology': 'POISSON_BOOTSTRAP' } }, 'my_mean_label': { 'boundedValue': { 'value': 1.0, 'lowerBound': 1.0, 'upperBound': 1.0, 'methodology': 'POISSON_BOOTSTRAP' } }, metric_keys.EXAMPLE_WEIGHT: { 'doubleValue': 7.0 }, metric_keys.EXAMPLE_COUNT: { 'doubleValue': 2.0 }, } } self.assertEqual(eval_result.config.model_specs[0].location, model_location.decode()) self.assertEqual(eval_result.config.input_data_specs[0].location, data_location) self.assertEqual(eval_result.config.slicing_specs[0], config.SlicingSpec(feature_keys=['language'])) self.assertMetricsAlmostEqual(eval_result.slicing_metrics, expected) self.assertFalse(eval_result.plots)