Beispiel #1
0
    def testTFJSPredictExtractorWithKerasModel(self, multi_model,
                                               multi_output):
        input1 = tf.keras.layers.Input(shape=(1, ), name='input1')
        input2 = tf.keras.layers.Input(shape=(1, ), name='input2')
        inputs = [input1, input2]
        input_layer = tf.keras.layers.concatenate(inputs)
        output_layers = {}
        output_layers['output1'] = (tf.keras.layers.Dense(
            1, activation=tf.nn.sigmoid, name='output1')(input_layer))
        if multi_output:
            output_layers['output2'] = (tf.keras.layers.Dense(
                1, activation=tf.nn.sigmoid, name='output2')(input_layer))

        model = tf.keras.models.Model(inputs, output_layers)
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001),
                      loss=tf.keras.losses.binary_crossentropy,
                      metrics=['accuracy'])

        train_features = {'input1': [[0.0], [1.0]], 'input2': [[1.0], [0.0]]}
        labels = {'output1': [[1], [0]]}
        if multi_output:
            labels['output2'] = [[1], [0]]

        example_weights = {'output1': [1.0, 0.5]}
        if multi_output:
            example_weights['output2'] = [1.0, 0.5]
        dataset = tf.data.Dataset.from_tensor_slices(
            (train_features, labels, example_weights))
        dataset = dataset.shuffle(buffer_size=1).repeat().batch(2)
        model.fit(dataset, steps_per_epoch=1)

        src_model_path = tempfile.mkdtemp()
        model.save(src_model_path)

        dst_model_path = tempfile.mkdtemp()
        converter.convert([
            '--input_format=tf_saved_model',
            '--saved_model_tags=serve',
            '--signature_name=serving_default',
            src_model_path,
            dst_model_path,
        ])

        model_specs = [config.ModelSpec(name='model1', model_type='tf_js')]
        if multi_model:
            model_specs.append(
                config.ModelSpec(name='model2', model_type='tf_js'))

        eval_config = config.EvalConfig(model_specs=model_specs)
        eval_shared_models = [
            self.createTestEvalSharedModel(
                model_name='model1',
                eval_saved_model_path=dst_model_path,
                model_type='tf_js')
        ]
        if multi_model:
            eval_shared_models.append(
                self.createTestEvalSharedModel(
                    model_name='model2',
                    eval_saved_model_path=dst_model_path,
                    model_type='tf_js'))

        schema = text_format.Parse(
            """
        feature {
          name: "input1"
          type: FLOAT
        }
        feature {
          name: "input2"
          type: FLOAT
        }
        feature {
          name: "non_model_feature"
          type: INT
        }
        """, schema_pb2.Schema())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        feature_extractor = features_extractor.FeaturesExtractor(eval_config)
        predictor = tfjs_predict_extractor.TFJSPredictExtractor(
            eval_config=eval_config, eval_shared_model=eval_shared_models)

        examples = [
            self._makeExample(input1=0.0, input2=1.0, non_model_feature=0),
            self._makeExample(input1=1.0, input2=0.0, non_model_feature=1),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | feature_extractor.stage_name >> feature_extractor.ptransform
                | predictor.stage_name >> predictor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got = got[0]
                    self.assertIn(constants.PREDICTIONS_KEY, got)
                    self.assertLen(got[constants.PREDICTIONS_KEY], 2)

                    for item in got[constants.PREDICTIONS_KEY]:
                        if multi_model:
                            self.assertIn('model1', item)
                            self.assertIn('model2', item)
                            if multi_output:
                                self.assertIn('Identity', item['model1'])
                                self.assertIn('Identity_1', item['model1'])

                        elif multi_output:
                            self.assertIn('Identity', item)
                            self.assertIn('Identity_1', item)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
class ModelUtilTest(testutil.TensorflowModelAnalysisTest,
                    parameterized.TestCase):
    def createDenseInputsSchema(self):
        return text_format.Parse(
            """
        tensor_representation_group {
          key: ""
          value {
            tensor_representation {
              key: "input_1"
              value {
                dense_tensor {
                  column_name: "input_1"
                  shape { dim { size: 1 } }
                }
              }
            }
            tensor_representation {
              key: "input_2"
              value {
                dense_tensor {
                  column_name: "input_2"
                  shape { dim { size: 1 } }
                }
              }
            }
          }
        }
        feature {
          name: "input_1"
          type: FLOAT
        }
        feature {
          name: "input_2"
          type: FLOAT
        }
        feature {
          name: "non_model_feature"
          type: INT
        }
        """, schema_pb2.Schema())

    def createModelWithSingleInput(self, save_as_keras):
        input_layer = tf.keras.layers.Input(shape=(1, ), name='input')
        output_layer = tf.keras.layers.Dense(
            1, activation=tf.nn.sigmoid)(input_layer)
        model = tf.keras.models.Model(input_layer, output_layer)

        @tf.function
        def serving_default(s):
            return model(s)

        input_spec = {
            'input': tf.TensorSpec(shape=(None, 1),
                                   dtype=tf.string,
                                   name='input'),
        }
        signatures = {
            'serving_default':
            serving_default.get_concrete_function(input_spec),
            'custom_signature':
            serving_default.get_concrete_function(input_spec),
        }

        export_path = tempfile.mkdtemp()
        if save_as_keras:
            model.save(export_path, save_format='tf', signatures=signatures)
        else:
            tf.saved_model.save(model, export_path, signatures=signatures)
        return export_path

    def createModelWithMultipleDenseInputs(self, save_as_keras):
        input1 = tf.keras.layers.Input(shape=(1, ), name='input_1')
        input2 = tf.keras.layers.Input(shape=(1, ), name='input_2')
        inputs = [input1, input2]
        input_layer = tf.keras.layers.concatenate(inputs)
        output_layer = tf.keras.layers.Dense(1,
                                             activation=tf.nn.sigmoid,
                                             name='output')(input_layer)
        model = tf.keras.models.Model(inputs, output_layer)

        # Add custom attribute to model to test callables stored as attributes
        model.custom_attribute = tf.keras.models.Model(inputs, output_layer)

        @tf.function
        def serving_default(serialized_tf_examples):
            parsed_features = tf.io.parse_example(
                serialized_tf_examples, {
                    'input_1': tf.io.FixedLenFeature([1], dtype=tf.float32),
                    'input_2': tf.io.FixedLenFeature([1], dtype=tf.float32)
                })
            return model(parsed_features)

        @tf.function
        def custom_single_output(features):
            return model(features)

        @tf.function
        def custom_multi_output(features):
            return {'output1': model(features), 'output2': model(features)}

        input_spec = tf.TensorSpec(shape=(None, ),
                                   dtype=tf.string,
                                   name='examples')
        custom_input_spec = {
            'input_1':
            tf.TensorSpec(shape=(None, 1), dtype=tf.float32, name='input_1'),
            'input_2':
            tf.TensorSpec(shape=(None, 1), dtype=tf.float32, name='input_2')
        }
        signatures = {
            'serving_default':
            serving_default.get_concrete_function(input_spec),
            'custom_single_output':
            custom_single_output.get_concrete_function(custom_input_spec),
            'custom_multi_output':
            custom_multi_output.get_concrete_function(custom_input_spec)
        }

        export_path = tempfile.mkdtemp()
        if save_as_keras:
            model.save(export_path, save_format='tf', signatures=signatures)
        else:
            tf.saved_model.save(model, export_path, signatures=signatures)
        return export_path

    def createModelWithMultipleMixedInputs(self, save_as_keras):
        dense_input = tf.keras.layers.Input(shape=(2, ),
                                            name='input_1',
                                            dtype=tf.int64)
        dense_float_input = tf.cast(dense_input, tf.float32)
        sparse_input = tf.keras.layers.Input(shape=(1, ),
                                             name='input_2',
                                             sparse=True)
        dense_sparse_input = tf.keras.layers.Dense(
            1, name='dense_input2')(sparse_input)
        ragged_input = tf.keras.layers.Input(shape=(None, ),
                                             name='input_3',
                                             ragged=True)
        dense_ragged_input = tf.keras.layers.Lambda(lambda x: x.to_tensor())(
            ragged_input)
        dense_ragged_input.set_shape((None, 1))
        inputs = [dense_input, sparse_input, ragged_input]
        input_layer = tf.keras.layers.concatenate(
            [dense_float_input, dense_sparse_input, dense_ragged_input])
        output_layer = tf.keras.layers.Dense(
            1, activation=tf.nn.sigmoid)(input_layer)
        model = tf.keras.models.Model(inputs, output_layer)

        @tf.function
        def serving_default(features):
            return model(features)

        input_spec = {
            'input_1':
            tf.TensorSpec(shape=(None, 2), dtype=tf.int64, name='input_1'),
            'input_2':
            tf.SparseTensorSpec(shape=(None, 1), dtype=tf.float32),
            'input_3':
            tf.RaggedTensorSpec(shape=(None, 1), dtype=tf.float32)
        }
        signatures = {
            'serving_default':
            serving_default.get_concrete_function(input_spec),
            'custom_signature':
            serving_default.get_concrete_function(input_spec),
        }

        export_path = tempfile.mkdtemp()
        if save_as_keras:
            model.save(export_path, save_format='tf', signatures=signatures)
        else:
            tf.saved_model.save(model, export_path, signatures=signatures)
        return export_path

    def testFilterByInputNames(self):
        tensors = {
            'f1': tf.constant([[1.1], [2.1]], dtype=tf.float32),
            'f2': tf.constant([[1], [2]], dtype=tf.int64),
            'f3': tf.constant([['hello'], ['world']], dtype=tf.string)
        }
        filtered_tensors = model_util.filter_by_input_names(
            tensors, ['f1', 'f3'])
        self.assertLen(filtered_tensors, 2)
        self.assertAllEqual(tf.constant([[1.1], [2.1]], dtype=tf.float32),
                            filtered_tensors['f1'])
        self.assertAllEqual(
            tf.constant([['hello'], ['world']], dtype=tf.string),
            filtered_tensors['f3'])

    def testFilterByInputNamesKeras(self):
        tensors = {
            'f1': tf.constant([[1.1], [2.1]], dtype=tf.float32),
            'f2': tf.constant([[1], [2]], dtype=tf.int64),
            'f3': tf.constant([['hello'], ['world']], dtype=tf.string)
        }
        filtered_tensors = model_util.filter_by_input_names(
            tensors, [
                'f1' + model_util.KERAS_INPUT_SUFFIX,
                'f3' + model_util.KERAS_INPUT_SUFFIX
            ])
        self.assertLen(filtered_tensors, 2)
        self.assertAllEqual(
            tf.constant([[1.1], [2.1]], dtype=tf.float32),
            filtered_tensors['f1' + model_util.KERAS_INPUT_SUFFIX])
        self.assertAllEqual(
            tf.constant([['hello'], ['world']], dtype=tf.string),
            filtered_tensors['f3' + model_util.KERAS_INPUT_SUFFIX])

    @parameterized.named_parameters(
        ('output_name_and_label_key', config.ModelSpec(label_key='label'),
         'output', 'label'),
        ('output_name_and_label_keys',
         config.ModelSpec(label_keys={'output': 'label'}), 'output', 'label'),
        ('output_name_and_no_label_keys', config.ModelSpec(), 'output', None),
        ('no_output_name_and_label_key', config.ModelSpec(label_key='label'),
         '', 'label'),
        ('no_output_name_and_no_label_keys', config.ModelSpec(), '', None))
    def testGetLabelKey(self, model_spec, output_name, expected_label_key):
        self.assertEqual(expected_label_key,
                         model_util.get_label_key(model_spec, output_name))

    def testGetLabelKeyNoOutputAndLabelKeys(self):
        with self.assertRaises(ValueError):
            model_util.get_label_key(
                config.ModelSpec(label_keys={'output1': 'label'}), '')

    @parameterized.named_parameters(
        {
            'testcase_name': 'single_model_single_key',
            'model_specs': [config.ModelSpec(label_key='feature1')],
            'field': 'label_key',
            'multi_output_field': 'label_keys',
            'expected_values': [
                [1.0, 1.1, 1.2],
            ]
        },
        {
            'testcase_name':
            'single_model_multi_key',
            'model_specs': [
                config.ModelSpec(label_keys={
                    'output1': 'feature1',
                    'output2': 'feature2'
                })
            ],
            'field':
            'label_key',
            'multi_output_field':
            'label_keys',
            'expected_values': [
                {
                    'output1': [1.0, 1.1, 1.2],
                    'output2': [2.0, 2.1, 2.2]
                },
            ]
        },
        {
            'testcase_name':
            'multi_model_single_key',
            'model_specs': [
                config.ModelSpec(name='model1', example_weight_key='feature2'),
                config.ModelSpec(name='model2', example_weight_key='feature3')
            ],
            'field':
            'example_weight_key',
            'multi_output_field':
            'example_weight_keys',
            'expected_values': [
                {
                    'model1': [2.0, 2.1, 2.2],
                    'model2': [3.0, 3.1, 3.2]
                },
            ]
        },
        {
            'testcase_name':
            'multi_model_multi_key',
            'model_specs': [
                config.ModelSpec(name='model1',
                                 prediction_keys={
                                     'output1': 'feature1',
                                     'output2': 'feature2'
                                 }),
                config.ModelSpec(name='model2',
                                 prediction_keys={
                                     'output1': 'feature1',
                                     'output3': 'feature3'
                                 })
            ],
            'field':
            'prediction_key',
            'multi_output_field':
            'prediction_keys',
            'expected_values': [
                {
                    'model1': {
                        'output1': [1.0, 1.1, 1.2],
                        'output2': [2.0, 2.1, 2.2]
                    },
                    'model2': {
                        'output1': [1.0, 1.1, 1.2],
                        'output3': [3.0, 3.1, 3.2]
                    }
                },
            ]
        },
    )
    def testGetFeatureValuesForModelSpecField(self, model_specs, field,
                                              multi_output_field,
                                              expected_values):
        extracts = {
            # Only need the num_rows from RecordBatch so use fake array of same len
            # as features.
            constants.ARROW_RECORD_BATCH_KEY:
            pa.RecordBatch.from_arrays([pa.array([1])], ['dummy']),
            constants.FEATURES_KEY: [
                {
                    'feature1': [1.0, 1.1, 1.2],
                    'feature2': [2.0, 2.1, 2.2],
                    'feature3': [3.0, 3.1, 3.2],
                },
            ]
        }
        got = model_util.get_feature_values_for_model_spec_field(
            model_specs, field, multi_output_field, extracts)
        self.assertAlmostEqual(expected_values, got)

    @parameterized.named_parameters(
        {
            'testcase_name': 'single_model_single_key',
            'model_specs': [config.ModelSpec(label_key='feature2')],
            'field': 'label_key',
            'multi_output_field': 'label_keys',
            'expected_values': [
                [4.0, 4.1, 4.2],
            ]
        },
        {
            'testcase_name':
            'single_model_multi_key',
            'model_specs': [
                config.ModelSpec(label_keys={
                    'output1': 'feature1',
                    'output2': 'feature2'
                })
            ],
            'field':
            'label_key',
            'multi_output_field':
            'label_keys',
            'expected_values': [
                {
                    'output1': [1.0, 1.1, 1.2],
                    'output2': [4.0, 4.1, 4.2]
                },
            ]
        },
    )
    def testGetFeatureValuesForModelSpecFieldWithSingleModelTransforedFeatures(
            self, model_specs, field, multi_output_field, expected_values):
        extracts = {
            # Only need the num_rows from RecordBatch so use fake array of same len
            # as features.
            constants.ARROW_RECORD_BATCH_KEY:
            pa.RecordBatch.from_arrays([pa.array([1])], ['dummy']),
            constants.FEATURES_KEY: [
                {
                    'feature1': [1.0, 1.1, 1.2],
                    'feature2': [2.0, 2.1, 2.2],
                },
            ],
            constants.TRANSFORMED_FEATURES_KEY: [
                {
                    'feature2': [4.0, 4.1, 4.2],
                },
            ]
        }
        got = model_util.get_feature_values_for_model_spec_field(
            model_specs, field, multi_output_field, extracts)
        self.assertAlmostEqual(expected_values, got)

    @parameterized.named_parameters(
        {
            'testcase_name':
            'multi_model_single_key',
            'model_specs': [
                config.ModelSpec(name='model1', example_weight_key='feature2'),
                config.ModelSpec(name='model2', example_weight_key='feature3')
            ],
            'field':
            'example_weight_key',
            'multi_output_field':
            'example_weight_keys',
            'expected_values': [
                {
                    'model1': [4.0, 4.1, 4.2],
                    'model2': [7.0, 7.1, 7.2]
                },
            ]
        },
        {
            'testcase_name':
            'multi_model_multi_key',
            'model_specs': [
                config.ModelSpec(name='model1',
                                 example_weight_keys={
                                     'output1': 'feature1',
                                     'output2': 'feature2'
                                 }),
                config.ModelSpec(name='model2',
                                 example_weight_keys={
                                     'output1': 'feature1',
                                     'output3': 'feature3'
                                 })
            ],
            'field':
            'example_weight_key',
            'multi_output_field':
            'example_weight_keys',
            'expected_values': [
                {
                    'model1': {
                        'output1': [1.0, 1.1, 1.2],
                        'output2': [4.0, 4.1, 4.2]
                    },
                    'model2': {
                        'output1': [1.0, 1.1, 1.2],
                        'output3': [7.0, 7.1, 7.2]
                    }
                },
            ]
        },
    )
    def testGetFeatureValuesForModelSpecFieldWithMultiModelTransforedFeatures(
            self, model_specs, field, multi_output_field, expected_values):
        extracts = {
            # Only need the num_rows from RecordBatch so use fake array of same len
            # as features.
            constants.ARROW_RECORD_BATCH_KEY:
            pa.RecordBatch.from_arrays([pa.array([1])], ['dummy']),
            constants.FEATURES_KEY: [
                {
                    'feature1': [1.0, 1.1, 1.2],
                    'feature2': [2.0, 2.1, 2.2],
                },
            ],
            constants.TRANSFORMED_FEATURES_KEY: [
                {
                    'model1': {
                        'feature2': [4.0, 4.1, 4.2],
                        'feature3': [5.0, 5.1, 5.2]
                    },
                    'model2': {
                        'feature2': [6.0, 6.1, 6.2],
                        'feature3': [7.0, 7.1, 7.2]
                    }
                },
            ]
        }
        got = model_util.get_feature_values_for_model_spec_field(
            model_specs, field, multi_output_field, extracts)
        self.assertAlmostEqual(expected_values, got)

    def testGetFeatureValuesForModelSpecFieldNoValues(self):
        model_spec = config.ModelSpec(name='model1',
                                      example_weight_key='feature2')
        extracts = {
            constants.ARROW_RECORD_BATCH_KEY:
            pa.RecordBatch.from_arrays([pa.array([1])], ['dummy']),
        }
        got = model_util.get_feature_values_for_model_spec_field(
            [model_spec], 'example_weight', 'example_weights', extracts)
        self.assertIsNone(got)

    @parameterized.named_parameters(
        ('keras_serving_default', True, 'serving_default'),
        ('keras_custom_signature', True, 'custom_signature'),
        ('tf2_serving_default', False, 'serving_default'),
        ('tf2_custom_signature', False, 'custom_signature'))
    def testGetCallableWithSignatures(self, save_as_keras, signature_name):
        export_path = self.createModelWithSingleInput(save_as_keras)
        if save_as_keras:
            model = tf.keras.models.load_model(export_path)
        else:
            model = tf.compat.v1.saved_model.load_v2(export_path)
        self.assertIsNotNone(model_util.get_callable(model, signature_name))

    @parameterized.named_parameters(('keras', True), ('tf2', False))
    def testGetCallableWithMissingSignatures(self, save_as_keras):
        export_path = self.createModelWithSingleInput(save_as_keras)
        if save_as_keras:
            model = tf.keras.models.load_model(export_path)
        else:
            model = tf.compat.v1.saved_model.load_v2(export_path)
        with self.assertRaises(ValueError):
            model_util.get_callable(model, 'non_existent')

    @unittest.skipIf(_TF_MAJOR_VERSION < 2,
                     'not all input types supported for TF1')
    def testGetCallableWithKerasModel(self):
        export_path = self.createModelWithMultipleMixedInputs(True)
        model = tf.keras.models.load_model(export_path)
        self.assertEqual(model, model_util.get_callable(model))

    @parameterized.named_parameters(
        ('keras_serving_default', True, 'serving_default'),
        ('keras_custom_signature', True, 'custom_signature'),
        ('tf2_serving_default', False, None),
        ('tf2_custom_signature', False, 'custom_signature'))
    def testGetInputSpecsWithSignatures(self, save_as_keras, signature_name):
        export_path = self.createModelWithSingleInput(save_as_keras)
        if save_as_keras:
            model = tf.keras.models.load_model(export_path)
        else:
            model = tf.compat.v1.saved_model.load_v2(export_path)
        self.assertEqual(
            {
                'input':
                tf.TensorSpec(name='input', shape=(None, 1), dtype=tf.string),
            }, model_util.get_input_specs(model, signature_name))

    @parameterized.named_parameters(('keras', True), ('tf2', False))
    def testGetInputSpecsWithMissingSignatures(self, save_as_keras):
        export_path = self.createModelWithSingleInput(save_as_keras)
        if save_as_keras:
            model = tf.keras.models.load_model(export_path)
        else:
            model = tf.compat.v1.saved_model.load_v2(export_path)
        with self.assertRaises(ValueError):
            model_util.get_callable(model, 'non_existent')

    @unittest.skipIf(_TF_MAJOR_VERSION < 2,
                     'not all input types supported for TF1')
    def testGetInputSpecsWithKerasModel(self):
        export_path = self.createModelWithMultipleMixedInputs(True)
        model = tf.keras.models.load_model(export_path)

        # Some versions of TF set the TensorSpec.name and others do not. Since we
        # don't care about the name, clear it from the output for testing purposes
        specs = model_util.get_input_specs(model)
        for k, v in specs.items():
            if isinstance(v, tf.TensorSpec):
                specs[k] = tf.TensorSpec(shape=v.shape, dtype=v.dtype)
        self.assertEqual(
            {
                'input_1':
                tf.TensorSpec(shape=(None, 2), dtype=tf.int64),
                'input_2':
                tf.SparseTensorSpec(shape=(None, 1), dtype=tf.float32),
                'input_3':
                tf.RaggedTensorSpec(shape=(None, None), dtype=tf.float32),
            }, specs)

    def testInputSpecsToTensorRepresentations(self):
        tensor_representations = model_util.input_specs_to_tensor_representations(
            {
                'input_1':
                tf.TensorSpec(shape=(None, 2), dtype=tf.int64),
                'input_2':
                tf.SparseTensorSpec(shape=(None, 1), dtype=tf.float32),
                'input_3':
                tf.RaggedTensorSpec(shape=(None, None), dtype=tf.float32),
            })
        dense_tensor_representation = text_format.Parse(
            """
        dense_tensor {
          column_name: "input_1"
          shape { dim { size: 2 } }
        }
        """, schema_pb2.TensorRepresentation())
        sparse_tensor_representation = text_format.Parse(
            """
        varlen_sparse_tensor {
          column_name: "input_2"
        }
        """, schema_pb2.TensorRepresentation())
        ragged_tensor_representation = text_format.Parse(
            """
        ragged_tensor {
          feature_path {
            step: "input_3"
          }
        }
        """, schema_pb2.TensorRepresentation())
        self.assertEqual(
            {
                'input_1': dense_tensor_representation,
                'input_2': sparse_tensor_representation,
                'input_3': ragged_tensor_representation
            }, tensor_representations)

    def testInputSpecsToTensorRepresentationsRaisesWithUnknownDims(self):
        with self.assertRaises(ValueError):
            model_util.input_specs_to_tensor_representations({
                'input_1':
                tf.TensorSpec(shape=(None, None), dtype=tf.int64),
            })

    @parameterized.named_parameters(
        ('keras_default', True, {
            constants.PREDICTIONS_KEY: {
                '': [None]
            }
        }, None, False, True, 1),
        ('tf_default', False, {
            constants.PREDICTIONS_KEY: {
                '': [None]
            }
        }, None, False, True, 1),
        ('keras_serving_default', True, {
            constants.PREDICTIONS_KEY: {
                '': ['serving_default']
            }
        }, None, False, True, 1),
        ('tf_serving_default', False, {
            constants.PREDICTIONS_KEY: {
                '': ['serving_default']
            }
        }, None, False, True, 1),
        ('keras_custom_single_output', True, {
            constants.PREDICTIONS_KEY: {
                '': ['custom_single_output']
            }
        }, None, False, True, 1),
        ('tf_custom_single_output', False, {
            constants.PREDICTIONS_KEY: {
                '': ['custom_single_output']
            }
        }, None, False, True, 1),
        ('keras_custom_multi_output', True, {
            constants.PREDICTIONS_KEY: {
                '': ['custom_multi_output']
            }
        }, None, False, True, 2),
        ('tf_custom_multi_output', False, {
            constants.PREDICTIONS_KEY: {
                '': ['custom_multi_output']
            }
        }, None, False, True, 2),
        ('multi_model', True, {
            constants.PREDICTIONS_KEY: {
                'model1': ['custom_multi_output'],
                'model2': ['custom_multi_output']
            }
        }, None, False, True, 2),
        ('default_signatures', True, {
            constants.PREDICTIONS_KEY: {
                '': [],
            }
        }, ['unknown', 'custom_single_output'], False, True, 1),
        ('keras_prefer_dict_outputs', True, {
            constants.FEATURES_KEY: {
                '': [],
            }
        }, ['unknown', 'custom_single_output', 'custom_multi_output'
            ], True, True, 3),
        ('tf_prefer_dict_outputs', False, {
            constants.FEATURES_KEY: {
                '': [],
            }
        }, ['unknown', 'custom_single_output', 'custom_multi_output'
            ], True, True, 3),
        ('custom_attribute', True, {
            constants.FEATURES_KEY: {
                '': ['custom_attribute'],
            }
        }, None, True, True, 1),
        ('keras_no_schema', True, {
            constants.PREDICTIONS_KEY: {
                '': [None]
            }
        }, None, False, False, 1),
        ('tf_no_schema', False, {
            constants.PREDICTIONS_KEY: {
                '': [None]
            }
        }, None, False, False, 1),
    )
    @unittest.skipIf(_TF_MAJOR_VERSION < 2,
                     'not all signatures supported for TF1')
    def testModelSignaturesDoFn(self, save_as_keras, signature_names,
                                default_signature_names, prefer_dict_outputs,
                                use_schema, expected_num_outputs):
        export_path = self.createModelWithMultipleDenseInputs(save_as_keras)
        eval_shared_models = {}
        model_specs = []
        for sigs in signature_names.values():
            for model_name in sigs:
                if model_name not in eval_shared_models:
                    eval_shared_models[
                        model_name] = self.createTestEvalSharedModel(
                            eval_saved_model_path=export_path,
                            model_name=model_name,
                            tags=[tf.saved_model.SERVING])
                    model_specs.append(config.ModelSpec(name=model_name))
        eval_config = config.EvalConfig(model_specs=model_specs)
        schema = self.createDenseInputsSchema() if use_schema else None
        tfx_io = tf_example_record.TFExampleBeamRecord(
            physical_format='text',
            schema=schema,
            raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        tensor_adapter_config = None
        if use_schema:
            tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                arrow_schema=tfx_io.ArrowSchema(),
                tensor_representations=tfx_io.TensorRepresentations())

        examples = [
            self._makeExample(input_1=1.0, input_2=2.0),
            self._makeExample(input_1=3.0, input_2=4.0),
            self._makeExample(input_1=5.0, input_2=6.0),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (pipeline
                      | 'Create' >> beam.Create(
                          [e.SerializeToString() for e in examples])
                      | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
                      | 'ToExtracts' >> beam.Map(_record_batch_to_extracts)
                      | 'ModelSignatures' >> beam.ParDo(
                          model_util.ModelSignaturesDoFn(
                              eval_config=eval_config,
                              eval_shared_models=eval_shared_models,
                              signature_names=signature_names,
                              default_signature_names=default_signature_names,
                              prefer_dict_outputs=prefer_dict_outputs,
                              tensor_adapter_config=tensor_adapter_config)))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    for key in signature_names:
                        self.assertIn(key, got[0])
                        if prefer_dict_outputs:
                            for entry in got[0][key]:
                                self.assertIsInstance(entry, dict)
                                self.assertLen(entry, expected_num_outputs)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')

    def testHasRubberStamp(self):
        # Model agnostic.
        self.assertFalse(model_util.has_rubber_stamp(None))

        # All non baseline models has rubber stamp.
        baseline = self.createTestEvalSharedModel(
            model_name=constants.BASELINE_KEY)
        candidate = self.createTestEvalSharedModel(
            model_name=constants.CANDIDATE_KEY, rubber_stamp=True)
        self.assertTrue(model_util.has_rubber_stamp([baseline, candidate]))

        # Not all non baseline has rubber stamp.
        candidate_nr = self.createTestEvalSharedModel(
            model_name=constants.CANDIDATE_KEY)
        self.assertFalse(model_util.has_rubber_stamp([candidate_nr]))
        self.assertFalse(
            model_util.has_rubber_stamp([baseline, candidate, candidate_nr]))
Beispiel #3
0
    def testUnbatchExtractor(self):
        model_spec = config.ModelSpec(label_key='label',
                                      example_weight_key='example_weight')
        eval_config = config.EvalConfig(model_specs=[model_spec])
        input_extractor = batched_input_extractor.BatchedInputExtractor(
            eval_config)
        unbatch_inputs_extractor = unbatch_extractor.UnbatchExtractor()

        schema = text_format.Parse(
            """
        feature {
          name: "label"
          type: FLOAT
        }
        feature {
          name: "example_weight"
          type: FLOAT
        }
        feature {
          name: "fixed_int"
          type: INT
        }
        feature {
          name: "fixed_float"
          type: FLOAT
        }
        feature {
          name: "fixed_string"
          type: BYTES
        }
        """, schema_pb2.Schema())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        examples = [
            self._makeExample(label=1.0,
                              example_weight=0.5,
                              fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string1'),
            self._makeExample(label=0.0,
                              example_weight=0.0,
                              fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string2'),
            self._makeExample(label=0.0,
                              example_weight=1.0,
                              fixed_int=2,
                              fixed_float=0.0,
                              fixed_string='fixed_string3')
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | input_extractor.stage_name >> input_extractor.ptransform
                | unbatch_inputs_extractor.stage_name >>
                unbatch_inputs_extractor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 3)
                    self.assertDictElementsAlmostEqual(
                        got[0][constants.FEATURES_KEY], {
                            'fixed_int': np.array([1]),
                            'fixed_float': np.array([1.0]),
                        })
                    self.assertEqual(
                        got[0][constants.FEATURES_KEY]['fixed_string'],
                        np.array([b'fixed_string1']))
                    self.assertAlmostEqual(got[0][constants.LABELS_KEY],
                                           np.array([1.0]))
                    self.assertAlmostEqual(
                        got[0][constants.EXAMPLE_WEIGHTS_KEY], np.array([0.5]))
                    self.assertDictElementsAlmostEqual(
                        got[1][constants.FEATURES_KEY], {
                            'fixed_int': np.array([1]),
                            'fixed_float': np.array([1.0]),
                        })
                    self.assertEqual(
                        got[1][constants.FEATURES_KEY]['fixed_string'],
                        np.array([b'fixed_string2']))
                    self.assertAlmostEqual(got[1][constants.LABELS_KEY],
                                           np.array([0.0]))
                    self.assertAlmostEqual(
                        got[1][constants.EXAMPLE_WEIGHTS_KEY], np.array([0.0]))
                    self.assertDictElementsAlmostEqual(
                        got[2][constants.FEATURES_KEY], {
                            'fixed_int': np.array([2]),
                            'fixed_float': np.array([0.0]),
                        })
                    self.assertEqual(
                        got[2][constants.FEATURES_KEY]['fixed_string'],
                        np.array([b'fixed_string3']))
                    self.assertAlmostEqual(got[2][constants.LABELS_KEY],
                                           np.array([0.0]))
                    self.assertAlmostEqual(
                        got[2][constants.EXAMPLE_WEIGHTS_KEY], np.array([1.0]))

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
    def testEvaluateWithQueryBasedMetrics(self):
        temp_export_dir = self._getExportDir()
        _, export_dir = (fixed_prediction_estimator_extra_fields.
                         simple_fixed_prediction_estimator_extra_fields(
                             None, temp_export_dir))
        eval_config = config.EvalConfig(
            model_specs=[
                config.ModelSpec(location=export_dir,
                                 label_key='label',
                                 example_weight_key='fixed_int')
            ],
            slicing_specs=[
                config.SlicingSpec(),
                config.SlicingSpec(feature_keys=['fixed_string']),
            ],
            metrics_specs=metric_specs.specs_from_metrics(
                [ndcg.NDCG(gain_key='fixed_float', name='ndcg')],
                binarize=config.BinarizationOptions(top_k_list=[1, 2]),
                query_key='fixed_string'))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
        slice_spec = [
            slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs
        ]
        extractors = [
            input_extractor.InputExtractor(eval_config=eval_config),
            predict_extractor_v2.PredictExtractor(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model]),
            slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec)
        ]
        evaluators = [
            metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model])
        ]

        # fixed_string used as query_key
        # fixed_float used as gain_key for NDCG
        # fixed_int used as example_weight_key for NDCG
        examples = [
            self._makeExample(prediction=0.2,
                              label=1.0,
                              fixed_float=1.0,
                              fixed_string='query1',
                              fixed_int=1),
            self._makeExample(prediction=0.8,
                              label=0.0,
                              fixed_float=0.5,
                              fixed_string='query1',
                              fixed_int=1),
            self._makeExample(prediction=0.5,
                              label=0.0,
                              fixed_float=0.5,
                              fixed_string='query2',
                              fixed_int=2),
            self._makeExample(prediction=0.9,
                              label=1.0,
                              fixed_float=1.0,
                              fixed_string='query2',
                              fixed_int=2),
            self._makeExample(prediction=0.1,
                              label=0.0,
                              fixed_float=0.1,
                              fixed_string='query2',
                              fixed_int=2),
            self._makeExample(prediction=0.9,
                              label=1.0,
                              fixed_float=1.0,
                              fixed_string='query3',
                              fixed_int=3)
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            metrics = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate(
                    extractors=extractors, evaluators=evaluators))

            # pylint: enable=no-value-for-parameter

            def check_metrics(got):
                try:
                    self.assertLen(got, 4)
                    slices = {}
                    for slice_key, value in got:
                        slices[slice_key] = value
                    overall_slice = ()
                    query1_slice = (('fixed_string', b'query1'), )
                    query2_slice = (('fixed_string', b'query2'), )
                    query3_slice = (('fixed_string', b'query3'), )
                    self.assertCountEqual(list(slices.keys()), [
                        overall_slice, query1_slice, query2_slice, query3_slice
                    ])
                    example_count_key = metric_types.MetricKey(
                        name='example_count')
                    weighted_example_count_key = metric_types.MetricKey(
                        name='weighted_example_count')
                    ndcg1_key = metric_types.MetricKey(
                        name='ndcg', sub_key=metric_types.SubKey(top_k=1))
                    ndcg2_key = metric_types.MetricKey(
                        name='ndcg', sub_key=metric_types.SubKey(top_k=2))
                    # Query1 (weight=1): (p=0.8, g=0.5) (p=0.2, g=1.0)
                    # Query2 (weight=2): (p=0.9, g=1.0) (p=0.5, g=0.5) (p=0.1, g=0.1)
                    # Query3 (weight=3): (p=0.9, g=1.0)
                    #
                    # DCG@1:  0.5, 1.0, 1.0
                    # NDCG@1: 0.5, 1.0, 1.0
                    # Average NDCG@1: (1 * 0.5 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.92
                    #
                    # DCG@2: (0.5 + 1.0/log(3) ~ 0.630930
                    #        (1.0 + 0.5/log(3) ~ 1.315465
                    #        1.0
                    # NDCG@2: (0.5 + 1.0/log(3)) / (1.0 + 0.5/log(3)) ~ 0.85972
                    #         (1.0 + 0.5/log(3)) / (1.0 + 0.5/log(3)) = 1.0
                    #         1.0
                    # Average NDCG@2: (1 * 0.860 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.97
                    self.assertDictElementsAlmostEqual(
                        slices[overall_slice], {
                            example_count_key: 6,
                            weighted_example_count_key: 11.0,
                            ndcg1_key: 0.9166667,
                            ndcg2_key: 0.9766198
                        })
                    self.assertDictElementsAlmostEqual(
                        slices[query1_slice], {
                            example_count_key: 2,
                            weighted_example_count_key: 2.0,
                            ndcg1_key: 0.5,
                            ndcg2_key: 0.85972
                        })
                    self.assertDictElementsAlmostEqual(
                        slices[query2_slice], {
                            example_count_key: 3,
                            weighted_example_count_key: 6.0,
                            ndcg1_key: 1.0,
                            ndcg2_key: 1.0
                        })
                    self.assertDictElementsAlmostEqual(
                        slices[query3_slice], {
                            example_count_key: 1,
                            weighted_example_count_key: 3.0,
                            ndcg1_key: 1.0,
                            ndcg2_key: 1.0
                        })

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics[constants.METRICS_KEY],
                             check_metrics,
                             label='metrics')
Beispiel #5
0
    def testPredictExtractorWithSequentialKerasModel(self):
        # Note that the input will be called 'test_input'
        model = tf.keras.models.Sequential([
            tf.keras.layers.Dense(1,
                                  activation=tf.nn.sigmoid,
                                  input_shape=(1, ),
                                  name='test')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001),
                      loss=tf.keras.losses.binary_crossentropy,
                      metrics=['accuracy'])

        train_features = {'test_input': [[0.0], [1.0]]}
        labels = [[1], [0]]
        example_weights = [1.0, 0.5]
        dataset = tf.data.Dataset.from_tensor_slices(
            (train_features, labels, example_weights))
        dataset = dataset.shuffle(buffer_size=1).repeat().batch(2)
        model.fit(dataset, steps_per_epoch=1)

        export_dir = self._getExportDir()
        model.save(export_dir, save_format='tf')

        eval_config = config.EvalConfig(
            model_specs=[config.ModelSpec(location=export_dir)])
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
        predict_extractor = predict_extractor_v2.PredictExtractor(
            eval_config=eval_config, eval_shared_models=[eval_shared_model])

        # Notice that the features are 'test' but the model expects 'test_input'.
        # This tests that the PredictExtractor properly handles this case.
        predict_features = [
            {
                'test': np.array([0.0], dtype=np.float32),
                'non_model_feature':
                np.array([0]),  # should be ignored by model
            },
            {
                'test': np.array([1.0], dtype=np.float32),
                'non_model_feature':
                np.array([1]),  # should be ignored by model
            }
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(predict_features)
                | 'FeaturesToExtracts' >>
                beam.Map(lambda x: {constants.FEATURES_KEY: x})
                | predict_extractor.stage_name >> predict_extractor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 2)
                    # We can't verify the actual predictions, but we can verify the keys.
                    for item in got:
                        self.assertIn(constants.PREDICTIONS_KEY, item)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
    def testEvaluateWithMultiClassModel(self):
        n_classes = 3
        temp_export_dir = self._getExportDir()
        _, export_dir = dnn_classifier.simple_dnn_classifier(
            None, temp_export_dir, n_classes=n_classes)

        # Add example_count and weighted_example_count
        eval_config = config.EvalConfig(
            model_specs=[
                config.ModelSpec(location=export_dir,
                                 label_key='label',
                                 example_weight_key='age')
            ],
            slicing_specs=[config.SlicingSpec()],
            metrics_specs=metric_specs.specs_from_metrics(
                [calibration.MeanLabel('mean_label')],
                binarize=config.BinarizationOptions(
                    class_ids=range(n_classes))))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])

        slice_spec = [
            slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs
        ]
        extractors = [
            input_extractor.InputExtractor(eval_config=eval_config),
            predict_extractor_v2.PredictExtractor(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model]),
            slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec)
        ]
        evaluators = [
            metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model])
        ]

        examples = [
            self._makeExample(age=1.0, language='english', label=0),
            self._makeExample(age=2.0, language='chinese', label=1),
            self._makeExample(age=3.0, language='english', label=2),
            self._makeExample(age=4.0, language='chinese', label=1),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            metrics = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate(
                    extractors=extractors, evaluators=evaluators))

            # pylint: enable=no-value-for-parameter

            def check_metrics(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    example_count_key = metric_types.MetricKey(
                        name='example_count')
                    weighted_example_count_key = metric_types.MetricKey(
                        name='weighted_example_count')
                    label_key_class_0 = metric_types.MetricKey(
                        name='mean_label',
                        sub_key=metric_types.SubKey(class_id=0))
                    label_key_class_1 = metric_types.MetricKey(
                        name='mean_label',
                        sub_key=metric_types.SubKey(class_id=1))
                    label_key_class_2 = metric_types.MetricKey(
                        name='mean_label',
                        sub_key=metric_types.SubKey(class_id=2))
                    self.assertEqual(got_slice_key, ())
                    self.assertDictElementsAlmostEqual(
                        got_metrics, {
                            example_count_key:
                            4,
                            weighted_example_count_key:
                            (1.0 + 2.0 + 3.0 + 4.0),
                            label_key_class_0:
                            (1 * 1.0 + 0 * 2.0 + 0 * 3.0 + 0 * 4.0) /
                            (1.0 + 2.0 + 3.0 + 4.0),
                            label_key_class_1:
                            (0 * 1.0 + 1 * 2.0 + 0 * 3.0 + 1 * 4.0) /
                            (1.0 + 2.0 + 3.0 + 4.0),
                            label_key_class_2:
                            (0 * 1.0 + 0 * 2.0 + 1 * 3.0 + 0 * 4.0) /
                            (1.0 + 2.0 + 3.0 + 4.0)
                        })

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics[constants.METRICS_KEY],
                             check_metrics,
                             label='metrics')
    def testEvaluateWithMultiOutputModel(self):
        temp_export_dir = self._getExportDir()
        _, export_dir = multi_head.simple_multi_head(None, temp_export_dir)

        eval_config = config.EvalConfig(
            model_specs=[
                config.ModelSpec(location=export_dir,
                                 label_keys={
                                     'chinese_head': 'chinese_label',
                                     'english_head': 'english_label',
                                     'other_head': 'other_label'
                                 },
                                 example_weight_keys={
                                     'chinese_head': 'age',
                                     'english_head': 'age',
                                     'other_head': 'age'
                                 })
            ],
            slicing_specs=[config.SlicingSpec()],
            metrics_specs=metric_specs.specs_from_metrics({
                'chinese_head': [calibration.MeanLabel('mean_label')],
                'english_head': [calibration.MeanLabel('mean_label')],
                'other_head': [calibration.MeanLabel('mean_label')],
            }))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])

        slice_spec = [
            slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs
        ]
        extractors = [
            input_extractor.InputExtractor(eval_config=eval_config),
            predict_extractor_v2.PredictExtractor(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model]),
            slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec)
        ]
        evaluators = [
            metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model])
        ]

        examples = [
            self._makeExample(age=1.0,
                              language='english',
                              english_label=1.0,
                              chinese_label=0.0,
                              other_label=0.0),
            self._makeExample(age=1.0,
                              language='chinese',
                              english_label=0.0,
                              chinese_label=1.0,
                              other_label=0.0),
            self._makeExample(age=2.0,
                              language='english',
                              english_label=1.0,
                              chinese_label=0.0,
                              other_label=0.0),
            self._makeExample(age=2.0,
                              language='other',
                              english_label=0.0,
                              chinese_label=1.0,
                              other_label=1.0),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            metrics = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate(
                    extractors=extractors, evaluators=evaluators))

            # pylint: enable=no-value-for-parameter

            def check_metrics(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    example_count_key = metric_types.MetricKey(
                        name='example_count')
                    chinese_weighted_example_count_key = metric_types.MetricKey(
                        name='weighted_example_count',
                        output_name='chinese_head')
                    chinese_label_key = metric_types.MetricKey(
                        name='mean_label', output_name='chinese_head')
                    english_weighted_example_count_key = metric_types.MetricKey(
                        name='weighted_example_count',
                        output_name='english_head')
                    english_label_key = metric_types.MetricKey(
                        name='mean_label', output_name='english_head')
                    other_weighted_example_count_key = metric_types.MetricKey(
                        name='weighted_example_count',
                        output_name='other_head')
                    other_label_key = metric_types.MetricKey(
                        name='mean_label', output_name='other_head')
                    self.assertDictElementsAlmostEqual(
                        got_metrics, {
                            example_count_key:
                            4,
                            chinese_label_key:
                            (0.0 + 1.0 + 2 * 0.0 + 2 * 1.0) /
                            (1.0 + 1.0 + 2.0 + 2.0),
                            chinese_weighted_example_count_key:
                            (1.0 + 1.0 + 2.0 + 2.0),
                            english_label_key:
                            (1.0 + 0.0 + 2 * 1.0 + 2 * 0.0) /
                            (1.0 + 1.0 + 2.0 + 2.0),
                            english_weighted_example_count_key:
                            (1.0 + 1.0 + 2.0 + 2.0),
                            other_label_key: (0.0 + 0.0 + 2 * 0.0 + 2 * 1.0) /
                            (1.0 + 1.0 + 2.0 + 2.0),
                            other_weighted_example_count_key:
                            (1.0 + 1.0 + 2.0 + 2.0)
                        })

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics[constants.METRICS_KEY],
                             check_metrics,
                             label='metrics')
Beispiel #8
0
    def testBatchSizeLimit(self):
        temp_export_dir = self._getExportDir()
        _, export_dir = batch_size_limited_classifier.simple_batch_size_limited_classifier(
            None, temp_export_dir)
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
        eval_config = config.EvalConfig(model_specs=[config.ModelSpec()])
        schema = text_format.Parse(
            """
        feature {
          name: "classes"
          type: BYTES
        }
        feature {
          name: "scores"
          type: FLOAT
        }
        feature {
          name: "labels"
          type: BYTES
        }
        """, schema_pb2.Schema())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY)
        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
            arrow_schema=tfx_io.ArrowSchema(),
            tensor_representations=tfx_io.TensorRepresentations())
        input_extractor = batched_input_extractor.BatchedInputExtractor(
            eval_config)
        predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            tensor_adapter_config=tensor_adapter_config)

        examples = []
        for _ in range(4):
            examples.append(
                self._makeExample(classes='first', scores=0.0, labels='third'))

        with beam.Pipeline() as pipeline:
            predict_extracts = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=1)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | input_extractor.stage_name >> input_extractor.ptransform
                | predict_extractor.stage_name >> predict_extractor.ptransform)

            def check_result(got):
                try:
                    self.assertLen(got, 4)
                    # We can't verify the actual predictions, but we can verify the keys.
                    for item in got:
                        self.assertIn(constants.BATCHED_PREDICTIONS_KEY, item)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(predict_extracts, check_result, label='result')
  def testBatchedInputExtractor(self, label):
    model_spec = config.ModelSpec(
        label_key=label, example_weight_key='example_weight')
    eval_config = config.EvalConfig(model_specs=[model_spec])
    input_extractor = batched_input_extractor.BatchedInputExtractor(eval_config)

    label_feature = ''
    if label is not None:
      label_feature = """
          feature {
            name: "%s"
            type: FLOAT
          }
          """ % label
    schema = text_format.Parse(
        label_feature + """
        feature {
          name: "example_weight"
          type: FLOAT
        }
        feature {
          name: "fixed_int"
          type: INT
        }
        feature {
          name: "fixed_float"
          type: FLOAT
        }
        feature {
          name: "fixed_string"
          type: BYTES
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)

    def maybe_add_key(d, key, value):
      if key is not None:
        d[key] = value
      return d

    example_kwargs = [
        maybe_add_key(
            {
                'example_weight': 0.5,
                'fixed_int': 1,
                'fixed_float': 1.0,
                'fixed_string': 'fixed_string1'
            }, label, 1.0),
        maybe_add_key(
            {
                'example_weight': 0.0,
                'fixed_int': 1,
                'fixed_float': 1.0,
                'fixed_string': 'fixed_string2'
            }, label, 0.0),
        maybe_add_key(
            {
                'example_weight': 1.0,
                'fixed_int': 2,
                'fixed_float': 0.0,
                'fixed_string': 'fixed_string3'
            }, label, 0.0),
    ]

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create([
              self._makeExample(**kwargs).SerializeToString()
              for kwargs in example_kwargs
          ],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | input_extractor.stage_name >> input_extractor.ptransform)

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 1)
          self.assertDictElementsAlmostEqual(
              got[0][constants.FEATURES_KEY][0],
              maybe_add_key(
                  {
                      'fixed_int': np.array([1]),
                      'fixed_float': np.array([1.0]),
                      'example_weight': np.array([0.5]),
                  }, label, np.array([1.0])))
          self.assertEqual(got[0][constants.FEATURES_KEY][0]['fixed_string'],
                           np.array([b'fixed_string1']))
          self.assertAlmostEqual(got[0][constants.LABELS_KEY][0],
                                 np.array([1.0]) if label is not None else None)
          self.assertAlmostEqual(got[0][constants.EXAMPLE_WEIGHTS_KEY][0],
                                 np.array([0.5]))
          self.assertDictElementsAlmostEqual(
              got[0][constants.FEATURES_KEY][1],
              maybe_add_key(
                  {
                      'fixed_int': np.array([1]),
                      'fixed_float': np.array([1.0]),
                      'example_weight': np.array([0.0]),
                  }, label, np.array([0.0])))
          self.assertEqual(got[0][constants.FEATURES_KEY][1]['fixed_string'],
                           np.array([b'fixed_string2']))
          self.assertAlmostEqual(got[0][constants.LABELS_KEY][1],
                                 np.array([0.0]) if label is not None else None)
          self.assertAlmostEqual(got[0][constants.EXAMPLE_WEIGHTS_KEY][1],
                                 np.array([0.0]))
          self.assertDictElementsAlmostEqual(
              got[0][constants.FEATURES_KEY][2],
              maybe_add_key(
                  {
                      'fixed_int': np.array([2]),
                      'fixed_float': np.array([0.0]),
                      'example_weight': np.array([1.0]),
                  }, label, np.array([0.0])))
          self.assertEqual(got[0][constants.FEATURES_KEY][2]['fixed_string'],
                           np.array([b'fixed_string3']))
          self.assertAlmostEqual(got[0][constants.LABELS_KEY][2],
                                 np.array([0.0]) if label is not None else None)
          self.assertAlmostEqual(got[0][constants.EXAMPLE_WEIGHTS_KEY][2],
                                 np.array([1.0]))

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result, label='result')
Beispiel #10
0
    def testPredictExtractorWithSequentialKerasModel(self):
        # Note that the input will be called 'test_input'
        model = tf.keras.models.Sequential([
            tf.keras.layers.Dense(1,
                                  activation=tf.nn.sigmoid,
                                  input_shape=(2, ),
                                  name='test')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001),
                      loss=tf.keras.losses.binary_crossentropy,
                      metrics=['accuracy'])

        train_features = {'test_input': [[0.0, 0.0], [1.0, 1.0]]}
        labels = [[1], [0]]
        example_weights = [1.0, 0.5]
        dataset = tf.data.Dataset.from_tensor_slices(
            (train_features, labels, example_weights))
        dataset = dataset.shuffle(buffer_size=1).repeat().batch(2)
        model.fit(dataset, steps_per_epoch=1)

        export_dir = self._getExportDir()
        model.save(export_dir, save_format='tf')

        eval_config = config.EvalConfig(model_specs=[config.ModelSpec()])
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
        schema = text_format.Parse(
            """
        tensor_representation_group {
          key: ""
          value {
            tensor_representation {
              key: "test"
              value {
                dense_tensor {
                  column_name: "test"
                  shape { dim { size: 2 } }
                }
              }
            }
          }
        }
        feature {
          name: "test"
          type: FLOAT
        }
        feature {
          name: "non_model_feature"
          type: INT
        }
        """, schema_pb2.Schema())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY)
        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
            arrow_schema=tfx_io.ArrowSchema(),
            tensor_representations=tfx_io.TensorRepresentations())
        input_extractor = batched_input_extractor.BatchedInputExtractor(
            eval_config)
        predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            tensor_adapter_config=tensor_adapter_config)

        # Notice that the features are 'test' but the model expects 'test_input'.
        # This tests that the PredictExtractor properly handles this case.
        examples = [
            self._makeExample(
                test=[0.0,
                      0.0], non_model_feature=0),  # should be ignored by model
            self._makeExample(
                test=[1.0,
                      1.0], non_model_feature=1),  # should be ignored by model
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | input_extractor.stage_name >> input_extractor.ptransform
                | predict_extractor.stage_name >> predict_extractor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    # We can't verify the actual predictions, but we can verify the keys.
                    for item in got:
                        self.assertIn(constants.BATCHED_PREDICTIONS_KEY, item)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Beispiel #11
0
    def testBatchSizeLimitWithKerasModel(self):
        input1 = tf.keras.layers.Input(shape=(1, ),
                                       batch_size=1,
                                       name='input1')
        input2 = tf.keras.layers.Input(shape=(1, ),
                                       batch_size=1,
                                       name='input2')

        inputs = [input1, input2]
        input_layer = tf.keras.layers.concatenate(inputs)

        def add_1(tensor):
            return tf.add_n([tensor, tf.constant(1.0, shape=(1, 2))])

        assert_layer = tf.keras.layers.Lambda(add_1)(input_layer)

        model = tf.keras.models.Model(inputs, assert_layer)
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001),
                      loss=tf.keras.losses.binary_crossentropy,
                      metrics=['accuracy'])

        export_dir = self._getExportDir()
        model.save(export_dir, save_format='tf')

        eval_config = config.EvalConfig(model_specs=[config.ModelSpec()])
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
        schema = text_format.Parse(
            """
        tensor_representation_group {
          key: ""
          value {
            tensor_representation {
              key: "input1"
              value {
                dense_tensor {
                  column_name: "input1"
                  shape { dim { size: 1 } }
                }
              }
            }
            tensor_representation {
              key: "input2"
              value {
                dense_tensor {
                  column_name: "input2"
                  shape { dim { size: 1 } }
                }
              }
            }
          }
        }
        feature {
          name: "input1"
          type: FLOAT
        }
        feature {
          name: "input2"
          type: FLOAT
        }
        """, schema_pb2.Schema())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY)
        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
            arrow_schema=tfx_io.ArrowSchema(),
            tensor_representations=tfx_io.TensorRepresentations())
        input_extractor = batched_input_extractor.BatchedInputExtractor(
            eval_config)
        predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            tensor_adapter_config=tensor_adapter_config)

        examples = []
        for _ in range(4):
            examples.append(self._makeExample(input1=0.0, input2=1.0))

        with beam.Pipeline() as pipeline:
            predict_extracts = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=1)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | input_extractor.stage_name >> input_extractor.ptransform
                | predict_extractor.stage_name >> predict_extractor.ptransform)

            # pylint: enable=no-value-for-parameter
            def check_result(got):
                try:
                    self.assertLen(got, 4)
                    # We can't verify the actual predictions, but we can verify the keys.
                    for item in got:
                        self.assertIn(constants.BATCHED_PREDICTIONS_KEY, item)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(predict_extracts, check_result, label='result')
Beispiel #12
0
    def testPredictExtractorWithRegressionModel(self):
        temp_export_dir = self._getExportDir()
        export_dir, _ = (fixed_prediction_estimator_extra_fields.
                         simple_fixed_prediction_estimator_extra_fields(
                             temp_export_dir, None))

        eval_config = config.EvalConfig(model_specs=[config.ModelSpec()])
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
        schema = text_format.Parse(
            """
        feature {
          name: "prediction"
          type: FLOAT
        }
        feature {
          name: "label"
          type: FLOAT
        }
        feature {
          name: "fixed_int"
          type: INT
        }
        feature {
          name: "fixed_float"
          type: FLOAT
        }
        feature {
          name: "fixed_string"
          type: BYTES
        }
        """, schema_pb2.Schema())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY)
        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
            arrow_schema=tfx_io.ArrowSchema(),
            tensor_representations=tfx_io.TensorRepresentations())
        input_extractor = batched_input_extractor.BatchedInputExtractor(
            eval_config)
        predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            tensor_adapter_config=tensor_adapter_config)

        examples = [
            self._makeExample(prediction=0.2,
                              label=1.0,
                              fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string1'),
            self._makeExample(prediction=0.8,
                              label=0.0,
                              fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string2'),
            self._makeExample(prediction=0.5,
                              label=0.0,
                              fixed_int=2,
                              fixed_float=1.0,
                              fixed_string='fixed_string3')
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | input_extractor.stage_name >> input_extractor.ptransform
                | predict_extractor.stage_name >> predict_extractor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    self.assertIn(constants.BATCHED_PREDICTIONS_KEY, got[0])
                    expected_preds = [0.2, 0.8, 0.5]
                    self.assertAlmostEqual(
                        got[0][constants.BATCHED_PREDICTIONS_KEY],
                        expected_preds)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Beispiel #13
0
    def testPredictExtractorWithMultiModels(self):
        temp_export_dir = self._getExportDir()
        export_dir1, _ = multi_head.simple_multi_head(temp_export_dir, None)
        export_dir2, _ = multi_head.simple_multi_head(temp_export_dir, None)

        eval_config = config.EvalConfig(model_specs=[
            config.ModelSpec(name='model1'),
            config.ModelSpec(name='model2')
        ])
        eval_shared_model1 = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir1, tags=[tf.saved_model.SERVING])
        eval_shared_model2 = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir2, tags=[tf.saved_model.SERVING])
        schema = text_format.Parse(
            """
        feature {
          name: "age"
          type: FLOAT
        }
        feature {
          name: "langauge"
          type: BYTES
        }
        feature {
          name: "english_label"
          type: FLOAT
        }
        feature {
          name: "chinese_label"
          type: FLOAT
        }
        feature {
          name: "other_label"
          type: FLOAT
        }
        """, schema_pb2.Schema())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY)
        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
            arrow_schema=tfx_io.ArrowSchema(),
            tensor_representations=tfx_io.TensorRepresentations())
        input_extractor = batched_input_extractor.BatchedInputExtractor(
            eval_config)
        predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor(
            eval_config=eval_config,
            eval_shared_model={
                'model1': eval_shared_model1,
                'model2': eval_shared_model2
            },
            tensor_adapter_config=tensor_adapter_config)

        examples = [
            self._makeExample(age=1.0,
                              language='english',
                              english_label=1.0,
                              chinese_label=0.0,
                              other_label=0.0),
            self._makeExample(age=1.0,
                              language='chinese',
                              english_label=0.0,
                              chinese_label=1.0,
                              other_label=0.0),
            self._makeExample(age=2.0,
                              language='english',
                              english_label=1.0,
                              chinese_label=0.0,
                              other_label=0.0),
            self._makeExample(age=2.0,
                              language='other',
                              english_label=0.0,
                              chinese_label=1.0,
                              other_label=1.0)
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=4)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | input_extractor.stage_name >> input_extractor.ptransform
                | predict_extractor.stage_name >> predict_extractor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    for item in got:
                        # We can't verify the actual predictions, but we can verify the keys
                        self.assertIn(constants.BATCHED_PREDICTIONS_KEY, item)
                        for pred in item[constants.BATCHED_PREDICTIONS_KEY]:
                            for model_name in ('model1', 'model2'):
                                self.assertIn(model_name, pred)
                                for output_name in ('chinese_head',
                                                    'english_head',
                                                    'other_head'):
                                    for pred_key in ('logistic',
                                                     'probabilities',
                                                     'all_classes'):
                                        self.assertIn(
                                            output_name + '/' + pred_key,
                                            pred[model_name])

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Beispiel #14
0
    def testBatchedInputExtractorMultiModel(self):
        model_spec1 = config.ModelSpec(name='model1',
                                       label_key='label',
                                       example_weight_key='example_weight',
                                       prediction_key='fixed_float')
        model_spec2 = config.ModelSpec(name='model2',
                                       label_keys={
                                           'output1': 'label1',
                                           'output2': 'label2'
                                       },
                                       example_weight_keys={
                                           'output1': 'example_weight1',
                                           'output2': 'example_weight2'
                                       },
                                       prediction_keys={
                                           'output1': 'fixed_float',
                                           'output2': 'fixed_float'
                                       })
        eval_config = config.EvalConfig(model_specs=[model_spec1, model_spec2])
        input_extractor = batched_input_extractor.BatchedInputExtractor(
            eval_config)

        schema = text_format.Parse(
            """
        feature {
          name: "label"
          type: FLOAT
        }
        feature {
          name: "label1"
          type: FLOAT
        }
        feature {
          name: "label2"
          type: FLOAT
        }
        feature {
          name: "example_weight"
          type: FLOAT
        }
        feature {
          name: "example_weight1"
          type: FLOAT
        }
        feature {
          name: "example_weight2"
          type: FLOAT
        }
        feature {
          name: "fixed_int"
          type: INT
        }
        feature {
          name: "fixed_float"
          type: FLOAT
        }
        feature {
          name: "fixed_string"
          type: BYTES
        }
        """, schema_pb2.Schema())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY)

        examples = [
            self._makeExample(label=1.0,
                              label1=1.0,
                              label2=0.0,
                              example_weight=0.5,
                              example_weight1=0.5,
                              example_weight2=0.5,
                              fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string1'),
            self._makeExample(label=1.0,
                              label1=1.0,
                              label2=1.0,
                              example_weight=0.0,
                              example_weight1=0.0,
                              example_weight2=1.0,
                              fixed_int=1,
                              fixed_float=2.0,
                              fixed_string='fixed_string2'),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | input_extractor.stage_name >> input_extractor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    self.assertDictElementsAlmostEqual(
                        got[0][constants.BATCHED_FEATURES_KEY][0], {
                            'fixed_int': np.array([1]),
                        })
                    self.assertEqual(
                        got[0][constants.BATCHED_FEATURES_KEY][0]
                        ['fixed_string'], np.array([b'fixed_string1']))
                    for model_name in ('model1', 'model2'):
                        self.assertIn(model_name,
                                      got[0][constants.BATCHED_LABELS_KEY][0])
                        self.assertIn(
                            model_name,
                            got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][0])
                        self.assertIn(
                            model_name,
                            got[0][constants.BATCHED_PREDICTIONS_KEY][0])
                    self.assertAlmostEqual(
                        got[0][constants.BATCHED_LABELS_KEY][0]['model1'],
                        np.array([1.0]))
                    self.assertDictElementsAlmostEqual(
                        got[0][constants.BATCHED_LABELS_KEY][0]['model2'], {
                            'output1': np.array([1.0]),
                            'output2': np.array([0.0])
                        })
                    self.assertAlmostEqual(
                        got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][0]
                        ['model1'], np.array([0.5]))
                    self.assertDictElementsAlmostEqual(
                        got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][0]
                        ['model2'], {
                            'output1': np.array([0.5]),
                            'output2': np.array([0.5])
                        })
                    self.assertAlmostEqual(
                        got[0][constants.BATCHED_PREDICTIONS_KEY][0]['model1'],
                        np.array([1.0]))
                    self.assertDictElementsAlmostEqual(
                        got[0][constants.BATCHED_PREDICTIONS_KEY][0]['model2'],
                        {
                            'output1': np.array([1.0]),
                            'output2': np.array([1.0])
                        })

                    self.assertDictElementsAlmostEqual(
                        got[0][constants.BATCHED_FEATURES_KEY][1], {
                            'fixed_int': np.array([1]),
                        })
                    self.assertEqual(
                        got[0][constants.BATCHED_FEATURES_KEY][1]
                        ['fixed_string'], np.array([b'fixed_string2']))
                    for model_name in ('model1', 'model2'):
                        self.assertIn(model_name,
                                      got[0][constants.BATCHED_LABELS_KEY][1])
                        self.assertIn(
                            model_name,
                            got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][1])
                        self.assertIn(
                            model_name,
                            got[0][constants.BATCHED_PREDICTIONS_KEY][1])
                    self.assertAlmostEqual(
                        got[0][constants.BATCHED_LABELS_KEY][1]['model1'],
                        np.array([1.0]))
                    self.assertDictElementsAlmostEqual(
                        got[0][constants.BATCHED_LABELS_KEY][1]['model2'], {
                            'output1': np.array([1.0]),
                            'output2': np.array([1.0])
                        })
                    self.assertAlmostEqual(
                        got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][1]
                        ['model1'], np.array([0.0]))
                    self.assertDictElementsAlmostEqual(
                        got[0][constants.BATCHED_EXAMPLE_WEIGHTS_KEY][1]
                        ['model2'], {
                            'output1': np.array([0.0]),
                            'output2': np.array([1.0])
                        })
                    self.assertAlmostEqual(
                        got[0][constants.BATCHED_PREDICTIONS_KEY][1]['model1'],
                        np.array([2.0]))
                    self.assertDictElementsAlmostEqual(
                        got[0][constants.BATCHED_PREDICTIONS_KEY][1]['model2'],
                        {
                            'output1': np.array([2.0]),
                            'output2': np.array([2.0])
                        })

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
  def testTFlitePredictExtractorWithSingleOutputModel(self, multi_model,
                                                      multi_output,
                                                      batch_examples,
                                                      batch_inputs):
    input1 = tf.keras.layers.Input(shape=(1,), name='input1')
    input2 = tf.keras.layers.Input(shape=(1,), name='input2')
    inputs = [input1, input2]
    input_layer = tf.keras.layers.concatenate(inputs)
    output_layers = {}
    output_layers['output1'] = (
        tf.keras.layers.Dense(1, activation=tf.nn.sigmoid,
                              name='output1')(input_layer))
    if multi_output:
      output_layers['output2'] = (
          tf.keras.layers.Dense(1, activation=tf.nn.sigmoid,
                                name='output2')(input_layer))

    model = tf.keras.models.Model(inputs, output_layers)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr=.001),
        loss=tf.keras.losses.binary_crossentropy,
        metrics=['accuracy'])

    train_features = {'input1': [[0.0], [1.0]], 'input2': [[1.0], [0.0]]}
    labels = {'output1': [[1], [0]]}
    if multi_output:
      labels['output2'] = [[1], [0]]

    example_weights = {'output1': [1.0, 0.5]}
    if multi_output:
      example_weights['output2'] = [1.0, 0.5]
    dataset = tf.data.Dataset.from_tensor_slices(
        (train_features, labels, example_weights))
    dataset = dataset.shuffle(buffer_size=1).repeat().batch(2)
    model.fit(dataset, steps_per_epoch=1)

    converter = tf.compat.v2.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()

    tflite_model_dir = tempfile.mkdtemp()
    with tf.io.gfile.GFile(os.path.join(tflite_model_dir, 'tflite'), 'wb') as f:
      f.write(tflite_model)

    model_specs = [config.ModelSpec(name='model1')]
    if multi_model:
      model_specs.append(config.ModelSpec(name='model2'))

    eval_config = config.EvalConfig(model_specs=model_specs)
    eval_shared_models = [
        self.createTestEvalSharedModel(
            model_name='model1', eval_saved_model_path=tflite_model_dir)
    ]
    if multi_model:
      eval_shared_models.append(
          self.createTestEvalSharedModel(
              model_name='model2', eval_saved_model_path=tflite_model_dir))

    desired_batch_size = 2 if batch_examples else None
    predictor = tflite_predict_extractor.TFLitePredictExtractor(
        eval_config=eval_config,
        eval_shared_model=eval_shared_models,
        desired_batch_size=desired_batch_size)

    predict_features = [
        {
            'input1': np.array([0.0], dtype=np.float32),
            'input2': np.array([1.0], dtype=np.float32),
            'non_model_feature': np.array([0]),  # should be ignored by model
        },
        {
            'input1': np.array([1.0], dtype=np.float32),
            'input2': np.array([0.0], dtype=np.float32),
            'non_model_feature': np.array([1]),  # should be ignored by model
        }
    ]

    if batch_inputs:
      predict_features = [{k: np.expand_dims(v, 0)
                           for k, v in p.items()}
                          for p in predict_features]

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create(predict_features)
          | 'FeaturesToExtracts' >>
          beam.Map(lambda x: {constants.FEATURES_KEY: x})
          | predictor.stage_name >> predictor.ptransform)

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 2)
          # We can't verify the actual predictions, but we can verify the keys.
          for item in got:
            self.assertIn(constants.PREDICTIONS_KEY, item)

            # TODO(dzats): TFLite seems to currently rename all outputs to
            # Identity*. Update this test to check for output1 and output2
            # when this changes.
            if multi_model:
              self.assertIn('model1', item[constants.PREDICTIONS_KEY])
              self.assertIn('model2', item[constants.PREDICTIONS_KEY])
              if multi_output:
                self.assertIn('Identity',
                              item[constants.PREDICTIONS_KEY]['model1'])
                self.assertIn('Identity_1',
                              item[constants.PREDICTIONS_KEY]['model1'])

            elif multi_output:
              self.assertIn('Identity', item[constants.PREDICTIONS_KEY])
              self.assertIn('Identity_1', item[constants.PREDICTIONS_KEY])

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result, label='result')
Beispiel #16
0
    def assertMetricsComputedWithBeamAre(
        self,
        eval_saved_model_path: Text,
        serialized_examples: List[bytes],
        expected_metrics: Dict[Text, Any],
        add_metrics_callbacks: Optional[List[
            types.AddMetricsCallbackType]] = None):
        """Checks metrics computed using Beam.

    Metrics will be computed over all examples, without any slicing. If you
    want to provide your own PCollection (e.g. read a large number of examples
    from a file), if you want to check metrics over certain slices, or if you
    want to add additional post-export metrics, use the more general
    assertGeneralMetricsComputedWithBeamAre.

    Example usage:
      self.assertMetricsComputedWithBeamAre(
        eval_saved_model_path=path,
        serialized_examples=[self.makeExample(age=5, label=1.0),
                             self.makeExample(age=10, label=0.0)],
        expected_metrics={'average_loss': 0.1})

    Args:
      eval_saved_model_path: Path to the directory containing the
        EvalSavedModel.
      serialized_examples: List of serialized example bytes.
      expected_metrics: Dictionary of expected metric values.
      add_metrics_callbacks: Optional. Callbacks for adding additional metrics.
    """
        def check_metrics(got):
            """Check metrics callback."""
            try:
                self.assertEqual(
                    1, len(got),
                    'expecting metrics for exactly one slice, but got %d '
                    'slices instead. metrics were: %s' % (len(got), got))
                (slice_key, value) = got[0]
                self.assertEqual((), slice_key)
                self.assertDictElementsWithinBounds(
                    got_values_dict=value,
                    expected_values_dict=expected_metrics)
            except AssertionError as err:
                raise beam_util.BeamAssertException(err)

        eval_config = config.EvalConfig(
            input_data_specs=[config.InputDataSpec()],
            model_specs=[config.ModelSpec(location=eval_saved_model_path)],
            output_data_specs=[config.OutputDataSpec()])
        eval_shared_model = model_eval_lib.default_eval_shared_model(
            eval_saved_model_path=eval_saved_model_path,
            add_metrics_callbacks=add_metrics_callbacks)
        extractors = model_eval_lib.default_extractors(
            eval_config=eval_config, eval_shared_model=eval_shared_model)

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            (metrics, _), _ = (
                pipeline
                | 'CreateExamples' >> beam.Create(serialized_examples)
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'Extract' >> Extract(extractors=extractors)
                | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator.
                ComputeMetricsAndPlots(eval_shared_model=eval_shared_model))
            # pylint: enable=no-value-for-parameter

            beam_util.assert_that(metrics, check_metrics)
    def testEvaluateWithBinaryClassificationModel(self):
        n_classes = 2
        temp_export_dir = self._getExportDir()
        _, export_dir = dnn_classifier.simple_dnn_classifier(
            None, temp_export_dir, n_classes=n_classes)

        # Add mean_label, example_count, weighted_example_count, calibration_plot
        eval_config = config.EvalConfig(
            model_specs=[
                config.ModelSpec(location=export_dir,
                                 label_key='label',
                                 example_weight_key='age')
            ],
            slicing_specs=[config.SlicingSpec()],
            metrics_specs=metric_specs.specs_from_metrics([
                calibration.MeanLabel('mean_label'),
                calibration_plot.CalibrationPlot(name='calibration_plot',
                                                 num_buckets=10)
            ]))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])

        slice_spec = [
            slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs
        ]
        extractors = [
            input_extractor.InputExtractor(eval_config=eval_config),
            predict_extractor_v2.PredictExtractor(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model]),
            slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec)
        ]
        evaluators = [
            metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model])
        ]

        examples = [
            self._makeExample(age=1.0, language='english', label=0.0),
            self._makeExample(age=2.0, language='chinese', label=1.0),
            self._makeExample(age=3.0, language='chinese', label=0.0),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            metrics_and_plots = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate(
                    extractors=extractors, evaluators=evaluators))

            # pylint: enable=no-value-for-parameter

            def check_metrics(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    example_count_key = metric_types.MetricKey(
                        name='example_count')
                    weighted_example_count_key = metric_types.MetricKey(
                        name='weighted_example_count')
                    label_key = metric_types.MetricKey(name='mean_label')
                    self.assertDictElementsAlmostEqual(
                        got_metrics, {
                            example_count_key:
                            3,
                            weighted_example_count_key: (1.0 + 2.0 + 3.0),
                            label_key:
                            (0 * 1.0 + 1 * 2.0 + 0 * 3.0) / (1.0 + 2.0 + 3.0),
                        })

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            def check_plots(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_plots = got[0]
                    self.assertEqual(got_slice_key, ())
                    plot_key = metric_types.PlotKey('calibration_plot')
                    self.assertIn(plot_key, got_plots)
                    # 10 buckets + 2 for edge cases
                    self.assertLen(got_plots[plot_key].buckets, 12)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics_and_plots[constants.METRICS_KEY],
                             check_metrics,
                             label='metrics')
            util.assert_that(metrics_and_plots[constants.PLOTS_KEY],
                             check_plots,
                             label='plots')
Beispiel #18
0
    def assertGeneralMetricsComputedWithBeamAre(
            self, eval_saved_model_path: Text,
            examples_pcollection: beam.pvalue.PCollection,
            slice_spec: List[slicer.SingleSliceSpec],
            add_metrics_callbacks: List[types.AddMetricsCallbackType],
            expected_slice_metrics: Dict[Any, Dict[Text, Any]]):
        """Checks metrics computed using Beam.

    A more general version of assertMetricsComputedWithBeamAre. Note that the
    caller is responsible for setting up and running the Beam pipeline.

    Example usage:
      def add_metrics(features, predictions, labels):
       metric_ops = {
         'mse': tf.metrics.mean_squared_error(labels, predictions['logits']),
         'mae': tf.metrics.mean_absolute_error(labels, predictions['logits']),
      }
      return metric_ops

      with beam.Pipeline() as pipeline:
        expected_slice_metrics = {
            (): {
              'mae': 0.1,
              'mse': 0.2,
              tfma.post_export_metrics.metric_keys.AUC:
                tfma.test.BoundedValue(lower_bound=0.5)
            },
            (('age', 10),): {
              'mae': 0.2,
              'mse': 0.3,
              tfma.post_export_metrics.metric_keys.AUC:
                tfma.test.BoundedValue(lower_bound=0.5)
            },
        }
        examples = pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(path)
        self.assertGeneralMetricsComputedWithBeamAre(
          eval_saved_model_path=path,
          examples_pcollection=examples,
          slice_spec=[tfma.slicer.SingleSliceSpec(),
                      tfma.slicer.SingleSliceSpec(columns=['age'])],
          add_metrics_callbacks=[
            add_metrics, tfma.post_export_metrics.auc()],
          expected_slice_metrics=expected_slice_metrics)

    Args:
      eval_saved_model_path: Path to the directory containing the
        EvalSavedModel.
      examples_pcollection: A PCollection of serialized example bytes.
      slice_spec: List of slice specifications.
      add_metrics_callbacks: Callbacks for adding additional metrics.
      expected_slice_metrics: Dictionary of dictionaries describing the expected
        metrics for each slice. The outer dictionary map slice keys to the
        expected metrics for that slice.
    """
        def check_metrics(got):
            """Check metrics callback."""
            try:
                slices = {}
                for slice_key, value in got:
                    slices[slice_key] = value
                self.assertItemsEqual(list(slices.keys()),
                                      list(expected_slice_metrics.keys()))
                for slice_key, expected_metrics in expected_slice_metrics.items(
                ):
                    self.assertDictElementsWithinBounds(
                        got_values_dict=slices[slice_key],
                        expected_values_dict=expected_metrics)
            except AssertionError as err:
                raise beam_util.BeamAssertException(err)

        slicing_specs = None
        if slice_spec:
            slicing_specs = [s.to_proto() for s in slice_spec]
        eval_config = config.EvalConfig(
            input_data_specs=[config.InputDataSpec()],
            model_specs=[config.ModelSpec(location=eval_saved_model_path)],
            output_data_specs=[config.OutputDataSpec()],
            slicing_specs=slicing_specs)
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_saved_model_path,
            add_metrics_callbacks=add_metrics_callbacks)
        extractors = model_eval_lib.default_extractors(
            eval_config=eval_config, eval_shared_model=eval_shared_model)

        # pylint: disable=no-value-for-parameter
        (metrics,
         _), _ = (examples_pcollection
                  | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                  | 'Extract' >> Extract(extractors=extractors)
                  | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator.
                  ComputeMetricsAndPlots(eval_shared_model=eval_shared_model))
        # pylint: enable=no-value-for-parameter

        beam_util.assert_that(metrics, check_metrics)
    def testEvaluateWithSlicing(self):
        temp_export_dir = self._getExportDir()
        _, export_dir = (fixed_prediction_estimator_extra_fields.
                         simple_fixed_prediction_estimator_extra_fields(
                             None, temp_export_dir))
        eval_config = config.EvalConfig(
            model_specs=[
                config.ModelSpec(location=export_dir,
                                 label_key='label',
                                 example_weight_key='fixed_float')
            ],
            slicing_specs=[
                config.SlicingSpec(),
                config.SlicingSpec(feature_keys=['fixed_string']),
            ],
            metrics_specs=metric_specs.specs_from_metrics([
                calibration.MeanLabel('mean_label'),
                calibration.MeanPrediction('mean_prediction')
            ]))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir)
        slice_spec = [
            slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs
        ]
        extractors = [
            predict_extractor.PredictExtractor(
                eval_shared_model=eval_shared_model),
            slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec)
        ]
        evaluators = [
            metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model])
        ]

        # fixed_float used as example_weight key
        examples = [
            self._makeExample(prediction=0.2,
                              label=1.0,
                              fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string1'),
            self._makeExample(prediction=0.8,
                              label=0.0,
                              fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string1'),
            self._makeExample(prediction=0.5,
                              label=0.0,
                              fixed_int=2,
                              fixed_float=2.0,
                              fixed_string='fixed_string2')
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            metrics = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate(
                    extractors=extractors, evaluators=evaluators))

            # pylint: enable=no-value-for-parameter

            def check_metrics(got):
                try:
                    self.assertLen(got, 3)
                    slices = {}
                    for slice_key, value in got:
                        slices[slice_key] = value
                    overall_slice = ()
                    fixed_string1_slice = (('fixed_string',
                                            b'fixed_string1'), )
                    fixed_string2_slice = (('fixed_string',
                                            b'fixed_string2'), )
                    self.asssertCountEqual(list(slices.keys()), [
                        overall_slice, fixed_string1_slice, fixed_string2_slice
                    ])
                    example_count_key = metric_types.MetricKey(
                        name='example_count')
                    weighted_example_count_key = metric_types.MetricKey(
                        name='weighted_example_count')
                    label_key = metric_types.MetricKey(name='mean_label')
                    pred_key = metric_types.MetricKey(name='mean_prediction')
                    self.assertDictElementsAlmostEqual(
                        slices[overall_slice], {
                            example_count_key: 3,
                            weighted_example_count_key: 4.0,
                            label_key:
                            (1.0 + 0.0 + 2 * 0.0) / (1.0 + 1.0 + 2.0),
                            pred_key:
                            (0.2 + 0.8 + 2 * 0.5) / (1.0 + 1.0 + 2.0),
                        })
                    self.assertDictElementsAlmostEqual(
                        slices[fixed_string1_slice], {
                            example_count_key: 2,
                            weighted_example_count_key: 2.0,
                            label_key: (1.0 + 0.0) / (1.0 + 1.0),
                            pred_key: (0.2 + 0.8) / (1.0 + 1.0),
                        })
                    self.assertDictElementsAlmostEqual(
                        slices[fixed_string2_slice], {
                            example_count_key: 1,
                            weighted_example_count_key: 2.0,
                            label_key: (2 * 0.0) / 2.0,
                            pred_key: (2 * 0.5) / 2.0,
                        })

                except AssertionError as err:
                    raise util.BeamAssertException(err)

                util.assert_that(metrics[constants.METRICS_KEY],
                                 check_metrics,
                                 label='metrics')
    def testRunModelAnalysisWithKerasModel(self):
        input_layer = tf.keras.layers.Input(shape=(28 * 28, ), name='data')
        output_layer = tf.keras.layers.Dense(
            10, activation=tf.nn.softmax)(input_layer)
        model = tf.keras.models.Model(input_layer, output_layer)
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001),
                      loss=tf.keras.losses.categorical_crossentropy)

        features = {'data': [[0.0] * 28 * 28]}
        labels = [[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]]
        example_weights = [1.0]
        dataset = tf.data.Dataset.from_tensor_slices(
            (features, labels, example_weights))
        dataset = dataset.shuffle(buffer_size=1).repeat().batch(1)
        model.fit(dataset, steps_per_epoch=1)

        model_location = os.path.join(self._getTempDir(), 'export_dir')
        model.save(model_location, save_format='tf')

        examples = [
            self._makeExample(data=[0.0] * 28 * 28, label=1.0),
            self._makeExample(data=[1.0] * 28 * 28, label=5.0),
            self._makeExample(data=[1.0] * 28 * 28, label=9.0),
        ]
        data_location = self._writeTFExamplesToTFRecords(examples)
        metrics_spec = config.MetricsSpec()
        for metric in (tf.keras.metrics.AUC(), ):
            cfg = tf.keras.utils.serialize_keras_object(metric)
            metrics_spec.metrics.append(
                config.MetricConfig(class_name=cfg['class_name'],
                                    config=json.dumps(cfg['config'])))
        for class_id in (0, 5, 9):
            metrics_spec.binarize.class_ids.append(class_id)
        eval_config = config.EvalConfig(
            model_specs=[config.ModelSpec(label_key='label')],
            metrics_specs=[metrics_spec])
        eval_result = model_eval_lib.run_model_analysis(
            eval_config=eval_config,
            eval_shared_model=model_eval_lib.default_eval_shared_model(
                eval_saved_model_path=model_location,
                tags=[tf.saved_model.SERVING]),
            data_location=data_location,
            output_path=self._getTempDir())
        self.assertEqual(eval_result.model_location, model_location)
        self.assertEqual(eval_result.data_location, data_location)
        self.assertLen(eval_result.slicing_metrics, 1)
        got_slice_key, got_metrics = eval_result.slicing_metrics[0]
        self.assertEqual(got_slice_key, ())
        self.assertIn('', got_metrics)  # output_name
        got_metrics = got_metrics['']
        expected_metrics = {
            'classId:0': {
                'auc': True,
            },
            'classId:5': {
                'auc': True,
            },
            'classId:9': {
                'auc': True,
            },
        }
        for class_id in expected_metrics:
            self.assertIn(class_id, got_metrics)
            for k in expected_metrics[class_id]:
                self.assertIn(k, got_metrics[class_id])
    def testEvaluateWithKerasModel(self):
        input1 = tf.keras.layers.Input(shape=(1, ), name='input1')
        input2 = tf.keras.layers.Input(shape=(1, ), name='input2')
        inputs = [input1, input2]
        input_layer = tf.keras.layers.concatenate(inputs)
        output_layer = tf.keras.layers.Dense(1,
                                             activation=tf.nn.sigmoid,
                                             name='output')(input_layer)
        model = tf.keras.models.Model(inputs, output_layer)
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001),
                      loss=tf.keras.losses.binary_crossentropy,
                      metrics=['accuracy'])

        features = {'input1': [[0.0], [1.0]], 'input2': [[1.0], [0.0]]}
        labels = [[1], [0]]
        example_weights = [1.0, 0.5]
        dataset = tf.data.Dataset.from_tensor_slices(
            (features, labels, example_weights))
        dataset = dataset.shuffle(buffer_size=1).repeat().batch(2)
        model.fit(dataset, steps_per_epoch=1)

        export_dir = self._getExportDir()
        model.save(export_dir, save_format='tf')

        eval_config = config.EvalConfig(
            model_specs=[
                config.ModelSpec(location=export_dir,
                                 label_key='label',
                                 example_weight_key='example_weight')
            ],
            slicing_specs=[config.SlicingSpec()],
            metrics_specs=metric_specs.specs_from_metrics(
                [calibration.MeanLabel('mean_label')]))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])

        slice_spec = [
            slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs
        ]
        extractors = [
            input_extractor.InputExtractor(eval_config=eval_config),
            predict_extractor_v2.PredictExtractor(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model]),
            slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec)
        ]
        evaluators = [
            metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                eval_config=eval_config,
                eval_shared_models=[eval_shared_model])
        ]

        examples = [
            self._makeExample(input1=0.0,
                              input2=1.0,
                              label=1.0,
                              example_weight=1.0,
                              extra_feature='non_model_feature'),
            self._makeExample(input1=1.0,
                              input2=0.0,
                              label=0.0,
                              example_weight=0.5,
                              extra_feature='non_model_feature'),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            metrics = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate(
                    extractors=extractors, evaluators=evaluators))

            # pylint: enable=no-value-for-parameter

            def check_metrics(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    example_count_key = metric_types.MetricKey(
                        name='example_count')
                    weighted_example_count_key = metric_types.MetricKey(
                        name='weighted_example_count')
                    label_key = metric_types.MetricKey(name='mean_label')
                    self.assertDictElementsAlmostEqual(
                        got_metrics, {
                            example_count_key: 2,
                            weighted_example_count_key: (1.0 + 0.5),
                            label_key: (1.0 * 1.0 + 0.0 * 0.5) / (1.0 + 0.5),
                        })

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics[constants.METRICS_KEY],
                             check_metrics,
                             label='metrics')
    def testRunModelAnalysisWithQueryBasedMetrics(self):
        input_layer = tf.keras.layers.Input(shape=(1, ), name='age')
        output_layer = tf.keras.layers.Dense(
            1, activation=tf.nn.sigmoid)(input_layer)
        model = tf.keras.models.Model(input_layer, output_layer)
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=.001),
                      loss=tf.keras.losses.binary_crossentropy)

        features = {'age': [[20.0]]}
        labels = [[1]]
        example_weights = [1.0]
        dataset = tf.data.Dataset.from_tensor_slices(
            (features, labels, example_weights))
        dataset = dataset.shuffle(buffer_size=1).repeat().batch(1)
        model.fit(dataset, steps_per_epoch=1)

        model_location = os.path.join(self._getTempDir(), 'export_dir')
        model.save(model_location, save_format='tf')

        examples = [
            self._makeExample(age=3.0, language='english', label=1.0),
            self._makeExample(age=5.0, language='chinese', label=0.0),
            self._makeExample(age=3.0, language='english', label=0.0),
            self._makeExample(age=5.0, language='chinese', label=1.0)
        ]
        data_location = self._writeTFExamplesToTFRecords(examples)
        slicing_specs = [config.SlicingSpec()]
        eval_config = config.EvalConfig(
            model_specs=[config.ModelSpec(label_key='label')],
            slicing_specs=slicing_specs,
            metrics_specs=metric_specs.specs_from_metrics(
                [ndcg.NDCG(gain_key='age', name='ndcg')],
                binarize=config.BinarizationOptions(top_k_list=[1]),
                query_key='language'))
        eval_shared_model = model_eval_lib.default_eval_shared_model(
            eval_saved_model_path=model_location,
            tags=[tf.saved_model.SERVING])
        eval_result = model_eval_lib.run_model_analysis(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            data_location=data_location,
            output_path=self._getTempDir(),
            evaluators=[
                metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                    eval_config=eval_config,
                    eval_shared_model=eval_shared_model)
            ])

        self.assertEqual(eval_result.model_location, model_location)
        self.assertEqual(eval_result.data_location, data_location)
        self.assertLen(eval_result.slicing_metrics, 1)
        got_slice_key, got_metrics = eval_result.slicing_metrics[0]
        self.assertEqual(got_slice_key, ())
        self.assertIn('', got_metrics)  # output_name
        got_metrics = got_metrics['']
        expected_metrics = {
            '': {
                'example_count': True,
                'weighted_example_count': True,
            },
            'topK:1': {
                'ndcg': True,
            },
        }
        for group in expected_metrics:
            self.assertIn(group, got_metrics)
            for k in expected_metrics[group]:
                self.assertIn(k, got_metrics[group])
Beispiel #23
0
    def testPredictExtractorWithMultiModels(self):
        temp_export_dir = self._getExportDir()
        export_dir1, _ = multi_head.simple_multi_head(temp_export_dir, None)
        export_dir2, _ = multi_head.simple_multi_head(temp_export_dir, None)

        eval_config = config.EvalConfig(model_specs=[
            config.ModelSpec(location=export_dir1, name='model1'),
            config.ModelSpec(location=export_dir2, name='model2')
        ])
        eval_shared_model1 = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir1, tags=[tf.saved_model.SERVING])
        eval_shared_model2 = self.createTestEvalSharedModel(
            eval_saved_model_path=export_dir2, tags=[tf.saved_model.SERVING])
        predict_extractor = predict_extractor_v2.PredictExtractor(
            eval_config=eval_config,
            eval_shared_models=[eval_shared_model1, eval_shared_model2])

        examples = [
            self._makeExample(age=1.0,
                              language='english',
                              english_label=1.0,
                              chinese_label=0.0,
                              other_label=0.0),
            self._makeExample(age=1.0,
                              language='chinese',
                              english_label=0.0,
                              chinese_label=1.0,
                              other_label=0.0),
            self._makeExample(age=2.0,
                              language='english',
                              english_label=1.0,
                              chinese_label=0.0,
                              other_label=0.0),
            self._makeExample(age=2.0,
                              language='other',
                              english_label=0.0,
                              chinese_label=1.0,
                              other_label=1.0)
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples])
                | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
                | predict_extractor.stage_name >> predict_extractor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 4)
                    for item in got:
                        # We can't verify the actual predictions, but we can verify the keys
                        self.assertIn(constants.PREDICTIONS_KEY, item)
                        for model_name in ('model1', 'model2'):
                            self.assertIn(model_name,
                                          item[constants.PREDICTIONS_KEY])
                            for output_name in ('chinese_head', 'english_head',
                                                'other_head'):
                                for pred_key in ('logistic', 'probabilities',
                                                 'all_classes'):
                                    self.assertIn(
                                        output_name + '/' + pred_key,
                                        item[constants.PREDICTIONS_KEY]
                                        [model_name])

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
    def testWriteMetricsAndPlots(self):
        metrics_file = os.path.join(self._getTempDir(), 'metrics')
        plots_file = os.path.join(self._getTempDir(), 'plots')
        temp_eval_export_dir = os.path.join(self._getTempDir(),
                                            'eval_export_dir')

        _, eval_export_dir = (
            fixed_prediction_estimator.simple_fixed_prediction_estimator(
                None, temp_eval_export_dir))
        eval_config = config.EvalConfig(
            model_specs=[config.ModelSpec()],
            options=config.Options(disabled_outputs=['eval_config.json']))
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_export_dir,
            add_metrics_callbacks=[
                post_export_metrics.example_count(),
                post_export_metrics.calibration_plot_and_prediction_histogram(
                    num_buckets=2)
            ])
        extractors = [
            predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor()
        ]
        evaluators = [
            metrics_and_plots_evaluator.MetricsAndPlotsEvaluator(
                eval_shared_model)
        ]
        output_paths = {
            constants.METRICS_KEY: metrics_file,
            constants.PLOTS_KEY: plots_file
        }
        writers = [
            metrics_and_plots_writer.MetricsAndPlotsWriter(
                output_paths, eval_shared_model.add_metrics_callbacks)
        ]

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(prediction=0.0, label=1.0)
            example2 = self._makeExample(prediction=1.0, label=1.0)

            # pylint: disable=no-value-for-parameter
            _ = (pipeline
                 | 'Create' >> beam.Create([
                     example1.SerializeToString(),
                     example2.SerializeToString(),
                 ])
                 | 'ExtractEvaluateAndWriteResults' >>
                 model_eval_lib.ExtractEvaluateAndWriteResults(
                     eval_config=eval_config,
                     eval_shared_model=eval_shared_model,
                     extractors=extractors,
                     evaluators=evaluators,
                     writers=writers))
            # pylint: enable=no-value-for-parameter

        expected_metrics_for_slice = text_format.Parse(
            """
        slice_key {}
        metrics {
          key: "average_loss"
          value {
            double_value {
              value: 0.5
            }
          }
        }
        metrics {
          key: "post_export_metrics/example_count"
          value {
            double_value {
              value: 2.0
            }
          }
        }
        """, metrics_for_slice_pb2.MetricsForSlice())

        metric_records = []
        for record in tf.compat.v1.python_io.tf_record_iterator(metrics_file):
            metric_records.append(
                metrics_for_slice_pb2.MetricsForSlice.FromString(record))
        self.assertEqual(1, len(metric_records),
                         'metrics: %s' % metric_records)
        self.assertProtoEquals(expected_metrics_for_slice, metric_records[0])

        expected_plots_for_slice = text_format.Parse(
            """
      slice_key {}
      plots {
        key: "post_export_metrics"
        value {
          calibration_histogram_buckets {
            buckets {
              lower_threshold_inclusive: -inf
              num_weighted_examples {}
              total_weighted_label {}
              total_weighted_refined_prediction {}
            }
            buckets {
              upper_threshold_exclusive: 0.5
              num_weighted_examples {
                value: 1.0
              }
              total_weighted_label {
                value: 1.0
              }
              total_weighted_refined_prediction {}
            }
            buckets {
              lower_threshold_inclusive: 0.5
              upper_threshold_exclusive: 1.0
              num_weighted_examples {
              }
              total_weighted_label {}
              total_weighted_refined_prediction {}
            }
            buckets {
              lower_threshold_inclusive: 1.0
              upper_threshold_exclusive: inf
              num_weighted_examples {
                value: 1.0
              }
              total_weighted_label {
                value: 1.0
              }
              total_weighted_refined_prediction {
                value: 1.0
              }
            }
         }
        }
      }
    """, metrics_for_slice_pb2.PlotsForSlice())

        plot_records = []
        for record in tf.compat.v1.python_io.tf_record_iterator(plots_file):
            plot_records.append(
                metrics_for_slice_pb2.PlotsForSlice.FromString(record))
        self.assertEqual(1, len(plot_records), 'plots: %s' % plot_records)
        self.assertProtoEquals(expected_plots_for_slice, plot_records[0])
 def testGetLabelKeyNoOutputAndLabelKeys(self):
     with self.assertRaises(ValueError):
         model_util.get_label_key(
             config.ModelSpec(label_keys={'output1': 'label'}), '')
Beispiel #26
0
def ExtractEvaluateAndWriteResults(  # pylint: disable=invalid-name
        examples: beam.pvalue.PCollection,
        eval_shared_model: Optional[Union[types.EvalSharedModel,
                                          Dict[Text,
                                               types.EvalSharedModel]]] = None,
        eval_config: config.EvalConfig = None,
        extractors: Optional[List[extractor.Extractor]] = None,
        evaluators: Optional[List[evaluator.Evaluator]] = None,
        writers: Optional[List[writer.Writer]] = None,
        output_path: Optional[Text] = None,
        display_only_data_location: Optional[Text] = None,
        display_only_file_format: Optional[Text] = None,
        slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
        write_config: Optional[bool] = True,
        compute_confidence_intervals: Optional[bool] = False,
        k_anonymization_count: int = 1,
        desired_batch_size: Optional[int] = None,
        random_seed_for_testing: Optional[int] = None) -> beam.pvalue.PDone:
    """PTransform for performing extraction, evaluation, and writing results.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:
    eval_config = tfma.EvalConfig(slicing_specs=[...], metrics_specs=[...])
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=model_location)
    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'ExtractEvaluateAndWriteResults' >>
           tfma.ExtractEvaluateAndWriteResults(
               eval_shared_model=eval_shared_model,
               eval_config=eval_config,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_shared_model: Optional shared model (single-model evaluation) or dict
      of shared models keyed by model name (multi-model evaluation). Only
      required if needed by default extractors, evaluators, or writers and for
      display purposes of the model path.
    eval_config: Eval config.
    extractors: Optional list of Extractors to apply to Extracts. Typically
      these will be added by calling the default_extractors function. If no
      extractors are provided, default_extractors (non-materialized) will be
      used.
    evaluators: Optional list of Evaluators for evaluating Extracts. Typically
      these will be added by calling the default_evaluators function. If no
      evaluators are provided, default_evaluators will be used.
    writers: Optional list of Writers for writing Evaluation output. Typically
      these will be added by calling the default_writers function. If no writers
      are provided, default_writers will be used.
    output_path: Path to output metrics and plots results.
    display_only_data_location: Optional path indicating where the examples were
      read from. This is used only for display purposes - data will not actually
      be read from this path.
    display_only_file_format: Optional format of the examples. This is used only
      for display purposes.
    slice_spec: Deprecated (use EvalConfig).
    write_config: Deprecated (use EvalConfig).
    compute_confidence_intervals: Deprecated (use EvalConfig).
    k_anonymization_count: Deprecated (use EvalConfig).
    desired_batch_size: Optional batch size for batching in Predict.
    random_seed_for_testing: Provide for deterministic tests only.

  Raises:
    ValueError: If EvalConfig invalid or matching Extractor not found for an
      Evaluator.

  Returns:
    PDone.
  """
    eval_shared_models = eval_shared_model
    if not isinstance(eval_shared_model, dict):
        eval_shared_models = {'': eval_shared_model}

    if eval_config is None:
        model_specs = []
        for model_name, shared_model in eval_shared_models.items():
            example_weight_key = shared_model.example_weight_key
            example_weight_keys = {}
            if example_weight_key and isinstance(example_weight_key, dict):
                example_weight_keys = example_weight_key
                example_weight_key = ''
            model_specs.append(
                config.ModelSpec(name=model_name,
                                 example_weight_key=example_weight_key,
                                 example_weight_keys=example_weight_keys))
        slicing_specs = None
        if slice_spec:
            slicing_specs = [s.to_proto() for s in slice_spec]
        options = config.Options()
        options.compute_confidence_intervals.value = compute_confidence_intervals
        options.k_anonymization_count.value = k_anonymization_count
        if not write_config:
            options.disabled_outputs.append(_EVAL_CONFIG_FILE)
        eval_config = config.EvalConfig(model_specs=model_specs,
                                        slicing_specs=slicing_specs,
                                        options=options)

    # Add default ModelSpec if empty.
    if (eval_shared_models and len(eval_shared_models) == 1
            and not eval_config.model_specs):
        tmp_config = config.EvalConfig()
        tmp_config.CopyFrom(eval_config)
        eval_config = tmp_config
        eval_config.model_specs.add()

    config.verify_eval_config(eval_config)

    if not extractors:
        extractors = default_extractors(eval_config=eval_config,
                                        eval_shared_model=eval_shared_model,
                                        materialize=False,
                                        desired_batch_size=desired_batch_size)

    if not evaluators:
        evaluators = default_evaluators(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            random_seed_for_testing=random_seed_for_testing)

    for v in evaluators:
        evaluator.verify_evaluator(v, extractors)

    if not writers:
        writers = default_writers(output_path=output_path,
                                  eval_shared_model=eval_shared_model)

    # pylint: disable=no-value-for-parameter
    _ = (examples
         | 'InputsToExtracts' >> InputsToExtracts()
         | 'ExtractAndEvaluate' >> ExtractAndEvaluate(extractors=extractors,
                                                      evaluators=evaluators)
         | 'WriteResults' >> WriteResults(writers=writers))

    if _EVAL_CONFIG_FILE not in eval_config.options.disabled_outputs:
        data_location = '<user provided PCollection>'
        if display_only_data_location is not None:
            data_location = display_only_data_location
        file_format = '<unknown>'
        if display_only_file_format is not None:
            file_format = display_only_file_format
        model_locations = {}
        for k, v in eval_shared_models.items():
            model_locations[k] = ('<unknown>' if v is None
                                  or v.model_path is None else v.model_path)
        _ = (examples.pipeline
             | WriteEvalConfig(eval_config, output_path, data_location,
                               file_format, model_locations))
    # pylint: enable=no-value-for-parameter

    return beam.pvalue.PDone(examples.pipeline)
    def testModelSignaturesDoFn(self, save_as_keras, signature_names,
                                default_signature_names, prefer_dict_outputs,
                                use_schema, expected_num_outputs):
        export_path = self.createModelWithMultipleDenseInputs(save_as_keras)
        eval_shared_models = {}
        model_specs = []
        for sigs in signature_names.values():
            for model_name in sigs:
                if model_name not in eval_shared_models:
                    eval_shared_models[
                        model_name] = self.createTestEvalSharedModel(
                            eval_saved_model_path=export_path,
                            model_name=model_name,
                            tags=[tf.saved_model.SERVING])
                    model_specs.append(config.ModelSpec(name=model_name))
        eval_config = config.EvalConfig(model_specs=model_specs)
        schema = self.createDenseInputsSchema() if use_schema else None
        tfx_io = tf_example_record.TFExampleBeamRecord(
            physical_format='text',
            schema=schema,
            raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        tensor_adapter_config = None
        if use_schema:
            tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                arrow_schema=tfx_io.ArrowSchema(),
                tensor_representations=tfx_io.TensorRepresentations())

        examples = [
            self._makeExample(input_1=1.0, input_2=2.0),
            self._makeExample(input_1=3.0, input_2=4.0),
            self._makeExample(input_1=5.0, input_2=6.0),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (pipeline
                      | 'Create' >> beam.Create(
                          [e.SerializeToString() for e in examples])
                      | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
                      | 'ToExtracts' >> beam.Map(_record_batch_to_extracts)
                      | 'ModelSignatures' >> beam.ParDo(
                          model_util.ModelSignaturesDoFn(
                              eval_config=eval_config,
                              eval_shared_models=eval_shared_models,
                              signature_names=signature_names,
                              default_signature_names=default_signature_names,
                              prefer_dict_outputs=prefer_dict_outputs,
                              tensor_adapter_config=tensor_adapter_config)))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    for key in signature_names:
                        self.assertIn(key, got[0])
                        if prefer_dict_outputs:
                            for entry in got[0][key]:
                                self.assertIsInstance(entry, dict)
                                self.assertLen(entry, expected_num_outputs)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Beispiel #28
0
def run_model_analysis(
    eval_shared_model: Optional[Union[types.EvalSharedModel,
                                      Dict[Text,
                                           types.EvalSharedModel]]] = None,
    eval_config: config.EvalConfig = None,
    data_location: Text = '',
    file_format: Text = 'tfrecords',
    output_path: Optional[Text] = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None,
    writers: Optional[List[writer.Writer]] = None,
    pipeline_options: Optional[Any] = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    k_anonymization_count: int = 1,
    desired_batch_size: Optional[int] = None,
    random_seed_for_testing: Optional[int] = None
) -> Union[EvalResult, EvalResults]:
    """Runs TensorFlow model analysis.

  It runs a Beam pipeline to compute the slicing metrics exported in TensorFlow
  Eval SavedModel and returns the results.

  This is a simplified API for users who want to quickly get something running
  locally. Users who wish to create their own Beam pipelines can use the
  Evaluate PTransform instead.

  Args:
    eval_shared_model: Optional shared model (single-model evaluation) or dict
      of shared models keyed by model name (multi-model evaluation). Only
      required if needed by default extractors, evaluators, or writers.
    eval_config: Eval config.
    data_location: The location of the data files.
    file_format: The file format of the data, can be either 'text' or
      'tfrecords' for now. By default, 'tfrecords' will be used.
    output_path: The directory to output metrics and results to. If None, we use
      a temporary directory.
    extractors: Optional list of Extractors to apply to Extracts. Typically
      these will be added by calling the default_extractors function. If no
      extractors are provided, default_extractors (non-materialized) will be
      used.
    evaluators: Optional list of Evaluators for evaluating Extracts. Typically
      these will be added by calling the default_evaluators function. If no
      evaluators are provided, default_evaluators will be used.
    writers: Optional list of Writers for writing Evaluation output. Typically
      these will be added by calling the default_writers function. If no writers
      are provided, default_writers will be used.
    pipeline_options: Optional arguments to run the Pipeline, for instance
      whether to run directly.
    slice_spec: Deprecated (use EvalConfig).
    write_config: Deprecated (use EvalConfig).
    compute_confidence_intervals: Deprecated (use EvalConfig).
    k_anonymization_count: Deprecated (use EvalConfig).
    desired_batch_size: Optional batch size for batching in Predict.
    random_seed_for_testing: Provide for deterministic tests only.

  Returns:
    An EvalResult that can be used with the TFMA visualization functions.

  Raises:
    ValueError: If the file_format is unknown to us.
  """
    _assert_tensorflow_version()

    if output_path is None:
        output_path = tempfile.mkdtemp()
    if not tf.io.gfile.exists(output_path):
        tf.io.gfile.makedirs(output_path)

    if eval_config is None:
        model_specs = []
        eval_shared_models = eval_shared_model
        if not isinstance(eval_shared_model, dict):
            eval_shared_models = {'': eval_shared_model}
        for model_name, shared_model in eval_shared_models.items():
            example_weight_key = shared_model.example_weight_key
            example_weight_keys = {}
            if example_weight_key and isinstance(example_weight_key, dict):
                example_weight_keys = example_weight_key
                example_weight_key = ''
            model_specs.append(
                config.ModelSpec(name=model_name,
                                 example_weight_key=example_weight_key,
                                 example_weight_keys=example_weight_keys))
        slicing_specs = None
        if slice_spec:
            slicing_specs = [s.to_proto() for s in slice_spec]
        options = config.Options()
        options.compute_confidence_intervals.value = compute_confidence_intervals
        options.k_anonymization_count.value = k_anonymization_count
        if not write_config:
            options.disabled_outputs.append(_EVAL_CONFIG_FILE)
        eval_config = config.EvalConfig(model_specs=model_specs,
                                        slicing_specs=slicing_specs,
                                        options=options)

    with beam.Pipeline(options=pipeline_options) as p:
        if file_format == 'tfrecords':
            data = p | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord(
                file_pattern=data_location,
                compression_type=beam.io.filesystem.CompressionTypes.AUTO)
        elif file_format == 'text':
            data = p | 'ReadFromText' >> beam.io.textio.ReadFromText(
                data_location)
        else:
            raise ValueError('unknown file_format: {}'.format(file_format))

        # pylint: disable=no-value-for-parameter
        _ = (
            data
            |
            'ExtractEvaluateAndWriteResults' >> ExtractEvaluateAndWriteResults(
                eval_config=eval_config,
                eval_shared_model=eval_shared_model,
                display_only_data_location=data_location,
                display_only_file_format=file_format,
                output_path=output_path,
                extractors=extractors,
                evaluators=evaluators,
                writers=writers,
                desired_batch_size=desired_batch_size,
                random_seed_for_testing=random_seed_for_testing))
        # pylint: enable=no-value-for-parameter

    if len(eval_config.model_specs) <= 1:
        return load_eval_result(output_path)
    else:
        results = []
        for spec in eval_config.model_specs:
            results.append(load_eval_result(output_path, model_name=spec.name))
        return EvalResults(results, constants.MODEL_CENTRIC_MODE)
Beispiel #29
0
 def _makeEvalConfig(self):
   eval_config = config.EvalConfig(
       model_specs=[config.ModelSpec(example_weight_key='testing_key')])
   return eval_config
Beispiel #30
0
 def testRunModelAnalysisWithUncertainty(self):
     model_location = self._exportEvalSavedModel(
         linear_classifier.simple_linear_classifier)
     examples = [
         self._makeExample(age=3.0, language='english', label=1.0),
         self._makeExample(age=3.0, language='chinese', label=0.0),
         self._makeExample(age=4.0, language='english', label=1.0),
         self._makeExample(age=5.0, language='chinese', label=1.0),
         self._makeExample(age=5.0, language='hindi', label=1.0)
     ]
     data_location = self._writeTFExamplesToTFRecords(examples)
     slicing_specs = [config.SlicingSpec(feature_keys=['language'])]
     options = config.Options()
     options.compute_confidence_intervals.value = True
     options.k_anonymization_count.value = 2
     eval_config = config.EvalConfig(
         input_data_specs=[config.InputDataSpec(location=data_location)],
         model_specs=[config.ModelSpec(location=model_location)],
         output_data_specs=[
             config.OutputDataSpec(default_location=self._getTempDir())
         ],
         slicing_specs=slicing_specs,
         options=options)
     eval_result = model_eval_lib.run_model_analysis(
         eval_config=eval_config,
         eval_shared_models=[
             model_eval_lib.default_eval_shared_model(
                 eval_saved_model_path=model_location,
                 example_weight_key='age')
         ])
     # We only check some of the metrics to ensure that the end-to-end
     # pipeline works.
     expected = {
         (('language', 'hindi'), ): {
             u'__ERROR__': {
                 'debugMessage':
                 u'Example count for this slice key is lower than the '
                 u'minimum required value: 2. No data is aggregated for '
                 u'this slice.'
             },
         },
         (('language', 'chinese'), ): {
             metric_keys.EXAMPLE_WEIGHT: {
                 'doubleValue': 8.0
             },
             metric_keys.EXAMPLE_COUNT: {
                 'doubleValue': 2.0
             },
         },
         (('language', 'english'), ): {
             'accuracy': {
                 'boundedValue': {
                     'value': 1.0,
                     'lowerBound': 1.0,
                     'upperBound': 1.0,
                     'methodology': 'POISSON_BOOTSTRAP'
                 }
             },
             'my_mean_label': {
                 'boundedValue': {
                     'value': 1.0,
                     'lowerBound': 1.0,
                     'upperBound': 1.0,
                     'methodology': 'POISSON_BOOTSTRAP'
                 }
             },
             metric_keys.EXAMPLE_WEIGHT: {
                 'doubleValue': 7.0
             },
             metric_keys.EXAMPLE_COUNT: {
                 'doubleValue': 2.0
             },
         }
     }
     self.assertEqual(eval_result.config.model_specs[0].location,
                      model_location.decode())
     self.assertEqual(eval_result.config.input_data_specs[0].location,
                      data_location)
     self.assertEqual(eval_result.config.slicing_specs[0],
                      config.SlicingSpec(feature_keys=['language']))
     self.assertMetricsAlmostEqual(eval_result.slicing_metrics, expected)
     self.assertFalse(eval_result.plots)