Example #1
0
    def testDoValidation(self, exec_properties, blessed, has_baseline):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        model = standard_artifacts.Model()
        baseline_model = standard_artifacts.Model()
        model.uri = os.path.join(source_data_dir, 'trainer/current')
        baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/')
        blessing_output = standard_artifacts.ModelBlessing()
        blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(source_data_dir, 'schema_gen')
        input_dict = {
            EXAMPLES_KEY: [examples],
            MODEL_KEY: [model],
            SCHEMA_KEY: [schema],
        }
        if has_baseline:
            input_dict[BASELINE_MODEL_KEY] = [baseline_model]

        # Create output dict.
        eval_output = standard_artifacts.ModelEvaluation()
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        blessing_output = standard_artifacts.ModelBlessing()
        blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
        output_dict = {
            EVALUATION_KEY: [eval_output],
            BLESSING_KEY: [blessing_output],
        }

        # List needs to be serialized before being passed into Do function.
        exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps(None)

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            fileio.exists(os.path.join(eval_output.uri, 'eval_config.json')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri,
                                                   'metrics')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots')))
        self.assertTrue(
            fileio.exists(os.path.join(eval_output.uri, 'validations')))
        if blessed:
            self.assertTrue(
                fileio.exists(os.path.join(blessing_output.uri, 'BLESSED')))
        else:
            self.assertTrue(
                fileio.exists(os.path.join(blessing_output.uri,
                                           'NOT_BLESSED')))
Example #2
0
    def testDoLegacySingleEvalSavedModelWFairness(self, exec_properties):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        model = standard_artifacts.Model()
        model.uri = os.path.join(source_data_dir, 'trainer/current')
        input_dict = {
            EXAMPLES_KEY: [examples],
            MODEL_KEY: [model],
        }

        # Create output dict.
        eval_output = standard_artifacts.ModelEvaluation()
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        blessing_output = standard_artifacts.ModelBlessing()
        blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
        output_dict = {
            EVALUATION_KEY: [eval_output],
            BLESSING_KEY: [blessing_output],
        }

        try:
            # Need to import the following module so that the fairness indicator
            # post-export metric is registered.  This may raise an ImportError if the
            # currently-installed version of TFMA does not support fairness
            # indicators.
            import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
            exec_properties['fairness_indicator_thresholds'] = [
                0.1, 0.3, 0.5, 0.7, 0.9
            ]
        except ImportError:
            logging.warning(
                'Not testing fairness indicators because a compatible TFMA version '
                'is not installed.')

        # List needs to be serialized before being passed into Do function.
        exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps(None)

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            fileio.exists(os.path.join(eval_output.uri, 'eval_config.json')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri,
                                                   'metrics')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots')))
        self.assertFalse(
            fileio.exists(os.path.join(blessing_output.uri, 'BLESSED')))
Example #3
0
    def testEvalution(self, exec_properties, model_agnostic=False):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        baseline_model = standard_artifacts.Model()
        baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/')
        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(source_data_dir, 'schema_gen')
        input_dict = {
            EXAMPLES_KEY: [examples],
            SCHEMA_KEY: [schema],
        }
        if not model_agnostic:
            model = standard_artifacts.Model()
            model.uri = os.path.join(source_data_dir, 'trainer/current')
            input_dict[MODEL_KEY] = [model]

        # Create output dict.
        eval_output = standard_artifacts.ModelEvaluation()
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        blessing_output = standard_artifacts.ModelBlessing()
        blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
        output_dict = {
            EVALUATION_KEY: [eval_output],
            BLESSING_KEY: [blessing_output],
        }

        # Test multiple splits.
        exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps(
            ['train', 'eval'])

        if MODULE_FILE_KEY in exec_properties:
            exec_properties[MODULE_FILE_KEY] = os.path.join(
                source_data_dir, 'module_file', 'evaluator_module.py')

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            fileio.exists(os.path.join(eval_output.uri, 'eval_config.json')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri,
                                                   'metrics')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots')))
        self.assertFalse(
            fileio.exists(os.path.join(blessing_output.uri, 'BLESSED')))
Example #4
0
    def test_do(self):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        train_examples = types.TfxArtifact(type_name='ExamplesPath',
                                           split='train')
        eval_examples = types.TfxArtifact(type_name='ExamplesPath',
                                          split='eval')
        eval_examples.uri = os.path.join(source_data_dir,
                                         'csv_example_gen/eval/')
        model_exports = types.TfxArtifact(type_name='ModelExportPath')
        model_exports.uri = os.path.join(source_data_dir, 'trainer/current/')
        input_dict = {
            'examples': [train_examples, eval_examples],
            'model_exports': [model_exports],
        }

        # Create output dict.
        eval_output = types.TfxArtifact('ModelEvalPath')
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        output_dict = {'output': [eval_output]}

        # Create exec proterties.
        exec_properties = {
            'feature_slicing_spec':
            json_format.MessageToJson(
                evaluator_pb2.FeatureSlicingSpec(specs=[
                    evaluator_pb2.SingleSlicingSpec(
                        column_for_slicing=['trip_start_hour']),
                    evaluator_pb2.SingleSlicingSpec(
                        column_for_slicing=['trip_start_day', 'trip_miles']),
                ]))
        }

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            tf.gfile.Exists(os.path.join(eval_output.uri, 'eval_config')))
        self.assertTrue(
            tf.gfile.Exists(os.path.join(eval_output.uri, 'metrics')))
        self.assertTrue(tf.gfile.Exists(os.path.join(eval_output.uri,
                                                     'plots')))
Example #5
0
    def testEvalution(self, exec_properties):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        model = standard_artifacts.Model()
        baseline_model = standard_artifacts.Model()
        model.uri = os.path.join(source_data_dir, 'trainer/current')
        baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/')
        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(source_data_dir, 'schema_gen')
        input_dict = {
            constants.EXAMPLES_KEY: [examples],
            constants.MODEL_KEY: [model],
            constants.SCHEMA_KEY: [schema],
        }

        # Create output dict.
        eval_output = standard_artifacts.ModelEvaluation()
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        blessing_output = standard_artifacts.ModelBlessing()
        blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
        output_dict = {
            constants.EVALUATION_KEY: [eval_output],
            constants.BLESSING_KEY: [blessing_output],
        }

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            tf.io.gfile.exists(
                os.path.join(eval_output.uri, 'eval_config.json')))
        self.assertTrue(
            tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics')))
        self.assertTrue(
            tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))
        self.assertFalse(
            tf.io.gfile.exists(os.path.join(blessing_output.uri, 'BLESSED')))
Example #6
0
  def testDo(self):
    source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    # Create input dict.
    train_examples = standard_artifacts.Examples(split='train')
    eval_examples = standard_artifacts.Examples(split='eval')
    eval_examples.uri = os.path.join(source_data_dir, 'csv_example_gen/eval/')
    model_exports = standard_artifacts.Model()
    model_exports.uri = os.path.join(source_data_dir, 'trainer/current/')
    input_dict = {
        'examples': [train_examples, eval_examples],
        'model_exports': [model_exports],
    }

    # Create output dict.
    eval_output = standard_artifacts.ModelEvaluation()
    eval_output.uri = os.path.join(output_data_dir, 'eval_output')
    output_dict = {'output': [eval_output]}

    # Create exec proterties.
    exec_properties = {
        'feature_slicing_spec':
            json_format.MessageToJson(
                evaluator_pb2.FeatureSlicingSpec(specs=[
                    evaluator_pb2.SingleSlicingSpec(
                        column_for_slicing=['trip_start_hour']),
                    evaluator_pb2.SingleSlicingSpec(
                        column_for_slicing=['trip_start_day', 'trip_miles']),
                ]))
    }

    # Run executor.
    evaluator = executor.Executor()
    evaluator.Do(input_dict, output_dict, exec_properties)

    # Check evaluator outputs.
    self.assertTrue(
        # TODO(b/141490237): Update to only check eval_config.json after TFMA
        # released with corresponding change.
        tf.gfile.Exists(os.path.join(eval_output.uri, 'eval_config')) or
        tf.gfile.Exists(os.path.join(eval_output.uri, 'eval_config.json')))
    self.assertTrue(tf.gfile.Exists(os.path.join(eval_output.uri, 'metrics')))
    self.assertTrue(tf.gfile.Exists(os.path.join(eval_output.uri, 'plots')))
Example #7
0
    def testDo(self):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        train_examples = standard_artifacts.Examples(split='train')
        eval_examples = standard_artifacts.Examples(split='eval')
        eval_examples.uri = os.path.join(source_data_dir,
                                         'csv_example_gen/eval/')
        model_exports = standard_artifacts.Model()
        model_exports.uri = os.path.join(source_data_dir, 'trainer/current/')
        input_dict = {
            'examples': [train_examples, eval_examples],
            'model_exports': [model_exports],
        }

        # Create output dict.
        eval_output = standard_artifacts.ModelEvaluation()
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        output_dict = {'output': [eval_output]}

        # Create exec proterties.
        exec_properties = {
            'feature_slicing_spec':
            json_format.MessageToJson(
                evaluator_pb2.FeatureSlicingSpec(specs=[
                    evaluator_pb2.SingleSlicingSpec(
                        column_for_slicing=['trip_start_hour']),
                    evaluator_pb2.SingleSlicingSpec(
                        column_for_slicing=['trip_start_day', 'trip_miles']),
                ]))
        }

        try:
            # Need to import the following module so that the fairness indicator
            # post-export metric is registered.  This may raise an ImportError if the
            # currently-installed version of TFMA does not support fairness
            # indicators.
            import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
            exec_properties['fairness_indicator_thresholds'] = [
                0.1, 0.3, 0.5, 0.7, 0.9
            ]
        except ImportError:
            absl.logging.warning(
                'Not testing fairness indicators because a compatible TFMA version '
                'is not installed.')

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            # TODO(b/141490237): Update to only check eval_config.json after TFMA
            # released with corresponding change.
            tf.io.gfile.exists(os.path.join(eval_output.uri, 'eval_config'))
            or tf.io.gfile.exists(
                os.path.join(eval_output.uri, 'eval_config.json')))
        self.assertTrue(
            tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics')))
        self.assertTrue(
            tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))