def test_validate_instance_invalid_environment(self): instance = {'feature': np.array(['A'])} schema = text_format.Parse( """ default_environment: "TRAINING" default_environment: "SERVING" feature { name: "label" not_in_environment: "SERVING" value_count { min: 1 max: 1 } presence { min_count: 1 } type: BYTES } feature { name: "feature" value_count { min: 1 max: 1 } presence { min_count: 1 } type: BYTES } """, schema_pb2.Schema()) options = stats_options.StatsOptions(schema=schema) with self.assertRaisesRegexp(ValueError, 'Environment.*not found in the schema.*'): _ = validation_api.validate_instance(instance, options, environment='INVALID')
def test_validate_instance_stats_options_without_schema(self): instance = {'feature': np.array(['A'])} # This instance of StatsOptions has no schema. options = stats_options.StatsOptions() with self.assertRaisesRegexp(ValueError, 'options must include a schema.'): _ = validation_api.validate_instance(instance, options)
def test_validate_instance_environment(self): instance = {'feature': np.array(['A'])} schema = text_format.Parse( """ default_environment: "TRAINING" default_environment: "SERVING" feature { name: "label" not_in_environment: "SERVING" value_count { min: 1 max: 1 } presence { min_count: 1 } type: BYTES } feature { name: "feature" value_count { min: 1 max: 1 } presence { min_count: 1 } type: BYTES } """, schema_pb2.Schema()) options = stats_options.StatsOptions(schema=schema) # Validate the instance in TRAINING environment. expected_anomalies_training = { 'label': text_format.Parse( """ description: "Column is completely missing" severity: ERROR short_description: "Column dropped" reason { type: SCHEMA_MISSING_COLUMN short_description: "Column dropped" description: "Column is completely missing" } """, anomalies_pb2.AnomalyInfo()) } anomalies_training = validation_api.validate_instance( instance, options, environment='TRAINING') self._assert_equal_anomalies(anomalies_training, expected_anomalies_training) # Validate the instance in SERVING environment. anomalies_serving = validation_api.validate_instance( instance, options, environment='SERVING') self._assert_equal_anomalies(anomalies_serving, {})
def test_validate_instance_global_only_anomaly_type(self): instance = {'annotated_enum': np.array(['D'])} # This schema has a presence.min_count > 1, which will generate an anomaly # of type FEATURE_TYPE_LOW_NUMBER_PRESENT when any single example is # validated using this schema. This test checks that this anomaly type # (which is not meaningful in per-example validation) is not included in the # Anomalies proto that validate_instance returns. schema = text_format.Parse( """ string_domain { name: "MyAloneEnum" value: "A" value: "B" value: "C" } feature { name: "annotated_enum" value_count { min:1 max:1 } presence { min_count: 5 } type: BYTES domain: "MyAloneEnum" } feature { name: "ignore_this" lifecycle_stage: DEPRECATED value_count { min:1 } presence { min_count: 1 } type: BYTES } """, schema_pb2.Schema()) expected_anomalies = { 'annotated_enum': text_format.Parse( """ description: "Examples contain values missing from the schema: D " "(~100%). " severity: ERROR short_description: "Unexpected string values" reason { type: ENUM_TYPE_UNEXPECTED_STRING_VALUES short_description: "Unexpected string values" description: "Examples contain values missing from the schema: D " "(~100%). " } """, anomalies_pb2.AnomalyInfo()) } options = stats_options.StatsOptions(schema=schema) anomalies = validation_api.validate_instance(instance, options) self._assert_equal_anomalies(anomalies, expected_anomalies)
def test_validate_instance(self): instance = {'annotated_enum': np.array(['D'])} schema = text_format.Parse( """ string_domain { name: "MyAloneEnum" value: "A" value: "B" value: "C" } feature { name: "annotated_enum" value_count { min:1 max:1 } presence { min_count: 1 } type: BYTES domain: "MyAloneEnum" } feature { name: "ignore_this" lifecycle_stage: DEPRECATED value_count { min:1 } presence { min_count: 1 } type: BYTES } """, schema_pb2.Schema()) expected_anomalies = { 'annotated_enum': text_format.Parse( """ description: "Examples contain values missing from the schema: D " "(~100%). " severity: ERROR short_description: "Unexpected string values" reason { type: ENUM_TYPE_UNEXPECTED_STRING_VALUES short_description: "Unexpected string values" description: "Examples contain values missing from the schema: D " "(~100%). " } """, anomalies_pb2.AnomalyInfo()) } options = stats_options.StatsOptions(schema=schema) anomalies = validation_api.validate_instance(instance, options) self._assert_equal_anomalies(anomalies, expected_anomalies)
def test_validate_instance_invalid_options(self): instance = {'feature': np.array(['A'])} with self.assertRaisesRegexp(ValueError, 'options must be a StatsOptions object.'): _ = validation_api.validate_instance(instance, {})
def test_validate_instance_stats_options_without_schema(self): instance = pa.Table.from_arrays([pa.array([['A']])], ['feature']) # This instance of StatsOptions has no schema. options = stats_options.StatsOptions() with self.assertRaisesRegexp(ValueError, 'options must include a schema.'): _ = validation_api.validate_instance(instance, options)
def test_validate_instance_invalid_options(self): instance = pa.Table.from_arrays([pa.array([['A']])], ['feature']) with self.assertRaisesRegexp(ValueError, 'options must be a StatsOptions object.'): _ = validation_api.validate_instance(instance, {})