def test_success_some_errors(self) -> None: result = SamenessStringsValidationResultDetails( num_error_rows=5, total_num_rows=100, max_allowed_error=0.05, non_null_counts_per_column_per_partition=[ (tuple(), {"a": 100, "b": 100, "c": 100}), ], ) self.assertTrue(result.was_successful()) self.assertIsNone(result.failure_description())
def test_sameness_check_strings_different_values_no_allowed_error(self) -> None: self.mock_client.run_query_async.return_value = [{"a": "a", "b": "b", "c": "c"}] job = DataValidationJob( region_code="US_XX", validation=SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], partition_columns=[], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, result_details=SamenessStringsValidationResultDetails( num_error_rows=1, total_num_rows=1, max_allowed_error=0.0, non_null_counts_per_column_per_partition=[ (tuple(), {"a": 1, "b": 1, "c": 1}), ], ), ), )
def test_failure_all_errors(self) -> None: result = SamenessStringsValidationResultDetails( num_error_rows=1, total_num_rows=1, max_allowed_error=0.0, non_null_counts_per_column_per_partition=[ (tuple(), {"a": 1, "b": 1, "c": 1}), ], ) self.assertFalse(result.was_successful()) self.assertEqual( result.failure_description(), "1 out of 1 row(s) did not contain matching strings. " "The acceptable margin of error is only 0.0, but the " "validation returned an error rate of 1.0.", )
def test_multiple_partitions(self) -> None: result = SamenessStringsValidationResultDetails( num_error_rows=3, total_num_rows=7, max_allowed_error=0.0, non_null_counts_per_column_per_partition=[ (("US_XX", "2021-01-31"), {"a": 3, "b": 3}), (("US_XX", "2020-12-31"), {"a": 2, "b": 3}), ], ) self.assertFalse(result.was_successful()) self.assertEqual( result.failure_description(), "3 out of 7 row(s) did not contain matching strings. " "The acceptable margin of error is only 0.0, but the " "validation returned an error rate of 0.4286.", )
def test_sameness_check_strings_multiple_dates(self) -> None: self.mock_client.run_query_async.return_value = [ # January 2021 {"region": "US_XX", "date": "2021-01-31", "a": "00", "b": "00"}, {"region": "US_XX", "date": "2021-01-31", "a": "01", "b": None}, {"region": "US_XX", "date": "2021-01-31", "a": "02", "b": "02"}, {"region": "US_XX", "date": "2021-01-31", "a": None, "b": "03"}, # December 2020 {"region": "US_XX", "date": "2020-12-31", "a": "00", "b": "00"}, {"region": "US_XX", "date": "2020-12-31", "a": "02", "b": "02"}, {"region": "US_XX", "date": "2020-12-31", "a": None, "b": "04"}, ] job = DataValidationJob( region_code="US_XX", validation=SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b"], partition_columns=["region", "date"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, max_allowed_error=0.0, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, result_details=SamenessStringsValidationResultDetails( num_error_rows=3, total_num_rows=7, max_allowed_error=0.0, non_null_counts_per_column_per_partition=[ (("US_XX", "2021-01-31"), {"a": 3, "b": 3}), (("US_XX", "2020-12-31"), {"a": 2, "b": 3}), ], ), ), )
def test_from_successful_result_strings(self) -> None: # Arrange result_details = SamenessStringsValidationResultDetails( num_error_rows=0, total_num_rows=5, max_allowed_error=0.5, non_null_counts_per_column_per_partition=[ (("US_XX", "2020-12-01"), {"internal": 5, "external": 5}) ], ) job_result = DataValidationJobResult( validation_job=DataValidationJob( region_code="US_XX", validation=SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, validation_type=ValidationCheckType.SAMENESS, comparison_columns=["internal", "external"], partition_columns=["state_code", "date"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ), ), result_details=result_details, ) # Act result = ValidationResultForStorage.from_validation_result( run_id="abc123", run_datetime=datetime.datetime(2000, 1, 1, 0, 0, 0), result=job_result, ) # Assert self.assertEqual( ValidationResultForStorage( run_id="abc123", run_date=datetime.date(2000, 1, 1), run_datetime=datetime.datetime(2000, 1, 1, 0, 0, 0), system_version="v1.0.0", check_type=ValidationCheckType.SAMENESS, validation_name="test_view", region_code="US_XX", did_run=True, was_successful=True, failure_description=None, result_details_type="SamenessStringsValidationResultDetails", result_details=result_details, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), result, ) self.assertEqual( { "run_id": "abc123", "run_date": "2000-01-01", "run_datetime": "2000-01-01T00:00:00", "system_version": "v1.0.0", "check_type": "SAMENESS", "validation_name": "test_view", "region_code": "US_XX", "did_run": True, "was_successful": True, "failure_description": None, "result_details_type": "SamenessStringsValidationResultDetails", "result_details": '{"num_error_rows": 0, "total_num_rows": 5, "max_allowed_error": 0.5, "non_null_counts_per_column_per_partition": [[["US_XX", "2020-12-01"], {"internal": 5, "external": 5}]]}', "validation_category": "EXTERNAL_INDIVIDUAL", }, result.to_serializable(), )