예제 #1
0
    def test_success_some_errors(self) -> None:
        result = SamenessStringsValidationResultDetails(
            num_error_rows=5,
            total_num_rows=100,
            max_allowed_error=0.05,
            non_null_counts_per_column_per_partition=[
                (tuple(), {"a": 100, "b": 100, "c": 100}),
            ],
        )

        self.assertTrue(result.was_successful())
        self.assertIsNone(result.failure_description())
예제 #2
0
    def test_sameness_check_strings_different_values_no_allowed_error(self) -> None:
        self.mock_client.run_query_async.return_value = [{"a": "a", "b": "b", "c": "c"}]

        job = DataValidationJob(
            region_code="US_XX",
            validation=SamenessDataValidationCheck(
                validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
                validation_type=ValidationCheckType.SAMENESS,
                comparison_columns=["a", "b", "c"],
                partition_columns=[],
                sameness_check_type=SamenessDataValidationCheckType.STRINGS,
                view_builder=SimpleBigQueryViewBuilder(
                    dataset_id="my_dataset",
                    view_id="test_view",
                    description="test_view description",
                    view_query_template="select * from literally_anything",
                ),
            ),
        )
        result = SamenessValidationChecker.run_check(job)

        self.assertEqual(
            result,
            DataValidationJobResult(
                validation_job=job,
                result_details=SamenessStringsValidationResultDetails(
                    num_error_rows=1,
                    total_num_rows=1,
                    max_allowed_error=0.0,
                    non_null_counts_per_column_per_partition=[
                        (tuple(), {"a": 1, "b": 1, "c": 1}),
                    ],
                ),
            ),
        )
예제 #3
0
    def test_failure_all_errors(self) -> None:
        result = SamenessStringsValidationResultDetails(
            num_error_rows=1,
            total_num_rows=1,
            max_allowed_error=0.0,
            non_null_counts_per_column_per_partition=[
                (tuple(), {"a": 1, "b": 1, "c": 1}),
            ],
        )

        self.assertFalse(result.was_successful())
        self.assertEqual(
            result.failure_description(),
            "1 out of 1 row(s) did not contain matching strings. "
            "The acceptable margin of error is only 0.0, but the "
            "validation returned an error rate of 1.0.",
        )
예제 #4
0
    def test_multiple_partitions(self) -> None:
        result = SamenessStringsValidationResultDetails(
            num_error_rows=3,
            total_num_rows=7,
            max_allowed_error=0.0,
            non_null_counts_per_column_per_partition=[
                (("US_XX", "2021-01-31"), {"a": 3, "b": 3}),
                (("US_XX", "2020-12-31"), {"a": 2, "b": 3}),
            ],
        )

        self.assertFalse(result.was_successful())
        self.assertEqual(
            result.failure_description(),
            "3 out of 7 row(s) did not contain matching strings. "
            "The acceptable margin of error is only 0.0, but the "
            "validation returned an error rate of 0.4286.",
        )
예제 #5
0
    def test_sameness_check_strings_multiple_dates(self) -> None:
        self.mock_client.run_query_async.return_value = [
            # January 2021
            {"region": "US_XX", "date": "2021-01-31", "a": "00", "b": "00"},
            {"region": "US_XX", "date": "2021-01-31", "a": "01", "b": None},
            {"region": "US_XX", "date": "2021-01-31", "a": "02", "b": "02"},
            {"region": "US_XX", "date": "2021-01-31", "a": None, "b": "03"},
            # December 2020
            {"region": "US_XX", "date": "2020-12-31", "a": "00", "b": "00"},
            {"region": "US_XX", "date": "2020-12-31", "a": "02", "b": "02"},
            {"region": "US_XX", "date": "2020-12-31", "a": None, "b": "04"},
        ]

        job = DataValidationJob(
            region_code="US_XX",
            validation=SamenessDataValidationCheck(
                validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
                validation_type=ValidationCheckType.SAMENESS,
                comparison_columns=["a", "b"],
                partition_columns=["region", "date"],
                sameness_check_type=SamenessDataValidationCheckType.STRINGS,
                max_allowed_error=0.0,
                view_builder=SimpleBigQueryViewBuilder(
                    dataset_id="my_dataset",
                    view_id="test_view",
                    description="test_view description",
                    view_query_template="select * from literally_anything",
                ),
            ),
        )
        result = SamenessValidationChecker.run_check(job)

        self.assertEqual(
            result,
            DataValidationJobResult(
                validation_job=job,
                result_details=SamenessStringsValidationResultDetails(
                    num_error_rows=3,
                    total_num_rows=7,
                    max_allowed_error=0.0,
                    non_null_counts_per_column_per_partition=[
                        (("US_XX", "2021-01-31"), {"a": 3, "b": 3}),
                        (("US_XX", "2020-12-31"), {"a": 2, "b": 3}),
                    ],
                ),
            ),
        )
    def test_from_successful_result_strings(self) -> None:
        # Arrange
        result_details = SamenessStringsValidationResultDetails(
            num_error_rows=0,
            total_num_rows=5,
            max_allowed_error=0.5,
            non_null_counts_per_column_per_partition=[
                (("US_XX", "2020-12-01"), {"internal": 5, "external": 5})
            ],
        )
        job_result = DataValidationJobResult(
            validation_job=DataValidationJob(
                region_code="US_XX",
                validation=SamenessDataValidationCheck(
                    validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
                    validation_type=ValidationCheckType.SAMENESS,
                    comparison_columns=["internal", "external"],
                    partition_columns=["state_code", "date"],
                    sameness_check_type=SamenessDataValidationCheckType.STRINGS,
                    view_builder=SimpleBigQueryViewBuilder(
                        dataset_id="my_dataset",
                        view_id="test_view",
                        description="test_view description",
                        view_query_template="select * from literally_anything",
                    ),
                ),
            ),
            result_details=result_details,
        )

        # Act
        result = ValidationResultForStorage.from_validation_result(
            run_id="abc123",
            run_datetime=datetime.datetime(2000, 1, 1, 0, 0, 0),
            result=job_result,
        )

        # Assert
        self.assertEqual(
            ValidationResultForStorage(
                run_id="abc123",
                run_date=datetime.date(2000, 1, 1),
                run_datetime=datetime.datetime(2000, 1, 1, 0, 0, 0),
                system_version="v1.0.0",
                check_type=ValidationCheckType.SAMENESS,
                validation_name="test_view",
                region_code="US_XX",
                did_run=True,
                was_successful=True,
                failure_description=None,
                result_details_type="SamenessStringsValidationResultDetails",
                result_details=result_details,
                validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
            ),
            result,
        )
        self.assertEqual(
            {
                "run_id": "abc123",
                "run_date": "2000-01-01",
                "run_datetime": "2000-01-01T00:00:00",
                "system_version": "v1.0.0",
                "check_type": "SAMENESS",
                "validation_name": "test_view",
                "region_code": "US_XX",
                "did_run": True,
                "was_successful": True,
                "failure_description": None,
                "result_details_type": "SamenessStringsValidationResultDetails",
                "result_details": '{"num_error_rows": 0, "total_num_rows": 5, "max_allowed_error": 0.5, "non_null_counts_per_column_per_partition": [[["US_XX", "2020-12-01"], {"internal": 5, "external": 5}]]}',
                "validation_category": "EXTERNAL_INDIVIDUAL",
            },
            result.to_serializable(),
        )