Ejemplo n.º 1
0
 def _get_stats_from_results(
     self, result: ValidationOperatorResult
 ) -> Tuple[int, int]:
     result_identifier = result.list_validation_result_identifiers()[0]
     stats = result._list_validation_statistics()[result_identifier]
     n_successful = stats["successful_expectations"]
     n_expectations = stats["evaluated_expectations"]
     return n_successful, n_expectations
def render_multiple_validation_result_pages_markdown(
    validation_operator_result: ValidationOperatorResult,
    run_info_at_end: bool = True,
):
    """
    Loop through and render multiple validation results to markdown.
    Args:
        validation_operator_result: (ValidationOperatorResult) Result of validation operator run
        run_info_at_end: move run info below expectation results

    Returns:
        string containing formatted markdown validation results

    """

    md_str = ""
    validation_results_page_renderer = ValidationResultsPageRenderer(
        run_info_at_end=run_info_at_end)
    for validation_result in validation_operator_result.list_validation_results(
    ):
        rendered_document_content = validation_results_page_renderer.render(
            validation_result)
        md_str += DefaultMarkdownPageView().render(
            rendered_document_content) + " "

    return md_str
Ejemplo n.º 3
0
    def render_validation_operator_result(
        self, validation_operator_result: ValidationOperatorResult
    ) -> List[RenderedDocumentContent]:
        """
        Render a ValidationOperatorResult which can have multiple ExpectationSuiteValidationResult

        Args:
            validation_operator_result: ValidationOperatorResult

        Returns:
            List[RenderedDocumentContent]
        """
        return [
            self.render(validation_result) for validation_result in
            validation_operator_result.list_validation_results()
        ]
    def run(
        self,
        assets_to_validate,
        run_id=None,
        base_expectation_suite_name=None,
        evaluation_parameters=None,
        run_name=None,
        run_time=None,
        result_format=None,
    ):
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."
        if isinstance(run_id, str) and not run_name:
            warnings.warn(
                "String run_ids will be deprecated in the future. Please provide a run_id of type "
                "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name "
                "and run_time (both optional). Instead of providing a run_id, you may also provide"
                "run_name and run_time separately.",
                DeprecationWarning,
            )
            try:
                run_time = parse(run_id)
            except (ParserError, TypeError):
                pass
            run_id = RunIdentifier(run_name=run_id, run_time=run_time)
        elif isinstance(run_id, dict):
            run_id = RunIdentifier(**run_id)
        elif not isinstance(run_id, RunIdentifier):
            run_id = RunIdentifier(run_name=run_name, run_time=run_time)

        if base_expectation_suite_name is None:
            if self.base_expectation_suite_name is None:
                raise ValueError(
                    "base_expectation_suite_name must be configured in the validation operator or passed at runtime"
                )
            base_expectation_suite_name = self.base_expectation_suite_name

        run_results = {}

        for item in assets_to_validate:
            batch = self._build_batch_from_item(item)

            batch_id = batch.batch_id
            run_id = run_id

            assert not batch_id is None
            assert not run_id is None

            failure_expectation_suite_identifier = ExpectationSuiteIdentifier(
                expectation_suite_name=base_expectation_suite_name
                + self.expectation_suite_name_suffixes[0]
            )

            failure_validation_result_id = ValidationResultIdentifier(
                expectation_suite_identifier=failure_expectation_suite_identifier,
                run_id=run_id,
                batch_identifier=batch_id,
            )

            failure_expectation_suite = None
            try:
                failure_expectation_suite = self.data_context.stores[
                    self.data_context.expectations_store_name
                ].get(failure_expectation_suite_identifier)

            # NOTE : Abe 2019/09/17 : I'm concerned that this may be too permissive, since
            # it will catch any error in the Store, not just KeyErrors. In the longer term, a better
            # solution will be to have the Stores catch other known errors and raise KeyErrors,
            # so that methods like this can catch and handle a single error type.
            except Exception:
                logger.debug(
                    "Failure expectation suite not found: {}".format(
                        failure_expectation_suite_identifier
                    )
                )

            if failure_expectation_suite:
                failure_run_result_obj = {"expectation_suite_severity_level": "failure"}
                failure_validation_result = batch.validate(
                    failure_expectation_suite,
                    result_format=result_format
                    if result_format
                    else self.result_format,
                    evaluation_parameters=evaluation_parameters,
                )
                failure_run_result_obj["validation_result"] = failure_validation_result
                failure_actions_results = self._run_actions(
                    batch,
                    failure_expectation_suite_identifier,
                    failure_expectation_suite,
                    failure_validation_result,
                    run_id,
                )
                failure_run_result_obj["actions_results"] = failure_actions_results
                run_results[failure_validation_result_id] = failure_run_result_obj

                if not failure_validation_result.success and self.stop_on_first_error:
                    break

            warning_expectation_suite_identifier = ExpectationSuiteIdentifier(
                expectation_suite_name=base_expectation_suite_name
                + self.expectation_suite_name_suffixes[1]
            )

            warning_validation_result_id = ValidationResultIdentifier(
                expectation_suite_identifier=warning_expectation_suite_identifier,
                run_id=run_id,
                batch_identifier=batch.batch_id,
            )

            warning_expectation_suite = None
            try:
                warning_expectation_suite = self.data_context.stores[
                    self.data_context.expectations_store_name
                ].get(warning_expectation_suite_identifier)
            except Exception:
                logger.debug(
                    "Warning expectation suite not found: {}".format(
                        warning_expectation_suite_identifier
                    )
                )

            if warning_expectation_suite:
                warning_run_result_obj = {"expectation_suite_severity_level": "warning"}
                warning_validation_result = batch.validate(
                    warning_expectation_suite,
                    result_format=result_format
                    if result_format
                    else self.result_format,
                    evaluation_parameters=evaluation_parameters,
                )
                warning_run_result_obj["validation_result"] = warning_validation_result
                warning_actions_results = self._run_actions(
                    batch,
                    warning_expectation_suite_identifier,
                    warning_expectation_suite,
                    warning_validation_result,
                    run_id,
                )
                warning_run_result_obj["actions_results"] = warning_actions_results
                run_results[warning_validation_result_id] = warning_run_result_obj

        validation_operator_result = ValidationOperatorResult(
            run_id=run_id,
            run_results=run_results,
            validation_operator_config=self.validation_operator_config,
            evaluation_parameters=evaluation_parameters,
            success=all(
                [
                    run_result_obj["validation_result"].success
                    for run_result_obj in run_results.values()
                ]
            ),
        )

        if self.slack_webhook:
            if (
                self.notify_on == "all"
                or self.notify_on == "success"
                and validation_operator_result.success
                or self.notify_on == "failure"
                and not validation_operator_result.success
            ):
                slack_query = self._build_slack_query(
                    validation_operator_result=validation_operator_result
                )
                send_slack_notification(
                    query=slack_query, slack_webhook=self.slack_webhook
                )

        return validation_operator_result
    def _build_slack_query(self, validation_operator_result: ValidationOperatorResult):
        success = validation_operator_result.success
        status_text = "Success :white_check_mark:" if success else "Failed :x:"
        run_id = validation_operator_result.run_id
        run_name = run_id.run_name
        run_time = run_id.run_time.strftime("%x %X")
        batch_identifiers = sorted(validation_operator_result.list_batch_identifiers())
        failed_data_assets_msg_strings = []

        run_results = validation_operator_result.run_results
        failure_level_run_results = {
            validation_result_identifier: run_result
            for validation_result_identifier, run_result in run_results.items()
            if run_result["expectation_suite_severity_level"] == "failure"
        }

        if failure_level_run_results:
            failed_data_assets_msg_strings = [
                validation_result_identifier.expectation_suite_identifier.expectation_suite_name
                + "-"
                + validation_result_identifier.batch_identifier
                for validation_result_identifier, run_result in failure_level_run_results.items()
                if not run_result["validation_result"].success
            ]

        title_block = {
            "type": "section",
            "text": {
                "type": "mrkdwn",
                "text": "*FailureVsWarning Validation Operator Completed.*",
            },
        }
        divider_block = {"type": "divider"}

        query = {"blocks": [divider_block, title_block, divider_block]}

        status_element = {
            "type": "section",
            "text": {"type": "mrkdwn", "text": "*Status*: {}".format(status_text)},
        }
        query["blocks"].append(status_element)

        batch_identifiers_element = {
            "type": "section",
            "text": {
                "type": "mrkdwn",
                "text": "*Batch Id List:* {}".format(batch_identifiers),
            },
        }
        query["blocks"].append(batch_identifiers_element)

        if not success:
            failed_data_assets_element = {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "*Failed Batches:* {}".format(
                        failed_data_assets_msg_strings
                    ),
                },
            }
            query["blocks"].append(failed_data_assets_element)

        run_name_element = {
            "type": "section",
            "text": {"type": "mrkdwn", "text": "*Run Name:* {}".format(run_name)},
        }
        query["blocks"].append(run_name_element)

        run_time_element = {
            "type": "section",
            "text": {"type": "mrkdwn", "text": "*Run Time:* {}".format(run_time)},
        }
        query["blocks"].append(run_time_element)

        query["blocks"].append(divider_block)

        documentation_url = "https://docs.greatexpectations.io/en/latest/reference/validation_operators/warning_and_failure_expectation_suites_validation_operator.html"
        footer_section = {
            "type": "context",
            "elements": [
                {
                    "type": "mrkdwn",
                    "text": "Learn about FailureVsWarning Validation Operators at {}".format(
                        documentation_url
                    ),
                }
            ],
        }
        query["blocks"].append(footer_section)

        return query
    def run(
        self,
        assets_to_validate,
        run_id=None,
        evaluation_parameters=None,
        run_name=None,
        run_time=None,
        result_format=None,
    ):
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."
        if isinstance(run_id, str) and not run_name:
            warnings.warn(
                "String run_ids will be deprecated in the future. Please provide a run_id of type "
                "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name "
                "and run_time (both optional). Instead of providing a run_id, you may also provide"
                "run_name and run_time separately.",
                DeprecationWarning,
            )
            try:
                run_time = parse(run_id)
            except (ParserError, TypeError):
                pass
            run_id = RunIdentifier(run_name=run_id, run_time=run_time)
        elif isinstance(run_id, dict):
            run_id = RunIdentifier(**run_id)
        elif not isinstance(run_id, RunIdentifier):
            run_id = RunIdentifier(run_name=run_name, run_time=run_time)

        run_results = {}

        for item in assets_to_validate:
            run_result_obj = {}
            batch = self._build_batch_from_item(item)
            expectation_suite_identifier = ExpectationSuiteIdentifier(
                expectation_suite_name=batch._expectation_suite.expectation_suite_name
            )
            validation_result_id = ValidationResultIdentifier(
                batch_identifier=batch.batch_id,
                expectation_suite_identifier=expectation_suite_identifier,
                run_id=run_id,
            )
            batch_validation_result = batch.validate(
                run_id=run_id,
                result_format=result_format if result_format else self.result_format,
                evaluation_parameters=evaluation_parameters,
            )
            run_result_obj["validation_result"] = batch_validation_result
            batch_actions_results = self._run_actions(
                batch,
                expectation_suite_identifier,
                batch._expectation_suite,
                batch_validation_result,
                run_id,
            )
            run_result_obj["actions_results"] = batch_actions_results
            run_results[validation_result_id] = run_result_obj

        return ValidationOperatorResult(
            run_id=run_id,
            run_results=run_results,
            validation_operator_config=self.validation_operator_config,
            evaluation_parameters=evaluation_parameters,
        )
def test_snapshot_render_section_page_with_fixture_data(
        validation_operator_result):
    """
    Make sure the appropriate markdown rendering is done for the applied fixture.
    Args:
        validation_operator_result: test fixture

    Returns: None

    """

    validation_operator_result = ValidationOperatorResult(
        **validation_operator_result)

    validation_results_page_renderer = ValidationResultsPageRenderer(
        run_info_at_end=True)

    rendered_document_content_list = validation_results_page_renderer.render_validation_operator_result(
        validation_operator_result=validation_operator_result)

    md_str_list = DefaultMarkdownPageView().render(
        rendered_document_content_list)

    md_str = " ".join(md_str_list)

    md_str = md_str.replace(" ", "").replace("\t", "").replace("\n", "")

    print(md_str)

    assert (md_str == """
# Validation Results




## Overview
### **Expectation Suite:** **basic.warning**
**Data asset:** **None**
**Status:**  **Failed**





### Statistics






 |  |  |
 | ------------  | ------------ |
Evaluated Expectations  | 11
Successful Expectations  | 9
Unsuccessful Expectations  | 2
Success Percent  | ≈81.82%





## Table-Level Expectations








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
❌  | Must have greater than or equal to **27000** and less than or equal to **33000** rows.  | 30
✅  | Must have exactly **3** columns.  | 3
✅  | Must have these columns in this order: **Team**, ** "Payroll (millions)"**, ** "Wins"**  | ['Team', ' "Payroll (millions)"', ' "Wins"']





##  "Payroll (millions)"








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
✅  | values must never be null.  | 100% not null
✅  | minimum value must be greater than or equal to **54.24** and less than or equal to **56.24**.  | 55.24
✅  | maximum value must be greater than or equal to **196.96** and less than or equal to **198.96**.  | 197.96
✅  | mean must be greater than or equal to **97.01899999999998** and less than or equal to **99.01899999999998**.  | ≈98.019
❌  | median must be greater than or equal to **84000.75** and less than or equal to **86000.75**.  | 85.75
✅  | quantiles must be within the following value ranges.




 | Quantile | Min Value | Max Value |
 | ------------  | ------------  | ------------ |
0.05  | 54.37  | 56.37
Q1  | 74.48  | 76.48
Median  | 82.31  | 84.31
Q3  | 116.62  | 118.62
0.95  | 173.54  | 175.54
  |




 | Quantile | Value |
 | ------------  | ------------ |
0.05  | 55.37
Q1  | 75.48
Median  | 83.31
Q3  | 117.62
0.95  | 174.54






## Team








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
✅  | values must never be null.  | 100% not null
✅  | values must always be greater than or equal to **1** characters long.  | 0% unexpected







### Info






 |  |  |
 | ------------  | ------------ |
Great Expectations Version  | 0.11.8+4.g4ab34df3.dirty
Run Name  | getest run
Run Time  | 2020-07-27T17:19:32.959193+00:00





### Batch Markers






 |  |  |
 | ------------  | ------------ |
**ge_load_time**  | **20200727T171932.954810Z**
**pandas_data_fingerprint**  | **8c46fdaf0bd356fd58b7bcd9b2e6012d**





### Batch Kwargs






 |  |  |
 | ------------  | ------------ |
**PandasInMemoryDF**  | **True**
**datasource**  | **getest**
**ge_batch_id**  | **56615f40-d02d-11ea-b6ea-acde48001122**




-----------------------------------------------------------
Powered by [Great Expectations](https://greatexpectations.io/)
# Validation Results




## Overview
### **Expectation Suite:** **basic.warning**
**Data asset:** **None**
**Status:**  **Failed**





### Statistics






 |  |  |
 | ------------  | ------------ |
Evaluated Expectations  | 11
Successful Expectations  | 9
Unsuccessful Expectations  | 2
Success Percent  | ≈81.82%





## Table-Level Expectations








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
❌  | Must have greater than or equal to **27000** and less than or equal to **33000** rows.  | 30
✅  | Must have exactly **3** columns.  | 3
✅  | Must have these columns in this order: **Team**, ** "Payroll (millions)"**, ** "Wins"**  | ['Team', ' "Payroll (millions)"', ' "Wins"']





##  "Payroll (millions)"








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
✅  | values must never be null.  | 100% not null
✅  | minimum value must be greater than or equal to **54.24** and less than or equal to **56.24**.  | 55.24
✅  | maximum value must be greater than or equal to **196.96** and less than or equal to **198.96**.  | 197.96
✅  | mean must be greater than or equal to **97.01899999999998** and less than or equal to **99.01899999999998**.  | ≈98.019
❌  | median must be greater than or equal to **84000.75** and less than or equal to **86000.75**.  | 85.75
✅  | quantiles must be within the following value ranges.




 | Quantile | Min Value | Max Value |
 | ------------  | ------------  | ------------ |
0.05  | 54.37  | 56.37
Q1  | 74.48  | 76.48
Median  | 82.31  | 84.31
Q3  | 116.62  | 118.62
0.95  | 173.54  | 175.54
  |




 | Quantile | Value |
 | ------------  | ------------ |
0.05  | 55.37
Q1  | 75.48
Median  | 83.31
Q3  | 117.62
0.95  | 174.54






## Team








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
✅  | values must never be null.  | 100% not null
✅  | values must always be greater than or equal to **1** characters long.  | 0% unexpected







### Info






 |  |  |
 | ------------  | ------------ |
Great Expectations Version  | 0.11.8+4.g4ab34df3.dirty
Run Name  | getest run
Run Time  | 2020-07-27T17:19:32.959193+00:00





### Batch Markers






 |  |  |
 | ------------  | ------------ |
**ge_load_time**  | **20200727T171932.954810Z**
**pandas_data_fingerprint**  | **8c46fdaf0bd356fd58b7bcd9b2e6012d**





### Batch Kwargs






 |  |  |
 | ------------  | ------------ |
**PandasInMemoryDF**  | **True**
**datasource**  | **getest**
**ge_batch_id**  | **56615f40-d02d-11ea-b6ea-acde48001122**




-----------------------------------------------------------
Powered by [Great Expectations](https://greatexpectations.io/)
""".replace(" ", "").replace("\t", "").replace("\n", ""))
    def run(
        self,
        assets_to_validate,
        run_id=None,
        evaluation_parameters=None,
        run_name=None,
        run_time=None,
        catch_exceptions=None,
        result_format=None,
        checkpoint_identifier=None,
    ):
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."
        if isinstance(run_id, str) and not run_name:
            warnings.warn(
                "String run_ids will be deprecated in the future. Please provide a run_id of type "
                "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name "
                "and run_time (both optional). Instead of providing a run_id, you may also provide"
                "run_name and run_time separately.",
                DeprecationWarning,
            )
            try:
                run_time = parse(run_id)
            except (ValueError, TypeError):
                pass
            run_id = RunIdentifier(run_name=run_id, run_time=run_time)
        elif isinstance(run_id, dict):
            run_id = RunIdentifier(**run_id)
        elif not isinstance(run_id, RunIdentifier):
            run_id = RunIdentifier(run_name=run_name, run_time=run_time)

        ###
        # NOTE: 20211010 - jdimatteo: This method is called by both Checkpoint.run and LegacyCheckpoint.run and below
        # usage of AsyncExecutor may speed up I/O bound validations by running them in parallel with multithreading
        # (if concurrency is enabled in the data context configuration).
        #
        # When this method is called by LegacyCheckpoint.run, len(assets_to_validate) may be greater than 1. If
        # concurrency is enabled in the configuration AND len(assets_to_validate) > 1, then execution is run in multiple
        # threads with AsyncExecutor -- otherwise AsyncExecutor only uses the current single thread to execute the work.
        # Please see the below arguments used to initialize AsyncExecutor and the corresponding AsyncExecutor docstring
        # for more details on when multiple threads are used.
        #
        # When this method is called by Checkpoint.run, len(assets_to_validate) may be 1 even if there are multiple
        # validations, because Checkpoint.run calls this method in a loop for each validation. AsyncExecutor is also
        # used in the Checkpoint.run loop to optionally run each validation in parallel with multithreading, so this
        # method's AsyncExecutor is nested within the Checkpoint.run AsyncExecutor. The AsyncExecutor logic to only use
        # multithreading when max_workers > 1 ensures that no nested multithreading is ever used when
        # len(assets_to_validate) is equal to 1. So no unnecessary multithreading is ever used here even though it may
        # be nested inside another AsyncExecutor (and this is a good thing because it avoids extra overhead associated
        # with each thread and minimizes the total number of threads to simplify debugging).
        with AsyncExecutor(
                self.data_context.concurrency,
                max_workers=len(assets_to_validate)) as async_executor:
            batch_and_async_result_tuples = []
            for item in assets_to_validate:
                batch = self._build_batch_from_item(item)

                if hasattr(batch, "active_batch_id"):
                    batch_identifier = batch.active_batch_id
                else:
                    batch_identifier = batch.batch_id

                if result_format is None:
                    result_format = self.result_format

                batch_validate_arguments = {
                    "run_id": run_id,
                    "result_format": result_format,
                    "evaluation_parameters": evaluation_parameters,
                }

                if catch_exceptions is not None:
                    batch_validate_arguments[
                        "catch_exceptions"] = catch_exceptions

                batch_and_async_result_tuples.append((
                    batch,
                    async_executor.submit(
                        batch.validate,
                        **batch_validate_arguments,
                    ),
                ))

            run_results = {}
            for batch, async_batch_validation_result in batch_and_async_result_tuples:
                if self.data_context.ge_cloud_mode:
                    expectation_suite_identifier = GeCloudIdentifier(
                        resource_type="expectation_suite",
                        ge_cloud_id=batch._expectation_suite.ge_cloud_id,
                    )
                    validation_result_id = GeCloudIdentifier(
                        resource_type="suite_validation_result")
                else:
                    expectation_suite_identifier = ExpectationSuiteIdentifier(
                        expectation_suite_name=batch._expectation_suite.
                        expectation_suite_name)
                    validation_result_id = ValidationResultIdentifier(
                        batch_identifier=batch_identifier,
                        expectation_suite_identifier=
                        expectation_suite_identifier,
                        run_id=run_id,
                    )

                batch_actions_results = self._run_actions(
                    batch=batch,
                    expectation_suite_identifier=expectation_suite_identifier,
                    expectation_suite=batch._expectation_suite,
                    batch_validation_result=async_batch_validation_result.
                    result(),
                    run_id=run_id,
                    validation_result_id=validation_result_id,
                    checkpoint_identifier=checkpoint_identifier,
                )

                run_result_obj = {
                    "validation_result":
                    async_batch_validation_result.result(),
                    "actions_results": batch_actions_results,
                }
                run_results[validation_result_id] = run_result_obj

        return ValidationOperatorResult(
            run_id=run_id,
            run_results=run_results,
            validation_operator_config=self.validation_operator_config,
            evaluation_parameters=evaluation_parameters,
        )