def _get_stats_from_results( self, result: ValidationOperatorResult ) -> Tuple[int, int]: result_identifier = result.list_validation_result_identifiers()[0] stats = result._list_validation_statistics()[result_identifier] n_successful = stats["successful_expectations"] n_expectations = stats["evaluated_expectations"] return n_successful, n_expectations
def render_multiple_validation_result_pages_markdown( validation_operator_result: ValidationOperatorResult, run_info_at_end: bool = True, ): """ Loop through and render multiple validation results to markdown. Args: validation_operator_result: (ValidationOperatorResult) Result of validation operator run run_info_at_end: move run info below expectation results Returns: string containing formatted markdown validation results """ md_str = "" validation_results_page_renderer = ValidationResultsPageRenderer( run_info_at_end=run_info_at_end) for validation_result in validation_operator_result.list_validation_results( ): rendered_document_content = validation_results_page_renderer.render( validation_result) md_str += DefaultMarkdownPageView().render( rendered_document_content) + " " return md_str
def render_validation_operator_result( self, validation_operator_result: ValidationOperatorResult ) -> List[RenderedDocumentContent]: """ Render a ValidationOperatorResult which can have multiple ExpectationSuiteValidationResult Args: validation_operator_result: ValidationOperatorResult Returns: List[RenderedDocumentContent] """ return [ self.render(validation_result) for validation_result in validation_operator_result.list_validation_results() ]
def run( self, assets_to_validate, run_id=None, base_expectation_suite_name=None, evaluation_parameters=None, run_name=None, run_time=None, result_format=None, ): assert not (run_id and run_name) and not ( run_id and run_time ), "Please provide either a run_id or run_name and/or run_time." if isinstance(run_id, str) and not run_name: warnings.warn( "String run_ids will be deprecated in the future. Please provide a run_id of type " "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name " "and run_time (both optional). Instead of providing a run_id, you may also provide" "run_name and run_time separately.", DeprecationWarning, ) try: run_time = parse(run_id) except (ParserError, TypeError): pass run_id = RunIdentifier(run_name=run_id, run_time=run_time) elif isinstance(run_id, dict): run_id = RunIdentifier(**run_id) elif not isinstance(run_id, RunIdentifier): run_id = RunIdentifier(run_name=run_name, run_time=run_time) if base_expectation_suite_name is None: if self.base_expectation_suite_name is None: raise ValueError( "base_expectation_suite_name must be configured in the validation operator or passed at runtime" ) base_expectation_suite_name = self.base_expectation_suite_name run_results = {} for item in assets_to_validate: batch = self._build_batch_from_item(item) batch_id = batch.batch_id run_id = run_id assert not batch_id is None assert not run_id is None failure_expectation_suite_identifier = ExpectationSuiteIdentifier( expectation_suite_name=base_expectation_suite_name + self.expectation_suite_name_suffixes[0] ) failure_validation_result_id = ValidationResultIdentifier( expectation_suite_identifier=failure_expectation_suite_identifier, run_id=run_id, batch_identifier=batch_id, ) failure_expectation_suite = None try: failure_expectation_suite = self.data_context.stores[ self.data_context.expectations_store_name ].get(failure_expectation_suite_identifier) # NOTE : Abe 2019/09/17 : I'm concerned that this may be too permissive, since # it will catch any error in the Store, not just KeyErrors. In the longer term, a better # solution will be to have the Stores catch other known errors and raise KeyErrors, # so that methods like this can catch and handle a single error type. except Exception: logger.debug( "Failure expectation suite not found: {}".format( failure_expectation_suite_identifier ) ) if failure_expectation_suite: failure_run_result_obj = {"expectation_suite_severity_level": "failure"} failure_validation_result = batch.validate( failure_expectation_suite, result_format=result_format if result_format else self.result_format, evaluation_parameters=evaluation_parameters, ) failure_run_result_obj["validation_result"] = failure_validation_result failure_actions_results = self._run_actions( batch, failure_expectation_suite_identifier, failure_expectation_suite, failure_validation_result, run_id, ) failure_run_result_obj["actions_results"] = failure_actions_results run_results[failure_validation_result_id] = failure_run_result_obj if not failure_validation_result.success and self.stop_on_first_error: break warning_expectation_suite_identifier = ExpectationSuiteIdentifier( expectation_suite_name=base_expectation_suite_name + self.expectation_suite_name_suffixes[1] ) warning_validation_result_id = ValidationResultIdentifier( expectation_suite_identifier=warning_expectation_suite_identifier, run_id=run_id, batch_identifier=batch.batch_id, ) warning_expectation_suite = None try: warning_expectation_suite = self.data_context.stores[ self.data_context.expectations_store_name ].get(warning_expectation_suite_identifier) except Exception: logger.debug( "Warning expectation suite not found: {}".format( warning_expectation_suite_identifier ) ) if warning_expectation_suite: warning_run_result_obj = {"expectation_suite_severity_level": "warning"} warning_validation_result = batch.validate( warning_expectation_suite, result_format=result_format if result_format else self.result_format, evaluation_parameters=evaluation_parameters, ) warning_run_result_obj["validation_result"] = warning_validation_result warning_actions_results = self._run_actions( batch, warning_expectation_suite_identifier, warning_expectation_suite, warning_validation_result, run_id, ) warning_run_result_obj["actions_results"] = warning_actions_results run_results[warning_validation_result_id] = warning_run_result_obj validation_operator_result = ValidationOperatorResult( run_id=run_id, run_results=run_results, validation_operator_config=self.validation_operator_config, evaluation_parameters=evaluation_parameters, success=all( [ run_result_obj["validation_result"].success for run_result_obj in run_results.values() ] ), ) if self.slack_webhook: if ( self.notify_on == "all" or self.notify_on == "success" and validation_operator_result.success or self.notify_on == "failure" and not validation_operator_result.success ): slack_query = self._build_slack_query( validation_operator_result=validation_operator_result ) send_slack_notification( query=slack_query, slack_webhook=self.slack_webhook ) return validation_operator_result
def _build_slack_query(self, validation_operator_result: ValidationOperatorResult): success = validation_operator_result.success status_text = "Success :white_check_mark:" if success else "Failed :x:" run_id = validation_operator_result.run_id run_name = run_id.run_name run_time = run_id.run_time.strftime("%x %X") batch_identifiers = sorted(validation_operator_result.list_batch_identifiers()) failed_data_assets_msg_strings = [] run_results = validation_operator_result.run_results failure_level_run_results = { validation_result_identifier: run_result for validation_result_identifier, run_result in run_results.items() if run_result["expectation_suite_severity_level"] == "failure" } if failure_level_run_results: failed_data_assets_msg_strings = [ validation_result_identifier.expectation_suite_identifier.expectation_suite_name + "-" + validation_result_identifier.batch_identifier for validation_result_identifier, run_result in failure_level_run_results.items() if not run_result["validation_result"].success ] title_block = { "type": "section", "text": { "type": "mrkdwn", "text": "*FailureVsWarning Validation Operator Completed.*", }, } divider_block = {"type": "divider"} query = {"blocks": [divider_block, title_block, divider_block]} status_element = { "type": "section", "text": {"type": "mrkdwn", "text": "*Status*: {}".format(status_text)}, } query["blocks"].append(status_element) batch_identifiers_element = { "type": "section", "text": { "type": "mrkdwn", "text": "*Batch Id List:* {}".format(batch_identifiers), }, } query["blocks"].append(batch_identifiers_element) if not success: failed_data_assets_element = { "type": "section", "text": { "type": "mrkdwn", "text": "*Failed Batches:* {}".format( failed_data_assets_msg_strings ), }, } query["blocks"].append(failed_data_assets_element) run_name_element = { "type": "section", "text": {"type": "mrkdwn", "text": "*Run Name:* {}".format(run_name)}, } query["blocks"].append(run_name_element) run_time_element = { "type": "section", "text": {"type": "mrkdwn", "text": "*Run Time:* {}".format(run_time)}, } query["blocks"].append(run_time_element) query["blocks"].append(divider_block) documentation_url = "https://docs.greatexpectations.io/en/latest/reference/validation_operators/warning_and_failure_expectation_suites_validation_operator.html" footer_section = { "type": "context", "elements": [ { "type": "mrkdwn", "text": "Learn about FailureVsWarning Validation Operators at {}".format( documentation_url ), } ], } query["blocks"].append(footer_section) return query
def run( self, assets_to_validate, run_id=None, evaluation_parameters=None, run_name=None, run_time=None, result_format=None, ): assert not (run_id and run_name) and not ( run_id and run_time ), "Please provide either a run_id or run_name and/or run_time." if isinstance(run_id, str) and not run_name: warnings.warn( "String run_ids will be deprecated in the future. Please provide a run_id of type " "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name " "and run_time (both optional). Instead of providing a run_id, you may also provide" "run_name and run_time separately.", DeprecationWarning, ) try: run_time = parse(run_id) except (ParserError, TypeError): pass run_id = RunIdentifier(run_name=run_id, run_time=run_time) elif isinstance(run_id, dict): run_id = RunIdentifier(**run_id) elif not isinstance(run_id, RunIdentifier): run_id = RunIdentifier(run_name=run_name, run_time=run_time) run_results = {} for item in assets_to_validate: run_result_obj = {} batch = self._build_batch_from_item(item) expectation_suite_identifier = ExpectationSuiteIdentifier( expectation_suite_name=batch._expectation_suite.expectation_suite_name ) validation_result_id = ValidationResultIdentifier( batch_identifier=batch.batch_id, expectation_suite_identifier=expectation_suite_identifier, run_id=run_id, ) batch_validation_result = batch.validate( run_id=run_id, result_format=result_format if result_format else self.result_format, evaluation_parameters=evaluation_parameters, ) run_result_obj["validation_result"] = batch_validation_result batch_actions_results = self._run_actions( batch, expectation_suite_identifier, batch._expectation_suite, batch_validation_result, run_id, ) run_result_obj["actions_results"] = batch_actions_results run_results[validation_result_id] = run_result_obj return ValidationOperatorResult( run_id=run_id, run_results=run_results, validation_operator_config=self.validation_operator_config, evaluation_parameters=evaluation_parameters, )
def test_snapshot_render_section_page_with_fixture_data( validation_operator_result): """ Make sure the appropriate markdown rendering is done for the applied fixture. Args: validation_operator_result: test fixture Returns: None """ validation_operator_result = ValidationOperatorResult( **validation_operator_result) validation_results_page_renderer = ValidationResultsPageRenderer( run_info_at_end=True) rendered_document_content_list = validation_results_page_renderer.render_validation_operator_result( validation_operator_result=validation_operator_result) md_str_list = DefaultMarkdownPageView().render( rendered_document_content_list) md_str = " ".join(md_str_list) md_str = md_str.replace(" ", "").replace("\t", "").replace("\n", "") print(md_str) assert (md_str == """ # Validation Results ## Overview ### **Expectation Suite:** **basic.warning** **Data asset:** **None** **Status:** **Failed** ### Statistics | | | | ------------ | ------------ | Evaluated Expectations | 11 Successful Expectations | 9 Unsuccessful Expectations | 2 Success Percent | ≈81.82% ## Table-Level Expectations | Status | Expectation | Observed Value | | ------------ | ------------ | ------------ | ❌ | Must have greater than or equal to **27000** and less than or equal to **33000** rows. | 30 ✅ | Must have exactly **3** columns. | 3 ✅ | Must have these columns in this order: **Team**, ** "Payroll (millions)"**, ** "Wins"** | ['Team', ' "Payroll (millions)"', ' "Wins"'] ## "Payroll (millions)" | Status | Expectation | Observed Value | | ------------ | ------------ | ------------ | ✅ | values must never be null. | 100% not null ✅ | minimum value must be greater than or equal to **54.24** and less than or equal to **56.24**. | 55.24 ✅ | maximum value must be greater than or equal to **196.96** and less than or equal to **198.96**. | 197.96 ✅ | mean must be greater than or equal to **97.01899999999998** and less than or equal to **99.01899999999998**. | ≈98.019 ❌ | median must be greater than or equal to **84000.75** and less than or equal to **86000.75**. | 85.75 ✅ | quantiles must be within the following value ranges. | Quantile | Min Value | Max Value | | ------------ | ------------ | ------------ | 0.05 | 54.37 | 56.37 Q1 | 74.48 | 76.48 Median | 82.31 | 84.31 Q3 | 116.62 | 118.62 0.95 | 173.54 | 175.54 | | Quantile | Value | | ------------ | ------------ | 0.05 | 55.37 Q1 | 75.48 Median | 83.31 Q3 | 117.62 0.95 | 174.54 ## Team | Status | Expectation | Observed Value | | ------------ | ------------ | ------------ | ✅ | values must never be null. | 100% not null ✅ | values must always be greater than or equal to **1** characters long. | 0% unexpected ### Info | | | | ------------ | ------------ | Great Expectations Version | 0.11.8+4.g4ab34df3.dirty Run Name | getest run Run Time | 2020-07-27T17:19:32.959193+00:00 ### Batch Markers | | | | ------------ | ------------ | **ge_load_time** | **20200727T171932.954810Z** **pandas_data_fingerprint** | **8c46fdaf0bd356fd58b7bcd9b2e6012d** ### Batch Kwargs | | | | ------------ | ------------ | **PandasInMemoryDF** | **True** **datasource** | **getest** **ge_batch_id** | **56615f40-d02d-11ea-b6ea-acde48001122** ----------------------------------------------------------- Powered by [Great Expectations](https://greatexpectations.io/) # Validation Results ## Overview ### **Expectation Suite:** **basic.warning** **Data asset:** **None** **Status:** **Failed** ### Statistics | | | | ------------ | ------------ | Evaluated Expectations | 11 Successful Expectations | 9 Unsuccessful Expectations | 2 Success Percent | ≈81.82% ## Table-Level Expectations | Status | Expectation | Observed Value | | ------------ | ------------ | ------------ | ❌ | Must have greater than or equal to **27000** and less than or equal to **33000** rows. | 30 ✅ | Must have exactly **3** columns. | 3 ✅ | Must have these columns in this order: **Team**, ** "Payroll (millions)"**, ** "Wins"** | ['Team', ' "Payroll (millions)"', ' "Wins"'] ## "Payroll (millions)" | Status | Expectation | Observed Value | | ------------ | ------------ | ------------ | ✅ | values must never be null. | 100% not null ✅ | minimum value must be greater than or equal to **54.24** and less than or equal to **56.24**. | 55.24 ✅ | maximum value must be greater than or equal to **196.96** and less than or equal to **198.96**. | 197.96 ✅ | mean must be greater than or equal to **97.01899999999998** and less than or equal to **99.01899999999998**. | ≈98.019 ❌ | median must be greater than or equal to **84000.75** and less than or equal to **86000.75**. | 85.75 ✅ | quantiles must be within the following value ranges. | Quantile | Min Value | Max Value | | ------------ | ------------ | ------------ | 0.05 | 54.37 | 56.37 Q1 | 74.48 | 76.48 Median | 82.31 | 84.31 Q3 | 116.62 | 118.62 0.95 | 173.54 | 175.54 | | Quantile | Value | | ------------ | ------------ | 0.05 | 55.37 Q1 | 75.48 Median | 83.31 Q3 | 117.62 0.95 | 174.54 ## Team | Status | Expectation | Observed Value | | ------------ | ------------ | ------------ | ✅ | values must never be null. | 100% not null ✅ | values must always be greater than or equal to **1** characters long. | 0% unexpected ### Info | | | | ------------ | ------------ | Great Expectations Version | 0.11.8+4.g4ab34df3.dirty Run Name | getest run Run Time | 2020-07-27T17:19:32.959193+00:00 ### Batch Markers | | | | ------------ | ------------ | **ge_load_time** | **20200727T171932.954810Z** **pandas_data_fingerprint** | **8c46fdaf0bd356fd58b7bcd9b2e6012d** ### Batch Kwargs | | | | ------------ | ------------ | **PandasInMemoryDF** | **True** **datasource** | **getest** **ge_batch_id** | **56615f40-d02d-11ea-b6ea-acde48001122** ----------------------------------------------------------- Powered by [Great Expectations](https://greatexpectations.io/) """.replace(" ", "").replace("\t", "").replace("\n", ""))
def run( self, assets_to_validate, run_id=None, evaluation_parameters=None, run_name=None, run_time=None, catch_exceptions=None, result_format=None, checkpoint_identifier=None, ): assert not (run_id and run_name) and not ( run_id and run_time ), "Please provide either a run_id or run_name and/or run_time." if isinstance(run_id, str) and not run_name: warnings.warn( "String run_ids will be deprecated in the future. Please provide a run_id of type " "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name " "and run_time (both optional). Instead of providing a run_id, you may also provide" "run_name and run_time separately.", DeprecationWarning, ) try: run_time = parse(run_id) except (ValueError, TypeError): pass run_id = RunIdentifier(run_name=run_id, run_time=run_time) elif isinstance(run_id, dict): run_id = RunIdentifier(**run_id) elif not isinstance(run_id, RunIdentifier): run_id = RunIdentifier(run_name=run_name, run_time=run_time) ### # NOTE: 20211010 - jdimatteo: This method is called by both Checkpoint.run and LegacyCheckpoint.run and below # usage of AsyncExecutor may speed up I/O bound validations by running them in parallel with multithreading # (if concurrency is enabled in the data context configuration). # # When this method is called by LegacyCheckpoint.run, len(assets_to_validate) may be greater than 1. If # concurrency is enabled in the configuration AND len(assets_to_validate) > 1, then execution is run in multiple # threads with AsyncExecutor -- otherwise AsyncExecutor only uses the current single thread to execute the work. # Please see the below arguments used to initialize AsyncExecutor and the corresponding AsyncExecutor docstring # for more details on when multiple threads are used. # # When this method is called by Checkpoint.run, len(assets_to_validate) may be 1 even if there are multiple # validations, because Checkpoint.run calls this method in a loop for each validation. AsyncExecutor is also # used in the Checkpoint.run loop to optionally run each validation in parallel with multithreading, so this # method's AsyncExecutor is nested within the Checkpoint.run AsyncExecutor. The AsyncExecutor logic to only use # multithreading when max_workers > 1 ensures that no nested multithreading is ever used when # len(assets_to_validate) is equal to 1. So no unnecessary multithreading is ever used here even though it may # be nested inside another AsyncExecutor (and this is a good thing because it avoids extra overhead associated # with each thread and minimizes the total number of threads to simplify debugging). with AsyncExecutor( self.data_context.concurrency, max_workers=len(assets_to_validate)) as async_executor: batch_and_async_result_tuples = [] for item in assets_to_validate: batch = self._build_batch_from_item(item) if hasattr(batch, "active_batch_id"): batch_identifier = batch.active_batch_id else: batch_identifier = batch.batch_id if result_format is None: result_format = self.result_format batch_validate_arguments = { "run_id": run_id, "result_format": result_format, "evaluation_parameters": evaluation_parameters, } if catch_exceptions is not None: batch_validate_arguments[ "catch_exceptions"] = catch_exceptions batch_and_async_result_tuples.append(( batch, async_executor.submit( batch.validate, **batch_validate_arguments, ), )) run_results = {} for batch, async_batch_validation_result in batch_and_async_result_tuples: if self.data_context.ge_cloud_mode: expectation_suite_identifier = GeCloudIdentifier( resource_type="expectation_suite", ge_cloud_id=batch._expectation_suite.ge_cloud_id, ) validation_result_id = GeCloudIdentifier( resource_type="suite_validation_result") else: expectation_suite_identifier = ExpectationSuiteIdentifier( expectation_suite_name=batch._expectation_suite. expectation_suite_name) validation_result_id = ValidationResultIdentifier( batch_identifier=batch_identifier, expectation_suite_identifier= expectation_suite_identifier, run_id=run_id, ) batch_actions_results = self._run_actions( batch=batch, expectation_suite_identifier=expectation_suite_identifier, expectation_suite=batch._expectation_suite, batch_validation_result=async_batch_validation_result. result(), run_id=run_id, validation_result_id=validation_result_id, checkpoint_identifier=checkpoint_identifier, ) run_result_obj = { "validation_result": async_batch_validation_result.result(), "actions_results": batch_actions_results, } run_results[validation_result_id] = run_result_obj return ValidationOperatorResult( run_id=run_id, run_results=run_results, validation_operator_config=self.validation_operator_config, evaluation_parameters=evaluation_parameters, )