def _list_data_assets_validated_by_batch_id(self) -> dict: if self._data_assets_validated_by_batch_id is None: assets_validated_by_batch_id = {} for validation_result in self.list_validation_results(): batch_kwargs = validation_result.meta["batch_kwargs"] batch_id = BatchKwargs(batch_kwargs).to_id() expectation_suite_name = validation_result.meta[ "expectation_suite_name" ] if batch_id not in assets_validated_by_batch_id: assets_validated_by_batch_id[batch_id] = { "batch_kwargs": batch_kwargs, "validation_results": [validation_result], "expectation_suite_names": [expectation_suite_name], } else: assets_validated_by_batch_id[batch_id]["validation_results"].append( validation_result ) assets_validated_by_batch_id[batch_id][ "expectation_suite_names" ].append(expectation_suite_name) self._data_assets_validated_by_batch_id = assets_validated_by_batch_id return self._data_assets_validated_by_batch_id
def render( self, validation_result=None, data_docs_pages=None, notify_with=None, ): summary_text = ( "No validation occurred. Please ensure you passed a validation_result." ) status = "Failed ❌" if validation_result: expectation_suite_name = validation_result.meta.get( "expectation_suite_name", "__no_expectation_suite_name__") if "batch_kwargs" in validation_result.meta: data_asset_name = validation_result.meta["batch_kwargs"].get( "data_asset_name", "__no_data_asset_name__") elif "active_batch_definition" in validation_result.meta: data_asset_name = ( validation_result.meta["active_batch_definition"]. data_asset_name if validation_result.meta["active_batch_definition"]. data_asset_name else "__no_data_asset_name__") else: data_asset_name = "__no_data_asset_name__" n_checks_succeeded = validation_result.statistics[ "successful_expectations"] n_checks = validation_result.statistics["evaluated_expectations"] run_id = validation_result.meta.get("run_id", "__no_run_id__") batch_id = BatchKwargs( validation_result.meta.get("batch_kwargs", {})).to_id() check_details_text = "{} of {} expectations were met".format( n_checks_succeeded, n_checks) if validation_result.success: status = "Success 🎉" summary_text = """Batch Validation Status: {} Expectation suite name: {} Data asset name: {} Run ID: {} Batch ID: {} Summary: {}""".format( status, expectation_suite_name, data_asset_name, run_id, batch_id, check_details_text, ) return summary_text
def _run_suite( self, dataset_name: str, dataset_path: Optional[str], df: Any, target_expectation_suite_name: str, run_id: str, ): target_suite = self.expectation_context.get_expectation_suite( target_expectation_suite_name) batch_markers = BatchMarkers({ "ge_load_time": datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") }) batch_kwargs = {"datasource": generate_datasource_name(dataset_name)} if dataset_path: dataasset_name, _ = os.path.splitext( os.path.basename(dataset_path)) batch_kwargs["path"] = str(dataset_path) batch_kwargs["data_asset_name"] = dataasset_name batch = Batch( "kedro", batch_kwargs=BatchKwargs(batch_kwargs), data=df, batch_parameters=None, batch_markers=batch_markers, data_context=self.expectation_context, ) try: v = Validator( batch=batch, expectation_suite=target_suite, ) except ValueError: raise UnsupportedDataSet validator_dataset_batch = v.get_dataset() return self.expectation_context.run_validation_operator( "action_list_operator", [validator_dataset_batch], run_id=run_id)
def _run_suite(self, dataset, target_expectation_suite_name, run_id): class_name = self._get_ge_class_name(dataset) target_suite = self.expectation_context.get_expectation_suite( target_expectation_suite_name) df = dataset.load() batch = Batch( 'kedro', BatchKwargs({ 'path': 'kedro', 'datasource': 'kedro' }), df, None, BatchMarkers({ "ge_load_time": datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") }), self.expectation_context) v = Validator(batch, target_suite, { 'module_name': 'great_expectations.dataset', 'class_name': class_name }) vgdf = v.get_dataset() self.expectation_context.run_validation_operator( 'action_list_operator', [vgdf], run_id=run_id)
def __init__( self, data, batch_request: BatchRequest = None, batch_definition: BatchDefinition = None, batch_spec: BatchSpec = None, batch_markers: BatchMarkers = None, # The remaining parameters are for backward compatibility. data_context=None, datasource_name=None, batch_parameters=None, batch_kwargs=None, ): self._data = data if batch_request is None: batch_request = dict() self._batch_request = batch_request if batch_definition is None: batch_definition = IDDict() self._batch_definition = batch_definition if batch_spec is None: batch_spec = BatchSpec() self._batch_spec = batch_spec if batch_markers is None: batch_markers = BatchMarkers( { "ge_load_time": datetime.datetime.now( datetime.timezone.utc ).strftime("%Y%m%dT%H%M%S.%fZ") } ) self._batch_markers = batch_markers # The remaining parameters are for backward compatibility. self._data_context = data_context self._datasource_name = datasource_name self._batch_parameters = batch_parameters self._batch_kwargs = batch_kwargs or BatchKwargs()
def render(self, validation_result=None, data_docs_pages=None, notify_with=None): default_text = ( "No validation occurred. Please ensure you passed a validation_result." ) status = "Failed ❌" title = default_text html = default_text if validation_result: expectation_suite_name = validation_result.meta.get( "expectation_suite_name", "__no_expectation_suite_name__" ) if "batch_kwargs" in validation_result.meta: data_asset_name = validation_result.meta["batch_kwargs"].get( "data_asset_name", "__no_data_asset_name__" ) elif "active_batch_definition" in validation_result.meta: data_asset_name = ( validation_result.meta["active_batch_definition"].data_asset_name if validation_result.meta["active_batch_definition"].data_asset_name else "__no_data_asset_name__" ) else: data_asset_name = "__no_data_asset_name__" n_checks_succeeded = validation_result.statistics["successful_expectations"] n_checks = validation_result.statistics["evaluated_expectations"] run_id = validation_result.meta.get("run_id", "__no_run_id__") batch_id = BatchKwargs( validation_result.meta.get("batch_kwargs", {}) ).to_id() check_details_text = f"<strong>{n_checks_succeeded}</strong> of <strong>{n_checks}</strong> expectations were met" if validation_result.success: status = "Success 🎉" title = f"{expectation_suite_name}: {status}" html = textwrap.dedent( f"""\ <p><strong>Batch Validation Status</strong>: {status}</p> <p><strong>Expectation suite name</strong>: {expectation_suite_name}</p> <p><strong>Data asset name</strong>: {data_asset_name}</p> <p><strong>Run ID</strong>: {run_id}</p> <p><strong>Batch ID</strong>: {batch_id}</p> <p><strong>Summary</strong>: {check_details_text}</p>""" ) if data_docs_pages: if notify_with is not None: for docs_link_key in notify_with: if docs_link_key in data_docs_pages.keys(): docs_link = data_docs_pages[docs_link_key] report_element = self._get_report_element(docs_link) else: report_element = str( f"<strong>ERROR</strong>: The email is trying to provide a link to the following DataDocs: " f"`{str(docs_link_key)}`, but it is not configured under data_docs_sites " "in the great_expectations.yml</br>" ) logger.critical(report_element) if report_element: print(report_element) html += report_element else: for docs_link_key in data_docs_pages.keys(): if docs_link_key == "class": continue docs_link = data_docs_pages[docs_link_key] report_element = self._get_report_element(docs_link) if report_element: html += report_element if "result_reference" in validation_result.meta: result_reference = validation_result.meta["result_reference"] report_element = ( f"- <strong>Validation Report</strong>: {result_reference}</br>" ) html += report_element if "dataset_reference" in validation_result.meta: dataset_reference = validation_result.meta["dataset_reference"] report_element = f"- <strong>Validation data asset</strong>: {dataset_reference}</br>" html += report_element documentation_url = "https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html" footer_section = f'<p>Learn <a href="{documentation_url}">here</a> how to review validation results in Data Docs</p>' html += footer_section return title, html
def render( self, validation_result=None, data_docs_pages=None, notify_with=None, ): default_text = ( "No validation occurred. Please ensure you passed a validation_result." ) status = "Failed :x:" title_block = { "type": "section", "text": { "type": "mrkdwn", "text": default_text, }, } query = { "blocks": [title_block], # this abbreviated root level "text" will show up in the notification and not the message "text": default_text, } if validation_result: expectation_suite_name = validation_result.meta.get( "expectation_suite_name", "__no_expectation_suite_name__") if "batch_kwargs" in validation_result.meta: data_asset_name = validation_result.meta["batch_kwargs"].get( "data_asset_name", "__no_data_asset_name__") elif "active_batch_definition" in validation_result.meta: data_asset_name = ( validation_result.meta["active_batch_definition"]. data_asset_name if validation_result.meta["active_batch_definition"]. data_asset_name else "__no_data_asset_name__") else: data_asset_name = "__no_data_asset_name__" n_checks_succeeded = validation_result.statistics[ "successful_expectations"] n_checks = validation_result.statistics["evaluated_expectations"] run_id = validation_result.meta.get("run_id", "__no_run_id__") batch_id = BatchKwargs( validation_result.meta.get("batch_kwargs", {})).to_id() check_details_text = ( f"*{n_checks_succeeded}* of *{n_checks}* expectations were met" ) if validation_result.success: status = "Success :tada:" summary_text = f"""*Batch Validation Status*: {status} *Expectation suite name*: `{expectation_suite_name}` *Data asset name*: `{data_asset_name}` *Run ID*: `{run_id}` *Batch ID*: `{batch_id}` *Summary*: {check_details_text}""" query["blocks"][0]["text"]["text"] = summary_text # this abbreviated root level "text" will show up in the notification and not the message query["text"] = f"{expectation_suite_name}: {status}" if data_docs_pages: if notify_with is not None: for docs_link_key in notify_with: if docs_link_key in data_docs_pages.keys(): docs_link = data_docs_pages[docs_link_key] report_element = self._get_report_element( docs_link) else: logger.critical( f"*ERROR*: Slack is trying to provide a link to the following DataDocs: `{str(docs_link_key)}`, but it is not configured under `data_docs_sites` in the `great_expectations.yml`\n" ) report_element = { "type": "section", "text": { "type": "mrkdwn", "text": f"*ERROR*: Slack is trying to provide a link to the following DataDocs: `{str(docs_link_key)}`, but it is not configured under `data_docs_sites` in the `great_expectations.yml`\n", }, } if report_element: query["blocks"].append(report_element) else: for docs_link_key in data_docs_pages.keys(): if docs_link_key == "class": continue docs_link = data_docs_pages[docs_link_key] report_element = self._get_report_element(docs_link) if report_element: query["blocks"].append(report_element) if "result_reference" in validation_result.meta: result_reference = validation_result.meta["result_reference"] report_element = { "type": "section", "text": { "type": "mrkdwn", "text": f"- *Validation Report*: {result_reference}", }, } query["blocks"].append(report_element) if "dataset_reference" in validation_result.meta: dataset_reference = validation_result.meta["dataset_reference"] dataset_element = { "type": "section", "text": { "type": "mrkdwn", "text": f"- *Validation data asset*: {dataset_reference}", }, } query["blocks"].append(dataset_element) documentation_url = "https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html" footer_section = { "type": "context", "elements": [{ "type": "mrkdwn", "text": f"Learn how to review validation results in Data Docs: {documentation_url}", }], } divider_block = {"type": "divider"} query["blocks"].append(divider_block) query["blocks"].append(footer_section) return query