def _add_validations_to_index_links( self, index_links_dict: OrderedDict, validation_and_profiling_result_site_keys: List[ValidationResultIdentifier], ) -> None: validations = self.site_section_builders_config.get("validations", "None") if validations and validations not in FALSEY_YAML_STRINGS: validations_run_name_filter = self.site_section_builders_config[ "validations" ]["run_name_filter"] validation_result_site_keys = [ validation_result_key for validation_result_key in validation_and_profiling_result_site_keys if resource_key_passes_run_name_filter( validation_result_key, validations_run_name_filter ) ] validation_result_site_keys = sorted( validation_result_site_keys, key=lambda x: x.run_id.run_time, reverse=True, ) if self.validation_results_limit: validation_result_site_keys = validation_result_site_keys[ : self.validation_results_limit ] for validation_result_key in validation_result_site_keys: try: validation = self.data_context.get_validation_result( batch_identifier=validation_result_key.batch_identifier, expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name, run_id=validation_result_key.run_id, validations_store_name=self.source_stores.get("validations"), ) validation_success = validation.success batch_kwargs = validation.meta.get("batch_kwargs", {}) batch_spec = validation.meta.get("batch_spec", {}) self.add_resource_info_to_index_links_dict( index_links_dict=index_links_dict, expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name, section_name="validations", batch_identifier=validation_result_key.batch_identifier, run_id=validation_result_key.run_id, validation_success=validation_success, run_time=validation_result_key.run_id.run_time, run_name=validation_result_key.run_id.run_name, asset_name=batch_kwargs.get("data_asset_name") or batch_spec.get("data_asset_name"), batch_kwargs=batch_kwargs, batch_spec=batch_spec, ) except Exception: error_msg = f"Validation result not found: {str(validation_result_key.to_tuple()):s} - skipping" logger.warning(error_msg)
def _add_profiling_to_index_links( self, index_links_dict: OrderedDict, validation_and_profiling_result_site_keys: List[ ValidationResultIdentifier], ) -> None: profiling = self.site_section_builders_config.get("profiling", "None") if profiling and profiling not in FALSEY_YAML_STRINGS: profiling_run_name_filter = self.site_section_builders_config[ "profiling"]["run_name_filter"] profiling_result_site_keys = [ validation_result_key for validation_result_key in validation_and_profiling_result_site_keys if resource_key_passes_run_name_filter( validation_result_key, profiling_run_name_filter) ] for profiling_result_key in profiling_result_site_keys: try: validation = self.data_context.get_validation_result( batch_identifier=profiling_result_key.batch_identifier, expectation_suite_name=profiling_result_key. expectation_suite_identifier.expectation_suite_name, run_id=profiling_result_key.run_id, validations_store_name=self.source_stores.get( "profiling"), ) batch_kwargs = validation.meta.get("batch_kwargs", {}) batch_spec = validation.meta.get("batch_spec", {}) self.add_resource_info_to_index_links_dict( index_links_dict=index_links_dict, expectation_suite_name=profiling_result_key. expectation_suite_identifier.expectation_suite_name, section_name="profiling", batch_identifier=profiling_result_key.batch_identifier, run_id=profiling_result_key.run_id, run_time=profiling_result_key.run_id.run_time, run_name=profiling_result_key.run_id.run_name, asset_name=batch_kwargs.get("data_asset_name") or batch_spec.get("data_asset_name"), batch_kwargs=batch_kwargs, batch_spec=batch_spec, ) except Exception: error_msg = f"Profiling result not found: {str(profiling_result_key.to_tuple()):s} - skipping" logger.warning(error_msg)
def build(self, resource_identifiers=None) -> None: source_store_keys = self.source_store.list_keys() if self.name == "validations" and self.validation_results_limit: source_store_keys = sorted( source_store_keys, key=lambda x: x.run_id.run_time, reverse=True)[:self.validation_results_limit] for resource_key in source_store_keys: # if no resource_identifiers are passed, the section # builder will build # a page for every keys in its source store. # if the caller did pass resource_identifiers, the section builder # will build pages only for the specified resources if resource_identifiers and resource_key not in resource_identifiers: continue if self.run_name_filter and not isinstance(resource_key, GeCloudIdentifier): if not resource_key_passes_run_name_filter( resource_key, self.run_name_filter): continue try: resource = self.source_store.get(resource_key) if isinstance(resource_key, ExpectationSuiteIdentifier): resource = ExpectationSuite(**resource, data_context=self.data_context) except exceptions.InvalidKeyError: logger.warning( f"Object with Key: {str(resource_key)} could not be retrieved. Skipping..." ) continue if isinstance(resource_key, ExpectationSuiteIdentifier): expectation_suite_name = resource_key.expectation_suite_name logger.debug( f" Rendering expectation suite {expectation_suite_name}" ) elif isinstance(resource_key, ValidationResultIdentifier): run_id = resource_key.run_id run_name = run_id.run_name run_time = run_id.run_time expectation_suite_name = ( resource_key.expectation_suite_identifier. expectation_suite_name) if self.name == "profiling": logger.debug( f" Rendering profiling for batch {resource_key.batch_identifier}" ) else: logger.debug( f" Rendering validation: run name: {run_name}, run time: {run_time}, suite {expectation_suite_name} for batch {resource_key.batch_identifier}" ) try: rendered_content = self.renderer_class.render(resource) if self.ge_cloud_mode: self.target_store.set( GeCloudIdentifier(resource_type=GeCloudRESTResource. RENDERED_DATA_DOC), rendered_content, source_type=resource_key.resource_type, source_id=resource_key.ge_cloud_id, ) else: viewable_content = self.view_class.render( rendered_content, data_context_id=self.data_context_id, show_how_to_buttons=self.show_how_to_buttons, ) # Verify type self.target_store.set( SiteSectionIdentifier( site_section_name=self.name, resource_identifier=resource_key, ), viewable_content, ) except Exception as e: exception_message = """\ An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: """ exception_traceback = traceback.format_exc() exception_message += (f'{type(e).__name__}: "{str(e)}". ' f'Traceback: "{exception_traceback}".') logger.error(exception_message)
def build(self, skip_and_clean_missing=True, build_index: bool = True): """ :param skip_and_clean_missing: if True, target html store keys without corresponding source store keys will be skipped and removed from the target store :param build_index: a flag if False, skips building the index page :return: tuple(index_page_url, index_links_dict) """ # Loop over sections in the HtmlStore logger.debug("DefaultSiteIndexBuilder.build") if not build_index: logger.debug("Skipping index rendering") return None, None index_links_dict = OrderedDict() index_links_dict["site_name"] = self.site_name if self.show_how_to_buttons: index_links_dict["cta_object"] = self.get_calls_to_action() if ( # TODO why is this duplicated? self.site_section_builders_config.get("expectations", "None") and self.site_section_builders_config.get( "expectations", "None") not in FALSEY_YAML_STRINGS): expectation_suite_source_keys = self.data_context.stores[ self.site_section_builders_config["expectations"].get( "source_store_name")].list_keys() expectation_suite_site_keys = [ ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple) for expectation_suite_tuple in self.target_store. store_backends[ExpectationSuiteIdentifier].list_keys() ] if skip_and_clean_missing: cleaned_keys = [] for expectation_suite_site_key in expectation_suite_site_keys: if expectation_suite_site_key not in expectation_suite_source_keys: self.target_store.store_backends[ ExpectationSuiteIdentifier].remove_key( expectation_suite_site_key) else: cleaned_keys.append(expectation_suite_site_key) expectation_suite_site_keys = cleaned_keys for expectation_suite_key in expectation_suite_site_keys: self.add_resource_info_to_index_links_dict( index_links_dict=index_links_dict, expectation_suite_name=expectation_suite_key. expectation_suite_name, section_name="expectations", ) validation_and_profiling_result_site_keys = [] if ( # TODO why is this duplicated? self.site_section_builders_config.get("validations", "None") and self.site_section_builders_config.get( "validations", "None") not in FALSEY_YAML_STRINGS or self.site_section_builders_config.get("profiling", "None") and self.site_section_builders_config.get( "profiling", "None") not in FALSEY_YAML_STRINGS): source_store = ( "validations" # TODO why is this duplicated? if self.site_section_builders_config.get("validations", "None") and self.site_section_builders_config.get( "validations", "None") not in FALSEY_YAML_STRINGS else "profiling") validation_and_profiling_result_source_keys = self.data_context.stores[ self.site_section_builders_config[source_store].get( "source_store_name")].list_keys() validation_and_profiling_result_site_keys = [ ValidationResultIdentifier.from_tuple(validation_result_tuple) for validation_result_tuple in self.target_store. store_backends[ValidationResultIdentifier].list_keys() ] if skip_and_clean_missing: cleaned_keys = [] for (validation_result_site_key ) in validation_and_profiling_result_site_keys: if (validation_result_site_key not in validation_and_profiling_result_source_keys): self.target_store.store_backends[ ValidationResultIdentifier].remove_key( validation_result_site_key) else: cleaned_keys.append(validation_result_site_key) validation_and_profiling_result_site_keys = cleaned_keys if ( # TODO why is this duplicated? self.site_section_builders_config.get("profiling", "None") and self.site_section_builders_config.get( "profiling", "None") not in FALSEY_YAML_STRINGS): profiling_run_name_filter = self.site_section_builders_config[ "profiling"]["run_name_filter"] profiling_result_site_keys = [ validation_result_key for validation_result_key in validation_and_profiling_result_site_keys if resource_key_passes_run_name_filter( validation_result_key, profiling_run_name_filter) ] for profiling_result_key in profiling_result_site_keys: try: validation = self.data_context.get_validation_result( batch_identifier=profiling_result_key.batch_identifier, expectation_suite_name=profiling_result_key. expectation_suite_identifier.expectation_suite_name, run_id=profiling_result_key.run_id, validations_store_name=self.source_stores.get( "profiling"), ) batch_kwargs = validation.meta.get("batch_kwargs", {}) batch_spec = validation.meta.get("batch_spec", {}) self.add_resource_info_to_index_links_dict( index_links_dict=index_links_dict, expectation_suite_name=profiling_result_key. expectation_suite_identifier.expectation_suite_name, section_name="profiling", batch_identifier=profiling_result_key.batch_identifier, run_id=profiling_result_key.run_id, run_time=profiling_result_key.run_id.run_time, run_name=profiling_result_key.run_id.run_name, asset_name=batch_kwargs.get("data_asset_name") or batch_spec.get("data_asset_name"), batch_kwargs=batch_kwargs, batch_spec=batch_spec, ) except Exception: error_msg = "Profiling result not found: {:s} - skipping".format( str(profiling_result_key.to_tuple())) logger.warning(error_msg) if ( # TODO why is this duplicated? self.site_section_builders_config.get("validations", "None") and self.site_section_builders_config.get( "validations", "None") not in FALSEY_YAML_STRINGS): validations_run_name_filter = self.site_section_builders_config[ "validations"]["run_name_filter"] validation_result_site_keys = [ validation_result_key for validation_result_key in validation_and_profiling_result_site_keys if resource_key_passes_run_name_filter( validation_result_key, validations_run_name_filter) ] validation_result_site_keys = sorted( validation_result_site_keys, key=lambda x: x.run_id.run_time, reverse=True, ) if self.validation_results_limit: validation_result_site_keys = validation_result_site_keys[:self . validation_results_limit] for validation_result_key in validation_result_site_keys: try: validation = self.data_context.get_validation_result( batch_identifier=validation_result_key. batch_identifier, expectation_suite_name=validation_result_key. expectation_suite_identifier.expectation_suite_name, run_id=validation_result_key.run_id, validations_store_name=self.source_stores.get( "validations"), ) validation_success = validation.success batch_kwargs = validation.meta.get("batch_kwargs", {}) batch_spec = validation.meta.get("batch_spec", {}) self.add_resource_info_to_index_links_dict( index_links_dict=index_links_dict, expectation_suite_name=validation_result_key. expectation_suite_identifier.expectation_suite_name, section_name="validations", batch_identifier=validation_result_key. batch_identifier, run_id=validation_result_key.run_id, validation_success=validation_success, run_time=validation_result_key.run_id.run_time, run_name=validation_result_key.run_id.run_name, asset_name=batch_kwargs.get("data_asset_name") or batch_spec.get("data_asset_name"), batch_kwargs=batch_kwargs, batch_spec=batch_spec, ) except Exception: error_msg = "Validation result not found: {:s} - skipping".format( str(validation_result_key.to_tuple())) logger.warning(error_msg) try: rendered_content = self.renderer_class.render(index_links_dict) viewable_content = self.view_class.render( rendered_content, data_context_id=self.data_context_id, show_how_to_buttons=self.show_how_to_buttons, ) except Exception as e: exception_message = f"""\ An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: """ exception_traceback = traceback.format_exc() exception_message += ( f'{type(e).__name__}: "{str(e)}". Traceback: "{exception_traceback}".' ) logger.error(exception_message) return (self.target_store.write_index_page(viewable_content), index_links_dict)
def test_resource_key_passes_run_name_filter(): resource_key = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier("test_suite"), run_id=RunIdentifier(run_name="foofooprofilingfoo"), batch_identifier="f14c3d2f6e8028c2db0c25edabdb0d61", ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"equals": "profiling"} ) is False ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"equals": "foofooprofilingfoo"} ) is True ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"not_equals": "profiling"} ) is True ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"not_equals": "foofooprofilingfoo"} ) is False ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"includes": "profiling"} ) is True ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"includes": "foobar"} ) is False ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"not_includes": "foobar"} ) is True ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"not_includes": "profiling"} ) is False ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"matches_regex": "(foo){2}profiling(" "foo)+"}, ) is True ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"matches_regex": "(foo){3}profiling(" "foo)+"}, ) is False ) with pytest.warns(DeprecationWarning): assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"eq": "profiling"} ) is False ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"eq": "foofooprofilingfoo"} ) is True ) with pytest.warns(DeprecationWarning): assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"ne": "profiling"} ) is True ) assert ( resource_key_passes_run_name_filter( resource_key, run_name_filter={"ne": "foofooprofilingfoo"} ) is False )
def build(self): # Loop over sections in the HtmlStore logger.debug("DefaultSiteIndexBuilder.build") expectation_suite_keys = [ ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple) for expectation_suite_tuple in self.target_store. store_backends[ExpectationSuiteIdentifier].list_keys() ] validation_and_profiling_result_keys = [ ValidationResultIdentifier.from_tuple(validation_result_tuple) for validation_result_tuple in self.target_store. store_backends[ValidationResultIdentifier].list_keys() ] profiling_result_keys = [ validation_result_key for validation_result_key in validation_and_profiling_result_keys if resource_key_passes_run_name_filter( validation_result_key, self.profiling_run_name_filter) ] validation_result_keys = [ validation_result_key for validation_result_key in validation_and_profiling_result_keys if resource_key_passes_run_name_filter( validation_result_key, self.validations_run_name_filter) ] validation_result_keys = sorted(validation_result_keys, key=lambda x: x.run_id.run_time, reverse=True) if self.validation_results_limit: validation_result_keys = validation_result_keys[:self. validation_results_limit] index_links_dict = OrderedDict() index_links_dict["site_name"] = self.site_name if self.show_how_to_buttons: index_links_dict["cta_object"] = self.get_calls_to_action() for expectation_suite_key in expectation_suite_keys: self.add_resource_info_to_index_links_dict( index_links_dict=index_links_dict, expectation_suite_name=expectation_suite_key. expectation_suite_name, section_name="expectations", ) for profiling_result_key in profiling_result_keys: try: validation = self.data_context.get_validation_result( batch_identifier=profiling_result_key.batch_identifier, expectation_suite_name=profiling_result_key. expectation_suite_identifier.expectation_suite_name, run_id=profiling_result_key.run_id, validations_store_name=self.source_stores.get("profiling"), ) batch_kwargs = validation.meta.get("batch_kwargs", {}) self.add_resource_info_to_index_links_dict( index_links_dict=index_links_dict, expectation_suite_name=profiling_result_key. expectation_suite_identifier.expectation_suite_name, section_name="profiling", batch_identifier=profiling_result_key.batch_identifier, run_id=profiling_result_key.run_id, run_time=profiling_result_key.run_id.run_time, run_name=profiling_result_key.run_id.run_name, asset_name=batch_kwargs.get("data_asset_name"), batch_kwargs=batch_kwargs, ) except Exception: error_msg = "Profiling result not found: {0:s} - skipping".format( str(profiling_result_key.to_tuple())) logger.warning(error_msg) for validation_result_key in validation_result_keys: try: validation = self.data_context.get_validation_result( batch_identifier=validation_result_key.batch_identifier, expectation_suite_name=validation_result_key. expectation_suite_identifier.expectation_suite_name, run_id=validation_result_key.run_id, validations_store_name=self.source_stores.get( "validations"), ) validation_success = validation.success batch_kwargs = validation.meta.get("batch_kwargs", {}) self.add_resource_info_to_index_links_dict( index_links_dict=index_links_dict, expectation_suite_name=validation_result_key. expectation_suite_identifier.expectation_suite_name, section_name="validations", batch_identifier=validation_result_key.batch_identifier, run_id=validation_result_key.run_id, validation_success=validation_success, run_time=validation_result_key.run_id.run_time, run_name=validation_result_key.run_id.run_name, asset_name=batch_kwargs.get("data_asset_name"), batch_kwargs=batch_kwargs, ) except Exception: error_msg = "Validation result not found: {0:s} - skipping".format( str(validation_result_key.to_tuple())) logger.warning(error_msg) try: rendered_content = self.renderer_class.render(index_links_dict) viewable_content = self.view_class.render( rendered_content, data_context_id=self.data_context_id, show_how_to_buttons=self.show_how_to_buttons, ) except Exception as e: exception_message = f"""\ An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: """ exception_traceback = traceback.format_exc() exception_message += ( f'{type(e).__name__}: "{str(e)}". Traceback: "{exception_traceback}".' ) logger.error(exception_message, e, exc_info=True) return (self.target_store.write_index_page(viewable_content), index_links_dict)
def build(self, resource_identifiers=None): source_store_keys = self.source_store.list_keys() if self.name == "validations" and self.validation_results_limit: source_store_keys = sorted( source_store_keys, key=lambda x: x.run_id.run_time, reverse=True)[:self.validation_results_limit] expectation_suite_identifier_exists: bool = any([ isinstance(ri, ExpectationSuiteIdentifier) for ri in resource_identifiers ]) if resource_identifiers is not None else False for resource_key in source_store_keys: # All expectation suites are always rendered unless resource_identifiers contains ExpectationSuiteIdentifier(s). if expectation_suite_identifier_exists or (self.name != "expectations"): # if no resource_identifiers are passed, the section # builder will build # a page for every keys in its source store. # if the caller did pass resource_identifiers, the section builder # will build pages only for the specified resources if resource_identifiers and resource_key not in resource_identifiers: continue if self.run_name_filter: if not resource_key_passes_run_name_filter( resource_key, self.run_name_filter): continue try: resource = self.source_store.get(resource_key) except exceptions.InvalidKeyError: logger.warning( f"Object with Key: {str(resource_key)} could not be retrieved. Skipping..." ) continue if isinstance(resource_key, ExpectationSuiteIdentifier): expectation_suite_name = resource_key.expectation_suite_name logger.debug(" Rendering expectation suite {}".format( expectation_suite_name)) elif isinstance(resource_key, ValidationResultIdentifier): run_id = resource_key.run_id run_name = run_id.run_name run_time = run_id.run_time expectation_suite_name = ( resource_key.expectation_suite_identifier. expectation_suite_name) if self.name == "profiling": logger.debug( " Rendering profiling for batch {}".format( resource_key.batch_identifier)) else: logger.debug( " Rendering validation: run name: {}, run time: {}, suite {} for batch {}" .format( run_name, run_time, expectation_suite_name, resource_key.batch_identifier, )) try: rendered_content = self.renderer_class.render(resource) viewable_content = self.view_class.render( rendered_content, data_context_id=self.data_context_id, show_how_to_buttons=self.show_how_to_buttons, ) self.target_store.set( SiteSectionIdentifier( site_section_name=self.name, resource_identifier=resource_key, ), viewable_content, ) except Exception as e: exception_message = f"""\ An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: """ exception_traceback = traceback.format_exc() exception_message += (f'{type(e).__name__}: "{str(e)}". ' f'Traceback: "{exception_traceback}".') logger.error(exception_message, e, exc_info=True)