def process_batch_parameters(self, query_parameters=None, limit=None, dataset_options=None): batch_kwargs = super().process_batch_parameters( limit=limit, dataset_options=dataset_options, ) nested_update(batch_kwargs, {"query_parameters": query_parameters}) return batch_kwargs
def get_evaluation_parameter_dependencies(self): dependencies = {} for key, value in self.kwargs.items(): if isinstance(value, dict) and "$PARAMETER" in value: if value["$PARAMETER"].startswith( "urn:great_expectations:validations:" ): try: evaluation_parameter_id = parse_evaluation_parameter_urn( value["$PARAMETER"] ) except ParserError: logger.warning( "Unable to parse great_expectations urn {}".format( value["$PARAMETER"] ) ) continue if evaluation_parameter_id.metric_kwargs_id is None: nested_update( dependencies, { evaluation_parameter_id.expectation_suite_name: [ evaluation_parameter_id.metric_name ] }, ) else: nested_update( dependencies, { evaluation_parameter_id.expectation_suite_name: [ { "metric_kwargs_id": { evaluation_parameter_id.metric_kwargs_id: [ evaluation_parameter_id.metric_name ] } } ] }, ) # if evaluation_parameter_id.expectation_suite_name not in dependencies: # dependencies[evaluation_parameter_id.expectation_suite_name] = {"metric_kwargs_id": {}} # # if evaluation_parameter_id.metric_kwargs_id not in dependencies[evaluation_parameter_id.expectation_suite_name]["metric_kwargs_id"]: # dependencies[evaluation_parameter_id.expectation_suite_name]["metric_kwargs_id"][evaluation_parameter_id.metric_kwargs_id] = [] # dependencies[evaluation_parameter_id.expectation_suite_name]["metric_kwargs_id"][ # evaluation_parameter_id.metric_kwargs_id].append(evaluation_parameter_id.metric_name) return dependencies
def test_nested_update(): # nested_update is useful for update nested dictionaries (such as batch_kwargs with reader_options as a dictionary) batch_kwargs = { "path": "/a/path", "reader_method": "read_csv", "reader_options": {"header": 0}, } nested_update(batch_kwargs, {"reader_options": {"nrows": 1}}) assert batch_kwargs == { "path": "/a/path", "reader_method": "read_csv", "reader_options": {"header": 0, "nrows": 1}, }
def reconcile_rule_variables( variables: ParameterContainer, variables_config: dict, reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.variables, ) -> dict: """ Rule "variables" reconciliation involves combining the variables, instantiated from Rule configuration (e.g., stored in a YAML file managed by the Profiler store), with the variables override, possibly supplied as part of the candiate override rule configuration. The reconciliation logic for "variables" is of the "replace" nature: An override value complements the original on key "miss", and replaces the original on key "hit" (or "collision"), because "variables" is a unique member for a Rule. :param variables: existing variables of a Rule :param variables_config: variables configuration override, supplied in dictionary (configuration) form :param reconciliation_strategy: one of update, nested_update, or overwrite ways of reconciling overwrites :return: reconciled variables configuration, returned in dictionary (configuration) form """ effective_variables_config: dict = convert_variables_to_dict( variables=variables) if variables_config: if reconciliation_strategy == ReconciliationStrategy.NESTED_UPDATE: effective_variables_config = nested_update( effective_variables_config, variables_config, ) elif reconciliation_strategy == ReconciliationStrategy.REPLACE: effective_variables_config = variables_config elif reconciliation_strategy == ReconciliationStrategy.UPDATE: effective_variables_config.update(variables_config) return effective_variables_config
def test_nested_update_lists(): # nested_update is useful for update nested dictionaries (such as batch_kwargs with reader_options as a dictionary) dependencies = { "suite.warning": {"metric.name": ["column=foo"]}, "suite.failure": {"metric.blarg": [""]}, } new_dependencies = { "suite.warning": { "metric.other_name": ["column=foo"], "metric.name": ["column=bar"], } } nested_update(dependencies, new_dependencies) assert dependencies == { "suite.warning": { "metric.name": ["column=foo", "column=bar"], "metric.other_name": ["column=foo"], }, "suite.failure": {"metric.blarg": [""]}, }
def get_substituted_validation_dict( substituted_runtime_config: dict, validation_dict: dict ) -> dict: substituted_validation_dict = { "batch_request": get_substituted_batch_request( substituted_runtime_config=substituted_runtime_config, validation_batch_request=validation_dict.get("batch_request"), ), "expectation_suite_name": validation_dict.get("expectation_suite_name") or substituted_runtime_config.get("expectation_suite_name"), "expectation_suite_ge_cloud_id": validation_dict.get( "expectation_suite_ge_cloud_id" ) or substituted_runtime_config.get("expectation_suite_ge_cloud_id"), "action_list": get_updated_action_list( base_action_list=substituted_runtime_config.get("action_list"), other_action_list=validation_dict.get("action_list", {}), ), "evaluation_parameters": nested_update( substituted_runtime_config.get("evaluation_parameters") or {}, validation_dict.get("evaluation_parameters", {}), dedup=True, ), "runtime_configuration": nested_update( substituted_runtime_config.get("runtime_configuration") or {}, validation_dict.get("runtime_configuration", {}), dedup=True, ), "include_rendered_content": validation_dict.get("include_rendered_content") or substituted_runtime_config.get("include_rendered_content") or False, } if validation_dict.get("name") is not None: substituted_validation_dict["name"] = validation_dict["name"] validate_validation_dict(substituted_validation_dict) return substituted_validation_dict
def usage_statistics_wrapped_method(*args, **kwargs): # if a function like `build_data_docs()` is being called as a `dry_run` # then we dont want to emit usage_statistics. We just return the function without sending a usage_stats message if "dry_run" in kwargs and kwargs["dry_run"]: return func(*args, **kwargs) # Set event_payload now so it can be updated below event_payload = {} message = {"event_payload": event_payload, "event": event_name} result = None time_begin: int = int(round(time.time() * 1000)) try: if args_payload_fn is not None: nested_update(event_payload, args_payload_fn(*args, **kwargs)) result = func(*args, **kwargs) message["success"] = True except Exception: message["success"] = False raise finally: if not ((result is None) or (result_payload_fn is None)): nested_update(event_payload, result_payload_fn(result)) time_end: int = int(round(time.time() * 1000)) delta_t: int = time_end - time_begin handler = get_usage_statistics_handler(list(args)) if handler: event_duration_property_name: str = ( f"{event_name}.duration".replace(".", "_")) setattr(handler, event_duration_property_name, delta_t) handler.emit(message) delattr(handler, event_duration_property_name) return result
def get_evaluation_parameter_dependencies(self): parsed_dependencies = dict() for key, value in self.kwargs.items(): if isinstance(value, dict) and "$PARAMETER" in value: param_string_dependencies = find_evaluation_parameter_dependencies( value["$PARAMETER"] ) nested_update(parsed_dependencies, param_string_dependencies) dependencies = dict() urns = parsed_dependencies.get("urns", []) for string_urn in urns: try: urn = ge_urn.parseString(string_urn) except ParserError: logger.warning( "Unable to parse great_expectations urn {}".format( value["$PARAMETER"] ) ) continue if not urn.get("metric_kwargs"): nested_update( dependencies, {urn["expectation_suite_name"]: [urn["metric_name"]]}, ) else: nested_update( dependencies, { urn["expectation_suite_name"]: [ { "metric_kwargs_id": { urn["metric_kwargs"]: [urn["metric_name"]] } } ] }, ) dependencies = _deduplicate_evaluation_parameter_dependencies(dependencies) return dependencies
def __init__( self, data_context, store_backend, site_name=None, site_index_builder=None, show_how_to_buttons=True, site_section_builders=None, runtime_environment=None, ge_cloud_mode=False, **kwargs, ) -> None: self.site_name = site_name self.data_context = data_context self.store_backend = store_backend self.show_how_to_buttons = show_how_to_buttons self.ge_cloud_mode = ge_cloud_mode usage_statistics_config = data_context.anonymous_usage_statistics data_context_id = None if (usage_statistics_config and usage_statistics_config.enabled and usage_statistics_config.data_context_id): data_context_id = usage_statistics_config.data_context_id self.data_context_id = data_context_id # set custom_styles_directory if present custom_styles_directory = None plugins_directory = data_context.plugins_directory if plugins_directory and os.path.isdir( os.path.join(plugins_directory, "custom_data_docs", "styles")): custom_styles_directory = os.path.join(plugins_directory, "custom_data_docs", "styles") # set custom_views_directory if present custom_views_directory = None if plugins_directory and os.path.isdir( os.path.join(plugins_directory, "custom_data_docs", "views")): custom_views_directory = os.path.join(plugins_directory, "custom_data_docs", "views") if site_index_builder is None: site_index_builder = {"class_name": "DefaultSiteIndexBuilder"} # The site builder is essentially a frontend store. We'll open up # three types of backends using the base # type of the configuration defined in the store_backend section if ge_cloud_mode: self.target_store = JsonSiteStore( store_backend=store_backend, runtime_environment=runtime_environment) else: self.target_store = HtmlSiteStore( store_backend=store_backend, runtime_environment=runtime_environment) default_site_section_builders_config = { "expectations": { "class_name": "DefaultSiteSectionBuilder", "source_store_name": data_context.expectations_store_name, "renderer": { "class_name": "ExpectationSuitePageRenderer" }, }, "validations": { "class_name": "DefaultSiteSectionBuilder", "source_store_name": data_context.validations_store_name, "renderer": { "class_name": "ValidationResultsPageRenderer" }, "validation_results_limit": site_index_builder.get("validation_results_limit"), }, "profiling": { "class_name": "DefaultSiteSectionBuilder", "source_store_name": data_context.validations_store_name, "renderer": { "class_name": "ProfilingResultsPageRenderer" }, }, } if site_section_builders is None: site_section_builders = default_site_section_builders_config else: site_section_builders = nested_update( default_site_section_builders_config, site_section_builders) # set default run_name_filter if site_section_builders.get("validations", "None") not in FALSEY_YAML_STRINGS: if site_section_builders["validations"].get( "run_name_filter") is None: site_section_builders["validations"]["run_name_filter"] = { "not_includes": "profiling" } if site_section_builders.get("profiling", "None") not in FALSEY_YAML_STRINGS: if site_section_builders["profiling"].get( "run_name_filter") is None: site_section_builders["profiling"]["run_name_filter"] = { "includes": "profiling" } self.site_section_builders = {} for site_section_name, site_section_config in site_section_builders.items( ): if not site_section_config or site_section_config in FALSEY_YAML_STRINGS: continue module_name = (site_section_config.get("module_name") or "great_expectations.render.renderer.site_builder") self.site_section_builders[ site_section_name] = instantiate_class_from_config( config=site_section_config, runtime_environment={ "data_context": data_context, "target_store": self.target_store, "custom_styles_directory": custom_styles_directory, "custom_views_directory": custom_views_directory, "data_context_id": self.data_context_id, "show_how_to_buttons": self.show_how_to_buttons, "ge_cloud_mode": self.ge_cloud_mode, }, config_defaults={ "name": site_section_name, "module_name": module_name }, ) if not self.site_section_builders[site_section_name]: raise exceptions.ClassInstantiationError( module_name=module_name, package_name=None, class_name=site_section_config["class_name"], ) module_name = (site_index_builder.get("module_name") or "great_expectations.render.renderer.site_builder") class_name = site_index_builder.get( "class_name") or "DefaultSiteIndexBuilder" self.site_index_builder = instantiate_class_from_config( config=site_index_builder, runtime_environment={ "data_context": data_context, "custom_styles_directory": custom_styles_directory, "custom_views_directory": custom_views_directory, "show_how_to_buttons": self.show_how_to_buttons, "target_store": self.target_store, "site_name": self.site_name, "data_context_id": self.data_context_id, "source_stores": { section_name: section_config.get("source_store_name") for (section_name, section_config) in site_section_builders.items() if section_config not in FALSEY_YAML_STRINGS }, "site_section_builders_config": site_section_builders, "ge_cloud_mode": self.ge_cloud_mode, }, config_defaults={ "name": "site_index_builder", "module_name": module_name, "class_name": class_name, }, ) if not self.site_index_builder: raise exceptions.ClassInstantiationError( module_name=module_name, package_name=None, class_name=site_index_builder["class_name"], )
def substitute_runtime_config(source_config: dict, runtime_kwargs: dict) -> dict: if not (runtime_kwargs and any(runtime_kwargs.values())): return source_config dest_config: dict = copy.deepcopy(source_config) # replace if runtime_kwargs.get("template_name") is not None: dest_config["template_name"] = runtime_kwargs["template_name"] if runtime_kwargs.get("run_name_template") is not None: dest_config["run_name_template"] = runtime_kwargs["run_name_template"] if runtime_kwargs.get("expectation_suite_name") is not None: dest_config["expectation_suite_name"] = runtime_kwargs["expectation_suite_name"] if runtime_kwargs.get("expectation_suite_ge_cloud_id") is not None: dest_config["expectation_suite_ge_cloud_id"] = runtime_kwargs[ "expectation_suite_ge_cloud_id" ] # update if runtime_kwargs.get("batch_request") is not None: batch_request = dest_config.get("batch_request") or {} batch_request_from_runtime_kwargs = runtime_kwargs["batch_request"] batch_request_from_runtime_kwargs = get_batch_request_as_dict( batch_request=batch_request_from_runtime_kwargs ) updated_batch_request = nested_update( batch_request, batch_request_from_runtime_kwargs, dedup=True, ) dest_config["batch_request"] = updated_batch_request if runtime_kwargs.get("action_list") is not None: action_list = dest_config.get("action_list") or [] dest_config["action_list"] = get_updated_action_list( base_action_list=action_list, other_action_list=runtime_kwargs["action_list"], ) if runtime_kwargs.get("evaluation_parameters") is not None: evaluation_parameters = dest_config.get("evaluation_parameters") or {} updated_evaluation_parameters = nested_update( evaluation_parameters, runtime_kwargs["evaluation_parameters"], dedup=True, ) dest_config["evaluation_parameters"] = updated_evaluation_parameters if runtime_kwargs.get("runtime_configuration") is not None: runtime_configuration = dest_config.get("runtime_configuration") or {} updated_runtime_configuration = nested_update( runtime_configuration, runtime_kwargs["runtime_configuration"], dedup=True, ) dest_config["runtime_configuration"] = updated_runtime_configuration if runtime_kwargs.get("validations") is not None: validations = dest_config.get("validations") or [] existing_validations = source_config.get("validations") or [] validations.extend( filter( lambda v: v not in existing_validations, runtime_kwargs["validations"], ) ) dest_config["validations"] = validations if runtime_kwargs.get("profilers") is not None: profilers = dest_config.get("profilers") or [] existing_profilers = source_config.get("profilers") or [] profilers.extend( filter(lambda v: v not in existing_profilers, runtime_kwargs["profilers"]) ) dest_config["profilers"] = profilers return dest_config
def _get_theme(theme: Optional[Dict[str, Any]]) -> Dict[str, Any]: default_theme: Dict[str, Any] = copy.deepcopy(AltairThemes.DEFAULT_THEME.value) if theme: return nested_update(default_theme, theme) else: return default_theme
def process_batch_parameters(self, query_parameters=None, limit=None): batch_kwargs = super(SqlAlchemyDatasource, self).process_batch_parameters(limit=limit) nested_update(batch_kwargs, {"query_parameters": query_parameters}) return batch_kwargs
def _reconcile_rule_expectation_configuration_builder_configs( rule: Rule, expectation_configuration_builder_configs: List[dict] ) -> List[dict]: """ Rule "expectation configuration builders" reconciliation involves combining the expectation configuration builders, instantiated from Rule configuration (e.g., stored in a YAML file managed by the Profiler store), with the expectation configuration builders overrides, possibly supplied as part of the candiate override rule configuration. The reconciliation logic for "expectation configuration builders" is of the "upsert" nature: A candidate override expectation configuration builder configuration contributes to the expectation configuration builders list of the rule if the corresponding expectation configuration builder name does not exist in the list of instantiated expectation configuration builders of the rule; otherwise, once instnatiated, it replaces the configuration associated with the original expectation configuration builder having the same name. :param rule: Profiler "rule", subject to expectations configuration builder overrides :param expectation_configuration_builder_configs: expectation configuration builder configuration overrides, supplied in dictionary (configuration) form :return: reconciled expectation configuration builder configuration, returned in dictionary (configuration) form """ expectation_configuration_builder_config: dict for (expectation_configuration_builder_config ) in expectation_configuration_builder_configs: _validate_builder_override_config( builder_config=expectation_configuration_builder_config) effective_expectation_configuration_builder_configs: Dict[str, dict] = {} current_expectation_configuration_builders: Dict[ str, ExpectationConfigurationBuilder] = rule._get_expectation_configuration_builders_as_dict( ) expectation_configuration_builder_name: str expectation_configuration_builder: ExpectationConfigurationBuilder expectation_configuration_builder_as_dict: dict for ( expectation_configuration_builder_name, expectation_configuration_builder, ) in current_expectation_configuration_builders.items(): expectation_configuration_builder_as_dict = ( expectation_configuration_builder.to_dict()) expectation_configuration_builder_as_dict[ "class_name"] = expectation_configuration_builder.__class__.__name__ expectation_configuration_builder_as_dict[ "module_name"] = expectation_configuration_builder.__class__.__module__ # Roundtrip through schema validation to add/or restore any missing fields. deserialized_config: ExpectationConfigurationBuilderConfig = ( expectationConfigurationBuilderConfigSchema.load( expectation_configuration_builder_as_dict)) serialized_config: dict = deserialized_config.to_dict() effective_expectation_configuration_builder_configs[ expectation_configuration_builder_name] = serialized_config effective_expectation_configuration_builder_configs = nested_update( effective_expectation_configuration_builder_configs, { expectation_configuration_builder_config["expectation_type"]: expectation_configuration_builder_config for expectation_configuration_builder_config in expectation_configuration_builder_configs }, dedup=True, ) if not effective_expectation_configuration_builder_configs: return [] return list( effective_expectation_configuration_builder_configs.values())