def rule_with_variables_with_parameters( column_Age_domain, column_Date_domain, single_part_name_parameter_container, multi_part_name_parameter_container, ): variables_multi_part_name_parameter_node: ParameterNode = ParameterNode({ "false_positive_threshold": 1.0e-2, }) root_variables_node: ParameterNode = ParameterNode({ "variables": variables_multi_part_name_parameter_node, # $variables.false_positive_threshold }) rule: Rule = Rule( name="rule_with_variables_with_parameters", domain_builder=None, parameter_builders=None, expectation_configuration_builders=None, variables=ParameterContainer(parameter_nodes={ "variables": root_variables_node, }), ) rule._parameters = { column_Age_domain.id: single_part_name_parameter_container, column_Date_domain.id: multi_part_name_parameter_container, } return rule
def rule_without_variables_without_parameters(): rule: Rule = Rule( name="rule_with_no_variables_no_parameters", domain_builder=None, parameter_builders=None, expectation_configuration_builders=None, variables=None, ) return rule
def rule_without_parameters(empty_data_context, ): skip_if_python_below_minimum_version() rule: Rule = Rule( name="rule_with_no_variables_no_parameters", domain_builder=ColumnDomainBuilder(data_context=empty_data_context), expectation_configuration_builders=[ DefaultExpectationConfigurationBuilder( expectation_type="expect_my_validation") ], ) return rule
def rule_without_variables( empty_data_context, column_Age_domain, column_Date_domain, variables_multi_part_name_parameter_container, single_part_name_parameter_container, multi_part_name_parameter_container, ): rule: Rule = Rule( name="rule_without_variables", variables=None, domain_builder=ColumnDomainBuilder(data_context=empty_data_context), expectation_configuration_builders=[ DefaultExpectationConfigurationBuilder( expectation_type="expect_my_validation", column=f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column", ), ], ) return rule
def rule_with_parameters( empty_data_context, column_Age_domain, column_Date_domain, variables_multi_part_name_parameter_container, single_part_name_parameter_container, multi_part_name_parameter_container, ): skip_if_python_below_minimum_version() rule: Rule = Rule( name="rule_with_parameters", domain_builder=ColumnDomainBuilder(data_context=empty_data_context), expectation_configuration_builders=[ DefaultExpectationConfigurationBuilder( expectation_type="expect_my_validation") ], ) rule._parameters = { column_Age_domain.id: single_part_name_parameter_container, column_Date_domain.id: multi_part_name_parameter_container, } return rule
def _build_table_rule() -> Rule: """ This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for table "Domain" type. """ # Step-1: Instantiate "TableDomainBuilder" object. table_domain_builder: TableDomainBuilder = TableDomainBuilder( data_context=None, ) # Step-2: Declare "ParameterBuilder" for every metric of interest. table_row_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_table_row_count_metric_multi_batch_parameter_builder( json_serialize=True) # Step-3: Declare "ParameterBuilder" for every "validation" need in "ExpectationConfigurationBuilder" objects. table_row_count_range_parameter_builder_for_validations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.build_numeric_metric_range_multi_batch_parameter_builder( metric_name="table.row_count", metric_value_kwargs=None, json_serialize=True, ) validation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] # Step-4: Pass "validation" "ParameterBuilderConfig" objects to every "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type"). validation_parameter_builder_configs = [ ParameterBuilderConfig( **table_row_count_range_parameter_builder_for_validations. to_json_dict(), ), ] expect_table_row_count_to_be_between_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder( expectation_type="expect_table_row_count_to_be_between", validation_parameter_builder_configs= validation_parameter_builder_configs, min_value= f"{table_row_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]", max_value= f"{table_row_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]", meta={ "profiler_details": f"{table_row_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}", }, ) # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components. variables: dict = { "false_positive_rate": 0.05, "quantile_statistic_interpolation_method": "auto", "estimator": "bootstrap", "n_resamples": 9999, "random_seed": None, "include_estimator_samples_histogram_in_details": False, "truncate_values": { "lower_bound": 0, "upper_bound": None, }, "round_decimals": 0, } parameter_builders: List[ParameterBuilder] = [ table_row_count_metric_multi_batch_parameter_builder_for_metrics, ] expectation_configuration_builders: List[ ExpectationConfigurationBuilder] = [ expect_table_row_count_to_be_between_expectation_configuration_builder, ] rule: Rule = Rule( name="table_rule", variables=variables, domain_builder=table_domain_builder, parameter_builders=parameter_builders, expectation_configuration_builders= expectation_configuration_builders, ) return rule
def _build_categorical_columns_rule() -> Rule: """ This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for categorical columns. """ # Step-1: Instantiate "CategoricalColumnDomainBuilder" for selecting columns containing "FEW" discrete values. categorical_column_type_domain_builder: CategoricalColumnDomainBuilder = ( CategoricalColumnDomainBuilder( include_column_names=None, exclude_column_names=None, include_column_name_suffixes=None, exclude_column_name_suffixes=None, semantic_type_filter_module_name=None, semantic_type_filter_class_name=None, include_semantic_types=None, exclude_semantic_types=None, allowed_semantic_types_passthrough=None, cardinality_limit_mode=CardinalityLimitMode.REL_100, max_unique_values=None, max_proportion_unique=None, data_context=None, )) # Step-2: Declare "ParameterBuilder" for every metric of interest. column_distinct_values_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_distinct_values_count_metric_multi_batch_parameter_builder( json_serialize=True) # Step-3: Declare "ParameterBuilder" for every "validation" need in "ExpectationConfigurationBuilder" objects. column_distinct_values_count_range_parameter_builder_for_validations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.build_numeric_metric_range_multi_batch_parameter_builder( metric_name="column.distinct_values.count", metric_value_kwargs=None, json_serialize=True, ) validation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] # Step-4: Pass "validation" "ParameterBuilderConfig" objects to every "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type"). validation_parameter_builder_configs = [ ParameterBuilderConfig( ** column_distinct_values_count_range_parameter_builder_for_validations .to_json_dict(), ), ] expect_column_unique_value_count_to_be_between_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder( expectation_type="expect_column_unique_value_count_to_be_between", validation_parameter_builder_configs= validation_parameter_builder_configs, column= f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column", min_value= f"{column_distinct_values_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]", max_value= f"{column_distinct_values_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]", strict_min=f"{VARIABLES_KEY}strict_min", strict_max=f"{VARIABLES_KEY}strict_max", meta={ "profiler_details": f"{column_distinct_values_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}", }, ) # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components. variables: dict = { "mostly": 1.0, "strict_min": False, "strict_max": False, "false_positive_rate": 0.05, "quantile_statistic_interpolation_method": "auto", "estimator": "bootstrap", "n_resamples": 9999, "random_seed": None, "include_estimator_samples_histogram_in_details": False, "truncate_values": { "lower_bound": 0.0, "upper_bound": None, }, "round_decimals": 1, } parameter_builders: List[ParameterBuilder] = [ column_distinct_values_count_metric_multi_batch_parameter_builder_for_metrics, ] expectation_configuration_builders: List[ ExpectationConfigurationBuilder] = [ expect_column_unique_value_count_to_be_between_expectation_configuration_builder, ] rule: Rule = Rule( name="categorical_columns_rule", variables=variables, domain_builder=categorical_column_type_domain_builder, parameter_builders=parameter_builders, expectation_configuration_builders= expectation_configuration_builders, ) return rule
def build_map_metric_rule( rule_name: str, expectation_type: str, map_metric_name: str, include_column_names: Optional[Union[str, Optional[List[str]]]] = None, exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None, include_column_name_suffixes: Optional[Union[str, Iterable, List[str]]] = None, exclude_column_name_suffixes: Optional[Union[str, Iterable, List[str]]] = None, semantic_type_filter_module_name: Optional[str] = None, semantic_type_filter_class_name: Optional[str] = None, include_semantic_types: Optional[Union[ str, SemanticDomainTypes, List[Union[str, SemanticDomainTypes]]]] = None, exclude_semantic_types: Optional[Union[ str, SemanticDomainTypes, List[Union[str, SemanticDomainTypes]]]] = None, max_unexpected_values: Union[str, int] = 0, max_unexpected_ratio: Optional[Union[str, float]] = None, min_max_unexpected_values_proportion: Union[str, float] = 9.75e-1, ) -> Rule: """ This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for any "map" style metric. """ # Step-1: Instantiate "MapMetricColumnDomainBuilder" for specified "map_metric_name" (subject to directives). map_metric_column_domain_builder: MapMetricColumnDomainBuilder = ( MapMetricColumnDomainBuilder( map_metric_name=map_metric_name, include_column_names=include_column_names, exclude_column_names=exclude_column_names, include_column_name_suffixes=include_column_name_suffixes, exclude_column_name_suffixes=exclude_column_name_suffixes, semantic_type_filter_module_name=semantic_type_filter_module_name, semantic_type_filter_class_name=semantic_type_filter_class_name, include_semantic_types=include_semantic_types, exclude_semantic_types=exclude_semantic_types, max_unexpected_values=max_unexpected_values, max_unexpected_ratio=max_unexpected_ratio, min_max_unexpected_values_proportion= min_max_unexpected_values_proportion, data_context=None, )) # Step-2: Declare "ParameterBuilder" for every metric of interest. column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_unique_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=True) column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=True) column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_null_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=True) # Step-3: Set up "MeanUnexpectedMapMetricMultiBatchParameterBuilder" to compute "condition" for emitting "ExpectationConfiguration" (based on "Domain" data). total_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_table_row_count_metric_multi_batch_parameter_builder( json_serialize=False) column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=False) evaluation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] = [ ParameterBuilderConfig( ** total_count_metric_multi_batch_parameter_builder_for_evaluations .to_json_dict()), ParameterBuilderConfig( ** column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations .to_json_dict()), ] column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations: MeanUnexpectedMapMetricMultiBatchParameterBuilder = MeanUnexpectedMapMetricMultiBatchParameterBuilder( name=f"{map_metric_name}.unexpected_value", map_metric_name=map_metric_name, total_count_parameter_builder_name= total_count_metric_multi_batch_parameter_builder_for_evaluations.name, null_count_parameter_builder_name= column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations .name, metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, evaluation_parameter_builder_configs= evaluation_parameter_builder_configs, json_serialize=True, data_context=None, ) # Step-4: Pass "MeanUnexpectedMapMetricMultiBatchParameterBuilder" as "validation" "ParameterBuilder" for "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type"). validation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] = [ ParameterBuilderConfig( ** column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations .to_json_dict()), ] expect_column_values_to_be_attribute_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder( expectation_type=expectation_type, validation_parameter_builder_configs= validation_parameter_builder_configs, column= f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column", condition= f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY} <= 1.0 - {VARIABLES_KEY}success_ratio", meta={ "profiler_details": f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}.{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}", }, ) # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components. variables: dict = { "success_ratio": 7.5e-1, } parameter_builders: List[ParameterBuilder] = [ column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics, column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics, column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics, ] expectation_configuration_builders: List[ExpectationConfigurationBuilder] = [ expect_column_values_to_be_attribute_expectation_configuration_builder, ] rule: Rule = Rule( name=rule_name, variables=variables, domain_builder=map_metric_column_domain_builder, parameter_builders=parameter_builders, expectation_configuration_builders=expectation_configuration_builders, ) return rule
def _build_profiler(self) -> None: """ Builds "RuleBasedProfiler", corresponding to present DataAssistant use case. Starts with empty "RuleBasedProfiler" (initialized in constructor) and adds Rule objects. Subclasses can add custom "Rule" objects as appropriate for their respective particular DataAssistant use cases. """ variables: dict = {} profiler: Optional[BaseRuleBasedProfiler] rules: List[Rule] rule: Rule domain_builder: DomainBuilder parameter_builders: List[ParameterBuilder] expectation_configuration_builders: List[ExpectationConfigurationBuilder] """ For each Self-Initializing "Expectation" as specified by "DataAssistant.expectation_kwargs_by_expectation_type" interface property, retrieve its "RuleBasedProfiler" configuration, construct "Rule" object based on it, while incorporating metrics "ParameterBuilder" objects for "MetricDomainTypes", emitted by "DomainBuilder" of comprised "Rule", specified by "DataAssistant.metrics_parameter_builders_by_domain" interface property. Append this "Rule" object to overall DataAssistant "RuleBasedProfiler" object; incorporate "variables" as well. """ expectation_type: str expectation_kwargs: Dict[str, Any] for ( expectation_type, expectation_kwargs, ) in self.expectation_kwargs_by_expectation_type.items(): profiler = self._validator.build_rule_based_profiler_for_expectation( expectation_type=expectation_type )(**expectation_kwargs) variables.update(convert_variables_to_dict(variables=profiler.variables)) rules = profiler.rules for rule in rules: domain_builder = rule.domain_builder parameter_builders = rule.parameter_builders or [] parameter_builders.extend( self.metrics_parameter_builders_by_domain[ Domain( domain_builder.domain_type, ) ] ) expectation_configuration_builders = ( rule.expectation_configuration_builders or [] ) self.profiler.add_rule( rule=Rule( name=rule.name, variables=rule.variables, domain_builder=domain_builder, parameter_builders=parameter_builders, expectation_configuration_builders=expectation_configuration_builders, ) ) self.profiler.variables = self.profiler.reconcile_profiler_variables( variables=variables, reconciliation_strategy=DEFAULT_RECONCILATION_DIRECTIVES.variables, )