def test_default_expectation_configuration_builder_alice_null_condition_parameter_builder_validation_dependency_included( alice_columnar_table_single_batch_context, ): data_context: DataContext = alice_columnar_table_single_batch_context batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } metric_domain_kwargs: dict = {"column": "user_id"} parameter_container: ParameterContainer = ParameterContainer( parameter_nodes=None) domain: Domain = Domain( domain_type=MetricDomainTypes.COLUMN, domain_kwargs=metric_domain_kwargs, rule_name="my_rule", ) parameters: Dict[str, ParameterContainer] = { domain.id: parameter_container, } fully_qualified_parameter_name_for_value: str = "$parameter.my_min_user_id.value[0]" condition: Optional[str] = None max_user_id: int = 999999999999 min_user_id_parameter_builder_config: ParameterBuilderConfig = ( ParameterBuilderConfig( module_name= "great_expectations.rule_based_profiler.parameter_builder", class_name="MetricMultiBatchParameterBuilder", name="my_min_user_id", metric_name="column.min", metric_domain_kwargs=metric_domain_kwargs, )) validation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] = [ min_user_id_parameter_builder_config, ] default_expectation_configuration_builder = DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_between", condition=condition, min_value=fully_qualified_parameter_name_for_value, max_value=max_user_id, validation_parameter_builder_configs= validation_parameter_builder_configs, data_context=data_context, ) expectation_configuration: Optional[ ExpectationConfiguration] = default_expectation_configuration_builder.build_expectation_configuration( domain=domain, parameters=parameters, batch_request=batch_request, ) assert expectation_configuration.kwargs["min_value"] == 397433
def rule_without_parameters(empty_data_context, ): skip_if_python_below_minimum_version() rule: Rule = Rule( name="rule_with_no_variables_no_parameters", domain_builder=ColumnDomainBuilder(data_context=empty_data_context), expectation_configuration_builders=[ DefaultExpectationConfigurationBuilder( expectation_type="expect_my_validation") ], ) return rule
def test_profiler_parameter_builder_added(data_context_with_taxi_data): """ What does this test and why? This test now adds a simple ParameterBuilder to our Rule. More specifically, we use a MetricMultiBatchParameterBuilder to pass in the min_value parameter to expect_column_values_to_be_greater_than. """ context: DataContext = data_context_with_taxi_data batch_request: BatchRequest = BatchRequest( datasource_name="taxi_multibatch_datasource_other_possibility", data_connector_name="default_inferred_data_connector_name", data_asset_name="yellow_tripdata_sample_2018", data_connector_query={"index": -1}, ) domain_builder: DomainBuilder = ColumnDomainBuilder( include_column_name_suffixes=["_amount"], data_context=context, ) # parameter_builder numeric_range_parameter_builder: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( data_context=context, metric_name="column.min", metric_domain_kwargs="$domain.domain_kwargs", name="my_column_min", )) config_builder: DefaultExpectationConfigurationBuilder = ( DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_greater_than", value="$parameter.my_column_min.value[-1]", column="$domain.domain_kwargs.column", )) simple_rule: Rule = Rule( name="rule_with_variables_and_parameters", variables=None, domain_builder=domain_builder, parameter_builders=[numeric_range_parameter_builder], expectation_configuration_builders=[config_builder], ) my_rbp = RuleBasedProfiler( name="my_rbp", config_version=1.0, data_context=context, ) my_rbp.add_rule(rule=simple_rule) result: RuleBasedProfilerResult = my_rbp.run(batch_request=batch_request) expectation_configurations: List[ ExpectationConfiguration] = result.expectation_configurations assert len(expectation_configurations) == 4
def test_add_rule_and_run_profiler(data_context_with_taxi_data): """ What does this test and why? This is the first test where we build a Rule in memory and use the add_rule() method to add to our RuleBasedProfiler and run the profiler. We use the DomainBuilder from the previous test (against "_amount" columns) and an ExpectationConfigurationBuilder that uses expect_column_values_to_not_be_null because it only needs a domain value. The test eventually asserts that the profiler return 4 Expectations, one per column in our domain. """ context: DataContext = data_context_with_taxi_data batch_request: BatchRequest = BatchRequest( datasource_name="taxi_multibatch_datasource_other_possibility", data_connector_name="default_inferred_data_connector_name", data_asset_name="yellow_tripdata_sample_2018", data_connector_query={"index": -1}, ) domain_builder: DomainBuilder = ColumnDomainBuilder( include_column_name_suffixes=["_amount"], data_context=context, ) default_expectation_configuration_builder = DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_not_be_null", column="$domain.domain_kwargs.column", ) simple_rule: Rule = Rule( name="rule_with_no_variables_no_parameters", variables=None, domain_builder=domain_builder, expectation_configuration_builders=[ default_expectation_configuration_builder ], ) my_rbp: RuleBasedProfiler = RuleBasedProfiler( name="my_simple_rbp", config_version=1.0, data_context=context, ) my_rbp.add_rule(rule=simple_rule) result: RuleBasedProfilerResult = my_rbp.run(batch_request=batch_request) expectation_configurations: List[ ExpectationConfiguration] = result.expectation_configurations assert len(expectation_configurations) == 4
def rule_without_variables( empty_data_context, column_Age_domain, column_Date_domain, variables_multi_part_name_parameter_container, single_part_name_parameter_container, multi_part_name_parameter_container, ): rule: Rule = Rule( name="rule_without_variables", variables=None, domain_builder=ColumnDomainBuilder(data_context=empty_data_context), expectation_configuration_builders=[ DefaultExpectationConfigurationBuilder( expectation_type="expect_my_validation", column=f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column", ), ], ) return rule
def rule_with_parameters( empty_data_context, column_Age_domain, column_Date_domain, variables_multi_part_name_parameter_container, single_part_name_parameter_container, multi_part_name_parameter_container, ): skip_if_python_below_minimum_version() rule: Rule = Rule( name="rule_with_parameters", domain_builder=ColumnDomainBuilder(data_context=empty_data_context), expectation_configuration_builders=[ DefaultExpectationConfigurationBuilder( expectation_type="expect_my_validation") ], ) rule._parameters = { column_Age_domain.id: single_part_name_parameter_container, column_Date_domain.id: multi_part_name_parameter_container, } return rule
def test_profiler_save_and_load(data_context_with_taxi_data): """ What does this test and why? This tests whether context.save_profiler() can be invoked to update a profiler that lives in Store. The test ensures that any changes that we make to the Profiler, like adding a rule, will be persisted. The test tests that context.save_profiler() and context.get_profiler() return the expected RBP. """ context: DataContext = data_context_with_taxi_data domain_builder: DomainBuilder = ColumnDomainBuilder( include_column_name_suffixes=["_amount"], data_context=context, ) # parameter_builder numeric_range_parameter_builder: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( data_context=context, metric_name="column.min", metric_domain_kwargs="$domain.domain_kwargs", name="my_column_min", )) config_builder: DefaultExpectationConfigurationBuilder = ( DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_greater_than", value="$parameter.my_column_min.value[-1]", column="$domain.domain_kwargs.column", )) simple_variables_rule: Rule = Rule( name="rule_with_no_variables_no_parameters", variables=None, domain_builder=domain_builder, parameter_builders=[numeric_range_parameter_builder], expectation_configuration_builders=[config_builder], ) my_rbp = RuleBasedProfiler( name="my_rbp", config_version=1.0, data_context=context, ) res: dict = my_rbp.config.to_json_dict() assert res == { "class_name": "RuleBasedProfiler", "module_name": "great_expectations.rule_based_profiler", "name": "my_rbp", "config_version": 1.0, "rules": None, "variables": {}, } my_rbp.add_rule(rule=simple_variables_rule) context.save_profiler(name="my_rbp", profiler=my_rbp) # load profiler from store my_loaded_profiler: RuleBasedProfiler = context.get_profiler(name="my_rbp") res = my_loaded_profiler.config.to_json_dict() assert res == { "module_name": "great_expectations.rule_based_profiler", "class_name": "RuleBasedProfiler", "name": "my_rbp", "config_version": 1.0, "variables": {}, "rules": { "rule_with_no_variables_no_parameters": { "domain_builder": { "module_name": "great_expectations.rule_based_profiler.domain_builder.column_domain_builder", "class_name": "ColumnDomainBuilder", "include_column_name_suffixes": [ "_amount", ], }, "variables": {}, "parameter_builders": [ { "module_name": "great_expectations.rule_based_profiler.parameter_builder.metric_multi_batch_parameter_builder", "class_name": "MetricMultiBatchParameterBuilder", "name": "my_column_min", "metric_name": "column.min", "metric_domain_kwargs": "$domain.domain_kwargs", "enforce_numeric_metric": False, "replace_nan_with_zero": False, "reduce_scalar_metric": True, "evaluation_parameter_builder_configs": None, }, ], "expectation_configuration_builders": [ { "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder.default_expectation_configuration_builder", "class_name": "DefaultExpectationConfigurationBuilder", "expectation_type": "expect_column_values_to_be_greater_than", "meta": {}, "column": "$domain.domain_kwargs.column", "validation_parameter_builder_configs": None, "value": "$parameter.my_column_min.value[-1]", }, ], }, }, }
def test_default_expectation_configuration_builder_alice_parentheses_parameter_variable_condition_true( alice_columnar_table_single_batch_context, ): data_context: DataContext = alice_columnar_table_single_batch_context batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } metric_domain_kwargs: dict = {"column": "user_id"} min_user_id_parameter: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( name="my_min_user_id", metric_name="column.min", metric_domain_kwargs=metric_domain_kwargs, data_context=data_context, ) ) variables: ParameterContainer = build_parameter_container_for_variables( {"max_user_id": 999999999999, "answer": 42} ) parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None) domain: Domain = Domain( domain_type=MetricDomainTypes.COLUMN, domain_kwargs=metric_domain_kwargs, rule_name="my_rule", ) parameters: Dict[str, ParameterContainer] = { domain.id: parameter_container, } min_user_id_parameter.build_parameters( domain=domain, parameters=parameters, batch_request=batch_request, ) fully_qualified_parameter_name_for_value: str = "$parameter.my_min_user_id.value[0]" parameter_value: Any = get_parameter_value_by_fully_qualified_parameter_name( fully_qualified_parameter_name=fully_qualified_parameter_name_for_value, domain=domain, parameters=parameters, ) condition: str = "($variables.max_user_id>0 & $variables.answer==42) | $parameter.my_min_user_id.value[0]<0" max_value: str = "$variables.max_user_id" default_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_between", condition=condition, min_value=parameter_value, max_value=max_value, ) expectation_configuration: Optional[ ExpectationConfiguration ] = default_expectation_configuration_builder.build_expectation_configuration( domain=domain, variables=variables, parameters=parameters, ) assert expectation_configuration.kwargs["min_value"] == 397433
def test_condition_not_string_exception( alice_columnar_table_single_batch_context, ): data_context: DataContext = alice_columnar_table_single_batch_context batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } metric_domain_kwargs: dict = {"column": "user_id"} min_user_id_parameter: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( name="my_min_user_id", metric_name="column.min", metric_domain_kwargs=metric_domain_kwargs, data_context=data_context, ) ) parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None) domain: Domain = Domain( domain_type=MetricDomainTypes.COLUMN, domain_kwargs=metric_domain_kwargs, rule_name="my_rule", ) parameters: Dict[str, ParameterContainer] = { domain.id: parameter_container, } min_user_id_parameter.build_parameters( domain=domain, parameters=parameters, batch_request=batch_request, ) fully_qualified_parameter_name_for_value: str = "$parameter.my_min_user_id" parameter_value: Any = get_parameter_value_by_fully_qualified_parameter_name( fully_qualified_parameter_name=fully_qualified_parameter_name_for_value, domain=domain, parameters=parameters, ) condition: dict = {"condition": "$variables.tolerance<0.8"} max_user_id: int = 999999999999 with pytest.raises(ge_exceptions.ProfilerExecutionError) as e: # noinspection PyTypeChecker DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_between", condition=condition, min_value=parameter_value.value[0], max_value=max_user_id, ) assert ( str(e.value) == 'Argument "{\'condition\': \'$variables.tolerance<0.8\'}" in "DefaultExpectationConfigurationBuilder" must be of type "string" (value of type "<class \'dict\'>" was encountered).\n' )
def test_default_expectation_configuration_builder_alice_null_condition_parameter_builder_validation_dependency_separate( alice_columnar_table_single_batch_context, ): data_context: DataContext = alice_columnar_table_single_batch_context batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } metric_domain_kwargs: dict = {"column": "user_id"} min_user_id_parameter: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( name="my_min_user_id", metric_name="column.min", metric_domain_kwargs=metric_domain_kwargs, data_context=data_context, ) ) parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None) domain: Domain = Domain( domain_type=MetricDomainTypes.COLUMN, domain_kwargs=metric_domain_kwargs, rule_name="my_rule", ) parameters: Dict[str, ParameterContainer] = { domain.id: parameter_container, } min_user_id_parameter.build_parameters( domain=domain, parameters=parameters, batch_request=batch_request, ) fully_qualified_parameter_name_for_value: str = "$parameter.my_min_user_id" parameter_value: Any = get_parameter_value_by_fully_qualified_parameter_name( fully_qualified_parameter_name=fully_qualified_parameter_name_for_value, domain=domain, parameters=parameters, ) condition: Optional[str] = None max_user_id: int = 999999999999 default_expectation_configuration_builder = DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_between", condition=condition, min_value=parameter_value.value[0], max_value=max_user_id, ) expectation_configuration: Optional[ ExpectationConfiguration ] = default_expectation_configuration_builder.build_expectation_configuration( domain=domain, parameters=parameters, ) assert expectation_configuration.kwargs["min_value"] == 397433
def _build_table_rule() -> Rule: """ This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for table "Domain" type. """ # Step-1: Instantiate "TableDomainBuilder" object. table_domain_builder: TableDomainBuilder = TableDomainBuilder( data_context=None, ) # Step-2: Declare "ParameterBuilder" for every metric of interest. table_row_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_table_row_count_metric_multi_batch_parameter_builder( json_serialize=True) # Step-3: Declare "ParameterBuilder" for every "validation" need in "ExpectationConfigurationBuilder" objects. table_row_count_range_parameter_builder_for_validations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.build_numeric_metric_range_multi_batch_parameter_builder( metric_name="table.row_count", metric_value_kwargs=None, json_serialize=True, ) validation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] # Step-4: Pass "validation" "ParameterBuilderConfig" objects to every "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type"). validation_parameter_builder_configs = [ ParameterBuilderConfig( **table_row_count_range_parameter_builder_for_validations. to_json_dict(), ), ] expect_table_row_count_to_be_between_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder( expectation_type="expect_table_row_count_to_be_between", validation_parameter_builder_configs= validation_parameter_builder_configs, min_value= f"{table_row_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]", max_value= f"{table_row_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]", meta={ "profiler_details": f"{table_row_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}", }, ) # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components. variables: dict = { "false_positive_rate": 0.05, "quantile_statistic_interpolation_method": "auto", "estimator": "bootstrap", "n_resamples": 9999, "random_seed": None, "include_estimator_samples_histogram_in_details": False, "truncate_values": { "lower_bound": 0, "upper_bound": None, }, "round_decimals": 0, } parameter_builders: List[ParameterBuilder] = [ table_row_count_metric_multi_batch_parameter_builder_for_metrics, ] expectation_configuration_builders: List[ ExpectationConfigurationBuilder] = [ expect_table_row_count_to_be_between_expectation_configuration_builder, ] rule: Rule = Rule( name="table_rule", variables=variables, domain_builder=table_domain_builder, parameter_builders=parameter_builders, expectation_configuration_builders= expectation_configuration_builders, ) return rule
def _build_categorical_columns_rule() -> Rule: """ This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for categorical columns. """ # Step-1: Instantiate "CategoricalColumnDomainBuilder" for selecting columns containing "FEW" discrete values. categorical_column_type_domain_builder: CategoricalColumnDomainBuilder = ( CategoricalColumnDomainBuilder( include_column_names=None, exclude_column_names=None, include_column_name_suffixes=None, exclude_column_name_suffixes=None, semantic_type_filter_module_name=None, semantic_type_filter_class_name=None, include_semantic_types=None, exclude_semantic_types=None, allowed_semantic_types_passthrough=None, cardinality_limit_mode=CardinalityLimitMode.REL_100, max_unique_values=None, max_proportion_unique=None, data_context=None, )) # Step-2: Declare "ParameterBuilder" for every metric of interest. column_distinct_values_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_distinct_values_count_metric_multi_batch_parameter_builder( json_serialize=True) # Step-3: Declare "ParameterBuilder" for every "validation" need in "ExpectationConfigurationBuilder" objects. column_distinct_values_count_range_parameter_builder_for_validations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.build_numeric_metric_range_multi_batch_parameter_builder( metric_name="column.distinct_values.count", metric_value_kwargs=None, json_serialize=True, ) validation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] # Step-4: Pass "validation" "ParameterBuilderConfig" objects to every "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type"). validation_parameter_builder_configs = [ ParameterBuilderConfig( ** column_distinct_values_count_range_parameter_builder_for_validations .to_json_dict(), ), ] expect_column_unique_value_count_to_be_between_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder( expectation_type="expect_column_unique_value_count_to_be_between", validation_parameter_builder_configs= validation_parameter_builder_configs, column= f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column", min_value= f"{column_distinct_values_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]", max_value= f"{column_distinct_values_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]", strict_min=f"{VARIABLES_KEY}strict_min", strict_max=f"{VARIABLES_KEY}strict_max", meta={ "profiler_details": f"{column_distinct_values_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}", }, ) # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components. variables: dict = { "mostly": 1.0, "strict_min": False, "strict_max": False, "false_positive_rate": 0.05, "quantile_statistic_interpolation_method": "auto", "estimator": "bootstrap", "n_resamples": 9999, "random_seed": None, "include_estimator_samples_histogram_in_details": False, "truncate_values": { "lower_bound": 0.0, "upper_bound": None, }, "round_decimals": 1, } parameter_builders: List[ParameterBuilder] = [ column_distinct_values_count_metric_multi_batch_parameter_builder_for_metrics, ] expectation_configuration_builders: List[ ExpectationConfigurationBuilder] = [ expect_column_unique_value_count_to_be_between_expectation_configuration_builder, ] rule: Rule = Rule( name="categorical_columns_rule", variables=variables, domain_builder=categorical_column_type_domain_builder, parameter_builders=parameter_builders, expectation_configuration_builders= expectation_configuration_builders, ) return rule
def build_map_metric_rule( rule_name: str, expectation_type: str, map_metric_name: str, include_column_names: Optional[Union[str, Optional[List[str]]]] = None, exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None, include_column_name_suffixes: Optional[Union[str, Iterable, List[str]]] = None, exclude_column_name_suffixes: Optional[Union[str, Iterable, List[str]]] = None, semantic_type_filter_module_name: Optional[str] = None, semantic_type_filter_class_name: Optional[str] = None, include_semantic_types: Optional[Union[ str, SemanticDomainTypes, List[Union[str, SemanticDomainTypes]]]] = None, exclude_semantic_types: Optional[Union[ str, SemanticDomainTypes, List[Union[str, SemanticDomainTypes]]]] = None, max_unexpected_values: Union[str, int] = 0, max_unexpected_ratio: Optional[Union[str, float]] = None, min_max_unexpected_values_proportion: Union[str, float] = 9.75e-1, ) -> Rule: """ This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for any "map" style metric. """ # Step-1: Instantiate "MapMetricColumnDomainBuilder" for specified "map_metric_name" (subject to directives). map_metric_column_domain_builder: MapMetricColumnDomainBuilder = ( MapMetricColumnDomainBuilder( map_metric_name=map_metric_name, include_column_names=include_column_names, exclude_column_names=exclude_column_names, include_column_name_suffixes=include_column_name_suffixes, exclude_column_name_suffixes=exclude_column_name_suffixes, semantic_type_filter_module_name=semantic_type_filter_module_name, semantic_type_filter_class_name=semantic_type_filter_class_name, include_semantic_types=include_semantic_types, exclude_semantic_types=exclude_semantic_types, max_unexpected_values=max_unexpected_values, max_unexpected_ratio=max_unexpected_ratio, min_max_unexpected_values_proportion= min_max_unexpected_values_proportion, data_context=None, )) # Step-2: Declare "ParameterBuilder" for every metric of interest. column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_unique_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=True) column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=True) column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_null_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=True) # Step-3: Set up "MeanUnexpectedMapMetricMultiBatchParameterBuilder" to compute "condition" for emitting "ExpectationConfiguration" (based on "Domain" data). total_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_table_row_count_metric_multi_batch_parameter_builder( json_serialize=False) column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder( json_serialize=False) evaluation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] = [ ParameterBuilderConfig( ** total_count_metric_multi_batch_parameter_builder_for_evaluations .to_json_dict()), ParameterBuilderConfig( ** column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations .to_json_dict()), ] column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations: MeanUnexpectedMapMetricMultiBatchParameterBuilder = MeanUnexpectedMapMetricMultiBatchParameterBuilder( name=f"{map_metric_name}.unexpected_value", map_metric_name=map_metric_name, total_count_parameter_builder_name= total_count_metric_multi_batch_parameter_builder_for_evaluations.name, null_count_parameter_builder_name= column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations .name, metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME, metric_value_kwargs=None, evaluation_parameter_builder_configs= evaluation_parameter_builder_configs, json_serialize=True, data_context=None, ) # Step-4: Pass "MeanUnexpectedMapMetricMultiBatchParameterBuilder" as "validation" "ParameterBuilder" for "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type"). validation_parameter_builder_configs: Optional[ List[ParameterBuilderConfig]] = [ ParameterBuilderConfig( ** column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations .to_json_dict()), ] expect_column_values_to_be_attribute_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder( expectation_type=expectation_type, validation_parameter_builder_configs= validation_parameter_builder_configs, column= f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column", condition= f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY} <= 1.0 - {VARIABLES_KEY}success_ratio", meta={ "profiler_details": f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}.{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}", }, ) # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components. variables: dict = { "success_ratio": 7.5e-1, } parameter_builders: List[ParameterBuilder] = [ column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics, column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics, column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics, ] expectation_configuration_builders: List[ExpectationConfigurationBuilder] = [ expect_column_values_to_be_attribute_expectation_configuration_builder, ] rule: Rule = Rule( name=rule_name, variables=variables, domain_builder=map_metric_column_domain_builder, parameter_builders=parameter_builders, expectation_configuration_builders=expectation_configuration_builders, ) return rule