def reconcile_profiler_variables( self, variables: Optional[Dict[str, Any]] = None ) -> Optional[ParameterContainer]: """ Profiler "variables" reconciliation involves combining the variables, instantiated from Profiler configuration (e.g., stored in a YAML file managed by the Profiler store), with the variables overrides, provided at run time. The reconciliation logic for "variables" is of the "replace" nature: An override value complements the original on key "miss", and replaces the original on key "hit" (or "collision"), because "variables" is a unique member. :param variables: variables overrides, supplied in dictionary (configuration) form :return: reconciled variables in their canonical ParameterContainer object form """ effective_variables: ParameterContainer if variables is not None and isinstance(variables, dict): variables_configs: dict = self.variables.to_dict( )["parameter_nodes"]["variables"]["variables"] variables_configs.update(variables) effective_variables = build_parameter_container_for_variables( variables_configs=variables_configs) else: effective_variables = self.variables return effective_variables
def test_column_pair_domain_builder_correct_sorted_column_names( alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, ): data_context: DataContext = alice_columnar_table_single_batch_context profiler_config: str = alice_columnar_table_single_batch["profiler_config"] full_profiler_config_dict: dict = yaml.load(profiler_config) variables_configs: dict = full_profiler_config_dict.get("variables") if variables_configs is None: variables_configs = {} variables: ParameterContainer = build_parameter_container_for_variables( variables_configs=variables_configs) batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } domain_builder: DomainBuilder = ColumnPairDomainBuilder( include_column_names=[ "user_id", "event_type", ], data_context=data_context, ) domains: List[Domain] = domain_builder.get_domains( rule_name="my_rule", variables=variables, batch_request=batch_request) assert len(domains) == 1 # Assert Domain object equivalence. assert domains == [{ "rule_name": "my_rule", "domain_type": "column_pair", "domain_kwargs": { "column_A": "event_type", "column_B": "user_id", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "event_type": SemanticDomainTypes.NUMERIC.value, "user_id": SemanticDomainTypes.NUMERIC.value, }, }, }] domain: Domain = domains[0] # Also test that the dot notation is supported properly throughout the dictionary fields of the Domain object. assert domain.domain_type.value == "column_pair" assert domain.domain_kwargs.column_A == "event_type" assert domain.domain_kwargs.column_B == "user_id"
def test_simple_semantic_type_column_domain_builder( alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, column_Age_domain, column_Description_domain, ): data_context: DataContext = alice_columnar_table_single_batch_context profiler_config: str = alice_columnar_table_single_batch["profiler_config"] full_profiler_config_dict: dict = yaml.load(profiler_config) variables_configs: dict = full_profiler_config_dict.get("variables") if variables_configs is None: variables_configs = {} variables: ParameterContainer = build_parameter_container_for_variables( variables_configs=variables_configs) batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } domain_builder: DomainBuilder = SimpleSemanticTypeColumnDomainBuilder( data_context=data_context, batch_request=batch_request, semantic_types=[ "numeric", ], ) domains: List[Domain] = domain_builder.get_domains(variables=variables) assert len(domains) == 2 assert domains == [ { "domain_type": "column", "domain_kwargs": { "column": "event_type", }, "details": { "inferred_semantic_domain_type": "numeric" }, }, { "domain_type": "column", "domain_kwargs": { "column": "user_id", }, "details": { "inferred_semantic_domain_type": "numeric" }, }, ]
def test_get_fully_qualified_parameter_names( parameters_with_different_depth_level_values, ): parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None) build_parameter_container( parameter_container=parameter_container, parameter_values=parameters_with_different_depth_level_values, ) domain: Domain = Domain( domain_type=MetricDomainTypes.COLUMN, domain_kwargs=None, details=None, rule_name="my_rule", ) # Convert variables argument to ParameterContainer variables: ParameterContainer = build_parameter_container_for_variables( variables_configs={ "my_int": 9, "my_float": 3.38, "my_string": "hello", } ) parameters: Dict[str, ParameterContainer] = { domain.id: parameter_container, } expected_fully_qualified_parameter_names: List[str] = [ "$variables", "$parameter.date_strings.yyyy_mm_dd_hh_mm_ss_tz_date_format", "$parameter.date_strings.yyyy_mm_dd_date_format", "$parameter.date_strings.mm_yyyy_dd_hh_mm_ss_tz_date_format", "$parameter.date_strings.mm_yyyy_dd_date_format", "$parameter.date_strings.tolerances.max_abs_error_time_milliseconds", "$parameter.date_strings.tolerances.max_num_conversion_attempts", "$parameter.tolerances.mostly", "$parameter.tolerances.financial.usd", "$parameter.monthly_taxi_fairs.mean_values", "$parameter.daily_taxi_fairs.mean_values", "$parameter.weekly_taxi_fairs.mean_values", "$mean", ] fully_qualified_parameter_names: List[str] = get_fully_qualified_parameter_names( domain=domain, variables=variables, parameters=parameters, ) assert len(fully_qualified_parameter_names) == len( expected_fully_qualified_parameter_names ) assert sorted(fully_qualified_parameter_names) == sorted( expected_fully_qualified_parameter_names )
def test_multi_column_domain_builder_wrong_column_list( alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, ): data_context: DataContext = alice_columnar_table_single_batch_context profiler_config: str = alice_columnar_table_single_batch["profiler_config"] full_profiler_config_dict: dict = yaml.load(profiler_config) variables_configs: dict = full_profiler_config_dict.get("variables") if variables_configs is None: variables_configs = {} variables: ParameterContainer = build_parameter_container_for_variables( variables_configs=variables_configs) batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } domain_builder: DomainBuilder = MultiColumnDomainBuilder( include_column_names=None, data_context=data_context, ) with pytest.raises(ge_exceptions.ProfilerExecutionError) as excinfo: # noinspection PyArgumentList domains: List[Domain] = domain_builder.get_domains( rule_name="my_rule", variables=variables, batch_request=batch_request) assert 'Error: "column_list" in MultiColumnDomainBuilder must not be empty.' in str( excinfo.value) with pytest.raises(ge_exceptions.ProfilerExecutionError) as excinfo: # noinspection PyArgumentList domains: List[Domain] = domain_builder.get_domains( rule_name="my_rule", variables=variables, batch_request=batch_request) assert 'Error: "column_list" in MultiColumnDomainBuilder must not be empty.' in str( excinfo.value)
def __init__( self, profiler_config: RuleBasedProfilerConfig, data_context: Optional["DataContext"] = None, # noqa: F821 ): """ Create a new RuleBasedProfilerBase using configured rules (as captured in the RuleBasedProfilerConfig object). For a rule or an item in a rule configuration, instantiates the following if available: a domain builder, a parameter builder, and a configuration builder. These will be used to define profiler computation patterns. Args: profiler_config: RuleBasedProfilerConfig -- formal typed object containing configuration data_context: DataContext object that defines a full runtime environment (data access, etc.) """ name: str = profiler_config.name config_version: float = profiler_config.config_version variables: Optional[Dict[str, Any]] = profiler_config.variables rules: Optional[Dict[str, Dict[str, Any]]] = profiler_config.rules self._name = name self._config_version = config_version self._profiler_config = profiler_config if variables is None: variables = {} # Necessary to annotate ExpectationSuite during `run()` self._citation = { "name": name, "config_version": config_version, "variables": variables, "rules": rules, } # Convert variables argument to ParameterContainer _variables: ParameterContainer = build_parameter_container_for_variables( variables_configs=variables) self._variables = _variables self._data_context = data_context self._rules = self._init_profiler_rules(rules=rules)
def test_column_pair_domain_builder_wrong_column_names( alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, ): data_context: DataContext = alice_columnar_table_single_batch_context profiler_config: str = alice_columnar_table_single_batch["profiler_config"] full_profiler_config_dict: dict = yaml.load(profiler_config) variables_configs: dict = full_profiler_config_dict.get("variables") if variables_configs is None: variables_configs = {} variables: ParameterContainer = build_parameter_container_for_variables( variables_configs=variables_configs) batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } domain_builder: DomainBuilder = ColumnPairDomainBuilder( include_column_names=[ "user_id", "event_type", "user_agent", ], data_context=data_context, ) with pytest.raises(ge_exceptions.ProfilerExecutionError) as excinfo: # noinspection PyArgumentList domains: List[Domain] = domain_builder.get_domains( rule_name="my_rule", variables=variables, batch_request=batch_request) assert ( 'Error: Columns specified for ColumnPairDomainBuilder in sorted order must correspond to "column_A" and "column_B" (in this exact order).' in str(excinfo.value))
def __init__( self, name: str, variables: Optional[Union[ParameterContainer, Dict[str, Any]]] = None, domain_builder: Optional[DomainBuilder] = None, parameter_builders: Optional[List[ParameterBuilder]] = None, expectation_configuration_builders: Optional[ List[ExpectationConfigurationBuilder]] = None, ) -> None: """ Sets Rule name, variables, domain builder, parameters builders, configuration builders, and other instance data. Args: name: A string representing the name of the ProfilerRule variables: Any variables to be substituted within the rules domain_builder: A Domain Builder object used to build rule data domain parameter_builders: A Parameter Builder list used to configure necessary rule evaluation parameters expectation_configuration_builders: A list of Expectation Configuration Builders """ self._name = name if variables is None: variables = {} # Convert variables argument to ParameterContainer _variables: ParameterContainer if isinstance(variables, ParameterContainer): _variables = variables else: _variables: ParameterContainer = build_parameter_container_for_variables( variables_configs=variables) self.variables = _variables self._domain_builder = domain_builder self._parameter_builders = parameter_builders self._expectation_configuration_builders = expectation_configuration_builders self._execution_time = None
def test_get_parameter_values_for_fully_qualified_parameter_names( parameters_with_different_depth_level_values, ): parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None) build_parameter_container( parameter_container=parameter_container, parameter_values=parameters_with_different_depth_level_values, ) domain: Domain = Domain( domain_type=MetricDomainTypes.COLUMN, domain_kwargs=None, details=None, rule_name="my_rule", ) # Convert variables argument to ParameterContainer variables: ParameterContainer = build_parameter_container_for_variables( variables_configs={ "my_int": 9, "my_float": 3.38, "my_string": "hello", } ) parameters: Dict[str, ParameterContainer] = { domain.id: parameter_container, } # fmt: off expected_parameter_values_for_fully_qualified_parameter_names: Dict[str, ParameterNode] = { "$variables": { "my_int": 9, "my_float": 3.38, "my_string": "hello", }, "$parameter.weekly_taxi_fairs.mean_values": { "value": [ { "sunday": 71.43, "monday": 74.35, "tuesday": 42.3, "wednesday": 42.3, "thursday": 82.2, "friday": 78.78, "saturday": 91.39, }, { "sunday": 81.43, "monday": 84.35, "tuesday": 52.3, "wednesday": 43.3, "thursday": 22.2, "friday": 98.78, "saturday": 81.39, }, { "sunday": 61.43, "monday": 34.35, "tuesday": 82.3, "wednesday": 72.3, "thursday": 22.2, "friday": 38.78, "saturday": 51.39, }, { "sunday": 51.43, "monday": 64.35, "tuesday": 72.3, "wednesday": 82.3, "thursday": 22.2, "friday": 98.78, "saturday": 31.39, }, { "sunday": 72.43, "monday": 77.35, "tuesday": 46.3, "wednesday": 47.3, "thursday": 88.2, "friday": 79.78, "saturday": 93.39, }, { "sunday": 72.43, "monday": 73.35, "tuesday": 41.3, "wednesday": 49.3, "thursday": 80.2, "friday": 78.78, "saturday": 93.39, }, { "sunday": 74.43, "monday": 78.35, "tuesday": 49.3, "wednesday": 43.3, "thursday": 88.2, "friday": 72.78, "saturday": 97.39, }, { "sunday": 73.43, "monday": 72.35, "tuesday": 40.3, "wednesday": 40.3, "thursday": 89.2, "friday": 77.78, "saturday": 90.39, }, { "sunday": 72.43, "monday": 73.35, "tuesday": 45.3, "wednesday": 44.3, "thursday": 89.2, "friday": 77.78, "saturday": 96.39, }, { "sunday": 75.43, "monday": 74.25, "tuesday": 42.33, "wednesday": 42.23, "thursday": 82.21, "friday": 78.76, "saturday": 91.37, }, { "sunday": 71.43, "monday": 74.37, "tuesday": 42.3, "wednesday": 42.32, "thursday": 82.23, "friday": 78.77, "saturday": 91.49, }, { "sunday": 71.63, "monday": 74.37, "tuesday": 42.2, "wednesday": 42.1, "thursday": 82.29, "friday": 78.79, "saturday": 91.39, }, { "sunday": 71.42, "monday": 74.33, "tuesday": 42.33, "wednesday": 42.34, "thursday": 82.25, "friday": 78.77, "saturday": 91.69, }, { "sunday": 71.44, "monday": 72.35, "tuesday": 42.33, "wednesday": 42.31, "thursday": 82.29, "friday": 78.68, "saturday": 91.49, }, { "sunday": 71.44, "monday": 74.32, "tuesday": 42.32, "wednesday": 42.32, "thursday": 82.29, "friday": 78.77, "saturday": 91.49, }, { "sunday": 71.44, "monday": 74.33, "tuesday": 42.21, "wednesday": 42.31, "thursday": 82.27, "friday": 78.74, "saturday": 91.49, }, { "sunday": 71.33, "monday": 74.25, "tuesday": 42.31, "wednesday": 42.03, "thursday": 82.02, "friday": 78.08, "saturday": 91.38, }, { "sunday": 71.41, "monday": 74.31, "tuesday": 42.39, "wednesday": 42.93, "thursday": 82.92, "friday": 78.75, "saturday": 91.49, }, { "sunday": 72.43, "monday": 73.35, "tuesday": 42.3, "wednesday": 32.3, "thursday": 52.2, "friday": 88.78, "saturday": 81.39, }, { "sunday": 71.43, "monday": 74.35, "tuesday": 32.3, "wednesday": 92.3, "thursday": 72.2, "friday": 74.78, "saturday": 51.39, }, { "sunday": 72.43, "monday": 64.35, "tuesday": 52.3, "wednesday": 42.39, "thursday": 82.28, "friday": 78.77, "saturday": 91.36, }, { "sunday": 81.43, "monday": 94.35, "tuesday": 62.3, "wednesday": 52.3, "thursday": 92.2, "friday": 88.78, "saturday": 51.39, }, { "sunday": 21.43, "monday": 34.35, "tuesday": 42.34, "wednesday": 62.3, "thursday": 52.2, "friday": 98.78, "saturday": 81.39, }, { "sunday": 71.33, "monday": 74.25, "tuesday": 42.13, "wednesday": 42.93, "thursday": 82.82, "friday": 78.78, "saturday": 91.39, }, { "sunday": 72.43, "monday": 73.35, "tuesday": 44.3, "wednesday": 45.3, "thursday": 86.2, "friday": 77.78, "saturday": 98.39, }, { "sunday": 79.43, "monday": 78.35, "tuesday": 47.3, "wednesday": 46.3, "thursday": 85.2, "friday": 74.78, "saturday": 93.39, }, { "sunday": 71.42, "monday": 74.31, "tuesday": 42.0, "wednesday": 42.1, "thursday": 82.23, "friday": 65.78, "saturday": 91.26, }, { "sunday": 91.43, "monday": 84.35, "tuesday": 42.37, "wednesday": 42.36, "thursday": 82.25, "friday": 78.74, "saturday": 91.32, }, { "sunday": 71.33, "monday": 74.45, "tuesday": 42.35, "wednesday": 42.36, "thursday": 82.27, "friday": 26.78, "saturday": 71.39, }, { "sunday": 71.53, "monday": 73.35, "tuesday": 43.32, "wednesday": 42.23, "thursday": 82.32, "friday": 78.18, "saturday": 91.49, }, { "sunday": 71.53, "monday": 74.25, "tuesday": 52.3, "wednesday": 52.3, "thursday": 81.23, "friday": 78.78, "saturday": 78.39, }, ], "details": { "confidence": "high", }, }, "$parameter.tolerances.mostly": 0.91, "$parameter.tolerances.financial.usd": 1.0, "$parameter.monthly_taxi_fairs.mean_values": { "value": [ 2.3, 9.8, 42.3, 8.1, 38.5, 53.7, 71.43, 16.34, 49.43, 74.35, 51.98, 46.42, 20.01, 69.44, 65.32, 8.83, 55.79, 82.2, 36.93, 83.78, 31.13, 76.93, 67.67, 25.12, 58.04, 79.78, 90.91, 15.26, 61.65, 78.78, 12.99, ], "details": { "confidence": "low", }, }, "$parameter.date_strings.yyyy_mm_dd_hh_mm_ss_tz_date_format": { "value": "%Y-%m-%d %H:%M:%S %Z", "details": { "confidence": 0.78, }, }, "$parameter.date_strings.yyyy_mm_dd_date_format": { "value": "%Y-%m-%d", "details": { "confidence": 0.78, }, }, "$parameter.date_strings.tolerances.max_num_conversion_attempts": 5, "$parameter.date_strings.tolerances.max_abs_error_time_milliseconds": 100, "$parameter.date_strings.mm_yyyy_dd_hh_mm_ss_tz_date_format": { "value": "%m-%Y-%d %H:%M:%S %Z", "details": { "confidence": 0.78, }, }, "$parameter.date_strings.mm_yyyy_dd_date_format": { "value": "%m-%Y-%d", "details": { "confidence": 0.78, }, }, "$parameter.daily_taxi_fairs.mean_values": { "value": { "sunday": 71.43, "monday": 74.35, "tuesday": 42.3, "wednesday": 42.3, "thursday": 82.2, "friday": 78.78, "saturday": 91.39, }, "details": { "confidence": "medium", }, }, "$mean": 0.65, } # fmt: on parameter_values_for_fully_qualified_parameter_names: Dict[ str, ParameterNode ] = get_parameter_values_for_fully_qualified_parameter_names( domain=domain, variables=variables, parameters=parameters, ) assert ( parameter_values_for_fully_qualified_parameter_names == expected_parameter_values_for_fully_qualified_parameter_names )
def test_builder_executed_with_runtime_batch_request_does_not_raise_error( data_context_with_datasource_pandas_engine, alice_columnar_table_single_batch, ): data_context: DataContext = data_context_with_datasource_pandas_engine profiler_config: str = alice_columnar_table_single_batch["profiler_config"] full_profiler_config_dict: dict = yaml.load(profiler_config) variables_configs: dict = full_profiler_config_dict.get("variables") if variables_configs is None: variables_configs = {} variables: ParameterContainer = build_parameter_container_for_variables( variables_configs=variables_configs) df: pd.DataFrame = pd.DataFrame({ "a": [ "2021-01-01", "2021-01-31", "2021-02-28", "2021-03-20", "2021-02-21", "2021-05-01", "2021-06-18", ] }) batch_request: dict = { "datasource_name": "my_datasource", "data_connector_name": "default_runtime_data_connector_name", "data_asset_name": "my_data_asset", "runtime_parameters": { "batch_data": df, }, "batch_identifiers": { "default_identifier_name": "my_identifier", }, } domain_builder: DomainBuilder = ColumnDomainBuilder( data_context=data_context, ) domains: List[Domain] = domain_builder.get_domains( rule_name="my_rule", variables=variables, batch_request=batch_request, ) assert len(domains) == 1 assert domains == [ { "rule_name": "my_rule", "domain_type": MetricDomainTypes.COLUMN.value, "domain_kwargs": { "column": "a", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "a": SemanticDomainTypes.TEXT.value, }, }, }, ]
def test_column_domain_builder_with_simple_semantic_type_included( alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, ): data_context: DataContext = alice_columnar_table_single_batch_context profiler_config: str = alice_columnar_table_single_batch["profiler_config"] full_profiler_config_dict: dict = yaml.load(profiler_config) variables_configs: dict = full_profiler_config_dict.get("variables") if variables_configs is None: variables_configs = {} variables: ParameterContainer = build_parameter_container_for_variables( variables_configs=variables_configs) batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } domain_builder: DomainBuilder = ColumnDomainBuilder( include_semantic_types=[ "numeric", ], data_context=data_context, ) domains: List[Domain] = domain_builder.get_domains( rule_name="my_rule", variables=variables, batch_request=batch_request) assert len(domains) == 2 # Assert Domain object equivalence. assert domains == [ { "rule_name": "my_rule", "domain_type": "column", "domain_kwargs": { "column": "event_type", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "event_type": SemanticDomainTypes.NUMERIC.value, }, }, }, { "rule_name": "my_rule", "domain_type": "column", "domain_kwargs": { "column": "user_id", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "user_id": SemanticDomainTypes.NUMERIC.value, }, }, }, ]
def test_column_domain_builder( alice_columnar_table_single_batch_context, alice_columnar_table_single_batch, ): data_context: DataContext = alice_columnar_table_single_batch_context profiler_config: str = alice_columnar_table_single_batch["profiler_config"] full_profiler_config_dict: dict = yaml.load(profiler_config) variables_configs: dict = full_profiler_config_dict.get("variables") if variables_configs is None: variables_configs = {} variables: ParameterContainer = build_parameter_container_for_variables( variables_configs=variables_configs) batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } domain_builder: DomainBuilder = ColumnDomainBuilder( data_context=data_context) domains: List[Domain] = domain_builder.get_domains( rule_name="my_rule", variables=variables, batch_request=batch_request) assert len(domains) == 7 assert domains == [ { "rule_name": "my_rule", "domain_type": MetricDomainTypes.COLUMN.value, "domain_kwargs": { "column": "id", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "id": SemanticDomainTypes.TEXT.value, }, }, }, { "rule_name": "my_rule", "domain_type": MetricDomainTypes.COLUMN.value, "domain_kwargs": { "column": "event_type", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "event_type": SemanticDomainTypes.NUMERIC.value, }, }, }, { "rule_name": "my_rule", "domain_type": MetricDomainTypes.COLUMN.value, "domain_kwargs": { "column": "user_id", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "user_id": SemanticDomainTypes.NUMERIC.value, }, }, }, { "rule_name": "my_rule", "domain_type": MetricDomainTypes.COLUMN.value, "domain_kwargs": { "column": "event_ts", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "event_ts": SemanticDomainTypes.TEXT.value, }, }, }, { "rule_name": "my_rule", "domain_type": MetricDomainTypes.COLUMN.value, "domain_kwargs": { "column": "server_ts", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "server_ts": SemanticDomainTypes.TEXT.value, }, }, }, { "rule_name": "my_rule", "domain_type": MetricDomainTypes.COLUMN.value, "domain_kwargs": { "column": "device_ts", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "device_ts": SemanticDomainTypes.TEXT.value, }, }, }, { "rule_name": "my_rule", "domain_type": MetricDomainTypes.COLUMN.value, "domain_kwargs": { "column": "user_agent", }, "details": { INFERRED_SEMANTIC_TYPE_KEY: { "user_agent": SemanticDomainTypes.TEXT.value, }, }, }, ]
def test_default_expectation_configuration_builder_alice_parentheses_parameter_variable_condition_true( alice_columnar_table_single_batch_context, ): data_context: DataContext = alice_columnar_table_single_batch_context batch_request: dict = { "datasource_name": "alice_columnar_table_single_batch_datasource", "data_connector_name": "alice_columnar_table_single_batch_data_connector", "data_asset_name": "alice_columnar_table_single_batch_data_asset", } metric_domain_kwargs: dict = {"column": "user_id"} min_user_id_parameter: MetricMultiBatchParameterBuilder = ( MetricMultiBatchParameterBuilder( name="my_min_user_id", metric_name="column.min", metric_domain_kwargs=metric_domain_kwargs, data_context=data_context, ) ) variables: ParameterContainer = build_parameter_container_for_variables( {"max_user_id": 999999999999, "answer": 42} ) parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None) domain: Domain = Domain( domain_type=MetricDomainTypes.COLUMN, domain_kwargs=metric_domain_kwargs, rule_name="my_rule", ) parameters: Dict[str, ParameterContainer] = { domain.id: parameter_container, } min_user_id_parameter.build_parameters( domain=domain, parameters=parameters, batch_request=batch_request, ) fully_qualified_parameter_name_for_value: str = "$parameter.my_min_user_id.value[0]" parameter_value: Any = get_parameter_value_by_fully_qualified_parameter_name( fully_qualified_parameter_name=fully_qualified_parameter_name_for_value, domain=domain, parameters=parameters, ) condition: str = "($variables.max_user_id>0 & $variables.answer==42) | $parameter.my_min_user_id.value[0]<0" max_value: str = "$variables.max_user_id" default_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder( expectation_type="expect_column_values_to_be_between", condition=condition, min_value=parameter_value, max_value=max_value, ) expectation_configuration: Optional[ ExpectationConfiguration ] = default_expectation_configuration_builder.build_expectation_configuration( domain=domain, variables=variables, parameters=parameters, ) assert expectation_configuration.kwargs["min_value"] == 397433
def run( self, variables: Optional[ParameterContainer] = None, batch_list: Optional[List[Batch]] = None, batch_request: Optional[Union[BatchRequestBase, dict]] = None, recompute_existing_parameter_values: bool = False, reconciliation_directives: ReconciliationDirectives = DEFAULT_RECONCILATION_DIRECTIVES, rule_state: Optional[RuleState] = None, ) -> RuleState: """ Builds a list of Expectation Configurations, returning a single Expectation Configuration entry for every ConfigurationBuilder available based on the instantiation. Args: variables: Attribute name/value pairs, commonly-used in Builder objects batch_list: Explicit list of Batch objects to supply data at runtime batch_request: Explicit batch_request used to supply data at runtime recompute_existing_parameter_values: If "True", recompute value if "fully_qualified_parameter_name" exists reconciliation_directives: directives for how each rule component should be overwritten rule_state: holds "Rule" execution state and responds to "execution_time_property_name" ("execution_time") Returns: RuleState representing effect of executing Rule """ variables = build_parameter_container_for_variables( variables_configs=reconcile_rule_variables( variables=variables, variables_config=convert_variables_to_dict( variables=self.variables), reconciliation_strategy=reconciliation_directives.variables, )) domains: List[Domain] = ([] if self.domain_builder is None else self.domain_builder.get_domains( rule_name=self.name, variables=variables, batch_list=batch_list, batch_request=batch_request, )) if rule_state is None: rule_state = RuleState() rule_state.rule = self rule_state.variables = variables rule_state.domains = domains rule_state.reset_parameter_containers() pbar_method: Callable = determine_progress_bar_method_by_environment() domain: Domain for domain in pbar_method( domains, desc="Profiling Dataset:", position=1, leave=False, bar_format="{desc:25}{percentage:3.0f}%|{bar}{r_bar}", ): rule_state.initialize_parameter_container_for_domain(domain=domain) parameter_builders: List[ ParameterBuilder] = self.parameter_builders or [] parameter_builder: ParameterBuilder for parameter_builder in parameter_builders: parameter_builder.build_parameters( domain=domain, variables=variables, parameters=rule_state.parameters, parameter_computation_impl=None, batch_list=batch_list, batch_request=batch_request, recompute_existing_parameter_values= recompute_existing_parameter_values, ) expectation_configuration_builders: List[ ExpectationConfigurationBuilder] = ( self.expectation_configuration_builders or []) expectation_configuration_builder: ExpectationConfigurationBuilder for expectation_configuration_builder in expectation_configuration_builders: expectation_configuration_builder.resolve_validation_dependencies( domain=domain, variables=variables, parameters=rule_state.parameters, batch_list=batch_list, batch_request=batch_request, recompute_existing_parameter_values= recompute_existing_parameter_values, ) return rule_state
def run( self, variables: Optional[ParameterContainer] = None, batch_list: Optional[List[Batch]] = None, batch_request: Optional[Union[BatchRequestBase, dict]] = None, recompute_existing_parameter_values: bool = False, reconciliation_directives: ReconciliationDirectives = DEFAULT_RECONCILATION_DIRECTIVES, ) -> RuleState: """ Builds a list of Expectation Configurations, returning a single Expectation Configuration entry for every ConfigurationBuilder available based on the instantiation. Args: variables: Attribute name/value pairs, commonly-used in Builder objects batch_list: Explicit list of Batch objects to supply data at runtime batch_request: Explicit batch_request used to supply data at runtime recompute_existing_parameter_values: If "True", recompute value if "fully_qualified_parameter_name" exists reconciliation_directives: directives for how each rule component should be overwritten Returns: RuleState representing effect of executing Rule """ variables = build_parameter_container_for_variables( variables_configs=reconcile_rule_variables( variables=variables, variables_config=convert_variables_to_dict( variables=self.variables), reconciliation_strategy=reconciliation_directives.variables, )) domains: List[Domain] = ([] if self.domain_builder is None else self.domain_builder.get_domains( rule_name=self.name, variables=variables, batch_list=batch_list, batch_request=batch_request, )) rule_state: RuleState = RuleState( rule=self, variables=variables, domains=domains, ) rule_state.reset_parameter_containers() domain: Domain for domain in domains: rule_state.initialize_parameter_container_for_domain(domain=domain) parameter_builders: List[ ParameterBuilder] = self.parameter_builders or [] parameter_builder: ParameterBuilder for parameter_builder in parameter_builders: parameter_builder.build_parameters( domain=domain, variables=variables, parameters=rule_state.parameters, parameter_computation_impl=None, json_serialize=None, batch_list=batch_list, batch_request=batch_request, recompute_existing_parameter_values= recompute_existing_parameter_values, ) expectation_configuration_builders: List[ ExpectationConfigurationBuilder] = ( self.expectation_configuration_builders or []) expectation_configuration_builder: ExpectationConfigurationBuilder for expectation_configuration_builder in expectation_configuration_builders: expectation_configuration_builder.resolve_validation_dependencies( domain=domain, variables=variables, parameters=rule_state.parameters, batch_list=batch_list, batch_request=batch_request, recompute_existing_parameter_values= recompute_existing_parameter_values, ) return rule_state