def reconcile_profiler_variables(
        self,
        variables: Optional[Dict[str, Any]] = None
    ) -> Optional[ParameterContainer]:
        """
        Profiler "variables" reconciliation involves combining the variables, instantiated from Profiler configuration
        (e.g., stored in a YAML file managed by the Profiler store), with the variables overrides, provided at run time.

        The reconciliation logic for "variables" is of the "replace" nature: An override value complements the original
        on key "miss", and replaces the original on key "hit" (or "collision"), because "variables" is a unique member.

        :param variables: variables overrides, supplied in dictionary (configuration) form
        :return: reconciled variables in their canonical ParameterContainer object form
        """
        effective_variables: ParameterContainer
        if variables is not None and isinstance(variables, dict):
            variables_configs: dict = self.variables.to_dict(
            )["parameter_nodes"]["variables"]["variables"]
            variables_configs.update(variables)
            effective_variables = build_parameter_container_for_variables(
                variables_configs=variables_configs)
        else:
            effective_variables = self.variables

        return effective_variables
def test_column_pair_domain_builder_correct_sorted_column_names(
    alice_columnar_table_single_batch_context,
    alice_columnar_table_single_batch,
):
    data_context: DataContext = alice_columnar_table_single_batch_context

    profiler_config: str = alice_columnar_table_single_batch["profiler_config"]

    full_profiler_config_dict: dict = yaml.load(profiler_config)

    variables_configs: dict = full_profiler_config_dict.get("variables")
    if variables_configs is None:
        variables_configs = {}

    variables: ParameterContainer = build_parameter_container_for_variables(
        variables_configs=variables_configs)

    batch_request: dict = {
        "datasource_name": "alice_columnar_table_single_batch_datasource",
        "data_connector_name":
        "alice_columnar_table_single_batch_data_connector",
        "data_asset_name": "alice_columnar_table_single_batch_data_asset",
    }

    domain_builder: DomainBuilder = ColumnPairDomainBuilder(
        include_column_names=[
            "user_id",
            "event_type",
        ],
        data_context=data_context,
    )
    domains: List[Domain] = domain_builder.get_domains(
        rule_name="my_rule", variables=variables, batch_request=batch_request)

    assert len(domains) == 1
    # Assert Domain object equivalence.
    assert domains == [{
        "rule_name": "my_rule",
        "domain_type": "column_pair",
        "domain_kwargs": {
            "column_A": "event_type",
            "column_B": "user_id",
        },
        "details": {
            INFERRED_SEMANTIC_TYPE_KEY: {
                "event_type": SemanticDomainTypes.NUMERIC.value,
                "user_id": SemanticDomainTypes.NUMERIC.value,
            },
        },
    }]

    domain: Domain = domains[0]

    # Also test that the dot notation is supported properly throughout the dictionary fields of the Domain object.
    assert domain.domain_type.value == "column_pair"
    assert domain.domain_kwargs.column_A == "event_type"
    assert domain.domain_kwargs.column_B == "user_id"
Ejemplo n.º 3
0
def test_simple_semantic_type_column_domain_builder(
    alice_columnar_table_single_batch_context,
    alice_columnar_table_single_batch,
    column_Age_domain,
    column_Description_domain,
):
    data_context: DataContext = alice_columnar_table_single_batch_context

    profiler_config: str = alice_columnar_table_single_batch["profiler_config"]

    full_profiler_config_dict: dict = yaml.load(profiler_config)

    variables_configs: dict = full_profiler_config_dict.get("variables")
    if variables_configs is None:
        variables_configs = {}

    variables: ParameterContainer = build_parameter_container_for_variables(
        variables_configs=variables_configs)

    batch_request: dict = {
        "datasource_name": "alice_columnar_table_single_batch_datasource",
        "data_connector_name":
        "alice_columnar_table_single_batch_data_connector",
        "data_asset_name": "alice_columnar_table_single_batch_data_asset",
    }
    domain_builder: DomainBuilder = SimpleSemanticTypeColumnDomainBuilder(
        data_context=data_context,
        batch_request=batch_request,
        semantic_types=[
            "numeric",
        ],
    )
    domains: List[Domain] = domain_builder.get_domains(variables=variables)

    assert len(domains) == 2
    assert domains == [
        {
            "domain_type": "column",
            "domain_kwargs": {
                "column": "event_type",
            },
            "details": {
                "inferred_semantic_domain_type": "numeric"
            },
        },
        {
            "domain_type": "column",
            "domain_kwargs": {
                "column": "user_id",
            },
            "details": {
                "inferred_semantic_domain_type": "numeric"
            },
        },
    ]
def test_get_fully_qualified_parameter_names(
    parameters_with_different_depth_level_values,
):
    parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None)
    build_parameter_container(
        parameter_container=parameter_container,
        parameter_values=parameters_with_different_depth_level_values,
    )

    domain: Domain = Domain(
        domain_type=MetricDomainTypes.COLUMN,
        domain_kwargs=None,
        details=None,
        rule_name="my_rule",
    )
    # Convert variables argument to ParameterContainer
    variables: ParameterContainer = build_parameter_container_for_variables(
        variables_configs={
            "my_int": 9,
            "my_float": 3.38,
            "my_string": "hello",
        }
    )
    parameters: Dict[str, ParameterContainer] = {
        domain.id: parameter_container,
    }

    expected_fully_qualified_parameter_names: List[str] = [
        "$variables",
        "$parameter.date_strings.yyyy_mm_dd_hh_mm_ss_tz_date_format",
        "$parameter.date_strings.yyyy_mm_dd_date_format",
        "$parameter.date_strings.mm_yyyy_dd_hh_mm_ss_tz_date_format",
        "$parameter.date_strings.mm_yyyy_dd_date_format",
        "$parameter.date_strings.tolerances.max_abs_error_time_milliseconds",
        "$parameter.date_strings.tolerances.max_num_conversion_attempts",
        "$parameter.tolerances.mostly",
        "$parameter.tolerances.financial.usd",
        "$parameter.monthly_taxi_fairs.mean_values",
        "$parameter.daily_taxi_fairs.mean_values",
        "$parameter.weekly_taxi_fairs.mean_values",
        "$mean",
    ]

    fully_qualified_parameter_names: List[str] = get_fully_qualified_parameter_names(
        domain=domain,
        variables=variables,
        parameters=parameters,
    )
    assert len(fully_qualified_parameter_names) == len(
        expected_fully_qualified_parameter_names
    )
    assert sorted(fully_qualified_parameter_names) == sorted(
        expected_fully_qualified_parameter_names
    )
def test_multi_column_domain_builder_wrong_column_list(
    alice_columnar_table_single_batch_context,
    alice_columnar_table_single_batch,
):
    data_context: DataContext = alice_columnar_table_single_batch_context

    profiler_config: str = alice_columnar_table_single_batch["profiler_config"]

    full_profiler_config_dict: dict = yaml.load(profiler_config)

    variables_configs: dict = full_profiler_config_dict.get("variables")
    if variables_configs is None:
        variables_configs = {}

    variables: ParameterContainer = build_parameter_container_for_variables(
        variables_configs=variables_configs)

    batch_request: dict = {
        "datasource_name": "alice_columnar_table_single_batch_datasource",
        "data_connector_name":
        "alice_columnar_table_single_batch_data_connector",
        "data_asset_name": "alice_columnar_table_single_batch_data_asset",
    }

    domain_builder: DomainBuilder = MultiColumnDomainBuilder(
        include_column_names=None,
        data_context=data_context,
    )

    with pytest.raises(ge_exceptions.ProfilerExecutionError) as excinfo:
        # noinspection PyArgumentList
        domains: List[Domain] = domain_builder.get_domains(
            rule_name="my_rule",
            variables=variables,
            batch_request=batch_request)

    assert 'Error: "column_list" in MultiColumnDomainBuilder must not be empty.' in str(
        excinfo.value)

    with pytest.raises(ge_exceptions.ProfilerExecutionError) as excinfo:
        # noinspection PyArgumentList
        domains: List[Domain] = domain_builder.get_domains(
            rule_name="my_rule",
            variables=variables,
            batch_request=batch_request)

    assert 'Error: "column_list" in MultiColumnDomainBuilder must not be empty.' in str(
        excinfo.value)
    def __init__(
            self,
            profiler_config: RuleBasedProfilerConfig,
            data_context: Optional["DataContext"] = None,  # noqa: F821
    ):
        """
        Create a new RuleBasedProfilerBase using configured rules (as captured in the RuleBasedProfilerConfig object).

        For a rule or an item in a rule configuration, instantiates the following if
        available: a domain builder, a parameter builder, and a configuration builder.
        These will be used to define profiler computation patterns.

        Args:
            profiler_config: RuleBasedProfilerConfig -- formal typed object containing configuration
            data_context: DataContext object that defines a full runtime environment (data access, etc.)
        """
        name: str = profiler_config.name
        config_version: float = profiler_config.config_version
        variables: Optional[Dict[str, Any]] = profiler_config.variables
        rules: Optional[Dict[str, Dict[str, Any]]] = profiler_config.rules

        self._name = name
        self._config_version = config_version

        self._profiler_config = profiler_config

        if variables is None:
            variables = {}

        # Necessary to annotate ExpectationSuite during `run()`
        self._citation = {
            "name": name,
            "config_version": config_version,
            "variables": variables,
            "rules": rules,
        }

        # Convert variables argument to ParameterContainer
        _variables: ParameterContainer = build_parameter_container_for_variables(
            variables_configs=variables)
        self._variables = _variables

        self._data_context = data_context

        self._rules = self._init_profiler_rules(rules=rules)
def test_column_pair_domain_builder_wrong_column_names(
    alice_columnar_table_single_batch_context,
    alice_columnar_table_single_batch,
):
    data_context: DataContext = alice_columnar_table_single_batch_context

    profiler_config: str = alice_columnar_table_single_batch["profiler_config"]

    full_profiler_config_dict: dict = yaml.load(profiler_config)

    variables_configs: dict = full_profiler_config_dict.get("variables")
    if variables_configs is None:
        variables_configs = {}

    variables: ParameterContainer = build_parameter_container_for_variables(
        variables_configs=variables_configs)

    batch_request: dict = {
        "datasource_name": "alice_columnar_table_single_batch_datasource",
        "data_connector_name":
        "alice_columnar_table_single_batch_data_connector",
        "data_asset_name": "alice_columnar_table_single_batch_data_asset",
    }

    domain_builder: DomainBuilder = ColumnPairDomainBuilder(
        include_column_names=[
            "user_id",
            "event_type",
            "user_agent",
        ],
        data_context=data_context,
    )

    with pytest.raises(ge_exceptions.ProfilerExecutionError) as excinfo:
        # noinspection PyArgumentList
        domains: List[Domain] = domain_builder.get_domains(
            rule_name="my_rule",
            variables=variables,
            batch_request=batch_request)

    assert (
        'Error: Columns specified for ColumnPairDomainBuilder in sorted order must correspond to "column_A" and "column_B" (in this exact order).'
        in str(excinfo.value))
Ejemplo n.º 8
0
    def __init__(
        self,
        name: str,
        variables: Optional[Union[ParameterContainer, Dict[str, Any]]] = None,
        domain_builder: Optional[DomainBuilder] = None,
        parameter_builders: Optional[List[ParameterBuilder]] = None,
        expectation_configuration_builders: Optional[
            List[ExpectationConfigurationBuilder]] = None,
    ) -> None:
        """
        Sets Rule name, variables, domain builder, parameters builders, configuration builders, and other instance data.

        Args:
            name: A string representing the name of the ProfilerRule
            variables: Any variables to be substituted within the rules
            domain_builder: A Domain Builder object used to build rule data domain
            parameter_builders: A Parameter Builder list used to configure necessary rule evaluation parameters
            expectation_configuration_builders: A list of Expectation Configuration Builders
        """
        self._name = name

        if variables is None:
            variables = {}

        # Convert variables argument to ParameterContainer
        _variables: ParameterContainer
        if isinstance(variables, ParameterContainer):
            _variables = variables
        else:
            _variables: ParameterContainer = build_parameter_container_for_variables(
                variables_configs=variables)

        self.variables = _variables

        self._domain_builder = domain_builder
        self._parameter_builders = parameter_builders
        self._expectation_configuration_builders = expectation_configuration_builders

        self._execution_time = None
def test_get_parameter_values_for_fully_qualified_parameter_names(
    parameters_with_different_depth_level_values,
):
    parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None)
    build_parameter_container(
        parameter_container=parameter_container,
        parameter_values=parameters_with_different_depth_level_values,
    )

    domain: Domain = Domain(
        domain_type=MetricDomainTypes.COLUMN,
        domain_kwargs=None,
        details=None,
        rule_name="my_rule",
    )
    # Convert variables argument to ParameterContainer
    variables: ParameterContainer = build_parameter_container_for_variables(
        variables_configs={
            "my_int": 9,
            "my_float": 3.38,
            "my_string": "hello",
        }
    )
    parameters: Dict[str, ParameterContainer] = {
        domain.id: parameter_container,
    }

    # fmt: off
    expected_parameter_values_for_fully_qualified_parameter_names: Dict[str, ParameterNode] = {
        "$variables": {
            "my_int": 9,
            "my_float": 3.38,
            "my_string": "hello",
        },
        "$parameter.weekly_taxi_fairs.mean_values": {
            "value": [
                {
                    "sunday": 71.43,
                    "monday": 74.35,
                    "tuesday": 42.3,
                    "wednesday": 42.3,
                    "thursday": 82.2,
                    "friday": 78.78,
                    "saturday": 91.39,
                },
                {
                    "sunday": 81.43,
                    "monday": 84.35,
                    "tuesday": 52.3,
                    "wednesday": 43.3,
                    "thursday": 22.2,
                    "friday": 98.78,
                    "saturday": 81.39,
                },
                {
                    "sunday": 61.43,
                    "monday": 34.35,
                    "tuesday": 82.3,
                    "wednesday": 72.3,
                    "thursday": 22.2,
                    "friday": 38.78,
                    "saturday": 51.39,
                },
                {
                    "sunday": 51.43,
                    "monday": 64.35,
                    "tuesday": 72.3,
                    "wednesday": 82.3,
                    "thursday": 22.2,
                    "friday": 98.78,
                    "saturday": 31.39,
                },
                {
                    "sunday": 72.43,
                    "monday": 77.35,
                    "tuesday": 46.3,
                    "wednesday": 47.3,
                    "thursday": 88.2,
                    "friday": 79.78,
                    "saturday": 93.39,
                },
                {
                    "sunday": 72.43,
                    "monday": 73.35,
                    "tuesday": 41.3,
                    "wednesday": 49.3,
                    "thursday": 80.2,
                    "friday": 78.78,
                    "saturday": 93.39,
                },
                {
                    "sunday": 74.43,
                    "monday": 78.35,
                    "tuesday": 49.3,
                    "wednesday": 43.3,
                    "thursday": 88.2,
                    "friday": 72.78,
                    "saturday": 97.39,
                },
                {
                    "sunday": 73.43,
                    "monday": 72.35,
                    "tuesday": 40.3,
                    "wednesday": 40.3,
                    "thursday": 89.2,
                    "friday": 77.78,
                    "saturday": 90.39,
                },
                {
                    "sunday": 72.43,
                    "monday": 73.35,
                    "tuesday": 45.3,
                    "wednesday": 44.3,
                    "thursday": 89.2,
                    "friday": 77.78,
                    "saturday": 96.39,
                },
                {
                    "sunday": 75.43,
                    "monday": 74.25,
                    "tuesday": 42.33,
                    "wednesday": 42.23,
                    "thursday": 82.21,
                    "friday": 78.76,
                    "saturday": 91.37,
                },
                {
                    "sunday": 71.43,
                    "monday": 74.37,
                    "tuesday": 42.3,
                    "wednesday": 42.32,
                    "thursday": 82.23,
                    "friday": 78.77,
                    "saturday": 91.49,
                },
                {
                    "sunday": 71.63,
                    "monday": 74.37,
                    "tuesday": 42.2,
                    "wednesday": 42.1,
                    "thursday": 82.29,
                    "friday": 78.79,
                    "saturday": 91.39,
                },
                {
                    "sunday": 71.42,
                    "monday": 74.33,
                    "tuesday": 42.33,
                    "wednesday": 42.34,
                    "thursday": 82.25,
                    "friday": 78.77,
                    "saturday": 91.69,
                },
                {
                    "sunday": 71.44,
                    "monday": 72.35,
                    "tuesday": 42.33,
                    "wednesday": 42.31,
                    "thursday": 82.29,
                    "friday": 78.68,
                    "saturday": 91.49,
                },
                {
                    "sunday": 71.44,
                    "monday": 74.32,
                    "tuesday": 42.32,
                    "wednesday": 42.32,
                    "thursday": 82.29,
                    "friday": 78.77,
                    "saturday": 91.49,
                },
                {
                    "sunday": 71.44,
                    "monday": 74.33,
                    "tuesday": 42.21,
                    "wednesday": 42.31,
                    "thursday": 82.27,
                    "friday": 78.74,
                    "saturday": 91.49,
                },
                {
                    "sunday": 71.33,
                    "monday": 74.25,
                    "tuesday": 42.31,
                    "wednesday": 42.03,
                    "thursday": 82.02,
                    "friday": 78.08,
                    "saturday": 91.38,
                },
                {
                    "sunday": 71.41,
                    "monday": 74.31,
                    "tuesday": 42.39,
                    "wednesday": 42.93,
                    "thursday": 82.92,
                    "friday": 78.75,
                    "saturday": 91.49,
                },
                {
                    "sunday": 72.43,
                    "monday": 73.35,
                    "tuesday": 42.3,
                    "wednesday": 32.3,
                    "thursday": 52.2,
                    "friday": 88.78,
                    "saturday": 81.39,
                },
                {
                    "sunday": 71.43,
                    "monday": 74.35,
                    "tuesday": 32.3,
                    "wednesday": 92.3,
                    "thursday": 72.2,
                    "friday": 74.78,
                    "saturday": 51.39,
                },
                {
                    "sunday": 72.43,
                    "monday": 64.35,
                    "tuesday": 52.3,
                    "wednesday": 42.39,
                    "thursday": 82.28,
                    "friday": 78.77,
                    "saturday": 91.36,
                },
                {
                    "sunday": 81.43,
                    "monday": 94.35,
                    "tuesday": 62.3,
                    "wednesday": 52.3,
                    "thursday": 92.2,
                    "friday": 88.78,
                    "saturday": 51.39,
                },
                {
                    "sunday": 21.43,
                    "monday": 34.35,
                    "tuesday": 42.34,
                    "wednesday": 62.3,
                    "thursday": 52.2,
                    "friday": 98.78,
                    "saturday": 81.39,
                },
                {
                    "sunday": 71.33,
                    "monday": 74.25,
                    "tuesday": 42.13,
                    "wednesday": 42.93,
                    "thursday": 82.82,
                    "friday": 78.78,
                    "saturday": 91.39,
                },
                {
                    "sunday": 72.43,
                    "monday": 73.35,
                    "tuesday": 44.3,
                    "wednesday": 45.3,
                    "thursday": 86.2,
                    "friday": 77.78,
                    "saturday": 98.39,
                },
                {
                    "sunday": 79.43,
                    "monday": 78.35,
                    "tuesday": 47.3,
                    "wednesday": 46.3,
                    "thursday": 85.2,
                    "friday": 74.78,
                    "saturday": 93.39,
                },
                {
                    "sunday": 71.42,
                    "monday": 74.31,
                    "tuesday": 42.0,
                    "wednesday": 42.1,
                    "thursday": 82.23,
                    "friday": 65.78,
                    "saturday": 91.26,
                },
                {
                    "sunday": 91.43,
                    "monday": 84.35,
                    "tuesday": 42.37,
                    "wednesday": 42.36,
                    "thursday": 82.25,
                    "friday": 78.74,
                    "saturday": 91.32,
                },
                {
                    "sunday": 71.33,
                    "monday": 74.45,
                    "tuesday": 42.35,
                    "wednesday": 42.36,
                    "thursday": 82.27,
                    "friday": 26.78,
                    "saturday": 71.39,
                },
                {
                    "sunday": 71.53,
                    "monday": 73.35,
                    "tuesday": 43.32,
                    "wednesday": 42.23,
                    "thursday": 82.32,
                    "friday": 78.18,
                    "saturday": 91.49,
                },
                {
                    "sunday": 71.53,
                    "monday": 74.25,
                    "tuesday": 52.3,
                    "wednesday": 52.3,
                    "thursday": 81.23,
                    "friday": 78.78,
                    "saturday": 78.39,
                },
            ],
            "details": {
                "confidence": "high",
            },
        },
        "$parameter.tolerances.mostly": 0.91,
        "$parameter.tolerances.financial.usd": 1.0,
        "$parameter.monthly_taxi_fairs.mean_values": {
            "value": [
                2.3,
                9.8,
                42.3,
                8.1,
                38.5,
                53.7,
                71.43,
                16.34,
                49.43,
                74.35,
                51.98,
                46.42,
                20.01,
                69.44,
                65.32,
                8.83,
                55.79,
                82.2,
                36.93,
                83.78,
                31.13,
                76.93,
                67.67,
                25.12,
                58.04,
                79.78,
                90.91,
                15.26,
                61.65,
                78.78,
                12.99,
            ],
            "details": {
                "confidence": "low",
            },
        },
        "$parameter.date_strings.yyyy_mm_dd_hh_mm_ss_tz_date_format": {
            "value": "%Y-%m-%d %H:%M:%S %Z",
            "details": {
                "confidence": 0.78,
            },
        },
        "$parameter.date_strings.yyyy_mm_dd_date_format": {
            "value": "%Y-%m-%d",
            "details": {
                "confidence": 0.78,
            },
        },
        "$parameter.date_strings.tolerances.max_num_conversion_attempts": 5,
        "$parameter.date_strings.tolerances.max_abs_error_time_milliseconds": 100,
        "$parameter.date_strings.mm_yyyy_dd_hh_mm_ss_tz_date_format": {
            "value": "%m-%Y-%d %H:%M:%S %Z",
            "details": {
                "confidence": 0.78,
            },
        },
        "$parameter.date_strings.mm_yyyy_dd_date_format": {
            "value": "%m-%Y-%d",
            "details": {
                "confidence": 0.78,
            },
        },
        "$parameter.daily_taxi_fairs.mean_values": {
            "value": {
                "sunday": 71.43,
                "monday": 74.35,
                "tuesday": 42.3,
                "wednesday": 42.3,
                "thursday": 82.2,
                "friday": 78.78,
                "saturday": 91.39,
            },
            "details": {
                "confidence": "medium",
            },
        },
        "$mean": 0.65,
    }
    # fmt: on

    parameter_values_for_fully_qualified_parameter_names: Dict[
        str, ParameterNode
    ] = get_parameter_values_for_fully_qualified_parameter_names(
        domain=domain,
        variables=variables,
        parameters=parameters,
    )
    assert (
        parameter_values_for_fully_qualified_parameter_names
        == expected_parameter_values_for_fully_qualified_parameter_names
    )
def test_builder_executed_with_runtime_batch_request_does_not_raise_error(
    data_context_with_datasource_pandas_engine,
    alice_columnar_table_single_batch,
):
    data_context: DataContext = data_context_with_datasource_pandas_engine

    profiler_config: str = alice_columnar_table_single_batch["profiler_config"]

    full_profiler_config_dict: dict = yaml.load(profiler_config)

    variables_configs: dict = full_profiler_config_dict.get("variables")
    if variables_configs is None:
        variables_configs = {}

    variables: ParameterContainer = build_parameter_container_for_variables(
        variables_configs=variables_configs)

    df: pd.DataFrame = pd.DataFrame({
        "a": [
            "2021-01-01",
            "2021-01-31",
            "2021-02-28",
            "2021-03-20",
            "2021-02-21",
            "2021-05-01",
            "2021-06-18",
        ]
    })

    batch_request: dict = {
        "datasource_name": "my_datasource",
        "data_connector_name": "default_runtime_data_connector_name",
        "data_asset_name": "my_data_asset",
        "runtime_parameters": {
            "batch_data": df,
        },
        "batch_identifiers": {
            "default_identifier_name": "my_identifier",
        },
    }

    domain_builder: DomainBuilder = ColumnDomainBuilder(
        data_context=data_context, )
    domains: List[Domain] = domain_builder.get_domains(
        rule_name="my_rule",
        variables=variables,
        batch_request=batch_request,
    )

    assert len(domains) == 1
    assert domains == [
        {
            "rule_name": "my_rule",
            "domain_type": MetricDomainTypes.COLUMN.value,
            "domain_kwargs": {
                "column": "a",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "a": SemanticDomainTypes.TEXT.value,
                },
            },
        },
    ]
def test_column_domain_builder_with_simple_semantic_type_included(
    alice_columnar_table_single_batch_context,
    alice_columnar_table_single_batch,
):
    data_context: DataContext = alice_columnar_table_single_batch_context

    profiler_config: str = alice_columnar_table_single_batch["profiler_config"]

    full_profiler_config_dict: dict = yaml.load(profiler_config)

    variables_configs: dict = full_profiler_config_dict.get("variables")
    if variables_configs is None:
        variables_configs = {}

    variables: ParameterContainer = build_parameter_container_for_variables(
        variables_configs=variables_configs)

    batch_request: dict = {
        "datasource_name": "alice_columnar_table_single_batch_datasource",
        "data_connector_name":
        "alice_columnar_table_single_batch_data_connector",
        "data_asset_name": "alice_columnar_table_single_batch_data_asset",
    }

    domain_builder: DomainBuilder = ColumnDomainBuilder(
        include_semantic_types=[
            "numeric",
        ],
        data_context=data_context,
    )
    domains: List[Domain] = domain_builder.get_domains(
        rule_name="my_rule", variables=variables, batch_request=batch_request)

    assert len(domains) == 2
    # Assert Domain object equivalence.
    assert domains == [
        {
            "rule_name": "my_rule",
            "domain_type": "column",
            "domain_kwargs": {
                "column": "event_type",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "event_type": SemanticDomainTypes.NUMERIC.value,
                },
            },
        },
        {
            "rule_name": "my_rule",
            "domain_type": "column",
            "domain_kwargs": {
                "column": "user_id",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "user_id": SemanticDomainTypes.NUMERIC.value,
                },
            },
        },
    ]
def test_column_domain_builder(
    alice_columnar_table_single_batch_context,
    alice_columnar_table_single_batch,
):
    data_context: DataContext = alice_columnar_table_single_batch_context

    profiler_config: str = alice_columnar_table_single_batch["profiler_config"]

    full_profiler_config_dict: dict = yaml.load(profiler_config)

    variables_configs: dict = full_profiler_config_dict.get("variables")
    if variables_configs is None:
        variables_configs = {}

    variables: ParameterContainer = build_parameter_container_for_variables(
        variables_configs=variables_configs)

    batch_request: dict = {
        "datasource_name": "alice_columnar_table_single_batch_datasource",
        "data_connector_name":
        "alice_columnar_table_single_batch_data_connector",
        "data_asset_name": "alice_columnar_table_single_batch_data_asset",
    }

    domain_builder: DomainBuilder = ColumnDomainBuilder(
        data_context=data_context)
    domains: List[Domain] = domain_builder.get_domains(
        rule_name="my_rule", variables=variables, batch_request=batch_request)

    assert len(domains) == 7
    assert domains == [
        {
            "rule_name": "my_rule",
            "domain_type": MetricDomainTypes.COLUMN.value,
            "domain_kwargs": {
                "column": "id",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "id": SemanticDomainTypes.TEXT.value,
                },
            },
        },
        {
            "rule_name": "my_rule",
            "domain_type": MetricDomainTypes.COLUMN.value,
            "domain_kwargs": {
                "column": "event_type",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "event_type": SemanticDomainTypes.NUMERIC.value,
                },
            },
        },
        {
            "rule_name": "my_rule",
            "domain_type": MetricDomainTypes.COLUMN.value,
            "domain_kwargs": {
                "column": "user_id",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "user_id": SemanticDomainTypes.NUMERIC.value,
                },
            },
        },
        {
            "rule_name": "my_rule",
            "domain_type": MetricDomainTypes.COLUMN.value,
            "domain_kwargs": {
                "column": "event_ts",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "event_ts": SemanticDomainTypes.TEXT.value,
                },
            },
        },
        {
            "rule_name": "my_rule",
            "domain_type": MetricDomainTypes.COLUMN.value,
            "domain_kwargs": {
                "column": "server_ts",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "server_ts": SemanticDomainTypes.TEXT.value,
                },
            },
        },
        {
            "rule_name": "my_rule",
            "domain_type": MetricDomainTypes.COLUMN.value,
            "domain_kwargs": {
                "column": "device_ts",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "device_ts": SemanticDomainTypes.TEXT.value,
                },
            },
        },
        {
            "rule_name": "my_rule",
            "domain_type": MetricDomainTypes.COLUMN.value,
            "domain_kwargs": {
                "column": "user_agent",
            },
            "details": {
                INFERRED_SEMANTIC_TYPE_KEY: {
                    "user_agent": SemanticDomainTypes.TEXT.value,
                },
            },
        },
    ]
def test_default_expectation_configuration_builder_alice_parentheses_parameter_variable_condition_true(
    alice_columnar_table_single_batch_context,
):
    data_context: DataContext = alice_columnar_table_single_batch_context

    batch_request: dict = {
        "datasource_name": "alice_columnar_table_single_batch_datasource",
        "data_connector_name": "alice_columnar_table_single_batch_data_connector",
        "data_asset_name": "alice_columnar_table_single_batch_data_asset",
    }

    metric_domain_kwargs: dict = {"column": "user_id"}

    min_user_id_parameter: MetricMultiBatchParameterBuilder = (
        MetricMultiBatchParameterBuilder(
            name="my_min_user_id",
            metric_name="column.min",
            metric_domain_kwargs=metric_domain_kwargs,
            data_context=data_context,
        )
    )

    variables: ParameterContainer = build_parameter_container_for_variables(
        {"max_user_id": 999999999999, "answer": 42}
    )
    parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None)
    domain: Domain = Domain(
        domain_type=MetricDomainTypes.COLUMN,
        domain_kwargs=metric_domain_kwargs,
        rule_name="my_rule",
    )
    parameters: Dict[str, ParameterContainer] = {
        domain.id: parameter_container,
    }

    min_user_id_parameter.build_parameters(
        domain=domain,
        parameters=parameters,
        batch_request=batch_request,
    )

    fully_qualified_parameter_name_for_value: str = "$parameter.my_min_user_id.value[0]"
    parameter_value: Any = get_parameter_value_by_fully_qualified_parameter_name(
        fully_qualified_parameter_name=fully_qualified_parameter_name_for_value,
        domain=domain,
        parameters=parameters,
    )

    condition: str = "($variables.max_user_id>0 & $variables.answer==42) | $parameter.my_min_user_id.value[0]<0"
    max_value: str = "$variables.max_user_id"

    default_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder(
        expectation_type="expect_column_values_to_be_between",
        condition=condition,
        min_value=parameter_value,
        max_value=max_value,
    )

    expectation_configuration: Optional[
        ExpectationConfiguration
    ] = default_expectation_configuration_builder.build_expectation_configuration(
        domain=domain,
        variables=variables,
        parameters=parameters,
    )

    assert expectation_configuration.kwargs["min_value"] == 397433
Ejemplo n.º 14
0
    def run(
        self,
        variables: Optional[ParameterContainer] = None,
        batch_list: Optional[List[Batch]] = None,
        batch_request: Optional[Union[BatchRequestBase, dict]] = None,
        recompute_existing_parameter_values: bool = False,
        reconciliation_directives:
        ReconciliationDirectives = DEFAULT_RECONCILATION_DIRECTIVES,
        rule_state: Optional[RuleState] = None,
    ) -> RuleState:
        """
        Builds a list of Expectation Configurations, returning a single Expectation Configuration entry for every
        ConfigurationBuilder available based on the instantiation.

        Args:
            variables: Attribute name/value pairs, commonly-used in Builder objects
            batch_list: Explicit list of Batch objects to supply data at runtime
            batch_request: Explicit batch_request used to supply data at runtime
            recompute_existing_parameter_values: If "True", recompute value if "fully_qualified_parameter_name" exists
            reconciliation_directives: directives for how each rule component should be overwritten
            rule_state: holds "Rule" execution state and responds to "execution_time_property_name" ("execution_time")

        Returns:
            RuleState representing effect of executing Rule
        """
        variables = build_parameter_container_for_variables(
            variables_configs=reconcile_rule_variables(
                variables=variables,
                variables_config=convert_variables_to_dict(
                    variables=self.variables),
                reconciliation_strategy=reconciliation_directives.variables,
            ))
        domains: List[Domain] = ([] if self.domain_builder is None else
                                 self.domain_builder.get_domains(
                                     rule_name=self.name,
                                     variables=variables,
                                     batch_list=batch_list,
                                     batch_request=batch_request,
                                 ))

        if rule_state is None:
            rule_state = RuleState()

        rule_state.rule = self
        rule_state.variables = variables
        rule_state.domains = domains

        rule_state.reset_parameter_containers()

        pbar_method: Callable = determine_progress_bar_method_by_environment()

        domain: Domain
        for domain in pbar_method(
                domains,
                desc="Profiling Dataset:",
                position=1,
                leave=False,
                bar_format="{desc:25}{percentage:3.0f}%|{bar}{r_bar}",
        ):
            rule_state.initialize_parameter_container_for_domain(domain=domain)

            parameter_builders: List[
                ParameterBuilder] = self.parameter_builders or []
            parameter_builder: ParameterBuilder
            for parameter_builder in parameter_builders:
                parameter_builder.build_parameters(
                    domain=domain,
                    variables=variables,
                    parameters=rule_state.parameters,
                    parameter_computation_impl=None,
                    batch_list=batch_list,
                    batch_request=batch_request,
                    recompute_existing_parameter_values=
                    recompute_existing_parameter_values,
                )

            expectation_configuration_builders: List[
                ExpectationConfigurationBuilder] = (
                    self.expectation_configuration_builders or [])

            expectation_configuration_builder: ExpectationConfigurationBuilder

            for expectation_configuration_builder in expectation_configuration_builders:
                expectation_configuration_builder.resolve_validation_dependencies(
                    domain=domain,
                    variables=variables,
                    parameters=rule_state.parameters,
                    batch_list=batch_list,
                    batch_request=batch_request,
                    recompute_existing_parameter_values=
                    recompute_existing_parameter_values,
                )

        return rule_state
Ejemplo n.º 15
0
    def run(
        self,
        variables: Optional[ParameterContainer] = None,
        batch_list: Optional[List[Batch]] = None,
        batch_request: Optional[Union[BatchRequestBase, dict]] = None,
        recompute_existing_parameter_values: bool = False,
        reconciliation_directives:
        ReconciliationDirectives = DEFAULT_RECONCILATION_DIRECTIVES,
    ) -> RuleState:
        """
        Builds a list of Expectation Configurations, returning a single Expectation Configuration entry for every
        ConfigurationBuilder available based on the instantiation.

        Args:
            variables: Attribute name/value pairs, commonly-used in Builder objects
            batch_list: Explicit list of Batch objects to supply data at runtime
            batch_request: Explicit batch_request used to supply data at runtime
            recompute_existing_parameter_values: If "True", recompute value if "fully_qualified_parameter_name" exists
            reconciliation_directives: directives for how each rule component should be overwritten

        Returns:
            RuleState representing effect of executing Rule
        """
        variables = build_parameter_container_for_variables(
            variables_configs=reconcile_rule_variables(
                variables=variables,
                variables_config=convert_variables_to_dict(
                    variables=self.variables),
                reconciliation_strategy=reconciliation_directives.variables,
            ))
        domains: List[Domain] = ([] if self.domain_builder is None else
                                 self.domain_builder.get_domains(
                                     rule_name=self.name,
                                     variables=variables,
                                     batch_list=batch_list,
                                     batch_request=batch_request,
                                 ))
        rule_state: RuleState = RuleState(
            rule=self,
            variables=variables,
            domains=domains,
        )
        rule_state.reset_parameter_containers()

        domain: Domain
        for domain in domains:
            rule_state.initialize_parameter_container_for_domain(domain=domain)

            parameter_builders: List[
                ParameterBuilder] = self.parameter_builders or []
            parameter_builder: ParameterBuilder
            for parameter_builder in parameter_builders:
                parameter_builder.build_parameters(
                    domain=domain,
                    variables=variables,
                    parameters=rule_state.parameters,
                    parameter_computation_impl=None,
                    json_serialize=None,
                    batch_list=batch_list,
                    batch_request=batch_request,
                    recompute_existing_parameter_values=
                    recompute_existing_parameter_values,
                )

            expectation_configuration_builders: List[
                ExpectationConfigurationBuilder] = (
                    self.expectation_configuration_builders or [])

            expectation_configuration_builder: ExpectationConfigurationBuilder

            for expectation_configuration_builder in expectation_configuration_builders:
                expectation_configuration_builder.resolve_validation_dependencies(
                    domain=domain,
                    variables=variables,
                    parameters=rule_state.parameters,
                    batch_list=batch_list,
                    batch_request=batch_request,
                    recompute_existing_parameter_values=
                    recompute_existing_parameter_values,
                )

        return rule_state