Python RuleBasedProfilerConfig 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: great_expectations.rule_based_profiler.config

hotexamples.com에서의 예제들: 15

Python RuleBasedProfilerConfig - 15개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 great_expectations.rule_based_profiler.config.RuleBasedProfilerConfig에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

RuleBasedProfilerConfig(10)

resolve_config_using_acceptable_arguments(3)

to_json_dict(2)

from_commented_map(1)

예제 #1

파일 보기

def test_resolve_config_using_acceptable_arguments(
    profiler_with_placeholder_args: RuleBasedProfiler, ) -> None:
    old_config: RuleBasedProfilerConfig = profiler_with_placeholder_args.config

    # Roundtrip through schema validation to add/or restore any missing fields.
    old_deserialized_config: dict = ruleBasedProfilerConfigSchema.load(
        old_config.to_json_dict())
    old_deserialized_config.pop("class_name")
    old_deserialized_config.pop("module_name")

    old_config = RuleBasedProfilerConfig(**old_deserialized_config)

    # Brand new config is created but existing attributes are unchanged
    new_config: RuleBasedProfilerConfig = (
        RuleBasedProfilerConfig.resolve_config_using_acceptable_arguments(
            profiler=profiler_with_placeholder_args, ))

    # Roundtrip through schema validation to add/or restore any missing fields.
    # new_deserialized_config: dict = ruleBasedProfilerConfigSchema.load(new_config.to_json_dict())
    new_deserialized_config: dict = new_config.to_json_dict()
    new_deserialized_config.pop("class_name")
    new_deserialized_config.pop("module_name")

    new_config = RuleBasedProfilerConfig(**new_deserialized_config)

    assert id(old_config) != id(new_config)
    assert all(old_config[attr] == new_config[attr]
               for attr in ("config_version", "name"))

예제 #2

파일 보기

def test_rule_based_profiler_from_commented_map():
    data = {
        "name": "my_RBP",
        "class_name": "RuleBasedProfiler",
        "module_name": "great_expectations.rule_based_profiler",
        "config_version": 1.0,
        "variables": {
            "foo": "bar"
        },
        "rules": {
            "rule_1": {
                "domain_builder": {
                    "class_name": "DomainBuilder"
                },
                "parameter_builders": [{
                    "class_name": "ParameterBuilder",
                    "name": "my_parameter"
                }],
                "expectation_configuration_builders": [{
                    "class_name":
                    "ExpectationConfigurationBuilder",
                    "expectation_type":
                    "expect_column_pair_values_A_to_be_greater_than_B",
                }],
            },
        },
    }
    commented_map = CommentedMap(data)
    config = RuleBasedProfilerConfig.from_commented_map(commented_map)
    assert all(hasattr(config, k) for k in data)

예제 #3

파일 보기

def test_add_profiler(
    empty_data_context: DataContext,
    profiler_config_with_placeholder_args: RuleBasedProfilerConfig,
):
    args = profiler_config_with_placeholder_args.to_json_dict()
    for attr in ("class_name", "module_name"):
        args.pop(attr, None)

    profiler = empty_data_context.add_profiler(**args)
    assert isinstance(profiler, RuleBasedProfiler)

예제 #4

파일 보기

def test_resolve_config_using_acceptable_arguments_with_runtime_overrides_with_batch_requests(
    profiler_with_placeholder_args: RuleBasedProfiler, ) -> None:
    runtime_override_rule: dict = {
        "domain_builder": {
            "class_name": "TableDomainBuilder",
            "module_name":
            "great_expectations.rule_based_profiler.domain_builder",
        },
        "parameter_builders": [
            {
                "class_name": "MetricMultiBatchParameterBuilder",
                "module_name":
                "great_expectations.rule_based_profiler.parameter_builder",
                "metric_name": "my_other_metric",
                "name": "my_additional_parameter",
            },
        ],
        "expectation_configuration_builders": [
            {
                "class_name": "DefaultExpectationConfigurationBuilder",
                "module_name":
                "great_expectations.rule_based_profiler.expectation_configuration_builder",
                "expectation_type": "expect_column_values_to_be_between",
                "meta": {
                    "details": {
                        "note": "Here's another rule",
                    },
                },
            },
        ],
    }

    runtime_override_rule_name: str = "rule_with_batch_request"
    runtime_override_rules: Dict[str, dict] = {
        runtime_override_rule_name: runtime_override_rule
    }

    config: RuleBasedProfilerConfig = (
        RuleBasedProfilerConfig.resolve_config_using_acceptable_arguments(
            profiler=profiler_with_placeholder_args,
            rules=runtime_override_rules,
        ))

    domain_builder: dict = config.rules[runtime_override_rule_name][
        "domain_builder"]

    assert domain_builder == {
        "class_name":
        "TableDomainBuilder",
        "module_name":
        "great_expectations.rule_based_profiler.domain_builder.table_domain_builder",
    }

예제 #5

파일 보기

파일: profiler_store.py 프로젝트: alfredo-f/great_expectations

    def serialization_self_check(self, pretty_print: bool) -> None:
        """
        Fufills the abstract method defined by the parent class.
        See `ConfigurationStore` for more details.
        """
        test_profiler_name = f"profiler_{''.join([random.choice(list('0123456789ABCDEF')) for _ in range(20)])}"
        test_profiler_configuration = RuleBasedProfilerConfig(
            name=test_profiler_name,
            class_name="RuleBasedProfiler",
            module_name="great_expectations.rule_based_profiler",
            config_version=1.0,
            rules={},
        )

        test_key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if self.ge_cloud_mode:
            test_key = self.key_class(resource_type="contract",
                                      ge_cloud_id=str(uuid.uuid4()))
        else:
            test_key = self.key_class(configuration_key=test_profiler_name)

        if pretty_print:
            print(
                f"Attempting to add a new test key {test_key} to Profiler store..."
            )

        self.set(key=test_key, value=test_profiler_configuration)
        if pretty_print:
            print(
                f"\tTest key {test_key} successfully added to Profiler store.\n"
            )
            print(
                f"Attempting to retrieve the test value associated with key {test_key} from Profiler store..."
            )

        test_value = self.get(key=test_key)
        if pretty_print:
            print(
                f"\tTest value successfully retrieved from Profiler store: {test_value}\n"
            )
            print(
                f"Cleaning up test key {test_key} and value from Profiler store..."
            )

        test_value = self.remove_key(key=test_key)
        if pretty_print:
            print(
                f"\tTest key and value successfully removed from Profiler store: {test_value}\n"
            )

예제 #6

파일 보기

def test_add_profiler_with_invalid_config_raises_error(
    empty_data_context: DataContext,
    profiler_config_with_placeholder_args: RuleBasedProfilerConfig,
):
    args = profiler_config_with_placeholder_args.to_json_dict()
    for attr in ("class_name", "module_name"):
        args.pop(attr, None)

    # Setting invalid configuration to check that it is caught by DataContext wrapper method
    args["config_version"] = -1

    with pytest.raises(ValidationError) as e:
        empty_data_context.add_profiler(**args)

    assert "config_version" in str(e.value)

예제 #7

파일 보기

파일: test_rule_based_profiler.py 프로젝트: alfredo-f/great_expectations

def test_add_profiler_with_batch_request_containing_batch_data_raises_error(
    mock_data_context: mock.MagicMock, ):
    profiler_config = RuleBasedProfilerConfig(
        name="my_profiler_config",
        class_name="RuleBasedProfiler",
        module_name="great_expectations.rule_based_profiler",
        config_version=1.0,
        rules={
            "rule_1": {
                "domain_builder": {
                    "class_name": "TableDomainBuilder",
                    "batch_request": {
                        "runtime_parameters": {
                            "batch_data":
                            pd.DataFrame()  # Cannot be serialized in store
                        }
                    },
                },
                "parameter_builders": [
                    {
                        "class_name": "MetricMultiBatchParameterBuilder",
                        "name": "my_parameter",
                        "metric_name": "my_metric",
                    },
                ],
                "expectation_configuration_builders": [
                    {
                        "class_name":
                        "DefaultExpectationConfigurationBuilder",
                        "expectation_type":
                        "expect_column_pair_values_A_to_be_greater_than_B",
                    },
                ],
            }
        },
    )

    with pytest.raises(InvalidConfigError) as e:
        RuleBasedProfiler.add_profiler(
            profiler_config,
            data_context=mock_data_context,
            profiler_store=mock_data_context.profiler_store,
        )

    assert "batch_data found in batch_request" in str(e.value)

예제 #8

파일 보기

파일: usage_statistics.py 프로젝트: MuttData/great_expectations

def get_profiler_run_usage_statistics(
    profiler: "RuleBasedProfiler",  # noqa: F821
    variables: Optional[dict] = None,
    rules: Optional[dict] = None,
    *args,
    **kwargs,
) -> dict:
    usage_statistics_handler: Optional[
        UsageStatisticsHandler
    ] = profiler._usage_statistics_handler

    data_context_id: Optional[str] = None
    if usage_statistics_handler:
        data_context_id = usage_statistics_handler._data_context_id

    anonymizer: Optional[Anonymizer] = _anonymizers.get(data_context_id, None)
    if anonymizer is None:
        anonymizer = Anonymizer(data_context_id)
        _anonymizers[data_context_id] = anonymizer

    payload: dict = {}

    if usage_statistics_handler:
        # noinspection PyBroadException
        try:
            anonymizer = usage_statistics_handler.anonymizer

            resolved_runtime_config: "RuleBasedProfilerConfig" = (  # noqa: F821
                RuleBasedProfilerConfig.resolve_config_using_acceptable_arguments(
                    profiler=profiler,
                    variables=variables,
                    rules=rules,
                )
            )

            payload: dict = anonymizer.anonymize(obj=resolved_runtime_config)
        except Exception as e:
            logger.debug(
                f"{UsageStatsExceptionPrefix.EMIT_EXCEPTION.value}: {e} type: {type(e)}, get_profiler_run_usage_statistics: Unable to create anonymized_profiler_run payload field"
            )

    return payload

예제 #9

파일 보기

파일: expect_column_stdev_to_be_between.py 프로젝트: admariner/great_expectations

class ExpectColumnStdevToBeBetween(ColumnExpectation):
    """Expect the column standard deviation to be between a minimum value and a maximum value.
            Uses sample standard deviation (normalized by N-1).

            expect_column_stdev_to_be_between is a \
            :func:`column_aggregate_expectation
            <great_expectations.execution_engine.MetaExecutionEngine.column_aggregate_expectation>`.

            Args:
                column (str): \
                    The column name.
                min_value (float or None): \
                    The minimum value for the column standard deviation.
                max_value (float or None): \
                    The maximum value for the column standard deviation.
                strict_min (boolean):
                    If True, the column standard deviation must be strictly larger than min_value, default=False
                strict_max (boolean):
                    If True, the column standard deviation must be strictly smaller than max_value, default=False

            Other Parameters:
                result_format (str or None): \
                    Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`. \
                    For more detail, see :ref:`result_format <result_format>`.
                include_config (boolean): \
                    If True, then include the expectation config as part of the result object. \
                    For more detail, see :ref:`include_config`.
                catch_exceptions (boolean or None): \
                    If True, then catch exceptions and include them as part of the result object. \
                    For more detail, see :ref:`catch_exceptions`.
                meta (dict or None): \
                    A JSON-serializable dictionary (nesting allowed) that will be included in the output without \
                    modification. For more detail, see :ref:`meta`.

            Returns:
                An ExpectationSuiteValidationResult

                Exact fields vary depending on the values passed to :ref:`result_format <result_format>` and
                :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`.

            Notes:
                These fields in the result object are customized for this expectation:
                ::

                    {
                        "observed_value": (float) The true standard deviation for the column
                    }

                * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.
                * If min_value is None, then max_value is treated as an upper bound
                * If max_value is None, then min_value is treated as a lower bound

            See Also:
                :func:`expect_column_mean_to_be_between \
                <great_expectations.execution_engine.execution_engine.ExecutionEngine.expect_column_mean_to_be_between>`

                :func:`expect_column_median_to_be_between \
                <great_expectations.execution_engine.execution_engine.ExecutionEngine.expect_column_median_to_be_between>`

            """

    # This dictionary contains metadata for display in the public gallery
    library_metadata = {
        "maturity": "production",
        "tags": ["core expectation", "column aggregate expectation"],
        "contributors": ["@great_expectations"],
        "requirements": [],
    }

    metric_dependencies = ("column.standard_deviation", )
    success_keys = (
        "min_value",
        "strict_min",
        "max_value",
        "strict_max",
        "auto",
        "profiler_config",
    )

    stdev_range_estimator_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="NumericMetricRangeMultiBatchParameterBuilder",
        name="stdev_range_estimator",
        metric_name="column.standard_deviation",
        metric_multi_batch_parameter_builder_name=None,
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        enforce_numeric_metric=True,
        replace_nan_with_zero=True,
        reduce_scalar_metric=True,
        false_positive_rate=f"{VARIABLES_KEY}false_positive_rate",
        quantile_statistic_interpolation_method=
        f"{VARIABLES_KEY}quantile_statistic_interpolation_method",
        estimator=f"{VARIABLES_KEY}estimator",
        n_resamples=f"{VARIABLES_KEY}n_resamples",
        random_seed=f"{VARIABLES_KEY}random_seed",
        include_estimator_samples_histogram_in_details=
        f"{VARIABLES_KEY}include_estimator_samples_histogram_in_details",
        truncate_values=f"{VARIABLES_KEY}truncate_values",
        round_decimals=f"{VARIABLES_KEY}round_decimals",
        evaluation_parameter_builder_configs=None,
    )
    validation_parameter_builder_configs: List[ParameterBuilderConfig] = [
        stdev_range_estimator_parameter_builder_config,
    ]
    default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
        name=
        "expect_column_stdev_to_be_between",  # Convention: use "expectation_type" as profiler name.
        config_version=1.0,
        variables={},
        rules={
            "default_expect_column_stdev_to_be_between_rule": {
                "variables": {
                    "strict_min": False,
                    "strict_max": False,
                    "false_positive_rate": 0.05,
                    "quantile_statistic_interpolation_method": "auto",
                    "estimator": "bootstrap",
                    "n_resamples": 9999,
                    "random_seed": None,
                    "include_estimator_samples_histogram_in_details": False,
                    "truncate_values": {
                        "lower_bound": 0,
                        "upper_bound": None,
                    },
                    "round_decimals": 2,
                },
                "domain_builder": {
                    "class_name":
                    "ColumnDomainBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.domain_builder",
                },
                "expectation_configuration_builders": [
                    {
                        "expectation_type":
                        "expect_column_stdev_to_be_between",
                        "class_name": "DefaultExpectationConfigurationBuilder",
                        "module_name":
                        "great_expectations.rule_based_profiler.expectation_configuration_builder",
                        "validation_parameter_builder_configs":
                        validation_parameter_builder_configs,
                        "column":
                        f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
                        "min_value":
                        f"{PARAMETER_KEY}{stdev_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]",
                        "max_value":
                        f"{PARAMETER_KEY}{stdev_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]",
                        "strict_min": f"{VARIABLES_KEY}strict_min",
                        "strict_max": f"{VARIABLES_KEY}strict_max",
                        "meta": {
                            "profiler_details":
                            f"{PARAMETER_KEY}{stdev_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
                        },
                    },
                ],
            },
        },
    )

    default_kwarg_values = {
        "min_value": None,
        "strict_min": False,
        "max_value": None,
        "strict_max": False,
        "result_format": "BASIC",
        "include_config": True,
        "catch_exceptions": False,
        "auto": False,
        "profiler_config": default_profiler_config,
    }
    args_keys = (
        "column",
        "min_value",
        "max_value",
        "strict_min",
        "strict_max",
    )

    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]) -> None:
        """
        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
        necessary configuration arguments have been provided for the validation of the expectation.

        Args:
            configuration (OPTIONAL[ExpectationConfiguration]): \
                An optional Expectation Configuration entry that will be used to configure the expectation
        Returns:
            None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
        """
        super().validate_configuration(configuration)
        self.validate_metric_value_between_configuration(
            configuration=configuration)

    @classmethod
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )
        params_with_json_schema = {
            "column": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column")
            },
            "min_value": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("min_value"),
            },
            "max_value": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("max_value"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
            "strict_min": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("strict_min"),
            },
            "strict_max": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("strict_max"),
            },
        }

        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str = "standard deviation may have any numerical value."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            if params["min_value"] is not None and params[
                    "max_value"] is not None:
                template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value."
            elif params["min_value"] is None:
                template_str = f"standard deviation must be {at_most_str} $max_value."
            elif params["max_value"] is None:
                template_str = f"standard deviation must be {at_least_str} $min_value."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = f"{conditional_template_str}, then {template_str}"
            params_with_json_schema.update(conditional_params)

        return (template_str, params_with_json_schema, styling)

    @classmethod
    @renderer(renderer_type="renderer.prescriptive")
    @render_evaluation_parameter_string
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )

        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str = "standard deviation may have any numerical value."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            if params["min_value"] is not None and params[
                    "max_value"] is not None:
                template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value."
            elif params["min_value"] is None:
                template_str = f"standard deviation must be {at_most_str} $max_value."
            elif params["max_value"] is None:
                template_str = f"standard deviation must be {at_least_str} $min_value."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"])
            template_str = f"{conditional_template_str}, then {template_str}"
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]

    def _validate(
        self,
        configuration: ExpectationConfiguration,
        metrics: Dict,
        runtime_configuration: dict = None,
        execution_engine: ExecutionEngine = None,
    ):
        return self._validate_metric_value_between(
            metric_name="column.standard_deviation",
            configuration=configuration,
            metrics=metrics,
            runtime_configuration=runtime_configuration,
            execution_engine=execution_engine,
        )

예제 #10

파일 보기

파일: expect_column_values_to_be_between.py 프로젝트: admariner/great_expectations

class ExpectColumnValuesToBeBetween(ColumnMapExpectation):
    """Expect column entries to be between a minimum value and a maximum value (inclusive).

    expect_column_values_to_be_between is a \
    :func:`column_map_expectation <great_expectations.execution_engine.execution_engine.MetaExecutionEngine
    .column_map_expectation>`.

    Args:
        column (str): \
            The column name.
        min_value (comparable type or None): The minimum value for a column entry.
        max_value (comparable type or None): The maximum value for a column entry.
        strict_min (boolean):
            If True, values must be strictly larger than min_value, default=False
        strict_max (boolean):
            If True, values must be strictly smaller than max_value, default=False

    Keyword Args:
        allow_cross_type_comparisons (boolean or None) : If True, allow comparisons between types (e.g. integer and\
            string). Otherwise, attempting such comparisons will raise an exception.
        parse_strings_as_datetimes (boolean or None) : If True, parse min_value, max_value, and all non-null column\
            values to datetimes before making comparisons.
        output_strftime_format (str or None): \
            A valid strfime format for datetime output. Only used if parse_strings_as_datetimes=True.
        mostly (None or a float between 0 and 1): \
            Return `"success": True` if at least mostly fraction of values match the expectation. \
            For more detail, see :ref:`mostly`.

    Other Parameters:
        result_format (str or None): \
            Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`.
            For more detail, see :ref:`result_format <result_format>`.
        include_config (boolean): \
            If True, then include the expectation config as part of the result object. \
            For more detail, see :ref:`include_config`.
        catch_exceptions (boolean or None): \
            If True, then catch exceptions and include them as part of the result object. \
            For more detail, see :ref:`catch_exceptions`.
        meta (dict or None): \
            A JSON-serializable dictionary (nesting allowed) that will be included in the output without \
            modification. For more detail, see :ref:`meta`.

    Returns:
        An ExpectationSuiteValidationResult

        Exact fields vary depending on the values passed to :ref:`result_format <result_format>` and
        :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`.

    Notes:
        * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.
        * If min_value is None, then max_value is treated as an upper bound, and there is no minimum value checked.
        * If max_value is None, then min_value is treated as a lower bound, and there is no maximum value checked.

    See Also:
        :func:`expect_column_value_lengths_to_be_between \
        <great_expectations.execution_engine.execution_engine.ExecutionEngine
        .expect_column_value_lengths_to_be_between>`

    """

    # This dictionary contains metadata for display in the public gallery
    library_metadata = {
        "maturity": "production",
        "tags": ["core expectation", "column map expectation"],
        "contributors": ["@great_expectations"],
        "requirements": [],
        "has_full_test_suite": True,
        "manually_reviewed_code": True,
    }

    map_metric = "column_values.between"
    success_keys = (
        "min_value",
        "max_value",
        "strict_min",
        "strict_max",
        "allow_cross_type_comparisons",
        "mostly",
        "parse_strings_as_datetimes",
        "auto",
        "profiler_config",
    )

    column_min_range_estimator_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="NumericMetricRangeMultiBatchParameterBuilder",
        name="column_min_range_estimator",
        metric_name="column.min",
        metric_multi_batch_parameter_builder_name=None,
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        enforce_numeric_metric=True,
        replace_nan_with_zero=True,
        reduce_scalar_metric=True,
        false_positive_rate=f"{VARIABLES_KEY}false_positive_rate",
        quantile_statistic_interpolation_method=
        f"{VARIABLES_KEY}quantile_statistic_interpolation_method",
        estimator=f"{VARIABLES_KEY}estimator",
        n_resamples=f"{VARIABLES_KEY}n_resamples",
        random_seed=f"{VARIABLES_KEY}random_seed",
        include_estimator_samples_histogram_in_details=
        f"{VARIABLES_KEY}include_estimator_samples_histogram_in_details",
        truncate_values=f"{VARIABLES_KEY}truncate_values",
        round_decimals=f"{VARIABLES_KEY}round_decimals",
        evaluation_parameter_builder_configs=None,
    )
    column_max_range_estimator_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="NumericMetricRangeMultiBatchParameterBuilder",
        name="column_max_range_estimator",
        metric_name="column.max",
        metric_multi_batch_parameter_builder_name=None,
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        enforce_numeric_metric=True,
        replace_nan_with_zero=True,
        reduce_scalar_metric=True,
        false_positive_rate=f"{VARIABLES_KEY}false_positive_rate",
        quantile_statistic_interpolation_method=
        f"{VARIABLES_KEY}quantile_statistic_interpolation_method",
        estimator=f"{VARIABLES_KEY}estimator",
        n_resamples=f"{VARIABLES_KEY}n_resamples",
        random_seed=f"{VARIABLES_KEY}random_seed",
        include_estimator_samples_histogram_in_details=
        f"{VARIABLES_KEY}include_estimator_samples_histogram_in_details",
        truncate_values=f"{VARIABLES_KEY}truncate_values",
        round_decimals=f"{VARIABLES_KEY}round_decimals",
        evaluation_parameter_builder_configs=None,
    )
    validation_parameter_builder_configs: List[ParameterBuilderConfig] = [
        column_min_range_estimator_parameter_builder_config,
        column_max_range_estimator_parameter_builder_config,
    ]
    default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
        name=
        "expect_column_values_to_be_between",  # Convention: use "expectation_type" as profiler name.
        config_version=1.0,
        variables={},
        rules={
            "default_expect_column_values_to_be_between_rule": {
                "variables": {
                    "mostly": 1.0,
                    "strict_min": False,
                    "strict_max": False,
                    "false_positive_rate": 0.05,
                    "quantile_statistic_interpolation_method": "auto",
                    "estimator": "bootstrap",
                    "n_resamples": 9999,
                    "random_seed": None,
                    "include_estimator_samples_histogram_in_details": False,
                    "truncate_values": {
                        "lower_bound": None,
                        "upper_bound": None,
                    },
                    "round_decimals": 1,
                },
                "domain_builder": {
                    "class_name":
                    "ColumnDomainBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.domain_builder",
                },
                "expectation_configuration_builders": [
                    {
                        "expectation_type":
                        "expect_column_values_to_be_between",
                        "class_name": "DefaultExpectationConfigurationBuilder",
                        "module_name":
                        "great_expectations.rule_based_profiler.expectation_configuration_builder",
                        "validation_parameter_builder_configs":
                        validation_parameter_builder_configs,
                        "column":
                        f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
                        "min_value":
                        f"{PARAMETER_KEY}{column_min_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]",
                        "max_value":
                        f"{PARAMETER_KEY}{column_max_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]",
                        "mostly": f"{VARIABLES_KEY}mostly",
                        "strict_min": f"{VARIABLES_KEY}strict_min",
                        "strict_max": f"{VARIABLES_KEY}strict_max",
                        "meta": {
                            "profiler_details": {
                                "column_min_range_estimator":
                                f"{PARAMETER_KEY}{column_min_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
                                "column_max_range_estimator":
                                f"{PARAMETER_KEY}{column_max_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
                            },
                        },
                    },
                ],
            },
        },
    )

    default_kwarg_values = {
        "row_condition": None,
        "condition_parser":
        None,  # we expect this to be explicitly set whenever a row_condition is passed
        "mostly": 1,
        "min_value": None,
        "max_value": None,
        "strict_min": False,
        "strict_max": False,  # tolerance=1e-9,
        "parse_strings_as_datetimes": False,
        "allow_cross_type_comparisons": None,
        "result_format": "BASIC",
        "include_config": True,
        "catch_exceptions": False,
        "meta": None,
        "auto": False,
        "profiler_config": default_profiler_config,
    }
    args_keys = (
        "column",
        "min_value",
        "max_value",
        "strict_min",
        "strict_max",
    )

    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]) -> None:
        """
        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
        necessary configuration arguments have been provided for the validation of the expectation.

        Args:
            configuration (OPTIONAL[ExpectationConfiguration]): \
                An optional Expectation Configuration entry that will be used to configure the expectation
        Returns:
            None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
        """

        # Setting up a configuration
        super().validate_configuration(configuration)

        min_val = None
        max_val = None
        if "min_value" in configuration.kwargs:
            min_val = configuration.kwargs["min_value"]
        if "max_value" in configuration.kwargs:
            max_val = configuration.kwargs["max_value"]
        assert (min_val is not None or max_val
                is not None), "min_value and max_value cannot both be None"

        self.validate_metric_value_between_configuration(
            configuration=configuration)

    @classmethod
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "mostly",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )
        params_with_json_schema = {
            "column": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column")
            },
            "min_value": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("min_value"),
            },
            "max_value": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("max_value"),
            },
            "mostly": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("mostly")
            },
            "mostly_pct": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("mostly_pct"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
            "strict_min": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("strict_min"),
            },
            "strict_max": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("strict_max"),
            },
        }

        template_str = ""
        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str += "may have any numerical value."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            mostly_str = ""
            if params["mostly"] is not None and params["mostly"] < 1.0:
                params_with_json_schema["mostly_pct"]["value"] = num_to_str(
                    params["mostly"] * 100, precision=15, no_scientific=True)
                # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
                mostly_str = ", at least $mostly_pct % of the time"

            if params["min_value"] is not None and params[
                    "max_value"] is not None:
                template_str += f"values must be {at_least_str} $min_value and {at_most_str} $max_value{mostly_str}."

            elif params["min_value"] is None:
                template_str += f"values must be {at_most_str} $max_value{mostly_str}."

            elif params["max_value"] is None:
                template_str += f"values must be {at_least_str} $min_value{mostly_str}."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = f"{conditional_template_str}, then {template_str}"
            params_with_json_schema.update(conditional_params)

        return (template_str, params_with_json_schema, styling)

    # NOTE: This method is a pretty good example of good usage of `params`.
    @classmethod
    @renderer(renderer_type="renderer.prescriptive")
    @render_evaluation_parameter_string
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "mostly",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )

        template_str = ""
        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str += "may have any numerical value."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            mostly_str = ""
            if params["mostly"] is not None and params["mostly"] < 1.0:
                params["mostly_pct"] = num_to_str(params["mostly"] * 100,
                                                  precision=15,
                                                  no_scientific=True)
                # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
                mostly_str = ", at least $mostly_pct % of the time"

            if params["min_value"] is not None and params[
                    "max_value"] is not None:
                template_str += f"values must be {at_least_str} $min_value and {at_most_str} $max_value{mostly_str}."

            elif params["min_value"] is None:
                template_str += f"values must be {at_most_str} $max_value{mostly_str}."

            elif params["max_value"] is None:
                template_str += f"values must be {at_least_str} $min_value{mostly_str}."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"])
            template_str = f"{conditional_template_str}, then {template_str}"
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]

예제 #11

파일 보기

파일: expect_table_columns_to_match_set.py 프로젝트: admariner/great_expectations

class ExpectTableColumnsToMatchSet(TableExpectation):
    """Expect the columns to match an *unordered* set.

    expect_table_columns_to_match_set is a :func:`expectation \
    <great_expectations.validator.validator.Validator.expectation>`, not a
    ``column_map_expectation`` or ``column_aggregate_expectation``.

    Args:
        column_set (list of str): \
            The column names, in any order.
        exact_match (boolean): \
            Whether the list of columns must exactly match the observed columns.

    Other Parameters:
        result_format (str or None): \
            Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`.
            For more detail, see :ref:`result_format <result_format>`.
        include_config (boolean): \
            If True, then include the expectation config as part of the result object. \
            For more detail, see :ref:`include_config`.
        catch_exceptions (boolean or None): \
            If True, then catch exceptions and include them as part of the result object. \
            For more detail, see :ref:`catch_exceptions`.
        meta (dict or None): \
            A JSON-serializable dictionary (nesting allowed) that will be included in the output without \
            modification. For more detail, see :ref:`meta`.

    Returns:
        An ExpectationSuiteValidationResult

        Exact fields vary depending on the values passed to :ref:`result_format <result_format>` and
        :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`.

    """

    library_metadata = {
        "maturity": "production",
        "tags": ["core expectation", "table expectation"],
        "contributors": [
            "@great_expectations",
        ],
        "requirements": [],
        "has_full_test_suite": True,
        "manually_reviewed_code": True,
    }

    metric_dependencies = ("table.columns",)
    success_keys = (
        "column_set",
        "exact_match",
        "auto",
        "profiler_config",
    )

    mean_table_columns_set_match_multi_batch_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="MeanTableColumnsSetMatchMultiBatchParameterBuilder",
        name="column_names_set_estimator",
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        evaluation_parameter_builder_configs=None,
    )
    validation_parameter_builder_configs: List[ParameterBuilderConfig] = [
        mean_table_columns_set_match_multi_batch_parameter_builder_config,
    ]
    default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
        name="expect_table_columns_to_match_set",  # Convention: use "expectation_type" as profiler name.
        config_version=1.0,
        variables={},
        rules={
            "expect_table_columns_to_match_set": {
                "variables": {
                    "exact_match": None,
                    "success_ratio": 1.0,
                },
                "domain_builder": {
                    "class_name": "TableDomainBuilder",
                    "module_name": "great_expectations.rule_based_profiler.domain_builder",
                },
                "expectation_configuration_builders": [
                    {
                        "expectation_type": "expect_table_columns_to_match_set",
                        "class_name": "DefaultExpectationConfigurationBuilder",
                        "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder",
                        "validation_parameter_builder_configs": validation_parameter_builder_configs,
                        "condition": f"{PARAMETER_KEY}{mean_table_columns_set_match_multi_batch_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}success_ratio >= {VARIABLES_KEY}success_ratio",
                        "column_set": f"{PARAMETER_KEY}{mean_table_columns_set_match_multi_batch_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}",
                        "exact_match": f"{VARIABLES_KEY}exact_match",
                        "meta": {
                            "profiler_details": f"{PARAMETER_KEY}{mean_table_columns_set_match_multi_batch_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
                        },
                    },
                ],
            },
        },
    )

    default_kwarg_values = {
        "column_set": None,
        "exact_match": True,
        "result_format": "BASIC",
        "include_config": True,
        "catch_exceptions": False,
        "auto": False,
        "profiler_config": default_profiler_config,
    }
    args_keys = (
        "column_set",
        "exact_match",
    )

    def validate_configuration(
        self, configuration: Optional[ExpectationConfiguration]
    ) -> None:
        """
        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
        necessary configuration arguments have been provided for the validation of the expectation.

        Args:
            configuration (OPTIONAL[ExpectationConfiguration]): \
                An optional Expectation Configuration entry that will be used to configure the expectation
        Returns:
            None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
        """

        # Setting up a configuration
        super().validate_configuration(configuration)

        # Ensuring that a proper value has been provided
        try:
            assert "column_set" in configuration.kwargs, "column_set is required"
            assert (
                isinstance(configuration.kwargs["column_set"], (list, set, dict))
                or configuration.kwargs["column_set"] is None
            ), "column_set must be a list, set, or None"
            if isinstance(configuration.kwargs["column_set"], dict):
                assert (
                    "$PARAMETER" in configuration.kwargs["column_set"]
                ), 'Evaluation Parameter dict for column_set kwarg must have "$PARAMETER" key.'
        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))

    @classmethod
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs, ["column_set", "exact_match"]
        )

        if params["column_set"] is None:
            template_str = "Must specify a set or list of columns."

        else:
            # standardize order of the set for output
            params["column_list"] = list(params["column_set"])

            column_list_template_str = ", ".join(
                [f"$column_list_{idx}" for idx in range(len(params["column_list"]))]
            )

            exact_match_str = "exactly" if params["exact_match"] is True else "at least"

            template_str = f"Must have {exact_match_str} these columns (in any order): {column_list_template_str}"

            for idx in range(len(params["column_list"])):
                params[f"column_list_{str(idx)}"] = params["column_list"][idx]

        params_with_json_schema = {
            "column_list": {
                "schema": {"type": "array"},
                "value": params.get("column_list"),
            },
            "exact_match": {
                "schema": {"type": "boolean"},
                "value": params.get("exact_match"),
            },
        }
        return (template_str, params_with_json_schema, styling)

    @classmethod
    @renderer(renderer_type="renderer.prescriptive")
    @render_evaluation_parameter_string
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs, ["column_set", "exact_match"]
        )

        if params["column_set"] is None:
            template_str = "Must specify a set or list of columns."

        else:
            # standardize order of the set for output
            params["column_list"] = list(params["column_set"])

            column_list_template_str = ", ".join(
                [f"$column_list_{idx}" for idx in range(len(params["column_list"]))]
            )

            exact_match_str = "exactly" if params["exact_match"] is True else "at least"

            template_str = f"Must have {exact_match_str} these columns (in any order): {column_list_template_str}"

            for idx in range(len(params["column_list"])):
                params[f"column_list_{str(idx)}"] = params["column_list"][idx]

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                }
            )
        ]

    def _validate(
        self,
        configuration: ExpectationConfiguration,
        metrics: Dict,
        runtime_configuration: dict = None,
        execution_engine: ExecutionEngine = None,
    ):
        # Obtaining columns and ordered list for sake of comparison
        expected_column_set = self.get_success_kwargs(configuration).get("column_set")
        expected_column_set = (
            set(expected_column_set) if expected_column_set is not None else set()
        )
        actual_column_list = metrics.get("table.columns")
        actual_column_set = set(actual_column_list)
        exact_match = self.get_success_kwargs(configuration).get("exact_match")

        if (
            (expected_column_set is None) and (exact_match is not True)
        ) or actual_column_set == expected_column_set:
            return {"success": True, "result": {"observed_value": actual_column_list}}
        else:
            # Convert to lists and sort to lock order for testing and output rendering
            # unexpected_list contains items from the dataset columns that are not in expected_column_set
            unexpected_list = sorted(list(actual_column_set - expected_column_set))
            # missing_list contains items from expected_column_set that are not in the dataset columns
            missing_list = sorted(list(expected_column_set - actual_column_set))
            # observed_value contains items that are in the dataset columns
            observed_value = sorted(actual_column_list)

            mismatched = {}
            if len(unexpected_list) > 0:
                mismatched["unexpected"] = unexpected_list
            if len(missing_list) > 0:
                mismatched["missing"] = missing_list

            result = {
                "observed_value": observed_value,
                "details": {"mismatched": mismatched},
            }

            return_success = {
                "success": True,
                "result": result,
            }
            return_failed = {
                "success": False,
                "result": result,
            }

            if exact_match:
                return return_failed
            else:
                # Failed if there are items in the missing list (but OK to have unexpected_list)
                if len(missing_list) > 0:
                    return return_failed
                # Passed if there are no items in the missing list
                else:
                    return return_success

예제 #12

파일 보기

파일: expect_table_row_count_to_be_between.py 프로젝트: MuttData/great_expectations

class ExpectTableRowCountToBeBetween(TableExpectation):
    """Expect the number of rows to be between two values.

    expect_table_row_count_to_be_between is a :func:`expectation \
    <great_expectations.validator.validator.Validator.expectation>`, not a
    ``column_map_expectation`` or ``column_aggregate_expectation``.

    Keyword Args:
        min_value (int or None): \
            The minimum number of rows, inclusive.
        max_value (int or None): \
            The maximum number of rows, inclusive.

    Other Parameters:
        result_format (str or None): \
            Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`.
            For more detail, see :ref:`result_format <result_format>`.
        include_config (boolean): \
            If True, then include the expectation config as part of the result object. \
            For more detail, see :ref:`include_config`.
        catch_exceptions (boolean or None): \
            If True, then catch exceptions and include them as part of the result object. \
            For more detail, see :ref:`catch_exceptions`.
        meta (dict or None): \
            A JSON-serializable dictionary (nesting allowed) that will be included in the output without \
            modification. For more detail, see :ref:`meta`.

    Returns:
        An ExpectationSuiteValidationResult

        Exact fields vary depending on the values passed to :ref:`result_format <result_format>` and
        :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`.

    Notes:
        * min_value and max_value are both inclusive.
        * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has \
          no minimum.
        * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has \
          no maximum.

    See Also:
        expect_table_row_count_to_equal
    """

    library_metadata = {
        "maturity": "production",
        "tags": ["core expectation", "table expectation"],
        "contributors": [
            "@great_expectations",
        ],
        "requirements": [],
        "has_full_test_suite": True,
        "manually_reviewed_code": True,
    }

    metric_dependencies = ("table.row_count",)
    success_keys = (
        "min_value",
        "max_value",
        "auto",
        "profiler_config",
    )

    table_row_count_range_estimator_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="NumericMetricRangeMultiBatchParameterBuilder",
        name="table_row_count_range_estimator",
        metric_name="table.row_count",
        metric_domain_kwargs=None,
        metric_value_kwargs=None,
        enforce_numeric_metric=True,
        replace_nan_with_zero=True,
        reduce_scalar_metric=True,
        false_positive_rate=f"{VARIABLES_KEY}false_positive_rate",
        quantile_statistic_interpolation_method=f"{VARIABLES_KEY}quantile_statistic_interpolation_method",
        estimator=f"{VARIABLES_KEY}estimator",
        num_bootstrap_samples=f"{VARIABLES_KEY}num_bootstrap_samples",
        bootstrap_random_seed=f"{VARIABLES_KEY}bootstrap_random_seed",
        truncate_values=f"{VARIABLES_KEY}truncate_values",
        round_decimals=f"{VARIABLES_KEY}round_decimals",
        evaluation_parameter_builder_configs=None,
        json_serialize=True,
    )
    validation_parameter_builder_configs: List[ParameterBuilderConfig] = [
        table_row_count_range_estimator_parameter_builder_config,
    ]
    default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
        name="expect_table_row_count_to_be_between",  # Convention: use "expectation_type" as profiler name.
        config_version=1.0,
        variables={},
        rules={
            "default_expect_table_row_count_to_be_between_rule": {
                "variables": {
                    "false_positive_rate": 0.05,
                    "quantile_statistic_interpolation_method": "auto",
                    "estimator": "bootstrap",
                    "num_bootstrap_samples": 9999,
                    "bootstrap_random_seed": None,
                    "truncate_values": {
                        "lower_bound": 0,
                        "upper_bound": None,
                    },
                    "round_decimals": 0,
                },
                "domain_builder": {
                    "class_name": "TableDomainBuilder",
                    "module_name": "great_expectations.rule_based_profiler.domain_builder",
                },
                "expectation_configuration_builders": [
                    {
                        "expectation_type": "expect_table_row_count_to_be_between",
                        "class_name": "DefaultExpectationConfigurationBuilder",
                        "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder",
                        "validation_parameter_builder_configs": validation_parameter_builder_configs,
                        "min_value": f"{PARAMETER_KEY}{table_row_count_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]",
                        "max_value": f"{PARAMETER_KEY}{table_row_count_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]",
                        "meta": {
                            "profiler_details": f"{PARAMETER_KEY}{table_row_count_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
                        },
                    }
                ],
            },
        },
    )

    default_kwarg_values = {
        "min_value": None,
        "max_value": None,
        "result_format": "BASIC",
        "include_config": True,
        "catch_exceptions": False,
        "meta": None,
        "auto": False,
        "profiler_config": default_profiler_config,
    }
    args_keys = (
        "min_value",
        "max_value",
    )

    def validate_configuration(
        self, configuration: Optional[ExpectationConfiguration]
    ) -> None:
        """
        Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that
        necessary configuration arguments have been provided for the validation of the expectation.

        Args:
            configuration (OPTIONAL[ExpectationConfiguration]): \
                An optional Expectation Configuration entry that will be used to configure the expectation
        Returns:
            None. Raises InvalidExpectationConfigurationError if the config is not validated successfully
        """

        # Setting up a configuration
        super().validate_configuration(configuration)
        self.validate_metric_value_between_configuration(configuration=configuration)

    @classmethod
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "min_value",
                "max_value",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )
        # format params
        params_with_json_schema = {
            "min_value": {
                "schema": {"type": "number"},
                "value": params.get("min_value"),
            },
            "max_value": {
                "schema": {"type": "number"},
                "value": params.get("max_value"),
            },
            "condition_parser": {
                "schema": {"type": "string"},
                "value": params.get("condition_parser"),
            },
            "strict_min": {
                "schema": {"type": "boolean"},
                "value": params.get("strict_min"),
            },
            "strict_max": {
                "schema": {"type": "boolean"},
                "value": params.get("strict_max"),
            },
        }

        if params["min_value"] is None and params["max_value"] is None:
            template_str = "May have any number of rows."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            if params["min_value"] is not None and params["max_value"] is not None:
                template_str = f"Must have {at_least_str} $min_value and {at_most_str} $max_value rows."
            elif params["min_value"] is None:
                template_str = f"Must have {at_most_str} $max_value rows."
            elif params["max_value"] is None:
                template_str = f"Must have {at_least_str} $min_value rows."

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True
            )
            template_str = (
                conditional_template_str
                + ", then "
                + template_str[0].lower()
                + template_str[1:]
            )
            params_with_json_schema.update(conditional_params)

        return (template_str, params_with_json_schema, styling)

    @classmethod
    @renderer(renderer_type="renderer.prescriptive")
    @render_evaluation_parameter_string
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "min_value",
                "max_value",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )

        if params["min_value"] is None and params["max_value"] is None:
            template_str = "May have any number of rows."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            if params["min_value"] is not None and params["max_value"] is not None:
                template_str = f"Must have {at_least_str} $min_value and {at_most_str} $max_value rows."
            elif params["min_value"] is None:
                template_str = f"Must have {at_most_str} $max_value rows."
            elif params["max_value"] is None:
                template_str = f"Must have {at_least_str} $min_value rows."

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(params["row_condition"])
            template_str = (
                conditional_template_str
                + ", then "
                + template_str[0].lower()
                + template_str[1:]
            )
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                }
            )
        ]

    def _validate(
        self,
        configuration: ExpectationConfiguration,
        metrics: Dict,
        runtime_configuration: dict = None,
        execution_engine: ExecutionEngine = None,
    ):
        return self._validate_metric_value_between(
            metric_name="table.row_count",
            configuration=configuration,
            metrics=metrics,
            runtime_configuration=runtime_configuration,
            execution_engine=execution_engine,
        )

예제 #13

파일 보기

파일: expect_column_value_lengths_to_be_between.py 프로젝트: rpatil524/great_expectations

class ExpectColumnValueLengthsToBeBetween(ColumnMapExpectation):
    """Expect column entries to be strings with length between a minimum value and a maximum value (inclusive).

    This expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError.

    expect_column_value_lengths_to_be_between is a \
    :func:`column_map_expectation <great_expectations.execution_engine.execution_engine.MetaExecutionEngine
    .column_map_expectation>`.

    Args:
        column (str): \
            The column name.
        min_value (int or None): \
            The minimum value for a column entry length.
        max_value (int or None): \
            The maximum value for a column entry length.

    Keyword Args:
        mostly (None or a float between 0 and 1): \
            Return `"success": True` if at least mostly fraction of values match the expectation. \
            For more detail, see :ref:`mostly`.

    Other Parameters:
        result_format (str or None): \
            Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`.
            For more detail, see :ref:`result_format <result_format>`.
        include_config (boolean): \
            If True, then include the expectation config as part of the result object. \
            For more detail, see :ref:`include_config`.
        catch_exceptions (boolean or None): \
            If True, then catch exceptions and include them as part of the result object. \
            For more detail, see :ref:`catch_exceptions`.
        meta (dict or None): \
            A JSON-serializable dictionary (nesting allowed) that will be included in the output without \
            modification. For more detail, see :ref:`meta`.

    Returns:
        An ExpectationSuiteValidationResult

        Exact fields vary depending on the values passed to :ref:`result_format <result_format>` and
        :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`.

    Notes:
        * min_value and max_value are both inclusive.
        * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has \
          no minimum.
        * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has \
          no maximum.

    See Also:
        :func:`expect_column_value_lengths_to_equal \
        <great_expectations.execution_engine.execution_engine.ExecutionEngine.expect_column_value_lengths_to_equal>`

    """

    # This dictionary contains metadata for display in the public gallery
    library_metadata = {
        "maturity": "production",
        "tags": ["core expectation", "column map expectation"],
        "contributors": ["@great_expectations"],
        "requirements": [],
        "has_full_test_suite": True,
        "manually_reviewed_code": True,
    }

    map_metric = "column_values.value_length.between"
    success_keys = (
        "min_value",
        "max_value",
        "strict_min",
        "strict_max",
        "mostly",
        "auto",
        "profiler_config",
    )

    column_min_length_range_estimator_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="NumericMetricRangeMultiBatchParameterBuilder",
        name="column_min_length_range_estimator",
        metric_name="column_values.length.min",
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        enforce_numeric_metric=True,
        replace_nan_with_zero=True,
        reduce_scalar_metric=True,
        false_positive_rate=f"{VARIABLES_KEY}false_positive_rate",
        quantile_statistic_interpolation_method=
        f"{VARIABLES_KEY}quantile_statistic_interpolation_method",
        estimator=f"{VARIABLES_KEY}estimator",
        n_resamples=f"{VARIABLES_KEY}n_resamples",
        random_seed=f"{VARIABLES_KEY}random_seed",
        include_estimator_samples_histogram_in_details=
        f"{VARIABLES_KEY}include_estimator_samples_histogram_in_details",
        truncate_values=f"{VARIABLES_KEY}truncate_values",
        round_decimals=f"{VARIABLES_KEY}round_decimals",
        evaluation_parameter_builder_configs=None,
        json_serialize=True,
    )
    column_max_length_range_estimator_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="NumericMetricRangeMultiBatchParameterBuilder",
        name="column_max_length_range_estimator",
        metric_name="column_values.length.max",
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        enforce_numeric_metric=True,
        replace_nan_with_zero=True,
        reduce_scalar_metric=True,
        false_positive_rate=f"{VARIABLES_KEY}false_positive_rate",
        quantile_statistic_interpolation_method=
        f"{VARIABLES_KEY}quantile_statistic_interpolation_method",
        estimator=f"{VARIABLES_KEY}estimator",
        n_resamples=f"{VARIABLES_KEY}n_resamples",
        random_seed=f"{VARIABLES_KEY}random_seed",
        include_estimator_samples_histogram_in_details=
        f"{VARIABLES_KEY}include_estimator_samples_histogram_in_details",
        truncate_values=f"{VARIABLES_KEY}truncate_values",
        round_decimals=f"{VARIABLES_KEY}round_decimals",
        evaluation_parameter_builder_configs=None,
        json_serialize=True,
    )
    validation_parameter_builder_configs: List[ParameterBuilderConfig] = [
        column_min_length_range_estimator_parameter_builder_config,
        column_max_length_range_estimator_parameter_builder_config,
    ]
    default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
        name=
        "expect_column_value_lengths_to_be_between",  # Convention: use "expectation_type" as profiler name.
        config_version=1.0,
        variables={},
        rules={
            "default_expect_column_value_lengths_to_be_between_rule": {
                "variables": {
                    "mostly": 1.0,
                    "strict_min": False,
                    "strict_max": False,
                    "false_positive_rate": 0.05,
                    "quantile_statistic_interpolation_method": "auto",
                    "estimator": "bootstrap",
                    "n_resamples": 9999,
                    "random_seed": None,
                    "include_estimator_samples_histogram_in_details": False,
                    "truncate_values": {
                        "lower_bound": 0,
                        "upper_bound": None,
                    },
                    "round_decimals": 0,
                },
                "domain_builder": {
                    "class_name":
                    "ColumnDomainBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.domain_builder",
                },
                "expectation_configuration_builders": [
                    {
                        "expectation_type":
                        "expect_column_value_lengths_to_be_between",
                        "class_name": "DefaultExpectationConfigurationBuilder",
                        "module_name":
                        "great_expectations.rule_based_profiler.expectation_configuration_builder",
                        "validation_parameter_builder_configs":
                        validation_parameter_builder_configs,
                        "column":
                        f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
                        "min_value":
                        f"{PARAMETER_KEY}{column_min_length_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]",
                        "max_value":
                        f"{PARAMETER_KEY}{column_max_length_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]",
                        "mostly": f"{VARIABLES_KEY}mostly",
                        "strict_min": f"{VARIABLES_KEY}strict_min",
                        "strict_max": f"{VARIABLES_KEY}strict_max",
                        "meta": {
                            "profiler_details": {
                                "column_min_range_estimator":
                                f"{PARAMETER_KEY}{column_min_length_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
                                "column_max_range_estimator":
                                f"{PARAMETER_KEY}{column_max_length_range_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
                            },
                        },
                    },
                ],
            },
        },
    )

    default_kwarg_values = {
        "row_condition": None,
        "condition_parser": None,
        "min_value": None,
        "max_value": None,
        "strict_min": None,
        "strict_max": None,
        "mostly": 1,
        "result_format": "BASIC",
        "include_config": True,
        "catch_exceptions": False,
        "auto": False,
        "profiler_config": default_profiler_config,
    }
    args_keys = (
        "column",
        "min_value",
        "max_value",
    )

    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]) -> None:
        super().validate_configuration(configuration)

        if configuration is None:
            configuration = self.configuration

        try:
            assert (configuration.kwargs.get("min_value") is not None
                    or configuration.kwargs.get("max_value")
                    is not None), "min_value and max_value cannot both be None"
            if configuration.kwargs.get("min_value"):
                assert (isinstance(
                    configuration.kwargs["min_value"], dict) or float(
                        configuration.kwargs.get("min_value")).is_integer()
                        ), "min_value and max_value must be integers"
                if isinstance(configuration.kwargs.get("min_value"), dict):
                    assert "$PARAMETER" in configuration.kwargs.get(
                        "min_value"
                    ), 'Evaluation Parameter dict for min_value kwarg must have "$PARAMETER" key.'

            if configuration.kwargs.get("max_value"):
                assert (isinstance(
                    configuration.kwargs["max_value"], dict) or float(
                        configuration.kwargs.get("max_value")).is_integer()
                        ), "min_value and max_value must be integers"
                if isinstance(configuration.kwargs.get("max_value"), dict):
                    assert "$PARAMETER" in configuration.kwargs.get(
                        "max_value"
                    ), 'Evaluation Parameter dict for max_value kwarg must have "$PARAMETER" key.'
        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))

    @classmethod
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "mostly",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )
        params_with_json_schema = {
            "column": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column")
            },
            "min_value": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("min_value"),
            },
            "max_value": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("max_value"),
            },
            "mostly": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("mostly")
            },
            "mostly_pct": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("mostly_pct"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
            "strict_min": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("strict_min"),
            },
            "strict_max": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("strict_max"),
            },
        }

        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str = "values may have any length."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            if params["mostly"] is not None and params["mostly"] < 1.0:
                params_with_json_schema["mostly_pct"]["value"] = num_to_str(
                    params["mostly"] * 100, precision=15, no_scientific=True)
                # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
                if params["min_value"] is not None and params[
                        "max_value"] is not None:
                    template_str = f"values must be {at_least_str} $min_value and {at_most_str} $max_value characters long, at least $mostly_pct % of the time."

                elif params["min_value"] is None:
                    template_str = f"values must be {at_most_str} $max_value characters long, at least $mostly_pct % of the time."

                elif params["max_value"] is None:
                    template_str = f"values must be {at_least_str} $min_value characters long, at least $mostly_pct % of the time."
            else:
                if params["min_value"] is not None and params[
                        "max_value"] is not None:
                    template_str = f"values must always be {at_least_str} $min_value and {at_most_str} $max_value characters long."

                elif params["min_value"] is None:
                    template_str = f"values must always be {at_most_str} $max_value characters long."

                elif params["max_value"] is None:
                    template_str = f"values must always be {at_least_str} $min_value characters long."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = f"{conditional_template_str}, then {template_str}"
            params_with_json_schema.update(conditional_params)

        return (template_str, params_with_json_schema, styling)

    @classmethod
    @renderer(renderer_type="renderer.prescriptive")
    @render_evaluation_parameter_string
    def _prescriptive_renderer(
        cls,
        configuration: ExpectationConfiguration = None,
        result: ExpectationValidationResult = None,
        language: str = None,
        runtime_configuration: dict = None,
        **kwargs,
    ) -> List[Union[dict, str, RenderedStringTemplateContent,
                    RenderedTableContent, RenderedBulletListContent,
                    RenderedGraphContent, Any, ]]:
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "mostly",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )

        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str = "values may have any length."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            if params["mostly"] is not None and params["mostly"] < 1.0:
                params["mostly_pct"] = num_to_str(params["mostly"] * 100,
                                                  precision=15,
                                                  no_scientific=True)
                # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
                if params["min_value"] is not None and params[
                        "max_value"] is not None:
                    template_str = f"values must be {at_least_str} $min_value and {at_most_str} $max_value characters long, at least $mostly_pct % of the time."

                elif params["min_value"] is None:
                    template_str = f"values must be {at_most_str} $max_value characters long, at least $mostly_pct % of the time."

                elif params["max_value"] is None:
                    template_str = f"values must be {at_least_str} $min_value characters long, at least $mostly_pct % of the time."
            else:
                if params["min_value"] is not None and params[
                        "max_value"] is not None:
                    template_str = f"values must always be {at_least_str} $min_value and {at_most_str} $max_value characters long."

                elif params["min_value"] is None:
                    template_str = f"values must always be {at_most_str} $max_value characters long."

                elif params["max_value"] is None:
                    template_str = f"values must always be {at_least_str} $min_value characters long."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"])
            template_str = f"{conditional_template_str}, then {template_str}"
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]

예제 #14

파일 보기

파일: expect_column_quantile_values_to_be_between.py 프로젝트: MuttData/great_expectations

class ExpectColumnQuantileValuesToBeBetween(ColumnExpectation):
    # noinspection PyUnresolvedReferences
    """Expect specific provided column quantiles to be between provided minimum and maximum values.

           ``quantile_ranges`` must be a dictionary with two keys:

               * ``quantiles``: (list of float) increasing ordered list of desired quantile values

               * ``value_ranges``: (list of lists): Each element in this list consists of a list with two values, a lower \
                 and upper bound (inclusive) for the corresponding quantile. These values must be [min, max] ordered.


           For each provided range:

               * min_value and max_value are both inclusive.
               * If min_value is None, then max_value is treated as an upper bound only
               * If max_value is None, then min_value is treated as a lower bound only

           The length of the quantiles list and quantile_values list must be equal.

           For example:
           ::

               # my_df.my_col = [1,2,2,3,3,3,4]
               >>> my_df.expect_column_quantile_values_to_be_between(
                   "my_col",
                   {
                       "quantiles": [0., 0.333, 0.6667, 1.],
                       "value_ranges": [[0,1], [2,3], [3,4], [4,5]]
                   }
               )
               {
                 "success": True,
                   "result": {
                     "observed_value": {
                       "quantiles: [0., 0.333, 0.6667, 1.],
                       "values": [1, 2, 3, 4],
                     }
                     "element_count": 7,
                     "missing_count": 0,
                     "missing_percent": 0.0,
                     "details": {
                       "success_details": [true, true, true, true]
                     }
                   }
                 }
               }

           `expect_column_quantile_values_to_be_between` can be computationally intensive for large datasets.

           expect_column_quantile_values_to_be_between is a \
           :func:`column_aggregate_expectation
           <great_expectations.execution_engine.MetaExecutionEngine.column_aggregate_expectation>`.

           Args:
               column (str): \
                   The column name.
               quantile_ranges (dictionary): \
                   Quantiles and associated value ranges for the column. See above for details.
               allow_relative_error (boolean or string): \
                   Whether to allow relative error in quantile communications on backends that support or require it.

           Other Parameters:
               result_format (str or None): \
                   Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`.
                   For more detail, see :ref:`result_format <result_format>`.
               include_config (boolean): \
                   If True, then include the expectation config as part of the result object. \
                   For more detail, see :ref:`include_config`.
               catch_exceptions (boolean or None): \
                   If True, then catch exceptions and include them as part of the result object. \
                   For more detail, see :ref:`catch_exceptions`.
               meta (dict or None): \
                   A JSON-serializable dictionary (nesting allowed) that will be included in the output without \
                   modification. For more detail, see :ref:`meta`.

           Returns:
               An ExpectationSuiteValidationResult

               Exact fields vary depending on the values passed to :ref:`result_format <result_format>` and
               :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`.

           Notes:
               These fields in the result object are customized for this expectation:
               ::
               details.success_details

           See Also:
               :func:`expect_column_min_to_be_between \
               <great_expectations.execution_engine.execution_engine.ExecutionEngine.expect_column_min_to_be_between>`

               :func:`expect_column_max_to_be_between \
               <great_expectations.execution_engine.execution_engine.ExecutionEngine.expect_column_max_to_be_between>`

               :func:`expect_column_median_to_be_between \
               <great_expectations.execution_engine.execution_engine.ExecutionEngine.expect_column_median_to_be_between>`

           """

    # This dictionary contains metadata for display in the public gallery
    library_metadata = {
        "maturity": "production",
        "tags": ["core expectation", "column aggregate expectation"],
        "contributors": ["@great_expectations"],
        "requirements": [],
    }

    metric_dependencies = ("column.quantile_values", )
    success_keys = (
        "quantile_ranges",
        "allow_relative_error",
        "auto",
        "profiler_config",
    )

    quantile_value_ranges_estimator_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="NumericMetricRangeMultiBatchParameterBuilder",
        name="quantile_value_ranges_estimator",
        metric_name="column.quantile_values",
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs={
            "quantiles": f"{VARIABLES_KEY}quantiles",
            "allow_relative_error": f"{VARIABLES_KEY}allow_relative_error",
        },
        enforce_numeric_metric=True,
        replace_nan_with_zero=True,
        reduce_scalar_metric=True,
        false_positive_rate=f"{VARIABLES_KEY}false_positive_rate",
        quantile_statistic_interpolation_method=
        f"{VARIABLES_KEY}quantile_statistic_interpolation_method",
        estimator=f"{VARIABLES_KEY}estimator",
        num_bootstrap_samples=f"{VARIABLES_KEY}num_bootstrap_samples",
        bootstrap_random_seed=f"{VARIABLES_KEY}bootstrap_random_seed",
        truncate_values=f"{VARIABLES_KEY}truncate_values",
        round_decimals=f"{VARIABLES_KEY}round_decimals",
        evaluation_parameter_builder_configs=None,
        json_serialize=True,
    )
    validation_parameter_builder_configs: List[ParameterBuilderConfig] = [
        quantile_value_ranges_estimator_parameter_builder_config,
    ]
    default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
        name=
        "expect_column_quantile_values_to_be_between",  # Convention: use "expectation_type" as profiler name.
        config_version=1.0,
        variables={},
        rules={
            "default_expect_column_quantile_values_to_be_between_rule": {
                "variables": {
                    "quantiles": [
                        0.25,
                        0.5,
                        0.75,
                    ],
                    "allow_relative_error": "linear",
                    "false_positive_rate": 0.05,
                    "quantile_statistic_interpolation_method": "auto",
                    "estimator": "bootstrap",
                    "num_bootstrap_samples": 9999,
                    "bootstrap_random_seed": None,
                    "truncate_values": {
                        "lower_bound": None,
                        "upper_bound": None,
                    },
                    "round_decimals": 1,
                },
                "domain_builder": {
                    "class_name":
                    "ColumnDomainBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.domain_builder",
                },
                "expectation_configuration_builders": [{
                    "expectation_type":
                    "expect_column_quantile_values_to_be_between",
                    "class_name":
                    "DefaultExpectationConfigurationBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.expectation_configuration_builder",
                    "validation_parameter_builder_configs":
                    validation_parameter_builder_configs,
                    "column":
                    f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
                    "quantile_ranges": {
                        "quantiles":
                        f"{VARIABLES_KEY}quantiles",
                        "value_ranges":
                        f"{PARAMETER_KEY}{quantile_value_ranges_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}",
                    },
                    "allow_relative_error":
                    f"{VARIABLES_KEY}allow_relative_error",
                    "meta": {
                        "profiler_details":
                        f"{PARAMETER_KEY}{quantile_value_ranges_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
                    },
                }],
            },
        },
    )

    default_kwarg_values = {
        "row_condition": None,
        "condition_parser": None,
        "quantile_ranges": None,
        "result_format": "BASIC",
        "allow_relative_error": False,
        "include_config": True,
        "catch_exceptions": False,
        "meta": None,
        "auto": False,
        "profiler_config": default_profiler_config,
    }
    args_keys = (
        "column",
        "quantile_ranges",
        "allow_relative_error",
    )

    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]) -> None:
        super().validate_configuration(configuration)
        try:
            assert (
                "quantile_ranges"
                in configuration.kwargs), "quantile_ranges must be provided"
            assert isinstance(configuration.kwargs["quantile_ranges"],
                              dict), "quantile_ranges should be a dictionary"

            assert all([
                True if None in x or x == sorted(x) else False for x in
                configuration.kwargs["quantile_ranges"]["value_ranges"]
            ]), "quantile_ranges must consist of ordered pairs"

        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))

        # Ensuring actual quantiles and their value ranges match up
        quantile_ranges = configuration.kwargs["quantile_ranges"]
        quantiles = quantile_ranges["quantiles"]
        quantile_value_ranges = quantile_ranges["value_ranges"]

        if len(quantiles) != len(quantile_value_ranges):
            raise ValueError(
                "quantile_values and quantiles must have the same number of elements"
            )

    @classmethod
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration["kwargs"],
            ["column", "quantile_ranges", "row_condition", "condition_parser"],
        )

        header_params_with_json_schema = {
            "column": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column")
            },
            "mostly": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("mostly")
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
        }

        header_template_str = "quantiles must be within the following value ranges."

        if include_column_name:
            header_template_str = f"$column {header_template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            header_template_str = (conditional_template_str + ", then " +
                                   header_template_str[0].lower() +
                                   header_template_str[1:])
            header_params_with_json_schema.update(conditional_params)

        quantile_ranges = (params.get("quantile_ranges")
                           if params.get("quantile_ranges") else {})
        quantiles = (quantile_ranges.get("quantiles")
                     if quantile_ranges.get("quantiles") else [])
        value_ranges = (quantile_ranges.get("value_ranges")
                        if quantile_ranges.get("value_ranges") else [])

        table_header_row = [
            {
                "schema": {
                    "type": "string"
                },
                "value": "Quantile"
            },
            {
                "schema": {
                    "type": "string"
                },
                "value": "Min Value"
            },
            {
                "schema": {
                    "type": "string"
                },
                "value": "Max Value"
            },
        ]
        table_rows = []

        quantile_strings = {0.25: "Q1", 0.75: "Q3", 0.50: "Median"}

        for quantile, value_range in zip(quantiles, value_ranges):
            quantile_string = quantile_strings.get(quantile,
                                                   f"{quantile:3.2f}")
            table_rows.append([
                {
                    "value": quantile_string,
                    "schema": {
                        "type": "string"
                    },
                },
                {
                    "value":
                    value_range[0] if value_range[0] is not None else "Any",
                    "schema": {
                        "type":
                        "number" if value_range[0] is not None else "string"
                    },
                },
                {
                    "value":
                    value_range[1] if value_range[1] is not None else "Any",
                    "schema": {
                        "type":
                        "number" if value_range[1] is not None else "string"
                    },
                },
            ])

        return (
            header_template_str,
            header_params_with_json_schema,
            styling,
            table_header_row,
            table_rows,
        )

    @classmethod
    @renderer(renderer_type="atomic.prescriptive.summary")
    @render_evaluation_parameter_string
    def _prescriptive_summary(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        """
        Rendering function that is utilized by GE Cloud Front-end
        """
        (
            header_template_str,
            header_params_with_json_schema,
            _,
            table_header_row,
            table_rows,
        ) = cls._atomic_prescriptive_template(configuration, result, language,
                                              runtime_configuration, **kwargs)
        value_obj = renderedAtomicValueSchema.load({
            "header": {
                "schema": {
                    "type": "StringValueType"
                },
                "value": {
                    "template": header_template_str,
                    "params": header_params_with_json_schema,
                },
            },
            "header_row": table_header_row,
            "table": table_rows,
            "schema": {
                "type": "TableType"
            },
        })
        rendered = RenderedAtomicContent(name="atomic.prescriptive.summary",
                                         value=value_obj,
                                         value_type="TableType")
        return rendered

    @classmethod
    @renderer(renderer_type="renderer.prescriptive")
    @render_evaluation_parameter_string
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration["kwargs"],
            ["column", "quantile_ranges", "row_condition", "condition_parser"],
        )
        template_str = "quantiles must be within the following value ranges."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"])
            template_str = (conditional_template_str + ", then " +
                            template_str[0].lower() + template_str[1:])
            params.update(conditional_params)

        expectation_string_obj = {
            "content_block_type": "string_template",
            "string_template": {
                "template": template_str,
                "params": params
            },
        }

        quantiles = params["quantile_ranges"]["quantiles"]
        value_ranges = params["quantile_ranges"]["value_ranges"]

        table_header_row = ["Quantile", "Min Value", "Max Value"]
        table_rows = []

        quantile_strings = {0.25: "Q1", 0.75: "Q3", 0.50: "Median"}

        for quantile, value_range in zip(quantiles, value_ranges):
            quantile_string = quantile_strings.get(quantile,
                                                   f"{quantile:3.2f}")
            table_rows.append([
                quantile_string,
                str(value_range[0]) if value_range[0] is not None else "Any",
                str(value_range[1]) if value_range[1] is not None else "Any",
            ])

        quantile_range_table = RenderedTableContent(
            **{
                "content_block_type": "table",
                "header_row": table_header_row,
                "table": table_rows,
                "styling": {
                    "body": {
                        "classes": [
                            "table",
                            "table-sm",
                            "table-unbordered",
                            "col-4",
                            "mt-2",
                        ],
                    },
                    "parent": {
                        "styles": {
                            "list-style-type": "none"
                        }
                    },
                },
            })

        return [expectation_string_obj, quantile_range_table]

    @classmethod
    @renderer(renderer_type="renderer.diagnostic.observed_value")
    def _diagnostic_observed_value_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        if result.result is None or result.result.get(
                "observed_value") is None:
            return "--"

        quantiles = result.result.get("observed_value",
                                      {}).get("quantiles", [])
        value_ranges = result.result.get("observed_value",
                                         {}).get("values", [])

        table_header_row = ["Quantile", "Value"]
        table_rows = []

        quantile_strings = {0.25: "Q1", 0.75: "Q3", 0.50: "Median"}

        for idx, quantile in enumerate(quantiles):
            quantile_string = quantile_strings.get(quantile)
            table_rows.append([
                quantile_string if quantile_string else f"{quantile:3.2f}",
                str(value_ranges[idx]),
            ])

        return RenderedTableContent(
            **{
                "content_block_type": "table",
                "header_row": table_header_row,
                "table": table_rows,
                "styling": {
                    "body": {
                        "classes":
                        ["table", "table-sm", "table-unbordered", "col-4"],
                    }
                },
            })

    @classmethod
    def _atomic_diagnostic_observed_value_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        template_string = None
        params_with_json_schema = None
        table_header_row = None
        table_rows = None

        if result.result is None or result.result.get(
                "observed_value") is None:
            template_string = "--"
            params_with_json_schema = {}
            return (
                template_string,
                params_with_json_schema,
                table_header_row,
                table_rows,
            )

        quantiles = result.result.get("observed_value",
                                      {}).get("quantiles", [])
        value_ranges = result.result.get("observed_value",
                                         {}).get("values", [])

        table_header_row = [
            {
                "schema": {
                    "type": "string"
                },
                "value": "Quantile"
            },
            {
                "schema": {
                    "type": "string"
                },
                "value": "Value"
            },
        ]
        table_rows = []

        quantile_strings = {0.25: "Q1", 0.75: "Q3", 0.50: "Median"}

        for idx, quantile in enumerate(quantiles):
            quantile_string = quantile_strings.get(quantile)
            table_rows.append([
                {
                    "value":
                    quantile_string if quantile_string else f"{quantile:3.2f}",
                    "schema": {
                        "type": "string"
                    },
                },
                {
                    "value": value_ranges[idx],
                    "schema": {
                        "type": "number"
                    }
                },
            ])

        return template_string, params_with_json_schema, table_header_row, table_rows

    @classmethod
    @renderer(renderer_type="atomic.diagnostic.observed_value")
    def _atomic_diagnostic_observed_value(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        (
            template_string,
            params_with_json_schema,
            table_header_row,
            table_rows,
        ) = cls._atomic_diagnostic_observed_value_template(
            configuration, result, language, runtime_configuration, **kwargs)
        if template_string is not None:
            value_obj = renderedAtomicValueSchema.load({
                "template": template_string,
                "params": {},
                "schema": {
                    "type": "StringValueType"
                },
            })
            return RenderedAtomicContent(
                name="atomic.diagnostic.observed_value",
                value=value_obj,
                value_type="StringValueType",
            )
        else:
            value_obj = renderedAtomicValueSchema.load({
                "header_row": table_header_row,
                "table": table_rows,
                "schema": {
                    "type": "TableType"
                },
            })
            return RenderedAtomicContent(
                name="atomic.diagnostic.observed_value",
                value=value_obj,
                value_type="TableType",
            )

    @classmethod
    @renderer(renderer_type="renderer.descriptive.quantile_table")
    def _descriptive_quantile_table_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        assert result, "Must pass in result."
        table_rows = []
        quantiles = result.result["observed_value"]["quantiles"]
        quantile_ranges = result.result["observed_value"]["values"]

        quantile_strings = {0.25: "Q1", 0.75: "Q3", 0.50: "Median"}

        for idx, quantile in enumerate(quantiles):
            quantile_string = quantile_strings.get(quantile)
            table_rows.append([
                {
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": quantile_string
                        if quantile_string else f"{quantile:3.2f}",
                        "tooltip": {
                            "content":
                            "expect_column_quantile_values_to_be_between \n expect_column_median_to_be_between"
                            if quantile == 0.50 else
                            "expect_column_quantile_values_to_be_between"
                        },
                    },
                },
                quantile_ranges[idx],
            ])

        return RenderedTableContent(
            **{
                "content_block_type":
                "table",
                "header":
                RenderedStringTemplateContent(
                    **{
                        "content_block_type": "string_template",
                        "string_template": {
                            "template": "Quantiles",
                            "tag": "h6"
                        },
                    }),
                "table":
                table_rows,
                "styling": {
                    "classes": ["col-3", "mt-1", "pl-1", "pr-1"],
                    "body": {
                        "classes": ["table", "table-sm", "table-unbordered"],
                    },
                },
            })

    def get_validation_dependencies(
        self,
        configuration: Optional[ExpectationConfiguration] = None,
        execution_engine: Optional[ExecutionEngine] = None,
        runtime_configuration: Optional[dict] = None,
    ):
        all_dependencies = super().get_validation_dependencies(
            configuration, execution_engine, runtime_configuration)
        # column.quantile_values expects a "quantiles" key
        all_dependencies["metrics"][
            "column.quantile_values"].metric_value_kwargs[
                "quantiles"] = configuration.kwargs["quantile_ranges"][
                    "quantiles"]
        return all_dependencies

    def _validate(
        self,
        configuration: ExpectationConfiguration,
        metrics: Dict,
        runtime_configuration: dict = None,
        execution_engine: ExecutionEngine = None,
    ):
        quantile_vals = metrics.get("column.quantile_values")
        quantile_ranges = configuration.kwargs.get("quantile_ranges")
        quantiles = quantile_ranges["quantiles"]
        quantile_value_ranges = quantile_ranges["value_ranges"]

        # We explicitly allow "None" to be interpreted as +/- infinity
        comparison_quantile_ranges = [[
            -np.inf if lower_bound is None else lower_bound,
            np.inf if upper_bound is None else upper_bound,
        ] for (lower_bound, upper_bound) in quantile_value_ranges]
        success_details = [
            range_[0] <= quantile_vals[idx] <= range_[1]
            for idx, range_ in enumerate(comparison_quantile_ranges)
        ]

        return {
            "success": np.all(success_details),
            "result": {
                "observed_value": {
                    "quantiles": quantiles,
                    "values": quantile_vals
                },
                "details": {
                    "success_details": success_details
                },
            },
        }

예제 #15

파일 보기

파일: expect_column_values_to_be_in_set.py 프로젝트: admariner/great_expectations

class ExpectColumnValuesToBeInSet(ColumnMapExpectation):
    """Expect each column value to be in a given set.

    For example:
    ::

        # my_df.my_col = [1,2,2,3,3,3]
        >>> my_df.expect_column_values_to_be_in_set(
            "my_col",
            [2,3]
        )
        {
          "success": false
          "result": {
            "unexpected_count": 1
            "unexpected_percent": 16.66666666666666666,
            "unexpected_percent_nonmissing": 16.66666666666666666,
            "partial_unexpected_list": [
              1
            ],
          },
        }

    expect_column_values_to_be_in_set is a \
    :func:`column_map_expectation <great_expectations.execution_engine.execution_engine.MetaExecutionEngine
    .column_map_expectation>`.

    Args:
        column (str): \
            The column name.
        value_set (set-like): \
            A set of objects used for comparison.

    Keyword Args:
        mostly (None or a float between 0 and 1): \
            Return `"success": True` if at least mostly fraction of values match the expectation. \
            For more detail, see :ref:`mostly`.
        parse_strings_as_datetimes (boolean or None) : If True values provided in value_set will be parsed as \
            datetimes before making comparisons.

    Other Parameters:
        result_format (str or None): \
            Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`.
            For more detail, see :ref:`result_format <result_format>`.
        include_config (boolean): \
            If True, then include the expectation config as part of the result object. \
            For more detail, see :ref:`include_config`.
        catch_exceptions (boolean or None): \
            If True, then catch exceptions and include them as part of the result object. \
            For more detail, see :ref:`catch_exceptions`.
        meta (dict or None): \
            A JSON-serializable dictionary (nesting allowed) that will be included in the output without \
            modification. For more detail, see :ref:`meta`.

    Returns:
        An ExpectationSuiteValidationResult

        Exact fields vary depending on the values passed to :ref:`result_format <result_format>` and
        :ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`.

    See Also:
        :func:`expect_column_values_to_not_be_in_set \
        <great_expectations.execution_engine.execution_engine.ExecutionEngine
        .expect_column_values_to_not_be_in_set>`

    """

    # This dictionary contains metadata for display in the public gallery
    library_metadata = {
        "maturity": "production",
        "tags": ["core expectation", "column map expectation"],
        "contributors": ["@great_expectations"],
        "requirements": [],
        "has_full_test_suite": True,
        "manually_reviewed_code": True,
    }

    map_metric = "column_values.in_set"

    args_keys = (
        "column",
        "value_set",
    )

    success_keys = (
        "value_set",
        "mostly",
        "parse_strings_as_datetimes",
        "auto",
        "profiler_config",
    )

    value_set_estimator_parameter_builder_config: ParameterBuilderConfig = (
        ParameterBuilderConfig(
            module_name=
            "great_expectations.rule_based_profiler.parameter_builder",
            class_name="ValueSetMultiBatchParameterBuilder",
            name="value_set_estimator",
            metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
            metric_value_kwargs=None,
            evaluation_parameter_builder_configs=None,
        ))
    validation_parameter_builder_configs: List[ParameterBuilderConfig] = [
        value_set_estimator_parameter_builder_config,
    ]
    default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
        name=
        "expect_column_values_to_be_in_set",  # Convention: use "expectation_type" as profiler name.
        config_version=1.0,
        variables={},
        rules={
            "default_expect_column_values_to_be_in_set_rule": {
                "variables": {
                    "mostly": 1.0,
                },
                "domain_builder": {
                    "class_name":
                    "ColumnDomainBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.domain_builder",
                },
                "expectation_configuration_builders": [
                    {
                        "expectation_type":
                        "expect_column_values_to_be_in_set",
                        "class_name": "DefaultExpectationConfigurationBuilder",
                        "module_name":
                        "great_expectations.rule_based_profiler.expectation_configuration_builder",
                        "validation_parameter_builder_configs":
                        validation_parameter_builder_configs,
                        "column":
                        f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
                        "value_set":
                        f"{PARAMETER_KEY}{value_set_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}",
                        "mostly": f"{VARIABLES_KEY}mostly",
                        "meta": {
                            "profiler_details":
                            f"{PARAMETER_KEY}{value_set_estimator_parameter_builder_config.name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
                        },
                    },
                ],
            },
        },
    )

    default_kwarg_values = {
        "value_set": [],
        "parse_strings_as_datetimes": False,
        "auto": False,
        "profiler_config": default_profiler_config,
    }

    @classmethod
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "value_set",
                "mostly",
                "parse_strings_as_datetimes",
                "row_condition",
                "condition_parser",
            ],
        )
        params_with_json_schema = {
            "column": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column")
            },
            "value_set": {
                "schema": {
                    "type": "array"
                },
                "value": params.get("value_set"),
            },
            "mostly": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("mostly")
            },
            "mostly_pct": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("mostly_pct"),
            },
            "parse_strings_as_datetimes": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("parse_strings_as_datetimes"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
        }

        if params["value_set"] is None or len(params["value_set"]) == 0:
            values_string = "[ ]"
        else:
            for i, v in enumerate(params["value_set"]):
                params[f"v__{str(i)}"] = v

            values_string = " ".join(
                [f"$v__{str(i)}" for i, v in enumerate(params["value_set"])])

        template_str = f"values must belong to this set: {values_string}"

        if params["mostly"] is not None and params["mostly"] < 1.0:
            params_with_json_schema["mostly_pct"]["value"] = num_to_str(
                params["mostly"] * 100, precision=15, no_scientific=True)
            # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
            template_str += ", at least $mostly_pct % of the time."
        else:
            template_str += "."

        if params.get("parse_strings_as_datetimes"):
            template_str += " Values should be parsed as datetimes."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = f"{conditional_template_str}, then {template_str}"
            params_with_json_schema.update(conditional_params)

        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="value_set",
        )

        return (template_str, params_with_json_schema, styling)

    @classmethod
    @renderer(renderer_type="renderer.prescriptive")
    @render_evaluation_parameter_string
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "value_set",
                "mostly",
                "parse_strings_as_datetimes",
                "row_condition",
                "condition_parser",
            ],
        )

        if params["value_set"] is None or len(params["value_set"]) == 0:
            values_string = "[ ]"
        else:
            for i, v in enumerate(params["value_set"]):
                params[f"v__{str(i)}"] = v

            values_string = " ".join(
                [f"$v__{str(i)}" for i, v in enumerate(params["value_set"])])

        template_str = f"values must belong to this set: {values_string}"

        if params["mostly"] is not None and params["mostly"] < 1.0:
            params["mostly_pct"] = num_to_str(params["mostly"] * 100,
                                              precision=15,
                                              no_scientific=True)
            # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
            template_str += ", at least $mostly_pct % of the time."
        else:
            template_str += "."

        if params.get("parse_strings_as_datetimes"):
            template_str += " Values should be parsed as datetimes."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"])
            template_str = f"{conditional_template_str}, then {template_str}"
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]

    @classmethod
    @renderer(renderer_type="renderer.descriptive.example_values_block")
    def _descriptive_example_values_block_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        assert result, "Must pass in result."
        if "partial_unexpected_counts" in result.result:
            partial_unexpected_counts = result.result[
                "partial_unexpected_counts"]
            values = [str(v["value"]) for v in partial_unexpected_counts]
        elif "partial_unexpected_list" in result.result:
            values = [
                str(item) for item in result.result["partial_unexpected_list"]
            ]
        else:
            return

        classes = ["col-3", "mt-1", "pl-1", "pr-1"]

        if any(len(value) > 80 for value in values):
            content_block_type = "bullet_list"
            content_block_class = RenderedBulletListContent
        else:
            content_block_type = "value_list"
            content_block_class = ValueListContent

        new_block = content_block_class(
            **{
                "content_block_type":
                content_block_type,
                "header":
                RenderedStringTemplateContent(
                    **{
                        "content_block_type": "string_template",
                        "string_template": {
                            "template": "Example Values",
                            "tooltip": {
                                "content": "expect_column_values_to_be_in_set"
                            },
                            "tag": "h6",
                        },
                    }),
                content_block_type: [{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": "$value",
                        "params": {
                            "value": value
                        },
                        "styling": {
                            "default": {
                                "classes": ["badge", "badge-info"]
                                if content_block_type == "value_list" else [],
                                "styles": {
                                    "word-break": "break-all"
                                },
                            },
                        },
                    },
                } for value in values],
                "styling": {
                    "classes": classes,
                },
            })

        return new_block

    def validate_configuration(
            self, configuration: Optional[ExpectationConfiguration]) -> None:
        super().validate_configuration(configuration)
        # supports extensibility by allowing value_set to not be provided in config but captured via child-class default_kwarg_values, e.g. parameterized expectations
        value_set = configuration.kwargs.get(
            "value_set") or self.default_kwarg_values.get("value_set")
        try:
            assert ("value_set" in configuration.kwargs
                    or value_set), "value_set is required"
            assert isinstance(
                value_set,
                (list, set, dict)), "value_set must be a list, set, or dict"
            if isinstance(value_set, dict):
                assert (
                    "$PARAMETER" in value_set
                ), 'Evaluation Parameter dict for value_set kwarg must have "$PARAMETER" key.'
        except AssertionError as e:
            raise InvalidExpectationConfigurationError(str(e))