def test_instantiation_mean_unexpected_map_metric_multi_batch_parameter_builder_required_arguments_absent(
    bobby_columnar_table_multi_batch_deterministic_data_context, ):
    data_context: DataContext = (
        bobby_columnar_table_multi_batch_deterministic_data_context)

    with pytest.raises(TypeError) as excinfo:
        # noinspection PyUnusedLocal,PyArgumentList
        parameter_builder: ParameterBuilder = (
            MeanUnexpectedMapMetricMultiBatchParameterBuilder(
                name="my_name",
                map_metric_name="column_values.nonnull",
                data_context=data_context,
            ))

    assert (
        "__init__() missing 1 required positional argument: 'total_count_parameter_builder_name'"
        in str(excinfo.value))

    with pytest.raises(TypeError) as excinfo:
        # noinspection PyUnusedLocal,PyArgumentList
        parameter_builder: ParameterBuilder = (
            MeanUnexpectedMapMetricMultiBatchParameterBuilder(
                name="my_name",
                total_count_parameter_builder_name="my_total_count",
                data_context=data_context,
            ))

    assert (
        "__init__() missing 1 required positional argument: 'map_metric_name'"
        in str(excinfo.value))
def test_mean_unexpected_map_metric_multi_batch_parameter_builder_bobby_check_serialized_keys_no_evaluation_parameter_builder_configs(
    bobby_columnar_table_multi_batch_deterministic_data_context,
):
    data_context: DataContext = (
        bobby_columnar_table_multi_batch_deterministic_data_context
    )

    mean_unexpected_map_metric_multi_batch_parameter_builder: ParameterBuilder = (
        MeanUnexpectedMapMetricMultiBatchParameterBuilder(
            name="my_pickup_datetime_count_values_unique_mean_unexpected_map_metric",
            map_metric_name="column_values.nonnull",
            total_count_parameter_builder_name="my_total_count",
            null_count_parameter_builder_name="my_null_count",
            metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
            metric_value_kwargs=None,
            evaluation_parameter_builder_configs=None,
            data_context=data_context,
        )
    )

    # Note: "evaluation_parameter_builder_configs" is not one of "ParameterBuilder" formal property attributes.
    assert set(
        mean_unexpected_map_metric_multi_batch_parameter_builder.to_json_dict().keys()
    ) == {
        "class_name",
        "module_name",
        "name",
        "map_metric_name",
        "total_count_parameter_builder_name",
        "null_count_parameter_builder_name",
        "metric_domain_kwargs",
        "metric_value_kwargs",
        "evaluation_parameter_builder_configs",
    }
def test_instantiation_mean_unexpected_map_metric_multi_batch_parameter_builder(
    bobby_columnar_table_multi_batch_deterministic_data_context, ):
    data_context: DataContext = (
        bobby_columnar_table_multi_batch_deterministic_data_context)

    # noinspection PyUnusedLocal
    parameter_builder: ParameterBuilder = (
        MeanUnexpectedMapMetricMultiBatchParameterBuilder(
            name="my_name",
            map_metric_name="column_values.nonnull",
            total_count_parameter_builder_name="my_total_count",
            data_context=data_context,
        ))
def test_mean_unexpected_map_metric_multi_batch_parameter_builder_bobby_numeric_dependencies_evaluated_separately(
    bobby_columnar_table_multi_batch_deterministic_data_context,
):
    data_context: DataContext = (
        bobby_columnar_table_multi_batch_deterministic_data_context
    )

    batch_request: dict = {
        "datasource_name": "taxi_pandas",
        "data_connector_name": "monthly",
        "data_asset_name": "my_reports",
    }

    my_total_count_metric_multi_batch_parameter_builder: MetricMultiBatchParameterBuilder = MetricMultiBatchParameterBuilder(
        name="my_total_count",
        metric_name="table.row_count",
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        enforce_numeric_metric=False,
        replace_nan_with_zero=False,
        reduce_scalar_metric=True,
        evaluation_parameter_builder_configs=None,
        data_context=data_context,
    )
    my_null_count_metric_multi_batch_parameter_builder: MetricMultiBatchParameterBuilder = MetricMultiBatchParameterBuilder(
        name="my_null_count",
        metric_name="column_values.nonnull.unexpected_count",
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        enforce_numeric_metric=False,
        replace_nan_with_zero=False,
        reduce_scalar_metric=True,
        evaluation_parameter_builder_configs=None,
        data_context=data_context,
    )

    mean_unexpected_map_metric_multi_batch_parameter_builder: ParameterBuilder = (
        MeanUnexpectedMapMetricMultiBatchParameterBuilder(
            name="my_passenger_count_values_not_null_mean_unexpected_map_metric",
            map_metric_name="column_values.nonnull",
            total_count_parameter_builder_name="my_total_count",
            null_count_parameter_builder_name="my_null_count",
            metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
            metric_value_kwargs=None,
            evaluation_parameter_builder_configs=None,
            data_context=data_context,
        )
    )

    metric_domain_kwargs: dict = {"column": "passenger_count"}
    domain: Domain = Domain(
        domain_type=MetricDomainTypes.COLUMN,
        domain_kwargs=metric_domain_kwargs,
        rule_name="my_rule",
    )

    variables: Optional[ParameterContainer] = None

    parameter_container: ParameterContainer = ParameterContainer(parameter_nodes=None)
    parameters: Dict[str, ParameterContainer] = {
        domain.id: parameter_container,
    }

    my_total_count_metric_multi_batch_parameter_builder.build_parameters(
        domain=domain,
        variables=variables,
        parameters=parameters,
        batch_request=batch_request,
    )
    my_null_count_metric_multi_batch_parameter_builder.build_parameters(
        domain=domain,
        variables=variables,
        parameters=parameters,
        batch_request=batch_request,
    )

    mean_unexpected_map_metric_multi_batch_parameter_builder.build_parameters(
        domain=domain,
        variables=variables,
        parameters=parameters,
        batch_request=batch_request,
    )

    expected_parameter_value: float = 0.0

    parameter_node: ParameterNode = get_parameter_value_and_validate_return_type(
        domain=domain,
        parameter_reference=mean_unexpected_map_metric_multi_batch_parameter_builder.json_serialized_fully_qualified_parameter_name,
        expected_return_type=None,
        variables=variables,
        parameters=parameters,
    )

    rtol: float = RTOL
    atol: float = 5.0e-1 * ATOL
    np.testing.assert_allclose(
        actual=parameter_node.value,
        desired=expected_parameter_value,
        rtol=rtol,
        atol=atol,
        err_msg=f"Actual value of {parameter_node.value} differs from expected value of {expected_parameter_value} by more than {atol + rtol * abs(parameter_node.value)} tolerance.",
    )
def test_mean_unexpected_map_metric_multi_batch_parameter_builder_bobby_check_serialized_keys_with_evaluation_parameter_builder_configs(
    bobby_columnar_table_multi_batch_deterministic_data_context,
):
    data_context: DataContext = (
        bobby_columnar_table_multi_batch_deterministic_data_context
    )

    my_total_count_metric_multi_batch_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="MetricMultiBatchParameterBuilder",
        name="my_total_count",
        metric_name="table.row_count",
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        enforce_numeric_metric=False,
        replace_nan_with_zero=False,
        reduce_scalar_metric=True,
        evaluation_parameter_builder_configs=None,
    )
    my_null_count_metric_multi_batch_parameter_builder_config: ParameterBuilderConfig = ParameterBuilderConfig(
        module_name="great_expectations.rule_based_profiler.parameter_builder",
        class_name="MetricMultiBatchParameterBuilder",
        name="my_null_count",
        metric_name="column_values.nonnull.unexpected_count",
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        enforce_numeric_metric=False,
        replace_nan_with_zero=False,
        reduce_scalar_metric=True,
        evaluation_parameter_builder_configs=None,
    )

    evaluation_parameter_builder_configs: Optional[List[ParameterBuilderConfig]] = [
        my_total_count_metric_multi_batch_parameter_builder_config,
        my_null_count_metric_multi_batch_parameter_builder_config,
    ]
    mean_unexpected_map_metric_multi_batch_parameter_builder: ParameterBuilder = (
        MeanUnexpectedMapMetricMultiBatchParameterBuilder(
            name="my_pickup_datetime_count_values_unique_mean_unexpected_map_metric",
            map_metric_name="column_values.nonnull",
            total_count_parameter_builder_name="my_total_count",
            null_count_parameter_builder_name="my_null_count",
            metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
            metric_value_kwargs=None,
            evaluation_parameter_builder_configs=evaluation_parameter_builder_configs,
            data_context=data_context,
        )
    )

    # Note: "evaluation_parameter_builder_configs" is not one of "ParameterBuilder" formal property attributes.
    assert set(
        mean_unexpected_map_metric_multi_batch_parameter_builder.to_json_dict().keys()
    ) == {
        "class_name",
        "module_name",
        "name",
        "map_metric_name",
        "total_count_parameter_builder_name",
        "null_count_parameter_builder_name",
        "metric_domain_kwargs",
        "metric_value_kwargs",
        "evaluation_parameter_builder_configs",
    }
Ejemplo n.º 6
0
def build_map_metric_rule(
    rule_name: str,
    expectation_type: str,
    map_metric_name: str,
    include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
    exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None,
    include_column_name_suffixes: Optional[Union[str, Iterable,
                                                 List[str]]] = None,
    exclude_column_name_suffixes: Optional[Union[str, Iterable,
                                                 List[str]]] = None,
    semantic_type_filter_module_name: Optional[str] = None,
    semantic_type_filter_class_name: Optional[str] = None,
    include_semantic_types: Optional[Union[
        str, SemanticDomainTypes, List[Union[str,
                                             SemanticDomainTypes]]]] = None,
    exclude_semantic_types: Optional[Union[
        str, SemanticDomainTypes, List[Union[str,
                                             SemanticDomainTypes]]]] = None,
    max_unexpected_values: Union[str, int] = 0,
    max_unexpected_ratio: Optional[Union[str, float]] = None,
    min_max_unexpected_values_proportion: Union[str, float] = 9.75e-1,
) -> Rule:
    """
    This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for any "map" style metric.
    """

    # Step-1: Instantiate "MapMetricColumnDomainBuilder" for specified "map_metric_name" (subject to directives).

    map_metric_column_domain_builder: MapMetricColumnDomainBuilder = (
        MapMetricColumnDomainBuilder(
            map_metric_name=map_metric_name,
            include_column_names=include_column_names,
            exclude_column_names=exclude_column_names,
            include_column_name_suffixes=include_column_name_suffixes,
            exclude_column_name_suffixes=exclude_column_name_suffixes,
            semantic_type_filter_module_name=semantic_type_filter_module_name,
            semantic_type_filter_class_name=semantic_type_filter_class_name,
            include_semantic_types=include_semantic_types,
            exclude_semantic_types=exclude_semantic_types,
            max_unexpected_values=max_unexpected_values,
            max_unexpected_ratio=max_unexpected_ratio,
            min_max_unexpected_values_proportion=
            min_max_unexpected_values_proportion,
            data_context=None,
        ))

    # Step-2: Declare "ParameterBuilder" for every metric of interest.

    column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_unique_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=True)
    column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=True)
    column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_null_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=True)

    # Step-3: Set up "MeanUnexpectedMapMetricMultiBatchParameterBuilder" to compute "condition" for emitting "ExpectationConfiguration" (based on "Domain" data).

    total_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_table_row_count_metric_multi_batch_parameter_builder(
        json_serialize=False)
    column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=False)
    evaluation_parameter_builder_configs: Optional[
        List[ParameterBuilderConfig]] = [
            ParameterBuilderConfig(
                **
                total_count_metric_multi_batch_parameter_builder_for_evaluations
                .to_json_dict()),
            ParameterBuilderConfig(
                **
                column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations
                .to_json_dict()),
        ]
    column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations: MeanUnexpectedMapMetricMultiBatchParameterBuilder = MeanUnexpectedMapMetricMultiBatchParameterBuilder(
        name=f"{map_metric_name}.unexpected_value",
        map_metric_name=map_metric_name,
        total_count_parameter_builder_name=
        total_count_metric_multi_batch_parameter_builder_for_evaluations.name,
        null_count_parameter_builder_name=
        column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations
        .name,
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        evaluation_parameter_builder_configs=
        evaluation_parameter_builder_configs,
        json_serialize=True,
        data_context=None,
    )

    # Step-4: Pass "MeanUnexpectedMapMetricMultiBatchParameterBuilder" as "validation" "ParameterBuilder" for "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type").

    validation_parameter_builder_configs: Optional[
        List[ParameterBuilderConfig]] = [
            ParameterBuilderConfig(
                **
                column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations
                .to_json_dict()),
        ]
    expect_column_values_to_be_attribute_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder(
        expectation_type=expectation_type,
        validation_parameter_builder_configs=
        validation_parameter_builder_configs,
        column=
        f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
        condition=
        f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY} <= 1.0 - {VARIABLES_KEY}success_ratio",
        meta={
            "profiler_details":
            f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}.{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
        },
    )

    # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components.

    variables: dict = {
        "success_ratio": 7.5e-1,
    }

    parameter_builders: List[ParameterBuilder] = [
        column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics,
        column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics,
        column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics,
    ]
    expectation_configuration_builders: List[ExpectationConfigurationBuilder] = [
        expect_column_values_to_be_attribute_expectation_configuration_builder,
    ]
    rule: Rule = Rule(
        name=rule_name,
        variables=variables,
        domain_builder=map_metric_column_domain_builder,
        parameter_builders=parameter_builders,
        expectation_configuration_builders=expectation_configuration_builders,
    )

    return rule