Esempio n. 1
0
def test_column_values_unique_single_batch(
        alice_columnar_table_single_batch_context):
    data_context: DataContext = alice_columnar_table_single_batch_context

    batch_request: BatchRequest = BatchRequest(
        datasource_name="alice_columnar_table_single_batch_datasource",
        data_connector_name="alice_columnar_table_single_batch_data_connector",
        data_asset_name="alice_columnar_table_single_batch_data_asset",
    )

    domain_builder: MapMetricColumnDomainBuilder = MapMetricColumnDomainBuilder(
        map_metric_name="column_values.unique",
        max_unexpected_values=0,
        max_unexpected_ratio=None,
        min_max_unexpected_values_proportion=9.75e-1,
        data_context=data_context,
    )
    domains: List[Domain] = domain_builder.get_domains(
        rule_name="my_rule", batch_request=batch_request)

    # Unit Tests for "inferred_semantic_domain_type" are provided separately.
    domain: Domain
    for domain in domains:
        domain.details = {}

    domains = sorted(domains, key=lambda x: x.domain_kwargs["column"])

    alice_compliant_column_names: List[str] = [
        "id",
        "event_type",
        "user_id",
        "event_ts",
        "server_ts",
        "device_ts",
    ]

    column_name: str
    alice_expected_column_domains: List[Domain] = [
        Domain(
            domain_type=MetricDomainTypes.COLUMN,
            domain_kwargs={
                "column": column_name,
            },
            rule_name="my_rule",
        ) for column_name in alice_compliant_column_names
    ]
    alice_expected_column_domains = sorted(
        alice_expected_column_domains, key=lambda x: x.domain_kwargs["column"])

    assert len(domains) == 6
    assert domains == alice_expected_column_domains
Esempio n. 2
0
def build_map_metric_rule(
    rule_name: str,
    expectation_type: str,
    map_metric_name: str,
    include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
    exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None,
    include_column_name_suffixes: Optional[Union[str, Iterable,
                                                 List[str]]] = None,
    exclude_column_name_suffixes: Optional[Union[str, Iterable,
                                                 List[str]]] = None,
    semantic_type_filter_module_name: Optional[str] = None,
    semantic_type_filter_class_name: Optional[str] = None,
    include_semantic_types: Optional[Union[
        str, SemanticDomainTypes, List[Union[str,
                                             SemanticDomainTypes]]]] = None,
    exclude_semantic_types: Optional[Union[
        str, SemanticDomainTypes, List[Union[str,
                                             SemanticDomainTypes]]]] = None,
    max_unexpected_values: Union[str, int] = 0,
    max_unexpected_ratio: Optional[Union[str, float]] = None,
    min_max_unexpected_values_proportion: Union[str, float] = 9.75e-1,
) -> Rule:
    """
    This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for any "map" style metric.
    """

    # Step-1: Instantiate "MapMetricColumnDomainBuilder" for specified "map_metric_name" (subject to directives).

    map_metric_column_domain_builder: MapMetricColumnDomainBuilder = (
        MapMetricColumnDomainBuilder(
            map_metric_name=map_metric_name,
            include_column_names=include_column_names,
            exclude_column_names=exclude_column_names,
            include_column_name_suffixes=include_column_name_suffixes,
            exclude_column_name_suffixes=exclude_column_name_suffixes,
            semantic_type_filter_module_name=semantic_type_filter_module_name,
            semantic_type_filter_class_name=semantic_type_filter_class_name,
            include_semantic_types=include_semantic_types,
            exclude_semantic_types=exclude_semantic_types,
            max_unexpected_values=max_unexpected_values,
            max_unexpected_ratio=max_unexpected_ratio,
            min_max_unexpected_values_proportion=
            min_max_unexpected_values_proportion,
            data_context=None,
        ))

    # Step-2: Declare "ParameterBuilder" for every metric of interest.

    column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_unique_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=True)
    column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=True)
    column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_null_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=True)

    # Step-3: Set up "MeanUnexpectedMapMetricMultiBatchParameterBuilder" to compute "condition" for emitting "ExpectationConfiguration" (based on "Domain" data).

    total_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_table_row_count_metric_multi_batch_parameter_builder(
        json_serialize=False)
    column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=False)
    evaluation_parameter_builder_configs: Optional[
        List[ParameterBuilderConfig]] = [
            ParameterBuilderConfig(
                **
                total_count_metric_multi_batch_parameter_builder_for_evaluations
                .to_json_dict()),
            ParameterBuilderConfig(
                **
                column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations
                .to_json_dict()),
        ]
    column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations: MeanUnexpectedMapMetricMultiBatchParameterBuilder = MeanUnexpectedMapMetricMultiBatchParameterBuilder(
        name=f"{map_metric_name}.unexpected_value",
        map_metric_name=map_metric_name,
        total_count_parameter_builder_name=
        total_count_metric_multi_batch_parameter_builder_for_evaluations.name,
        null_count_parameter_builder_name=
        column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations
        .name,
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        evaluation_parameter_builder_configs=
        evaluation_parameter_builder_configs,
        json_serialize=True,
        data_context=None,
    )

    # Step-4: Pass "MeanUnexpectedMapMetricMultiBatchParameterBuilder" as "validation" "ParameterBuilder" for "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type").

    validation_parameter_builder_configs: Optional[
        List[ParameterBuilderConfig]] = [
            ParameterBuilderConfig(
                **
                column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations
                .to_json_dict()),
        ]
    expect_column_values_to_be_attribute_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder(
        expectation_type=expectation_type,
        validation_parameter_builder_configs=
        validation_parameter_builder_configs,
        column=
        f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
        condition=
        f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY} <= 1.0 - {VARIABLES_KEY}success_ratio",
        meta={
            "profiler_details":
            f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}.{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
        },
    )

    # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components.

    variables: dict = {
        "success_ratio": 7.5e-1,
    }

    parameter_builders: List[ParameterBuilder] = [
        column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics,
        column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics,
        column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics,
    ]
    expectation_configuration_builders: List[ExpectationConfigurationBuilder] = [
        expect_column_values_to_be_attribute_expectation_configuration_builder,
    ]
    rule: Rule = Rule(
        name=rule_name,
        variables=variables,
        domain_builder=map_metric_column_domain_builder,
        parameter_builders=parameter_builders,
        expectation_configuration_builders=expectation_configuration_builders,
    )

    return rule
Esempio n. 3
0
def test_column_values_nonnull_multi_batch_one_column_not_emitted(
    bobby_columnar_table_multi_batch_deterministic_data_context, ):
    data_context: DataContext = (
        bobby_columnar_table_multi_batch_deterministic_data_context)

    batch_request: BatchRequest = BatchRequest(
        datasource_name="taxi_pandas",
        data_connector_name="monthly",
        data_asset_name="my_reports",
    )

    domain_builder: MapMetricColumnDomainBuilder = MapMetricColumnDomainBuilder(
        map_metric_name="column_values.nonnull",
        max_unexpected_values=0,
        max_unexpected_ratio=None,
        min_max_unexpected_values_proportion=9.75e-1,
        data_context=data_context,
    )
    domains: List[Domain] = domain_builder.get_domains(
        rule_name="my_rule", batch_request=batch_request)

    # Unit Tests for "inferred_semantic_domain_type" are provided separately.
    domain: Domain
    for domain in domains:
        domain.details = {}

    domains = sorted(domains, key=lambda x: x.domain_kwargs["column"])

    bobby_compliant_column_names: List[str] = [
        "VendorID",
        "pickup_datetime",
        "dropoff_datetime",
        "passenger_count",
        "trip_distance",
        "RatecodeID",
        "store_and_fwd_flag",
        "PULocationID",
        "DOLocationID",
        "payment_type",
        "fare_amount",
        "extra",
        "mta_tax",
        "tip_amount",
        "tolls_amount",
        "improvement_surcharge",
        "total_amount",
    ]

    column_name: str
    bobby_expected_column_domains: List[Domain] = [
        Domain(
            domain_type=MetricDomainTypes.COLUMN,
            domain_kwargs={
                "column": column_name,
            },
            rule_name="my_rule",
        ) for column_name in bobby_compliant_column_names
    ]
    bobby_expected_column_domains = sorted(
        bobby_expected_column_domains, key=lambda x: x.domain_kwargs["column"])

    assert len(domains) == 17
    assert domains == bobby_expected_column_domains