Example #1
0
    def _build_expectations_datetime(self, dataset, column, **kwargs):
        """
        Adds `expect_column_values_to_be_between` for a given column
        Args:
            dataset: A GE Dataset
            column: The column for which to add the expectation
            **kwargs:

        Returns:
            The GE Dataset
        """

        if "expect_column_values_to_be_between" not in self.excluded_expectations:
            min_value = dataset.expect_column_min_to_be_between(
                column,
                min_value=None,
                max_value=None,
                result_format="SUMMARY",
            ).result["observed_value"]

            if min_value is not None:
                try:
                    min_value = parse(min_value)
                except TypeError:
                    pass

            dataset._expectation_suite.remove_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_min_to_be_between",
                    kwargs={"column": column},
                ),
                match_type="domain",
            )

            max_value = dataset.expect_column_max_to_be_between(
                column,
                min_value=None,
                max_value=None,
                result_format="SUMMARY",
            ).result["observed_value"]
            if max_value is not None:
                try:
                    max_value = parse(max_value)
                except TypeError:
                    pass

            dataset._expectation_suite.remove_expectation(
                ExpectationConfiguration(
                    expectation_type="expect_column_max_to_be_between",
                    kwargs={"column": column},
                ),
                match_type="domain",
            )
            if min_value is not None or max_value is not None:
                dataset.expect_column_values_to_be_between(
                    column,
                    min_value=min_value,
                    max_value=max_value,
                )
        return dataset
Example #2
0
    def _add_column_cardinality_to_column_info(self, dataset, column_name):
        """
        Adds the cardinality of a column to the column_info dictionary on self
        Args:
            dataset: A GE Dataset
            column_name: The name of the column for which to add cardinality

        Returns:
            The cardinality of the column
        """
        column_info_entry = self.column_info.get(column_name)
        if not column_info_entry:
            column_info_entry = {}
            self.column_info[column_name] = column_info_entry
        column_cardinality = column_info_entry.get("cardinality")
        if not column_cardinality:
            column_cardinality = self._get_column_cardinality(
                dataset, column_name)
            column_info_entry["cardinality"] = column_cardinality
            # remove the expectations
            dataset._expectation_suite.remove_expectation(
                ExpectationConfiguration(
                    expectation_type=
                    "expect_column_unique_value_count_to_be_between",
                    kwargs={"column": column_name},
                ))
            dataset._expectation_suite.remove_expectation(
                ExpectationConfiguration(
                    expectation_type=
                    "expect_column_proportion_of_unique_values_to_be_between",
                    kwargs={"column": column_name},
                ))

        return column_cardinality
    def _get_column_cardinality_with_caching(cls, dataset, column_name, cache):
        column_cache_entry = cache.get(column_name)
        if not column_cache_entry:
            column_cache_entry = {}
            cache[column_name] = column_cache_entry
        column_cardinality = column_cache_entry.get("cardinality")
        if not column_cardinality:
            column_cardinality = cls._get_column_cardinality(
                dataset, column_name)
            column_cache_entry["cardinality"] = column_cardinality
            # remove the expectations
            dataset.remove_expectation(
                ExpectationConfiguration(
                    expectation_type=
                    "expect_column_unique_value_count_to_be_between",
                    kwargs={"column": column_name},
                ))
            dataset.remove_expectation(
                ExpectationConfiguration(
                    expectation_type=
                    "expect_column_proportion_of_unique_values_to_be_between",
                    kwargs={"column": column_name},
                ))
            dataset.set_config_value("interactive_evaluation", True)

        return column_cardinality
def test_find_expectations(exp2, exp3, exp4, exp5, domain_success_runtime_suite):
    expectation_to_find1 = ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={"column": "b", "value_set": [-1, -2, -3], "result_format": "COMPLETE"},
    )

    expectation_to_find2 = ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={"column": "b", "value_set": [1, 2, 3], "result_format": "COMPLETE"},
    )

    assert domain_success_runtime_suite.find_expectations(
        expectation_to_find1, "domain"
    ) == [exp2, exp3, exp4, exp5]

    assert domain_success_runtime_suite.find_expectations(
        expectation_to_find1, "success"
    ) == [exp2, exp3]

    assert domain_success_runtime_suite.find_expectations(
        expectation_to_find2, "runtime"
    ) == [exp5]

    assert (
        domain_success_runtime_suite.find_expectations(expectation_to_find1, "runtime")
        == []
    )
Example #5
0
def test_ge_cloud_validator_updates_self_suite_with_ge_cloud_ids_on_save(
    mock_emit,
    mock_context_get_suite,
    mock_context_save_suite,
    multi_batch_taxi_validator_ge_cloud_mode,
    empty_data_context_stats_enabled,
):
    """
    This checks that Validator in ge_cloud_mode properly updates underlying Expectation Suite on save.
    The multi_batch_taxi_validator_ge_cloud_mode fixture has a suite with a single expectation.
    :param mock_context_get_suite: Under normal circumstances, this would be ExpectationSuite object returned from GE Cloud
    :param mock_context_save_suite: Under normal circumstances, this would trigger post or patch to GE Cloud
    """
    context: DataContext = empty_data_context_stats_enabled
    mock_suite = ExpectationSuite(
        expectation_suite_name="validating_taxi_data",
        expectations=[
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_between",
                kwargs={
                    "column": "passenger_count",
                    "min_value": 0,
                    "max_value": 99
                },
                meta={"notes": "This is an expectation."},
                ge_cloud_id=UUID("0faf94a9-f53a-41fb-8e94-32f218d4a774"),
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_between",
                kwargs={
                    "column": "trip_distance",
                    "min_value": 11,
                    "max_value": 22
                },
                meta={"notes": "This is an expectation."},
                ge_cloud_id=UUID("3e8eee33-b425-4b36-a831-6e9dd31ad5af"),
            ),
        ],
        data_context=context,
        meta={"notes": "This is an expectation suite."},
    )
    mock_context_save_suite.return_value = True
    mock_context_get_suite.return_value = mock_suite
    multi_batch_taxi_validator_ge_cloud_mode.expect_column_values_to_be_between(
        column="trip_distance", min_value=11, max_value=22)
    multi_batch_taxi_validator_ge_cloud_mode.save_expectation_suite()
    assert (multi_batch_taxi_validator_ge_cloud_mode.get_expectation_suite().
            to_json_dict() == mock_suite.to_json_dict())

    # add_expectation() will not send usage_statistics event when called from a Validator
    assert mock_emit.call_count == 0
    assert mock_emit.call_args_list == []
Example #6
0
    def _build_expectation_configuration(
        self,
        domain: Domain,
        variables: Optional[ParameterContainer] = None,
        parameters: Optional[Dict[str, ParameterContainer]] = None,
    ) -> Optional[ExpectationConfiguration]:
        """Returns either and ExpectationConfiguration object or None depending on evaluation of condition"""
        parameter_name: str
        fully_qualified_parameter_name: str
        expectation_kwargs: Dict[str, Any] = {
            parameter_name: get_parameter_value_and_validate_return_type(
                domain=domain,
                parameter_reference=fully_qualified_parameter_name,
                expected_return_type=None,
                variables=variables,
                parameters=parameters,
            )
            for parameter_name, fully_qualified_parameter_name in
            self.kwargs.items()
        }
        meta: Dict[str, Any] = get_parameter_value_and_validate_return_type(
            domain=domain,
            parameter_reference=self.meta,
            expected_return_type=dict,
            variables=variables,
            parameters=parameters,
        )

        if self._condition:
            parsed_condition: ParseResults = self._parse_condition()
            condition: bool = self._evaluate_condition(
                parsed_condition=parsed_condition,
                domain=domain,
                variables=variables,
                parameters=parameters,
            )

            if condition:
                return ExpectationConfiguration(
                    expectation_type=self._expectation_type,
                    kwargs=expectation_kwargs,
                    meta=meta,
                )
            else:
                return None
        else:
            return ExpectationConfiguration(
                expectation_type=self._expectation_type,
                kwargs=expectation_kwargs,
                meta=meta,
            )
Example #7
0
def test_graph_validate_with_bad_config(basic_datasource):
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]})

    batch = basic_datasource.get_single_batch_from_batch_request(
        RuntimeBatchRequest(
            **{
                "datasource_name": "my_datasource",
                "data_connector_name": "test_runtime_data_connector",
                "data_asset_name": "IN_MEMORY_DATA_ASSET",
                "runtime_parameters": {
                    "batch_data": df,
                },
                "batch_identifiers": {
                    "pipeline_stage_name": 0,
                    "airflow_run_id": 0,
                    "custom_key_0": 0,
                },
            }))

    expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_max_to_be_between",
        kwargs={
            "column": "not_in_table",
            "min_value": 1,
            "max_value": 29
        },
    )
    with pytest.raises(ge_exceptions.ExecutionEngineError) as eee:
        # noinspection PyUnusedLocal
        result = Validator(execution_engine=PandasExecutionEngine(),
                           batches=[batch]).graph_validate(
                               configurations=[expectation_configuration])
    assert (str(eee.value) ==
            'Error: The column "not_in_table" in BatchData does not exist.')
    def __init__(
        self,
        expectation_suite_name,
        data_context=None,
        expectations=None,
        evaluation_parameters=None,
        data_asset_type=None,
        execution_engine_type=None,
        meta=None,
        ge_cloud_id=None,
    ):
        self.expectation_suite_name = expectation_suite_name
        self.ge_cloud_id = ge_cloud_id
        self._data_context = data_context

        if expectations is None:
            expectations = []
        self.expectations = [
            ExpectationConfiguration(**expectation) if isinstance(
                expectation, dict) else expectation
            for expectation in expectations
        ]
        if evaluation_parameters is None:
            evaluation_parameters = {}
        self.evaluation_parameters = evaluation_parameters
        self.data_asset_type = data_asset_type
        self.execution_engine_type = execution_engine_type
        if meta is None:
            meta = {"great_expectations_version": ge_version}
        if ("great_expectations.__version__" not in meta.keys()
                and "great_expectations_version" not in meta.keys()):
            meta["great_expectations_version"] = ge_version
        # We require meta information to be serializable, but do not convert until necessary
        ensure_json_serializable(meta)
        self.meta = meta
Example #9
0
def test_sa_expect_column_value_z_scores_to_be_less_than_impl(postgresql_engine):
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]})
    df.to_sql(
        name="z_score_test_data",
        con=postgresql_engine,
        index=False,
        if_exists="replace",
    )
    expectationConfiguration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "a",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )
    expectation = ExpectColumnValueZScoresToBeLessThan(expectationConfiguration)
    engine = SqlAlchemyExecutionEngine(engine=postgresql_engine)
    engine.load_batch_data(
        "my_id",
        SqlAlchemyBatchData(execution_engine=engine, table_name="z_score_test_data"),
    )
    result = expectation.validate(Validator(execution_engine=engine))
    assert result == ExpectationValidationResult(
        success=True,
    )
Example #10
0
    def _build_expectations_value_set(self, dataset, column, **kwargs):
        """
        Adds a value_set expectation for a given column
        Args:
            dataset: A GE Dataset
            column: The column for which to add an expectation
            **kwargs:

        Returns:
            The GE Dataset
        """
        if "expect_column_values_to_be_in_set" not in self.excluded_expectations:
            value_set = dataset.expect_column_distinct_values_to_be_in_set(
                column, value_set=None,
                result_format="SUMMARY").result["observed_value"]

            dataset._expectation_suite.remove_expectation(
                ExpectationConfiguration(
                    expectation_type=
                    "expect_column_distinct_values_to_be_in_set",
                    kwargs={"column": column},
                ),
                match_type="domain",
            )

            dataset.expect_column_values_to_be_in_set(column,
                                                      value_set=value_set)
        return dataset
Example #11
0
def test_validator_default_expectation_args__pandas(basic_datasource):
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]})
    expectationConfiguration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "b",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )

    batch = basic_datasource.get_single_batch_from_batch_request(
        BatchRequest(
            **{
                "datasource_name": "my_datasource",
                "data_connector_name": "test_runtime_data_connector",
                "batch_data": df,
                "partition_request": PartitionRequest(
                    **{
                        "partition_identifiers": {
                            "pipeline_stage_name": 0,
                            "run_id": 0,
                            "custom_key_0": 0,
                        }
                    }
                ),
            }
        )
    )

    my_validator = Validator(execution_engine=PandasExecutionEngine(), batches=[batch])

    print(my_validator.get_default_expectation_arguments())
def test_catch_exceptions_with_bad_expectation_type():
    # We want to catch degenerate cases where an expectation suite is incompatible with
    my_df = PandasDataset({"x": range(10)})
    my_df._expectation_suite.append_expectation(
        ExpectationConfiguration(expectation_type="foobar", kwargs={})
    )
    result = my_df.validate(catch_exceptions=True)

    # Find the foobar result
    idx = 0
    for idx, val_result in enumerate(result.results):
        if val_result.expectation_config.expectation_type == "foobar":
            break

    assert result.results[idx].success is False
    assert result.results[idx].expectation_config.expectation_type == "foobar"
    assert result.results[idx].expectation_config.kwargs == {}
    assert result.results[idx].exception_info["raised_exception"] is True
    assert (
        "AttributeError: 'PandasDataset' object has no attribute 'foobar'"
        in result.results[idx].exception_info["exception_traceback"]
    )

    with pytest.raises(AttributeError):
        result = my_df.validate(catch_exceptions=False)
Example #13
0
def evr_kwargs(expectation_configuration_kwargs):
    # These below fields are defaults; specific tests will overwrite as deemed necessary
    return {
        "expectation_config":
        ExpectationConfiguration(**expectation_configuration_kwargs),
        "result": {},
    }
def test_replace_expectation_replaces_expectation(ge_cloud_suite, ge_cloud_id, exp1):
    # The state of the first expectation before update
    expectation_before_update = ge_cloud_suite.expectations[0]
    assert expectation_before_update["kwargs"]["column"] == "a"
    assert expectation_before_update["kwargs"]["value_set"] == [1, 2, 3]
    assert expectation_before_update["meta"]["notes"] == "This is an expectation."

    new_expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={"column": "b", "value_set": [4, 5, 6], "result_format": "BASIC"},
        meta={"notes": "This is an updated expectation."},
        ge_cloud_id=ge_cloud_id,
    )
    ge_cloud_suite.replace_expectation(
        new_expectation_configuration=new_expectation_configuration,
        existing_expectation_configuration=exp1,
    )

    # The state of the first expectation after update
    expectation_after_update = ge_cloud_suite.expectations[0]
    assert expectation_after_update["kwargs"]["column"] == "b"
    assert expectation_after_update["kwargs"]["value_set"] == [4, 5, 6]
    assert (
        expectation_after_update["meta"]["notes"] == "This is an updated expectation."
    )
def test_spark_expect_column_value_z_scores_to_be_less_than_impl(
    spark_session, basic_spark_df_execution_engine
):
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]})
    spark = get_or_create_spark_application(
        spark_config={
            "spark.sql.catalogImplementation": "hive",
            "spark.executor.memory": "450m",
            # "spark.driver.allowMultipleContexts": "true",  # This directive does not appear to have any effect.
        }
    )
    df = spark.createDataFrame(df)

    expectationConfiguration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "a",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )
    expectation = ExpectColumnValueZScoresToBeLessThan(expectationConfiguration)
    engine = basic_spark_df_execution_engine
    engine.load_batch_data(batch_id="my_id", batch_data=df)
    result = expectation.validate(Validator(execution_engine=engine))
    assert result == ExpectationValidationResult(
        success=True,
    )
Example #16
0
def test_parse_validation_graph():
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, 6]})
    expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "a",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )
    # noinspection PyUnusedLocal
    expectation = ExpectColumnValueZScoresToBeLessThan(
        expectation_configuration)
    # noinspection PyUnusedLocal
    batch = Batch(data=df)
    graph = ValidationGraph()
    engine = PandasExecutionEngine()
    for configuration in [expectation_configuration]:
        expectation_impl = get_expectation_impl(
            "expect_column_value_z_scores_to_be_less_than")
        validation_dependencies = expectation_impl(
            configuration).get_validation_dependencies(configuration, engine)

        for metric_configuration in validation_dependencies["metrics"].values(
        ):
            Validator(execution_engine=engine).build_metric_dependency_graph(
                graph=graph,
                execution_engine=engine,
                metric_configuration=metric_configuration,
                configuration=configuration,
            )
    ready_metrics, needed_metrics = Validator(engine)._parse_validation_graph(
        validation_graph=graph, metrics=dict())
    assert len(ready_metrics) == 2 and len(needed_metrics) == 9
Example #17
0
def test_graph_validate_with_bad_config(basic_datasource):
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]})
    expectationConfiguration = ExpectationConfiguration(
        expectation_type="expect_column_max_to_be_between",
        kwargs={"column": "not_in_table", "min_value": 1, "max_value": 29},
    )
    expectation = ExpectColumnMaxToBeBetween(expectationConfiguration)

    batch = basic_datasource.get_single_batch_from_batch_request(
        BatchRequest(
            **{
                "datasource_name": "my_datasource",
                "data_connector_name": "test_runtime_data_connector",
                "batch_data": df,
                "partition_request": PartitionRequest(
                    **{
                        "partition_identifiers": {
                            "pipeline_stage_name": 0,
                            "run_id": 0,
                            "custom_key_0": 0,
                        }
                    }
                ),
            }
        )
    )

    try:
        result = Validator(
            execution_engine=PandasExecutionEngine(), batches=[batch]
        ).graph_validate(configurations=[expectationConfiguration])
    except KeyError as e:
        result = e
    assert isinstance(result, KeyError)
Example #18
0
    def _create_range_expectation(
        self, key: str, details: dict
    ) -> Optional[ExpectationConfiguration]:
        """https://json-schema.org/understanding-json-schema/reference/numeric.html#range"""
        object_types = self._get_object_types(details=details)
        object_types = filter(
            lambda object_type: object_type != JsonSchemaTypes.NULL.value, object_types
        )
        range_types = [JsonSchemaTypes.INTEGER.value, JsonSchemaTypes.NUMBER.value]

        if set(object_types).issubset(set(range_types)) is False:
            return None

        type_ = details.get("type", None)
        any_of = details.get("anyOf", None)

        if not type_ and not any_of:
            return None

        minimum = None
        maximum = None
        exclusive_minimum = None
        exclusive_maximum = None

        if type_:
            minimum = details.get("minimum", None)
            maximum = details.get("maximum", None)
            exclusive_minimum = details.get("exclusiveMinimum", None)
            exclusive_maximum = details.get("exclusiveMaximum", None)
        elif any_of:
            for item in any_of:
                item_type = item.get("type", None)
                if item_type in range_types:
                    minimum = item.get("minimum", None)
                    maximum = item.get("maximum", None)
                    exclusive_minimum = item.get("exclusiveMinimum", None)
                    exclusive_maximum = item.get("exclusiveMaximum", None)
                    break

        if (
            minimum is None
            and maximum is None
            and exclusive_minimum is None
            and exclusive_maximum is None
        ):
            return None

        kwargs: Dict[str, Any] = {"column": key}
        if minimum is not None:
            kwargs["min_value"] = minimum
        if maximum is not None:
            kwargs["max_value"] = maximum
        if exclusive_minimum is not None:
            kwargs["min_value"] = exclusive_minimum
            kwargs["strict_min"] = True
        if exclusive_maximum is not None:
            kwargs["max_value"] = exclusive_maximum
            kwargs["strict_max"] = True

        return ExpectationConfiguration("expect_column_values_to_be_between", kwargs)
Example #19
0
def test_parse_validation_graph_with_bad_metrics_args():
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, 6]})
    expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "a",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )
    graph = ValidationGraph()
    engine = PandasExecutionEngine()
    validator = Validator(execution_engine=engine)
    for configuration in [expectation_configuration]:
        expectation_impl = get_expectation_impl(
            "expect_column_value_z_scores_to_be_less_than")
        validation_dependencies = expectation_impl(
            configuration).get_validation_dependencies(
                configuration,
                execution_engine=engine,
            )

        for metric_configuration in validation_dependencies["metrics"].values(
        ):
            validator.build_metric_dependency_graph(
                graph=graph,
                execution_engine=engine,
                metric_configuration=metric_configuration,
                configuration=configuration,
            )
    ready_metrics, needed_metrics = validator._parse_validation_graph(
        validation_graph=graph, metrics=("nonexistent", "NONE"))
    assert len(ready_metrics) == 2 and len(needed_metrics) == 9
Example #20
0
def test_populate_dependencies():
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, 6]})
    expectationConfiguration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "a",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )
    expectation = ExpectColumnValueZScoresToBeLessThan(
        expectationConfiguration)
    batch = Batch(data=df)
    graph = ValidationGraph()
    engine = PandasExecutionEngine()
    for configuration in [expectationConfiguration]:
        expectation_impl = get_expectation_impl(
            "expect_column_value_z_scores_to_be_less_than")
        validation_dependencies = expectation_impl(
            configuration).get_validation_dependencies(
                configuration,
                engine,
            )

        for metric_configuration in validation_dependencies["metrics"].values(
        ):
            Validator(execution_engine=engine).build_metric_dependency_graph(
                graph,
                metric_configuration,
                configuration,
                execution_engine=engine)
    assert len(graph.edges) == 10
Example #21
0
    def add_expectation(
        self,
        rule: str,
        args: Dict,
        meta: Dict,
        validation_expectation: Dict,
        ):
        """
            Purpose:
                Add individual expectation for a rule to the suite
            Input:
                rule: 
                    validation rule
                args: 
                    dict of arguments specifying expectation behavior
                meta:
                    dict of additional information for each expectation
                validation_expectation:
                    dictionary to map between rules and expectations
            Returns:
                adds expectation to self.suite
            
        """
        # Create an Expectation
        expectation_configuration = ExpectationConfiguration(
            # Name of expectation type being added
            expectation_type=validation_expectation[rule.split(" ")[0]],

            #add arguments and meta message
            kwargs={**args},
            meta={**meta}
        )
        # Add the Expectation to the suite
        self.suite.add_expectation(expectation_configuration=expectation_configuration)
 def _build_expectation_configuration(
     self,
     domain: Domain,
     variables: Optional[ParameterContainer] = None,
     parameters: Optional[Dict[str, ParameterContainer]] = None,
 ) -> ExpectationConfiguration:
     parameter_name: str
     fully_qualified_parameter_name: str
     expectation_kwargs: Dict[str, Any] = {
         parameter_name: get_parameter_value_and_validate_return_type(
             domain=domain,
             parameter_reference=fully_qualified_parameter_name,
             expected_return_type=None,
             variables=variables,
             parameters=parameters,
         )
         for parameter_name, fully_qualified_parameter_name in
         self.kwargs.items()
     }
     meta: Dict[str, Any] = get_parameter_value_and_validate_return_type(
         domain=domain,
         parameter_reference=self.meta,
         expected_return_type=dict,
         variables=variables,
         parameters=parameters,
     )
     return ExpectationConfiguration(
         expectation_type=self.expectation_type,
         kwargs=expectation_kwargs,
         meta=meta,
     )
def test_graph_validate_with_runtime_config(basic_datasource):
    df = pd.DataFrame(
        {"a": [1, 5, 22, 3, 5, 10, 2, 3], "b": [97, 332, 3, 4, 5, 6, 7, None]}
    )

    batch = basic_datasource.get_single_batch_from_batch_request(
        BatchRequest(
            **{
                "datasource_name": "my_datasource",
                "data_connector_name": "test_runtime_data_connector",
                "data_asset_name": "IN_MEMORY_DATA_ASSET",
                "batch_data": df,
                "partition_request": PartitionRequest(
                    **{
                        "batch_identifiers": {
                            "pipeline_stage_name": 0,
                            "airflow_run_id": 0,
                            "custom_key_0": 0,
                        }
                    }
                ),
            }
        )
    )

    expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={"column": "b", "mostly": 1, "threshold": 2, "double_sided": True},
    )
    try:
        result = Validator(
            execution_engine=PandasExecutionEngine(), batches=(batch,)
        ).graph_validate(
            configurations=[expectation_configuration],
            runtime_configuration={"result_format": "COMPLETE"},
        )
    except AssertionError as e:
        result = e
    assert result == [
        ExpectationValidationResult(
            success=False,
            meta={},
            result={
                "element_count": 8,
                "unexpected_count": 1,
                "unexpected_percent": 12.5,
                "partial_unexpected_list": [332.0],
                "missing_count": 1,
                "missing_percent": 12.5,
                "unexpected_percent_nonmissing": 14.285714285714285,
                "partial_unexpected_index_list": None,
                "partial_unexpected_counts": [{"value": 332.0, "count": 1}],
                "unexpected_list": [332.0],
                "unexpected_index_list": None,
            },
            expectation_config=None,
            exception_info=None,
        )
    ]
def table_exp2():
    return ExpectationConfiguration(
        expectation_type="expect_table_row_count_to_be_between",
        kwargs={
            "min_value": 0,
            "max_value": 1
        },
    )
Example #25
0
    def _create_string_length_expectation(
        self, key: str, details: dict
    ) -> Optional[ExpectationConfiguration]:
        """https://json-schema.org/understanding-json-schema/reference/string.html#length"""
        object_types = self._get_object_types(details=details)

        if JsonSchemaTypes.STRING.value not in object_types:
            return None

        type_ = details.get("type", None)
        any_of = details.get("anyOf", None)

        if not type_ and not any_of:
            return None

        if type_:
            minimum = details.get("minLength", None)
            maximum = details.get("maxLength", None)
        elif any_of:
            for item in any_of:
                item_type = item.get("type", None)
                if item_type == JsonSchemaTypes.STRING.value:
                    minimum = item.get("minLength", None)
                    maximum = item.get("maxLength", None)
                    break

        if minimum is None and maximum is None:
            return None

        kwargs = {
            "column": key,
        }
        if minimum == maximum:
            kwargs["value"] = minimum
            return ExpectationConfiguration(
                "expect_column_value_lengths_to_equal", kwargs
            )
        if minimum is not None:
            kwargs["min_value"] = minimum
        if maximum is not None:
            kwargs["max_value"] = maximum

        return ExpectationConfiguration(
            "expect_column_value_lengths_to_be_between", kwargs
        )
Example #26
0
def exp6():
    return ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={
            "column": "b",
            "value_set": [1, 2]
        },
        meta={"notes": "This is an expectation."},
    )
Example #27
0
def test_registry_from_configuration():
    configuration = ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={
            "column": "PClass",
            "value_set": [1, 2, 3]
        },
    )
    assert configuration._get_expectation_impl() == ExpectColumnValuesToBeInSet
Example #28
0
def config7():
    return ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={
            "column": "a",
            "value_set": [1, 2, 3, 4],
        },  # differs from others
        meta={"notes": "This is another expectation."},
    )
Example #29
0
def config6():
    return ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={
            "column": "a",
            "value_set": [1, 2, 3, 4],  # differs from others
            "result_format": "COMPLETE",
        },
        meta={"notes": "This is another expectation."},
    )
Example #30
0
def config1():
    return ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={
            "column": "a",
            "value_set": [1, 2, 3],
            "result_format": "BASIC"
        },
        meta={"notes": "This is an expectation."},
    )