def get_validation_dependencies(
        self,
        configuration: Optional[ExpectationConfiguration] = None,
        execution_engine: Optional[ExecutionEngine] = None,
        runtime_configuration: Optional[dict] = None,
    ):
        # this calls TableExpectation.get_validation_dependencies to set baseline dependencies
        # for the aggregate version of the expectation
        dependencies = super(ColumnMapExpectation, self).get_validation_dependencies(
            configuration, execution_engine, runtime_configuration
        )

        # only PandasExecutionEngine supports the column map version of the expectation
        if isinstance(execution_engine, PandasExecutionEngine):
            column_name = configuration.kwargs.get("column")
            expected_types_list = configuration.kwargs.get("type_list")
            metric_kwargs = get_metric_kwargs(
                configuration=configuration,
                metric_name="table.column_types",
                runtime_configuration=runtime_configuration,
            )
            metric_domain_kwargs = metric_kwargs.get("metric_domain_kwargs")
            metric_value_kwargs = metric_kwargs.get("metric_value_kwargs")
            table_column_types_configuration = MetricConfiguration(
                "table.column_types",
                metric_domain_kwargs=metric_domain_kwargs,
                metric_value_kwargs=metric_value_kwargs,
            )
            actual_column_types_list = execution_engine.resolve_metrics(
                [table_column_types_configuration]
            )[table_column_types_configuration.id]
            actual_column_type = [
                type_dict["type"]
                for type_dict in actual_column_types_list
                if type_dict["name"] == column_name
            ][0]

            # only use column map version if column dtype is object
            if (
                actual_column_type.type.__name__ == "object_"
                and expected_types_list is not None
            ):
                # this resets dependencies using  ColumnMapExpectation.get_validation_dependencies
                dependencies = super().get_validation_dependencies(
                    configuration, execution_engine, runtime_configuration
                )

        # this adds table.column_types dependency for both aggregate and map versions of expectation
        column_types_metric_kwargs = get_metric_kwargs(
            metric_name="table.column_types",
            configuration=configuration,
            runtime_configuration=runtime_configuration,
        )
        dependencies["metrics"]["table.column_types"] = MetricConfiguration(
            metric_name="table.column_types",
            metric_domain_kwargs=column_types_metric_kwargs["metric_domain_kwargs"],
            metric_value_kwargs=column_types_metric_kwargs["metric_value_kwargs"],
        )

        return dependencies
    def get_validation_dependencies(
        self,
        configuration: Optional[ExpectationConfiguration] = None,
        execution_engine: Optional[ExecutionEngine] = None,
        runtime_configuration: Optional[dict] = None,
    ) -> dict:

        dependencies = super().get_validation_dependencies(
            configuration=configuration,
            execution_engine=execution_engine,
            runtime_configuration=runtime_configuration,
        )
        metric_kwargs = get_metric_kwargs(
            metric_name="column_values.string_integers.increasing.map",
            configuration=configuration,
            runtime_configuration=runtime_configuration,
        )

        dependencies["metrics"][
            "column_values.string_integers.increasing.map"] = MetricConfiguration(
                metric_name="column_values.string_integers.increasing.map",
                metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"],
                metric_value_kwargs=metric_kwargs["metric_value_kwargs"],
            )

        return dependencies
コード例 #3
0
    def get_validation_dependencies(
        self,
        configuration: Optional[ExpectationConfiguration] = None,
        execution_engine: Optional[ExecutionEngine] = None,
        runtime_configuration: Optional[dict] = None,
    ):
        dependencies = super().get_validation_dependencies(
            configuration, execution_engine, runtime_configuration
        )
        for metric_name in self.metric_dependencies:
            metric_kwargs = get_metric_kwargs(
                metric_name=metric_name,
                configuration=configuration,
                runtime_configuration=runtime_configuration,
            )
            dependencies["metrics"][metric_name] = MetricConfiguration(
                metric_name=metric_name,
                metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"],
                metric_value_kwargs=metric_kwargs["metric_value_kwargs"],
            )

        return dependencies
コード例 #4
0
    def get_validation_dependencies(
        self,
        configuration: Optional[ExpectationConfiguration] = None,
        execution_engine: Optional[ExecutionEngine] = None,
        runtime_configuration: Optional[dict] = None,
    ):
        # This calls TableExpectation.get_validation_dependencies to set baseline dependencies for the aggregate version
        # of the expectation.
        # We need to keep this as super(ColumnMapExpectation, self), which calls
        # TableExpectation.get_validation_dependencies instead of ColumnMapExpectation.get_validation_dependencies.
        # This is because the map version of this expectation is only supported for Pandas, so we want the aggregate
        # version for the other backends.
        dependencies = super(ColumnMapExpectation,
                             self).get_validation_dependencies(
                                 configuration, execution_engine,
                                 runtime_configuration)

        # Only PandasExecutionEngine supports the column map version of the expectation.
        if isinstance(execution_engine, PandasExecutionEngine):
            column_name = configuration.kwargs.get("column")
            expected_types_list = configuration.kwargs.get("type_list")
            metric_kwargs = get_metric_kwargs(
                configuration=configuration,
                metric_name="table.column_types",
                runtime_configuration=runtime_configuration,
            )
            metric_domain_kwargs = metric_kwargs.get("metric_domain_kwargs")
            metric_value_kwargs = metric_kwargs.get("metric_value_kwargs")
            table_column_types_configuration = MetricConfiguration(
                "table.column_types",
                metric_domain_kwargs=metric_domain_kwargs,
                metric_value_kwargs=metric_value_kwargs,
            )
            actual_column_types_list = execution_engine.resolve_metrics([
                table_column_types_configuration
            ])[table_column_types_configuration.id]
            try:
                actual_column_type = [
                    type_dict["type"] for type_dict in actual_column_types_list
                    if type_dict["name"] == column_name
                ][0]
            except IndexError:
                actual_column_type = None

            # only use column map version if column dtype is object
            if (actual_column_type
                    and actual_column_type.type.__name__ == "object_"
                    and expected_types_list is not None):
                # this resets dependencies using  ColumnMapExpectation.get_validation_dependencies
                dependencies = super().get_validation_dependencies(
                    configuration, execution_engine, runtime_configuration)

        # this adds table.column_types dependency for both aggregate and map versions of expectation
        column_types_metric_kwargs = get_metric_kwargs(
            metric_name="table.column_types",
            configuration=configuration,
            runtime_configuration=runtime_configuration,
        )
        dependencies["metrics"]["table.column_types"] = MetricConfiguration(
            metric_name="table.column_types",
            metric_domain_kwargs=column_types_metric_kwargs[
                "metric_domain_kwargs"],
            metric_value_kwargs=column_types_metric_kwargs[
                "metric_value_kwargs"],
        )

        return dependencies
コード例 #5
0
    def get_validation_dependencies(
        self,
        configuration: Optional[ExpectationConfiguration] = None,
        execution_engine: Optional[ExecutionEngine] = None,
        runtime_configuration: Optional[dict] = None,
    ):
        dependencies = super().get_validation_dependencies(
            configuration, execution_engine, runtime_configuration
        )
        assert isinstance(
            self.map_metric, str
        ), "ColumnMapExpectation must override get_validation_dependencies or declare exactly one map_metric"
        assert (
            self.metric_dependencies == tuple()
        ), "ColumnMapExpectation must be configured using map_metric, and cannot have metric_dependencies declared."
        # convenient name for updates
        metric_dependencies = dependencies["metrics"]
        metric_kwargs = get_metric_kwargs(
            metric_name="column_values.nonnull.unexpected_count",
            configuration=configuration,
            runtime_configuration=runtime_configuration,
        )
        metric_dependencies[
            "column_values.nonnull.unexpected_count"
        ] = MetricConfiguration(
            "column_values.nonnull.unexpected_count",
            metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"],
            metric_value_kwargs=metric_kwargs["metric_value_kwargs"],
        )
        metric_kwargs = get_metric_kwargs(
            metric_name=self.map_metric + ".unexpected_count",
            configuration=configuration,
            runtime_configuration=runtime_configuration,
        )
        metric_dependencies[
            self.map_metric + ".unexpected_count"
        ] = MetricConfiguration(
            self.map_metric + ".unexpected_count",
            metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"],
            metric_value_kwargs=metric_kwargs["metric_value_kwargs"],
        )

        result_format_str = dependencies["result_format"].get("result_format")
        metric_kwargs = get_metric_kwargs(
            metric_name="table.row_count",
            configuration=configuration,
            runtime_configuration=runtime_configuration,
        )
        metric_dependencies["table.row_count"] = MetricConfiguration(
            metric_name="table.row_count",
            metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"],
            metric_value_kwargs=metric_kwargs["metric_value_kwargs"],
        )
        if result_format_str == "BOOLEAN_ONLY":
            return dependencies

        metric_kwargs = get_metric_kwargs(
            self.map_metric + ".unexpected_values",
            configuration=configuration,
            runtime_configuration=runtime_configuration,
        )
        metric_dependencies[
            self.map_metric + ".unexpected_values"
        ] = MetricConfiguration(
            metric_name=self.map_metric + ".unexpected_values",
            metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"],
            metric_value_kwargs=metric_kwargs["metric_value_kwargs"],
        )

        if result_format_str in ["BASIC", "SUMMARY"]:
            return dependencies

        metric_kwargs = get_metric_kwargs(
            self.map_metric + ".unexpected_rows",
            configuration=configuration,
            runtime_configuration=runtime_configuration,
        )
        metric_dependencies[self.map_metric + ".unexpected_rows"] = MetricConfiguration(
            metric_name=self.map_metric + ".unexpected_rows",
            metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"],
            metric_value_kwargs=metric_kwargs["metric_value_kwargs"],
        )

        if isinstance(execution_engine, PandasExecutionEngine):
            metric_kwargs = get_metric_kwargs(
                self.map_metric + ".unexpected_index_list",
                configuration=configuration,
                runtime_configuration=runtime_configuration,
            )
            metric_dependencies[
                self.map_metric + ".unexpected_index_list"
            ] = MetricConfiguration(
                metric_name=self.map_metric + ".unexpected_index_list",
                metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"],
                metric_value_kwargs=metric_kwargs["metric_value_kwargs"],
            )

        return dependencies