Exemple #1
0
 def from_object(cls, metric):
     if not isinstance(metric, Metric):
         raise GreatExpectationsError(
             "Unable to build MetricIdentifier from object of type {} when Metric is "
             "expected.".format(type(metric))
         )
     return cls(metric.metric_name, metric.metric_kwargs_id)
    def add_column_row_condition(
        self, domain_kwargs, column_name=None, filter_null=True, filter_nan=False
    ):
        # We explicitly handle filter_nan & filter_null for spark using a spark-native condition
        if "row_condition" in domain_kwargs and domain_kwargs["row_condition"]:
            raise GreatExpectationsError(
                "ExecutionEngine does not support updating existing row_conditions."
            )

        new_domain_kwargs = copy.deepcopy(domain_kwargs)
        assert "column" in domain_kwargs or column_name is not None
        if column_name is not None:
            column = column_name
        else:
            column = domain_kwargs["column"]
        if filter_null and filter_nan:
            new_domain_kwargs[
                "row_condition"
            ] = f"NOT isnan({column}) AND {column} IS NOT NULL"
        elif filter_null:
            new_domain_kwargs["row_condition"] = f"{column} IS NOT NULL"
        elif filter_nan:
            new_domain_kwargs["row_condition"] = f"NOT isnan({column})"
        else:
            logger.warning(
                "add_column_row_condition called without specifying a desired row condition"
            )

        new_domain_kwargs["condition_parser"] = "spark"
        return new_domain_kwargs
Exemple #3
0
    def build_batch_kwargs_from_partition_id(self,
                                             generator_asset,
                                             partition_id=None,
                                             reader_options=None,
                                             limit=None):
        try:
            asset_config = self._assets[generator_asset]
        except KeyError:
            raise GreatExpectationsError("No asset config found for asset %s" %
                                         generator_asset)
        if generator_asset not in self._iterators:
            self._iterators[generator_asset] = {}

        iterator_dict = self._iterators[generator_asset]
        batch_kwargs = None
        for key in self._get_asset_options(generator_asset, iterator_dict):
            if self._partitioner(key=key,
                                 asset_config=asset_config) == partition_id:
                batch_kwargs = self._build_batch_kwargs(
                    key=key,
                    asset_config=asset_config,
                    reader_options=reader_options,
                    limit=limit)

        if batch_kwargs is None:
            raise BatchKwargsError(
                "Unable to identify partition %s for asset %s" %
                (partition_id, generator_asset), {
                    generator_asset: generator_asset,
                    partition_id: partition_id
                })

        return batch_kwargs
Exemple #4
0
    def __init__(self, name="default", datasource=None, assets=None):
        super().__init__(name=name, datasource=datasource)
        if not assets:
            assets = {}
        try:
            self._assets = {
                asset_name: assetConfigurationSchema.load(asset_config)
                for (asset_name, asset_config) in assets.items()
            }
        except ValidationError as err:
            raise GreatExpectationsError(
                "Unable to load asset configuration in TableBatchKwargsGenerator '%s': "
                "validation error: %s." % (name, str(err))
            )

        if datasource is not None:
            self.engine = datasource.engine
            try:
                self.inspector = sqlalchemy.inspect(self.engine)

            except sqlalchemy.exc.OperationalError:
                logger.warning(
                    "Unable to create inspector from engine in batch kwargs generator '%s'"
                    % name
                )
                self.inspector = None
Exemple #5
0
    def build_batch_kwargs_from_partition_id(self,
                                             generator_asset,
                                             partition_id=None,
                                             batch_kwargs=None,
                                             **kwargs):
        try:
            asset_config = self._assets[generator_asset]
        except KeyError:
            raise GreatExpectationsError("No asset config found for asset %s" %
                                         generator_asset)
        if generator_asset not in self._iterators:
            self._iterators[generator_asset] = {}

        iterator_dict = self._iterators[generator_asset]
        new_kwargs = None
        for key in self._get_asset_options(generator_asset, iterator_dict):
            if self._partitioner(key=key,
                                 asset_config=asset_config) == partition_id:
                new_kwargs = self._build_batch_kwargs(
                    key=key, asset_config=asset_config)

        if new_kwargs is None:
            raise BatchKwargsError(
                "Unable to identify partition %s for asset %s" %
                (partition_id, generator_asset), {
                    generator_asset: generator_asset,
                    partition_id: partition_id
                })
        if batch_kwargs is not None:
            kwargs.update(batch_kwargs)
        if kwargs is not None:
            new_kwargs.update(kwargs)
        return new_kwargs
Exemple #6
0
 def load_batch_data(self, batch_id: str, batch_data: Any) -> None:
     if isinstance(batch_data, DataFrame):
         batch_data = SparkDFBatchData(self, batch_data)
     elif isinstance(batch_data, SparkDFBatchData):
         pass
     else:
         raise GreatExpectationsError(
             "SparkDFExecutionEngine requires batch data that is either a DataFrame or a SparkDFBatchData object"
         )
     super().load_batch_data(batch_id=batch_id, batch_data=batch_data)
Exemple #7
0
def singularize(plural_ge_noun):
    """
    Singularizes a Great Expectations plural noun
    """
    try:
        return PLURAL_TO_SINGULAR_LOOKUP_DICT[plural_ge_noun.lower()]
    except KeyError:
        raise GreatExpectationsError(
            f"Unable to singularize '{plural_ge_noun}'. Please update "
            f"great_expectations.util.PLURAL_TO_SINGULAR_LOOKUP_DICT.")
Exemple #8
0
def pluralize(singular_ge_noun):
    """
    Pluralizes a Great Expectations singular noun
    """
    try:
        return SINGULAR_TO_PLURAL_LOOKUP_DICT[singular_ge_noun.lower()]
    except KeyError:
        raise GreatExpectationsError(
            f"Unable to pluralize '{singular_ge_noun}'. Please update "
            f"great_expectations.util.SINGULAR_TO_PLURAL_LOOKUP_DICT")
    def from_object(cls, validation_metric):
        if not isinstance(validation_metric, ValidationMetric):
            raise GreatExpectationsError(
                "Unable to build ValidationMetricIdentifier from object of type {} when "
                "ValidationMetric is expected.".format(
                    type(validation_metric)))

        return cls(validation_metric.expectation_suite_identifier,
                   validation_metric.run_id, validation_metric.metric_name,
                   validation_metric.metric_kwargs_id)
Exemple #10
0
 def from_fixed_length_tuple(cls, tuple_):
     if len(tuple_) != 4:
         raise GreatExpectationsError(
             "ValidationMetricIdentifier fixed length tuple must have exactly four "
             "components.")
     return cls(run_id=tuple_[0],
                expectation_suite_identifier=ExpectationSuiteIdentifier.
                from_fixed_length_tuple(tuple((tuple_[1], ))),
                metric_name=tuple_[2],
                metric_kwargs_id=tuple_[3])
    def _split_multi_column_metric_domain_kwargs(
        self,
        domain_kwargs: Dict,
        domain_type: MetricDomainTypes,
    ) -> SplitDomainKwargs:
        """Split domain_kwargs for multicolumn domain types into compute and accessor domain kwargs.

        Args:
            domain_kwargs: A dictionary consisting of the domain kwargs specifying which data to obtain
            domain_type: an Enum value indicating which metric domain the user would
            like to be using.

        Returns:
            compute_domain_kwargs, accessor_domain_kwargs split from domain_kwargs
            The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs
        """
        assert (domain_type == MetricDomainTypes.MULTICOLUMN
                ), "This method only supports MetricDomainTypes.MULTICOLUMN"

        compute_domain_kwargs: Dict = copy.deepcopy(domain_kwargs)
        accessor_domain_kwargs: Dict = {}

        if "column_list" not in domain_kwargs:
            raise GreatExpectationsError(
                "column_list not found within domain_kwargs")

        column_list = compute_domain_kwargs.pop("column_list")

        if len(column_list) < 2:
            raise GreatExpectationsError(
                "column_list must contain at least 2 columns")

        # Checking if case-sensitive and using appropriate name
        if self.active_batch_data.use_quoted_name:
            accessor_domain_kwargs["column_list"] = [
                quoted_name(column_name, quote=True)
                for column_name in column_list
            ]
        else:
            accessor_domain_kwargs["column_list"] = column_list

        return SplitDomainKwargs(compute_domain_kwargs, accessor_domain_kwargs)
Exemple #12
0
 def from_tuple(cls, tuple_):
     if len(tuple_) < 4:
         raise GreatExpectationsError(
             "ValidationMetricIdentifier tuple must have at least four components."
         )
     return cls(
         run_id=tuple_[0],
         expectation_suite_identifier=ExpectationSuiteIdentifier.from_tuple(
             tuple_[1:-2]),
         metric_name=tuple_[-2],
         metric_kwargs_id=tuple_[-1])
Exemple #13
0
    def add_column_row_condition(self,
                                 domain_kwargs,
                                 column_name=None,
                                 filter_null=True,
                                 filter_nan=False):
        """EXPERIMENTAL

        Add a row condition for handling null filter.

        Args:
            domain_kwargs: the domain kwargs to use as the base and to which to add the condition
            column_name: if provided, use this name to add the condition; otherwise, will use "column" key from table_domain_kwargs
            filter_null: if true, add a filter for null values
            filter_nan: if true, add a filter for nan values
        """
        if filter_null is False and filter_nan is False:
            logger.warning(
                "add_column_row_condition called with no filter condition requested"
            )
            return domain_kwargs

        if filter_nan:
            raise GreatExpectationsError(
                "Base ExecutionEngine does not support adding nan condition filters"
            )

        if "row_condition" in domain_kwargs and domain_kwargs["row_condition"]:
            raise GreatExpectationsError(
                "ExecutionEngine does not support updating existing row_conditions."
            )

        new_domain_kwargs = copy.deepcopy(domain_kwargs)
        assert "column" in domain_kwargs or column_name is not None
        if column_name is not None:
            column = column_name
        else:
            column = domain_kwargs["column"]
        new_domain_kwargs[
            "condition_parser"] = "great_expectations__experimental__"
        new_domain_kwargs["row_condition"] = f'col("{column}").notnull()'
        return new_domain_kwargs
    def _build_batch_kwargs(self, batch_parameters):
        try:
            data_asset_name = batch_parameters.pop("data_asset_name")
        except KeyError:
            raise BatchKwargsError(
                "Unable to build BatchKwargs: no name provided in batch_parameters.",
                batch_kwargs=batch_parameters,
            )

        partition_id = batch_parameters.pop("partition_id", None)
        batch_kwargs = self._datasource.process_batch_parameters(
            batch_parameters)

        if partition_id:
            try:
                asset_config = self._assets[data_asset_name]
            except KeyError:
                raise GreatExpectationsError(
                    "No asset config found for asset %s" % data_asset_name)
            if data_asset_name not in self._iterators:
                self._iterators[data_asset_name] = {}

            iterator_dict = self._iterators[data_asset_name]
            for key in self._get_asset_options(asset_config, iterator_dict):
                if (self._partitioner(
                        key=key, asset_config=asset_config) == partition_id):
                    batch_kwargs = self._build_batch_kwargs_from_key(
                        key=key,
                        asset_config=asset_config,
                        reader_options=batch_parameters.get(
                            "reader_options"),  # handled in generator
                        limit=batch_kwargs.get(
                            "limit"
                        ),  # may have been processed from datasource
                    )

            if batch_kwargs is None:
                raise BatchKwargsError(
                    "Unable to identify partition %s for asset %s" %
                    (partition_id, data_asset_name),
                    {
                        data_asset_name: data_asset_name,
                        partition_id: partition_id
                    },
                )

            return batch_kwargs

        else:
            return self.yield_batch_kwargs(data_asset_name=data_asset_name,
                                           **batch_parameters,
                                           **batch_kwargs)
Exemple #15
0
 def _build_evr(self, raw_response, configuration):
     """_build_evr is a lightweight convenience wrapper handling cases where an Expectation implementor
     fails to return an EVR but returns the necessary components in a dictionary."""
     if not isinstance(raw_response, ExpectationValidationResult):
         if isinstance(raw_response, dict):
             evr = ExpectationValidationResult(**raw_response)
             evr.expectation_config = configuration
         else:
             raise GreatExpectationsError("Unable to build EVR")
     else:
         evr = raw_response
         evr.expectation_config = configuration
     return evr
Exemple #16
0
    def profile(cls, data_asset, run_id=None):
        if not cls.validate(data_asset):
            raise GreatExpectationsError(
                "Invalid data_asset for profiler; aborting")

        expectation_suite = cls._profile(data_asset)

        batch_kwargs = data_asset.batch_kwargs
        expectation_suite = cls.add_meta(expectation_suite, batch_kwargs)
        validation_results = data_asset.validate(expectation_suite,
                                                 run_id=run_id,
                                                 result_format="SUMMARY")
        return expectation_suite, validation_results
Exemple #17
0
    def profile(
        cls,
        data_asset,
        run_id=None,
        profiler_configuration=None,
        run_name=None,
        run_time=None,
    ):
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."
        if isinstance(run_id, str) and not run_name:
            # deprecated-v0.11.0
            warnings.warn(
                "String run_ids are deprecated as of v0.11.0 and support will be removed in v0.16. Please provide a run_id of type "
                "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name "
                "and run_time (both optional). Instead of providing a run_id, you may also provide"
                "run_name and run_time separately.",
                DeprecationWarning,
            )
            try:
                run_time = parse(run_id)
            except (ValueError, TypeError):
                pass
            run_id = RunIdentifier(run_name=run_id, run_time=run_time)
        elif isinstance(run_id, dict):
            run_id = RunIdentifier(**run_id)
        elif not isinstance(run_id, RunIdentifier):
            run_name = run_name or "profiling"
            run_id = RunIdentifier(run_name=run_name, run_time=run_time)

        if not cls.validate(data_asset):
            raise GreatExpectationsError(
                "Invalid data_asset for profiler; aborting")

        expectation_suite = cls._profile(data_asset,
                                         configuration=profiler_configuration)

        batch_kwargs = data_asset.batch_kwargs
        expectation_suite = cls.add_meta(expectation_suite, batch_kwargs)
        validation_results = data_asset.validate(expectation_suite,
                                                 run_id=run_id,
                                                 result_format="SUMMARY")
        expectation_suite.add_citation(
            comment=
            f"{str(cls.__name__)} added a citation based on the current batch.",
            batch_kwargs=data_asset.batch_kwargs,
            batch_markers=data_asset.batch_markers,
            batch_parameters=data_asset.batch_parameters,
        )
        return expectation_suite, validation_results
 def _sqlalchemy(
     cls,
     execution_engine: SqlAlchemyExecutionEngine,
     metric_domain_kwargs: Dict,
     metric_value_kwargs: Dict,
     metrics: Dict[Tuple, Any],
     runtime_configuration: Dict,
 ):
     batch_id = metric_domain_kwargs.get("batch_id")
     if batch_id is None:
         if execution_engine.active_batch_data_id is not None:
             batch_id = execution_engine.active_batch_data_id
         else:
             raise GreatExpectationsError(
                 "batch_id could not be determined from domain kwargs and no active_batch_data is loaded into the "
                 "execution engine")
     batch_data = execution_engine.loaded_batch_data_dict.get(batch_id)
     if batch_data is None:
         raise GreatExpectationsError(
             "the requested batch is not available; please load the batch into the execution engine."
         )
     return _get_sqlalchemy_column_metadata(execution_engine.engine,
                                            batch_data)
    def build_batch_kwargs_from_partition_id(
        self,
        generator_asset=None,
        data_asset_name=None,
        partition_id=None,
        reader_options=None,
        limit=None,
    ):
        assert (generator_asset and not data_asset_name) or (
            not generator_asset and data_asset_name
        ), "Please provide either generator_asset or data_asset_name."
        if generator_asset:
            warnings.warn(
                "The 'generator_asset' argument will be deprecated and renamed to 'data_asset_name'. "
                "Please update code accordingly.",
                DeprecationWarning,
            )
            data_asset_name = generator_asset
        try:
            asset_config = self._assets[data_asset_name]
        except KeyError:
            raise GreatExpectationsError("No asset config found for asset %s" %
                                         data_asset_name)
        if data_asset_name not in self._iterators:
            self._iterators[data_asset_name] = {}

        iterator_dict = self._iterators[data_asset_name]
        batch_kwargs = None
        for key in self._get_asset_options(data_asset_name, iterator_dict):
            if self._partitioner(key=key,
                                 asset_config=asset_config) == partition_id:
                batch_kwargs = self._build_batch_kwargs(
                    key=key,
                    asset_config=asset_config,
                    reader_options=reader_options,
                    limit=limit,
                )

        if batch_kwargs is None:
            raise BatchKwargsError(
                "Unable to identify partition %s for asset %s" %
                (partition_id, data_asset_name),
                {
                    data_asset_name: data_asset_name,
                    partition_id: partition_id
                },
            )

        return batch_kwargs
Exemple #20
0
 def from_tuple(cls, tuple_):
     if len(tuple_) < 6:
         raise GreatExpectationsError(
             "ValidationMetricIdentifier tuple must have at least six components."
         )
     if tuple_[2] == "__":
         tuple_data_asset_name = None
     else:
         tuple_data_asset_name = tuple_[2]
     metric_id = MetricIdentifier.from_tuple(tuple_[-2:])
     return cls(
         run_id=RunIdentifier.from_tuple((tuple_[0], tuple_[1])),
         data_asset_name=tuple_data_asset_name,
         expectation_suite_identifier=ExpectationSuiteIdentifier.from_tuple(
             tuple_[3:-2]),
         metric_name=metric_id.metric_name,
         metric_kwargs_id=metric_id.metric_kwargs_id,
     )
Exemple #21
0
    def inner_func(*args, **kwargs):
        rendered_string_template = render_func(*args, **kwargs)
        current_expectation_params = list()
        app_template_str = (
            "\n - $eval_param = $eval_param_value (at time of validation)."
        )
        configuration = kwargs.get("configuration", None)
        kwargs_dict = configuration.kwargs
        for key, value in kwargs_dict.items():
            if isinstance(value, dict) and "$PARAMETER" in value.keys():
                current_expectation_params.append(value["$PARAMETER"])

        # if expectation configuration has no eval params, then don't look for the values in runtime_configuration
        if len(current_expectation_params) > 0:
            runtime_configuration = kwargs.get("runtime_configuration", None)
            if runtime_configuration:
                eval_params = runtime_configuration.get("evaluation_parameters", {})
                styling = runtime_configuration.get("styling")
                for key, val in eval_params.items():
                    # this needs to be more complicated?
                    # the possibility that it is a substring?
                    for param in current_expectation_params:
                        # "key in param" condition allows for eval param values to be rendered if arithmetic is present
                        if key == param or key in param:
                            app_params = {}
                            app_params["eval_param"] = key
                            app_params["eval_param_value"] = val
                            to_append = RenderedStringTemplateContent(
                                **{
                                    "content_block_type": "string_template",
                                    "string_template": {
                                        "template": app_template_str,
                                        "params": app_params,
                                        "styling": styling,
                                    },
                                }
                            )
                            rendered_string_template.append(to_append)
            else:
                raise GreatExpectationsError(
                    f"""GE was not able to render the value of evaluation parameters.
                        Expectation {render_func} had evaluation parameters set, but they were not passed in."""
                )
        return rendered_string_template
Exemple #22
0
 def from_fixed_length_tuple(cls, tuple_):
     if len(tuple_) != 6:
         raise GreatExpectationsError(
             "ValidationMetricIdentifier fixed length tuple must have exactly six "
             "components.")
     if tuple_[2] == "__":
         tuple_data_asset_name = None
     else:
         tuple_data_asset_name = tuple_[2]
     metric_id = MetricIdentifier.from_tuple(tuple_[-2:])
     return cls(
         run_id=RunIdentifier.from_fixed_length_tuple(
             (tuple_[0], tuple_[1])),
         data_asset_name=tuple_data_asset_name,
         expectation_suite_identifier=ExpectationSuiteIdentifier.
         from_fixed_length_tuple(tuple((tuple_[3], ))),
         metric_name=metric_id.metric_name,
         metric_kwargs_id=metric_id.metric_kwargs_id,
     )
Exemple #23
0
    def profile(cls, data_asset, run_id=None):
        if not cls.validate(data_asset):
            raise GreatExpectationsError(
                "Invalid data_asset for profiler; aborting")

        expectation_suite = cls._profile(data_asset)

        batch_kwargs = data_asset.batch_kwargs
        expectation_suite = cls.add_meta(expectation_suite, batch_kwargs)
        validation_results = data_asset.validate(expectation_suite,
                                                 run_id=run_id,
                                                 result_format="SUMMARY")
        expectation_suite.add_citation(
            comment=str(cls.__name__) +
            " added a citation based on the current batch.",
            batch_kwargs=data_asset.batch_kwargs,
            batch_markers=data_asset.batch_markers,
            batch_parameters=data_asset.batch_parameters)
        return expectation_suite, validation_results
    def get_compute_domain(
        self,
        domain_kwargs: dict,
        domain_type: Union[str, MetricDomainTypes],
        accessor_keys: Optional[Iterable[str]] = None,
    ) -> Tuple[DataFrame, dict, dict]:
        """Uses a given batch dictionary and domain kwargs (which include a row condition and a condition parser)
        to obtain and/or query a batch. Returns in the format of a Spark DataFrame.

        Args:
            domain_kwargs (dict) - A dictionary consisting of the domain kwargs specifying which data to obtain
            domain_type (str or MetricDomainTypes) - an Enum value indicating which metric domain the user would
            like to be using, or a corresponding string value representing it. String types include "identity",
            "column", "column_pair", "table" and "other". Enum types include capitalized versions of these from the
            class MetricDomainTypes.
            accessor_keys (str iterable) - keys that are part of the compute domain but should be ignored when
            describing the domain and simply transferred with their associated values into accessor_domain_kwargs.

        Returns:
            A tuple including:
              - a DataFrame (the data on which to compute)
              - a dictionary of compute_domain_kwargs, describing the DataFrame
              - a dictionary of accessor_domain_kwargs, describing any accessors needed to
                identify the domain within the compute domain
        """
        data = self.get_domain_records(
            domain_kwargs=domain_kwargs,
        )
        # Extracting value from enum if it is given for future computation
        domain_type = MetricDomainTypes(domain_type)

        compute_domain_kwargs = copy.deepcopy(domain_kwargs)
        accessor_domain_kwargs = {}
        table = domain_kwargs.get("table", None)
        if table:
            raise ValueError(
                "SparkDFExecutionEngine does not currently support multiple named tables."
            )

        # Warning user if accessor keys are in any domain that is not of type table, will be ignored
        if (
            domain_type != MetricDomainTypes.TABLE
            and accessor_keys is not None
            and len(list(accessor_keys)) > 0
        ):
            logger.warning(
                'Accessor keys ignored since Metric Domain Type is not "table"'
            )

        if domain_type == MetricDomainTypes.TABLE:
            if accessor_keys is not None and len(list(accessor_keys)) > 0:
                for key in accessor_keys:
                    accessor_domain_kwargs[key] = compute_domain_kwargs.pop(key)
            if len(compute_domain_kwargs.keys()) > 0:
                # Warn user if kwarg not "normal".
                unexpected_keys: set = set(compute_domain_kwargs.keys()).difference(
                    {
                        "batch_id",
                        "table",
                        "row_condition",
                        "condition_parser",
                    }
                )
                if len(unexpected_keys) > 0:
                    unexpected_keys_str: str = ", ".join(
                        map(lambda element: f'"{element}"', unexpected_keys)
                    )
                    logger.warning(
                        f'Unexpected key(s) {unexpected_keys_str} found in domain_kwargs for domain type "{domain_type.value}".'
                    )
            return data, compute_domain_kwargs, accessor_domain_kwargs

        elif domain_type == MetricDomainTypes.COLUMN:
            if "column" not in compute_domain_kwargs:
                raise GreatExpectationsError(
                    "Column not provided in compute_domain_kwargs"
                )

            accessor_domain_kwargs["column"] = compute_domain_kwargs.pop("column")

        elif domain_type == MetricDomainTypes.COLUMN_PAIR:
            if not (
                "column_A" in compute_domain_kwargs
                and "column_B" in compute_domain_kwargs
            ):
                raise GreatExpectationsError(
                    "column_A or column_B not found within compute_domain_kwargs"
                )

            accessor_domain_kwargs["column_A"] = compute_domain_kwargs.pop("column_A")
            accessor_domain_kwargs["column_B"] = compute_domain_kwargs.pop("column_B")

        elif domain_type == MetricDomainTypes.MULTICOLUMN:
            if "column_list" not in domain_kwargs:
                raise ge_exceptions.GreatExpectationsError(
                    "column_list not found within domain_kwargs"
                )

            column_list = compute_domain_kwargs.pop("column_list")

            if len(column_list) < 2:
                raise ge_exceptions.GreatExpectationsError(
                    "column_list must contain at least 2 columns"
                )

            accessor_domain_kwargs["column_list"] = column_list

        return data, compute_domain_kwargs, accessor_domain_kwargs
Exemple #25
0
    def resolve_metrics(
        self,
        metrics_to_resolve: Iterable[MetricConfiguration],
        metrics: Dict[Tuple, Any] = None,
        runtime_configuration: dict = None,
    ) -> dict:
        """resolve_metrics is the main entrypoint for an execution engine. The execution engine will compute the value
        of the provided metrics.

        Args:
            metrics_to_resolve: the metrics to evaluate
            metrics: already-computed metrics currently available to the engine
            runtime_configuration: runtime configuration information

        Returns:
            resolved_metrics (Dict): a dictionary with the values for the metrics that have just been resolved.
        """
        if metrics is None:
            metrics = dict()

        resolved_metrics = dict()

        metric_fn_bundle = []
        for metric_to_resolve in metrics_to_resolve:
            metric_class, metric_fn = get_metric_provider(
                metric_name=metric_to_resolve.metric_name,
                execution_engine=self)
            try:
                metric_dependencies = {
                    k: metrics[v.id]
                    for k, v in metric_to_resolve.metric_dependencies.items()
                }
            except KeyError as e:
                raise GreatExpectationsError(
                    f"Missing metric dependency: {str(e)}")
            metric_provider_kwargs = {
                "cls": metric_class,
                "execution_engine": self,
                "metric_domain_kwargs": metric_to_resolve.metric_domain_kwargs,
                "metric_value_kwargs": metric_to_resolve.metric_value_kwargs,
                "metrics": metric_dependencies,
                "runtime_configuration": runtime_configuration,
            }
            if metric_fn is None:
                try:
                    (
                        metric_fn,
                        compute_domain_kwargs,
                        accessor_domain_kwargs,
                    ) = metric_dependencies.pop("metric_partial_fn")
                except KeyError as e:
                    raise GreatExpectationsError(
                        f"Missing metric dependency: {str(e)} for metric {metric_to_resolve.metric_name}"
                    )
                metric_fn_bundle.append((
                    metric_to_resolve,
                    metric_fn,
                    compute_domain_kwargs,
                    accessor_domain_kwargs,
                    metric_provider_kwargs,
                ))
                continue
            metric_fn_type = getattr(metric_fn, "metric_fn_type",
                                     MetricFunctionTypes.VALUE)
            if metric_fn_type in [
                    MetricPartialFunctionTypes.MAP_SERIES,
                    MetricPartialFunctionTypes.MAP_FN,
                    MetricPartialFunctionTypes.MAP_CONDITION_FN,
                    MetricPartialFunctionTypes.MAP_CONDITION_SERIES,
                    MetricPartialFunctionTypes.WINDOW_FN,
                    MetricPartialFunctionTypes.WINDOW_CONDITION_FN,
                    MetricPartialFunctionTypes.AGGREGATE_FN,
            ]:
                # NOTE: 20201026 - JPC - we could use the fact that these metric functions return functions rather
                # than data to optimize compute in the future
                resolved_metrics[metric_to_resolve.id] = metric_fn(
                    **metric_provider_kwargs)
            elif metric_fn_type == MetricFunctionTypes.VALUE:
                resolved_metrics[metric_to_resolve.id] = metric_fn(
                    **metric_provider_kwargs)
            else:
                logger.warning(
                    f"Unrecognized metric function type while trying to resolve {str(metric_to_resolve.id)}"
                )
                resolved_metrics[metric_to_resolve.id] = metric_fn(
                    **metric_provider_kwargs)
        if len(metric_fn_bundle) > 0:
            resolved_metrics.update(
                self.resolve_metric_bundle(metric_fn_bundle))

        return resolved_metrics
Exemple #26
0
    def get_domain_records(
        self,
        domain_kwargs: dict,
    ) -> DataFrame:
        """
        Uses the given domain kwargs (which include row_condition, condition_parser, and ignore_row_if directives) to
        obtain and/or query a batch. Returns in the format of a Spark DataFrame.

        Args:
            domain_kwargs (dict) - A dictionary consisting of the domain kwargs specifying which data to obtain

        Returns:
            A DataFrame (the data on which to compute)
        """
        table = domain_kwargs.get("table", None)
        if table:
            raise ValueError(
                "SparkDFExecutionEngine does not currently support multiple named tables."
            )

        batch_id = domain_kwargs.get("batch_id")
        if batch_id is None:
            # We allow no batch id specified if there is only one batch
            if self.active_batch_data:
                data = self.active_batch_data.dataframe
            else:
                raise ValidationError(
                    "No batch is specified, but could not identify a loaded batch."
                )
        else:
            if batch_id in self.loaded_batch_data_dict:
                data = self.loaded_batch_data_dict[batch_id].dataframe
            else:
                raise ValidationError(f"Unable to find batch with batch_id {batch_id}")

        # Filtering by row condition.
        row_condition = domain_kwargs.get("row_condition", None)
        if row_condition:
            condition_parser = domain_kwargs.get("condition_parser", None)
            if condition_parser == "spark":
                data = data.filter(row_condition)
            elif condition_parser == "great_expectations__experimental__":
                parsed_condition = parse_condition_to_spark(row_condition)
                data = data.filter(parsed_condition)
            else:
                raise GreatExpectationsError(
                    f"unrecognized condition_parser {str(condition_parser)} for Spark execution engine"
                )

        # Filtering by filter_conditions
        filter_conditions: List[RowCondition] = domain_kwargs.get(
            "filter_conditions", []
        )
        if len(filter_conditions) > 0:
            filter_condition = self._combine_row_conditions(filter_conditions)
            data = data.filter(filter_condition.condition)

        if "column" in domain_kwargs:
            return data

        # Filtering by ignore_row_if directive
        if (
            "column_A" in domain_kwargs
            and "column_B" in domain_kwargs
            and "ignore_row_if" in domain_kwargs
        ):
            # noinspection PyPep8Naming
            column_A_name = domain_kwargs["column_A"]
            # noinspection PyPep8Naming
            column_B_name = domain_kwargs["column_B"]

            ignore_row_if = domain_kwargs["ignore_row_if"]
            if ignore_row_if == "both_values_are_missing":
                ignore_condition = (
                    F.col(column_A_name).isNull() & F.col(column_B_name).isNull()
                )
                data = data.filter(~ignore_condition)
            elif ignore_row_if == "either_value_is_missing":
                ignore_condition = (
                    F.col(column_A_name).isNull() | F.col(column_B_name).isNull()
                )
                data = data.filter(~ignore_condition)
            else:
                if ignore_row_if not in ["neither", "never"]:
                    raise ValueError(
                        f'Unrecognized value of ignore_row_if ("{ignore_row_if}").'
                    )

                if ignore_row_if == "never":
                    # deprecated-v0.13.29
                    warnings.warn(
                        f"""The correct "no-action" value of the "ignore_row_if" directive for the column pair case is \
"neither" (the use of "{ignore_row_if}" is deprecated as of v0.13.29 and will be removed in v0.16).  Please use "neither" moving forward.
""",
                        DeprecationWarning,
                    )

            return data

        if "column_list" in domain_kwargs and "ignore_row_if" in domain_kwargs:
            column_list = domain_kwargs["column_list"]
            ignore_row_if = domain_kwargs["ignore_row_if"]
            if ignore_row_if == "all_values_are_missing":
                conditions = [
                    F.col(column_name).isNull() for column_name in column_list
                ]
                ignore_condition = reduce(lambda a, b: a & b, conditions)
                data = data.filter(~ignore_condition)
            elif ignore_row_if == "any_value_is_missing":
                conditions = [
                    F.col(column_name).isNull() for column_name in column_list
                ]
                ignore_condition = reduce(lambda a, b: a | b, conditions)
                data = data.filter(~ignore_condition)
            else:
                if ignore_row_if != "never":
                    raise ValueError(
                        f'Unrecognized value of ignore_row_if ("{ignore_row_if}").'
                    )

            return data

        return data
Exemple #27
0
    def get_domain_records(
        self,
        domain_kwargs: Dict,
    ) -> Selectable:
        """
        Uses the given domain kwargs (which include row_condition, condition_parser, and ignore_row_if directives) to
        obtain and/or query a batch. Returns in the format of an SqlAlchemy table/column(s) object.

        Args:
            domain_kwargs (dict) - A dictionary consisting of the domain kwargs specifying which data to obtain

        Returns:
            An SqlAlchemy table/column(s) (the selectable object for obtaining data on which to compute)
        """
        batch_id = domain_kwargs.get("batch_id")
        if batch_id is None:
            # We allow no batch id specified if there is only one batch
            if self.active_batch_data:
                data_object = self.active_batch_data
            else:
                raise GreatExpectationsError(
                    "No batch is specified, but could not identify a loaded batch."
                )
        else:
            if batch_id in self.loaded_batch_data_dict:
                data_object = self.loaded_batch_data_dict[batch_id]
            else:
                raise GreatExpectationsError(
                    f"Unable to find batch with batch_id {batch_id}"
                )

        selectable: Selectable
        if "table" in domain_kwargs and domain_kwargs["table"] is not None:
            # TODO: Add logic to handle record_set_name once implemented
            # (i.e. multiple record sets (tables) in one batch
            if domain_kwargs["table"] != data_object.selectable.name:
                selectable = sa.Table(
                    domain_kwargs["table"],
                    sa.MetaData(),
                    schema=data_object._schema_name,
                )
            else:
                selectable = data_object.selectable
        elif "query" in domain_kwargs:
            raise ValueError(
                "query is not currently supported by SqlAlchemyExecutionEngine"
            )
        else:
            selectable = data_object.selectable

        """
        If a custom query is passed, selectable will be TextClause and not formatted
        as a subquery wrapped in "(subquery) alias". TextClause must first be converted
        to TextualSelect using sa.columns() before it can be converted to type Subquery
        """
        if TextClause and isinstance(selectable, TextClause):
            selectable = selectable.columns().subquery()

        # Filtering by row condition.
        if (
            "row_condition" in domain_kwargs
            and domain_kwargs["row_condition"] is not None
        ):
            condition_parser = domain_kwargs["condition_parser"]
            if condition_parser == "great_expectations__experimental__":
                parsed_condition = parse_condition_to_sqlalchemy(
                    domain_kwargs["row_condition"]
                )
                selectable = (
                    sa.select([sa.text("*")])
                    .select_from(selectable)
                    .where(parsed_condition)
                )
            else:
                raise GreatExpectationsError(
                    "SqlAlchemyExecutionEngine only supports the great_expectations condition_parser."
                )

        # Filtering by filter_conditions
        filter_conditions: List[RowCondition] = domain_kwargs.get(
            "filter_conditions", []
        )
        # For SqlAlchemyExecutionEngine only one filter condition is allowed
        if len(filter_conditions) == 1:
            filter_condition = filter_conditions[0]
            assert (
                filter_condition.condition_type == RowConditionParserType.GE
            ), "filter_condition must be of type GE for SqlAlchemyExecutionEngine"

            selectable = (
                sa.select([sa.text("*")])
                .select_from(selectable)
                .where(parse_condition_to_sqlalchemy(filter_condition.condition))
            )
        elif len(filter_conditions) > 1:
            raise GreatExpectationsError(
                "SqlAlchemyExecutionEngine currently only supports a single filter condition."
            )

        if "column" in domain_kwargs:
            return selectable

        # Filtering by ignore_row_if directive
        if (
            "column_A" in domain_kwargs
            and "column_B" in domain_kwargs
            and "ignore_row_if" in domain_kwargs
        ):
            if self.active_batch_data.use_quoted_name:
                # Checking if case-sensitive and using appropriate name
                # noinspection PyPep8Naming
                column_A_name = quoted_name(domain_kwargs["column_A"], quote=True)
                # noinspection PyPep8Naming
                column_B_name = quoted_name(domain_kwargs["column_B"], quote=True)
            else:
                # noinspection PyPep8Naming
                column_A_name = domain_kwargs["column_A"]
                # noinspection PyPep8Naming
                column_B_name = domain_kwargs["column_B"]

            ignore_row_if = domain_kwargs["ignore_row_if"]
            if ignore_row_if == "both_values_are_missing":
                selectable = get_sqlalchemy_selectable(
                    sa.select([sa.text("*")])
                    .select_from(get_sqlalchemy_selectable(selectable))
                    .where(
                        sa.not_(
                            sa.and_(
                                sa.column(column_A_name) == None,
                                sa.column(column_B_name) == None,
                            )
                        )
                    )
                )
            elif ignore_row_if == "either_value_is_missing":
                selectable = get_sqlalchemy_selectable(
                    sa.select([sa.text("*")])
                    .select_from(get_sqlalchemy_selectable(selectable))
                    .where(
                        sa.not_(
                            sa.or_(
                                sa.column(column_A_name) == None,
                                sa.column(column_B_name) == None,
                            )
                        )
                    )
                )
            else:
                if ignore_row_if not in ["neither", "never"]:
                    raise ValueError(
                        f'Unrecognized value of ignore_row_if ("{ignore_row_if}").'
                    )

                if ignore_row_if == "never":
                    # deprecated-v0.13.29
                    warnings.warn(
                        f"""The correct "no-action" value of the "ignore_row_if" directive for the column pair case is \
"neither" (the use of "{ignore_row_if}" is deprecated as of v0.13.29 and will be removed in v0.16).  Please use "neither" moving forward.
""",
                        DeprecationWarning,
                    )

            return selectable

        if "column_list" in domain_kwargs and "ignore_row_if" in domain_kwargs:
            if self.active_batch_data.use_quoted_name:
                # Checking if case-sensitive and using appropriate name
                column_list = [
                    quoted_name(domain_kwargs[column_name], quote=True)
                    for column_name in domain_kwargs["column_list"]
                ]
            else:
                column_list = domain_kwargs["column_list"]

            ignore_row_if = domain_kwargs["ignore_row_if"]
            if ignore_row_if == "all_values_are_missing":
                selectable = get_sqlalchemy_selectable(
                    sa.select([sa.text("*")])
                    .select_from(get_sqlalchemy_selectable(selectable))
                    .where(
                        sa.not_(
                            sa.and_(
                                *(
                                    sa.column(column_name) == None
                                    for column_name in column_list
                                )
                            )
                        )
                    )
                )
            elif ignore_row_if == "any_value_is_missing":
                selectable = get_sqlalchemy_selectable(
                    sa.select([sa.text("*")])
                    .select_from(get_sqlalchemy_selectable(selectable))
                    .where(
                        sa.not_(
                            sa.or_(
                                *(
                                    sa.column(column_name) == None
                                    for column_name in column_list
                                )
                            )
                        )
                    )
                )
            else:
                if ignore_row_if != "never":
                    raise ValueError(
                        f'Unrecognized value of ignore_row_if ("{ignore_row_if}").'
                    )

            return selectable

        return selectable
Exemple #28
0
    def get_compute_domain(
        self,
        domain_kwargs: Dict,
        domain_type: Union[str, "MetricDomainTypes"],
        accessor_keys: Optional[Iterable[str]] = None,
    ) -> Tuple["sa.sql.Selectable", dict, dict]:
        """Uses a given batch dictionary and domain kwargs to obtain a SqlAlchemy column object.

        Args:
            domain_kwargs (dict) - A dictionary consisting of the domain kwargs specifying which data to obtain
            domain_type (str or "MetricDomainTypes") - an Enum value indicating which metric domain the user would
            like to be using, or a corresponding string value representing it. String types include "identity", "column",
            "column_pair", "table" and "other". Enum types include capitalized versions of these from the class
            MetricDomainTypes.
            accessor_keys (str iterable) - keys that are part of the compute domain but should be ignored when describing
            the domain and simply transferred with their associated values into accessor_domain_kwargs.

        Returns:
            SqlAlchemy column
        """
        # Extracting value from enum if it is given for future computation
        domain_type = MetricDomainTypes(domain_type)
        batch_id = domain_kwargs.get("batch_id")
        if batch_id is None:
            # We allow no batch id specified if there is only one batch
            if self.active_batch_data:
                data_object = self.active_batch_data
            else:
                raise GreatExpectationsError(
                    "No batch is specified, but could not identify a loaded batch."
                )
        else:
            if batch_id in self.loaded_batch_data_dict:
                data_object = self.loaded_batch_data_dict[batch_id]
            else:
                raise GreatExpectationsError(
                    f"Unable to find batch with batch_id {batch_id}"
                )

        compute_domain_kwargs = copy.deepcopy(domain_kwargs)
        accessor_domain_kwargs = dict()
        if "table" in domain_kwargs and domain_kwargs["table"] is not None:
            if domain_kwargs["table"] != data_object.record_set_name:
                raise ValueError("Unrecognized table name.")
            else:
                selectable = data_object.selectable
        elif "query" in domain_kwargs:
            raise ValueError(
                "query is not currently supported by SqlAlchemyExecutionEngine"
            )
        else:
            selectable = data_object.selectable

        if (
            "row_condition" in domain_kwargs
            and domain_kwargs["row_condition"] is not None
        ):
            condition_parser = domain_kwargs["condition_parser"]
            if condition_parser == "great_expectations__experimental__":
                parsed_condition = parse_condition_to_sqlalchemy(
                    domain_kwargs["row_condition"]
                )
                selectable = sa.select(
                    "*", from_obj=selectable, whereclause=parsed_condition
                )

            else:
                raise GreatExpectationsError(
                    "SqlAlchemyExecutionEngine only supports the great_expectations condition_parser."
                )

        # Warning user if accessor keys are in any domain that is not of type table, will be ignored
        if (
            domain_type != MetricDomainTypes.TABLE
            and accessor_keys is not None
            and len(accessor_keys) > 0
        ):
            logger.warning(
                "Accessor keys ignored since Metric Domain Type is not 'table'"
            )

        if domain_type == MetricDomainTypes.TABLE:
            if accessor_keys is not None and len(accessor_keys) > 0:
                for key in accessor_keys:
                    accessor_domain_kwargs[key] = compute_domain_kwargs.pop(key)
            if len(domain_kwargs.keys()) > 0:
                for key in compute_domain_kwargs.keys():
                    # Warning user if kwarg not "normal"
                    if key not in [
                        "batch_id",
                        "table",
                        "row_condition",
                        "condition_parser",
                    ]:
                        logger.warning(
                            f"Unexpected key {key} found in domain_kwargs for domain type {domain_type.value}"
                        )
            return selectable, compute_domain_kwargs, accessor_domain_kwargs

        # If user has stated they want a column, checking if one is provided, and
        elif domain_type == MetricDomainTypes.COLUMN:
            if "column" in compute_domain_kwargs:
                # Checking if case- sensitive and using appropriate name
                if self.active_batch_data.use_quoted_name:
                    accessor_domain_kwargs["column"] = quoted_name(
                        compute_domain_kwargs.pop("column")
                    )
                else:
                    accessor_domain_kwargs["column"] = compute_domain_kwargs.pop(
                        "column"
                    )
            else:
                # If column not given
                raise GreatExpectationsError(
                    "Column not provided in compute_domain_kwargs"
                )

        # Else, if column pair values requested
        elif domain_type == MetricDomainTypes.COLUMN_PAIR:
            # Ensuring column_A and column_B parameters provided
            if (
                "column_A" in compute_domain_kwargs
                and "column_B" in compute_domain_kwargs
            ):
                if self.active_batch_data.use_quoted_name:
                    # If case matters...
                    accessor_domain_kwargs["column_A"] = quoted_name(
                        compute_domain_kwargs.pop("column_A")
                    )
                    accessor_domain_kwargs["column_B"] = quoted_name(
                        compute_domain_kwargs.pop("column_B")
                    )
                else:
                    accessor_domain_kwargs["column_A"] = compute_domain_kwargs.pop(
                        "column_A"
                    )
                    accessor_domain_kwargs["column_B"] = compute_domain_kwargs.pop(
                        "column_B"
                    )
            else:
                raise GreatExpectationsError(
                    "column_A or column_B not found within compute_domain_kwargs"
                )

        # Checking if table or identity or other provided, column is not specified. If it is, warning the user
        elif domain_type == MetricDomainTypes.MULTICOLUMN:
            if "columns" in compute_domain_kwargs:
                # If columns exist
                accessor_domain_kwargs["columns"] = compute_domain_kwargs.pop("columns")

        # Filtering if identity
        elif domain_type == MetricDomainTypes.IDENTITY:
            # If we would like our data to become a single column
            if "column" in compute_domain_kwargs:
                if self.active_batch_data.use_quoted_name:
                    selectable = sa.select(
                        [sa.column(quoted_name(compute_domain_kwargs["column"]))]
                    ).select_from(selectable)
                else:
                    selectable = sa.select(
                        [sa.column(compute_domain_kwargs["column"])]
                    ).select_from(selectable)

            # If we would like our data to now become a column pair
            elif ("column_A" in compute_domain_kwargs) and (
                "column_B" in compute_domain_kwargs
            ):
                if self.active_batch_data.use_quoted_name:
                    selectable = sa.select(
                        [
                            sa.column(quoted_name(compute_domain_kwargs["column_A"])),
                            sa.column(quoted_name(compute_domain_kwargs["column_B"])),
                        ]
                    ).select_from(selectable)
                else:
                    selectable = sa.select(
                        [
                            sa.column(compute_domain_kwargs["column_A"]),
                            sa.column(compute_domain_kwargs["column_B"]),
                        ]
                    ).select_from(selectable)
            else:
                # If we would like our data to become a multicolumn
                if "columns" in compute_domain_kwargs:
                    if self.active_batch_data.use_quoted_name:
                        # Building a list of column objects used for sql alchemy selection
                        to_select = [
                            sa.column(quoted_name(col))
                            for col in compute_domain_kwargs["columns"]
                        ]
                        selectable = sa.select(to_select).select_from(selectable)
                    else:
                        to_select = [
                            sa.column(col) for col in compute_domain_kwargs["columns"]
                        ]
                        selectable = sa.select(to_select).select_from(selectable)

        # Letting selectable fall through
        return selectable, compute_domain_kwargs, accessor_domain_kwargs
Exemple #29
0
    def get_compute_domain(
        self,
        domain_kwargs: Dict,
        domain_type: Union[str, MetricDomainTypes],
        accessor_keys: Optional[Iterable[str]] = None,
    ) -> Tuple[Selectable, dict, dict]:
        """Uses a given batch dictionary and domain kwargs to obtain a SqlAlchemy column object.

        Args:
            domain_kwargs (dict) - A dictionary consisting of the domain kwargs specifying which data to obtain
            domain_type (str or MetricDomainTypes) - an Enum value indicating which metric domain the user would
            like to be using, or a corresponding string value representing it. String types include "identity",
            "column", "column_pair", "table" and "other". Enum types include capitalized versions of these from the
            class MetricDomainTypes.
            accessor_keys (str iterable) - keys that are part of the compute domain but should be ignored when
            describing the domain and simply transferred with their associated values into accessor_domain_kwargs.

        Returns:
            SqlAlchemy column
        """
        selectable = self.get_domain_records(domain_kwargs=domain_kwargs, )
        # Extracting value from enum if it is given for future computation
        domain_type = MetricDomainTypes(domain_type)

        # Warning user if accessor keys are in any domain that is not of type table, will be ignored
        if (domain_type != MetricDomainTypes.TABLE
                and accessor_keys is not None
                and len(list(accessor_keys)) > 0):
            logger.warning(
                'Accessor keys ignored since Metric Domain Type is not "table"'
            )

        compute_domain_kwargs = copy.deepcopy(domain_kwargs)
        accessor_domain_kwargs = {}
        if domain_type == MetricDomainTypes.TABLE:
            if accessor_keys is not None and len(list(accessor_keys)) > 0:
                for key in accessor_keys:
                    accessor_domain_kwargs[key] = compute_domain_kwargs.pop(
                        key)
            if len(domain_kwargs.keys()) > 0:
                # Warn user if kwarg not "normal".
                unexpected_keys: set = set(
                    compute_domain_kwargs.keys()).difference({
                        "batch_id",
                        "table",
                        "row_condition",
                        "condition_parser",
                    })
                if len(unexpected_keys) > 0:
                    unexpected_keys_str: str = ", ".join(
                        map(lambda element: f'"{element}"', unexpected_keys))
                    logger.warning(
                        f'Unexpected key(s) {unexpected_keys_str} found in domain_kwargs for domain type "{domain_type.value}".'
                    )
            return selectable, compute_domain_kwargs, accessor_domain_kwargs

        elif domain_type == MetricDomainTypes.COLUMN:
            if "column" not in compute_domain_kwargs:
                raise GreatExpectationsError(
                    "Column not provided in compute_domain_kwargs")

            # Checking if case-sensitive and using appropriate name
            if self.active_batch_data.use_quoted_name:
                accessor_domain_kwargs["column"] = quoted_name(
                    compute_domain_kwargs.pop("column"), quote=True)
            else:
                accessor_domain_kwargs["column"] = compute_domain_kwargs.pop(
                    "column")

            return selectable, compute_domain_kwargs, accessor_domain_kwargs

        elif domain_type == MetricDomainTypes.COLUMN_PAIR:
            if not ("column_A" in compute_domain_kwargs
                    and "column_B" in compute_domain_kwargs):
                raise GreatExpectationsError(
                    "column_A or column_B not found within compute_domain_kwargs"
                )

            # Checking if case-sensitive and using appropriate name
            if self.active_batch_data.use_quoted_name:
                accessor_domain_kwargs["column_A"] = quoted_name(
                    compute_domain_kwargs.pop("column_A"), quote=True)
                accessor_domain_kwargs["column_B"] = quoted_name(
                    compute_domain_kwargs.pop("column_B"), quote=True)
            else:
                accessor_domain_kwargs["column_A"] = compute_domain_kwargs.pop(
                    "column_A")
                accessor_domain_kwargs["column_B"] = compute_domain_kwargs.pop(
                    "column_B")

            return selectable, compute_domain_kwargs, accessor_domain_kwargs

        elif domain_type == MetricDomainTypes.MULTICOLUMN:
            if "column_list" not in domain_kwargs:
                raise GreatExpectationsError(
                    "column_list not found within domain_kwargs")

            column_list = compute_domain_kwargs.pop("column_list")

            if len(column_list) < 2:
                raise GreatExpectationsError(
                    "column_list must contain at least 2 columns")

            # Checking if case-sensitive and using appropriate name
            if self.active_batch_data.use_quoted_name:
                accessor_domain_kwargs["column_list"] = [
                    quoted_name(column_name, quote=True)
                    for column_name in column_list
                ]
            else:
                accessor_domain_kwargs["column_list"] = column_list

            return selectable, compute_domain_kwargs, accessor_domain_kwargs

        # Letting selectable fall through
        return selectable, compute_domain_kwargs, accessor_domain_kwargs