def get_validation_dependencies( self, configuration: Optional[ExpectationConfiguration] = None, execution_engine: Optional[ExecutionEngine] = None, runtime_configuration: Optional[dict] = None, ): # this calls TableExpectation.get_validation_dependencies to set baseline dependencies # for the aggregate version of the expectation dependencies = super(ColumnMapExpectation, self).get_validation_dependencies( configuration, execution_engine, runtime_configuration ) # only PandasExecutionEngine supports the column map version of the expectation if isinstance(execution_engine, PandasExecutionEngine): column_name = configuration.kwargs.get("column") expected_types_list = configuration.kwargs.get("type_list") metric_kwargs = get_metric_kwargs( configuration=configuration, metric_name="table.column_types", runtime_configuration=runtime_configuration, ) metric_domain_kwargs = metric_kwargs.get("metric_domain_kwargs") metric_value_kwargs = metric_kwargs.get("metric_value_kwargs") table_column_types_configuration = MetricConfiguration( "table.column_types", metric_domain_kwargs=metric_domain_kwargs, metric_value_kwargs=metric_value_kwargs, ) actual_column_types_list = execution_engine.resolve_metrics( [table_column_types_configuration] )[table_column_types_configuration.id] actual_column_type = [ type_dict["type"] for type_dict in actual_column_types_list if type_dict["name"] == column_name ][0] # only use column map version if column dtype is object if ( actual_column_type.type.__name__ == "object_" and expected_types_list is not None ): # this resets dependencies using ColumnMapExpectation.get_validation_dependencies dependencies = super().get_validation_dependencies( configuration, execution_engine, runtime_configuration ) # this adds table.column_types dependency for both aggregate and map versions of expectation column_types_metric_kwargs = get_metric_kwargs( metric_name="table.column_types", configuration=configuration, runtime_configuration=runtime_configuration, ) dependencies["metrics"]["table.column_types"] = MetricConfiguration( metric_name="table.column_types", metric_domain_kwargs=column_types_metric_kwargs["metric_domain_kwargs"], metric_value_kwargs=column_types_metric_kwargs["metric_value_kwargs"], ) return dependencies
def get_validation_dependencies( self, configuration: Optional[ExpectationConfiguration] = None, execution_engine: Optional[ExecutionEngine] = None, runtime_configuration: Optional[dict] = None, ) -> dict: dependencies = super().get_validation_dependencies( configuration=configuration, execution_engine=execution_engine, runtime_configuration=runtime_configuration, ) metric_kwargs = get_metric_kwargs( metric_name="column_values.string_integers.increasing.map", configuration=configuration, runtime_configuration=runtime_configuration, ) dependencies["metrics"][ "column_values.string_integers.increasing.map"] = MetricConfiguration( metric_name="column_values.string_integers.increasing.map", metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"], metric_value_kwargs=metric_kwargs["metric_value_kwargs"], ) return dependencies
def get_validation_dependencies( self, configuration: Optional[ExpectationConfiguration] = None, execution_engine: Optional[ExecutionEngine] = None, runtime_configuration: Optional[dict] = None, ): dependencies = super().get_validation_dependencies( configuration, execution_engine, runtime_configuration ) for metric_name in self.metric_dependencies: metric_kwargs = get_metric_kwargs( metric_name=metric_name, configuration=configuration, runtime_configuration=runtime_configuration, ) dependencies["metrics"][metric_name] = MetricConfiguration( metric_name=metric_name, metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"], metric_value_kwargs=metric_kwargs["metric_value_kwargs"], ) return dependencies
def get_validation_dependencies( self, configuration: Optional[ExpectationConfiguration] = None, execution_engine: Optional[ExecutionEngine] = None, runtime_configuration: Optional[dict] = None, ): # This calls TableExpectation.get_validation_dependencies to set baseline dependencies for the aggregate version # of the expectation. # We need to keep this as super(ColumnMapExpectation, self), which calls # TableExpectation.get_validation_dependencies instead of ColumnMapExpectation.get_validation_dependencies. # This is because the map version of this expectation is only supported for Pandas, so we want the aggregate # version for the other backends. dependencies = super(ColumnMapExpectation, self).get_validation_dependencies( configuration, execution_engine, runtime_configuration) # Only PandasExecutionEngine supports the column map version of the expectation. if isinstance(execution_engine, PandasExecutionEngine): column_name = configuration.kwargs.get("column") expected_types_list = configuration.kwargs.get("type_list") metric_kwargs = get_metric_kwargs( configuration=configuration, metric_name="table.column_types", runtime_configuration=runtime_configuration, ) metric_domain_kwargs = metric_kwargs.get("metric_domain_kwargs") metric_value_kwargs = metric_kwargs.get("metric_value_kwargs") table_column_types_configuration = MetricConfiguration( "table.column_types", metric_domain_kwargs=metric_domain_kwargs, metric_value_kwargs=metric_value_kwargs, ) actual_column_types_list = execution_engine.resolve_metrics([ table_column_types_configuration ])[table_column_types_configuration.id] try: actual_column_type = [ type_dict["type"] for type_dict in actual_column_types_list if type_dict["name"] == column_name ][0] except IndexError: actual_column_type = None # only use column map version if column dtype is object if (actual_column_type and actual_column_type.type.__name__ == "object_" and expected_types_list is not None): # this resets dependencies using ColumnMapExpectation.get_validation_dependencies dependencies = super().get_validation_dependencies( configuration, execution_engine, runtime_configuration) # this adds table.column_types dependency for both aggregate and map versions of expectation column_types_metric_kwargs = get_metric_kwargs( metric_name="table.column_types", configuration=configuration, runtime_configuration=runtime_configuration, ) dependencies["metrics"]["table.column_types"] = MetricConfiguration( metric_name="table.column_types", metric_domain_kwargs=column_types_metric_kwargs[ "metric_domain_kwargs"], metric_value_kwargs=column_types_metric_kwargs[ "metric_value_kwargs"], ) return dependencies
def get_validation_dependencies( self, configuration: Optional[ExpectationConfiguration] = None, execution_engine: Optional[ExecutionEngine] = None, runtime_configuration: Optional[dict] = None, ): dependencies = super().get_validation_dependencies( configuration, execution_engine, runtime_configuration ) assert isinstance( self.map_metric, str ), "ColumnMapExpectation must override get_validation_dependencies or declare exactly one map_metric" assert ( self.metric_dependencies == tuple() ), "ColumnMapExpectation must be configured using map_metric, and cannot have metric_dependencies declared." # convenient name for updates metric_dependencies = dependencies["metrics"] metric_kwargs = get_metric_kwargs( metric_name="column_values.nonnull.unexpected_count", configuration=configuration, runtime_configuration=runtime_configuration, ) metric_dependencies[ "column_values.nonnull.unexpected_count" ] = MetricConfiguration( "column_values.nonnull.unexpected_count", metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"], metric_value_kwargs=metric_kwargs["metric_value_kwargs"], ) metric_kwargs = get_metric_kwargs( metric_name=self.map_metric + ".unexpected_count", configuration=configuration, runtime_configuration=runtime_configuration, ) metric_dependencies[ self.map_metric + ".unexpected_count" ] = MetricConfiguration( self.map_metric + ".unexpected_count", metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"], metric_value_kwargs=metric_kwargs["metric_value_kwargs"], ) result_format_str = dependencies["result_format"].get("result_format") metric_kwargs = get_metric_kwargs( metric_name="table.row_count", configuration=configuration, runtime_configuration=runtime_configuration, ) metric_dependencies["table.row_count"] = MetricConfiguration( metric_name="table.row_count", metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"], metric_value_kwargs=metric_kwargs["metric_value_kwargs"], ) if result_format_str == "BOOLEAN_ONLY": return dependencies metric_kwargs = get_metric_kwargs( self.map_metric + ".unexpected_values", configuration=configuration, runtime_configuration=runtime_configuration, ) metric_dependencies[ self.map_metric + ".unexpected_values" ] = MetricConfiguration( metric_name=self.map_metric + ".unexpected_values", metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"], metric_value_kwargs=metric_kwargs["metric_value_kwargs"], ) if result_format_str in ["BASIC", "SUMMARY"]: return dependencies metric_kwargs = get_metric_kwargs( self.map_metric + ".unexpected_rows", configuration=configuration, runtime_configuration=runtime_configuration, ) metric_dependencies[self.map_metric + ".unexpected_rows"] = MetricConfiguration( metric_name=self.map_metric + ".unexpected_rows", metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"], metric_value_kwargs=metric_kwargs["metric_value_kwargs"], ) if isinstance(execution_engine, PandasExecutionEngine): metric_kwargs = get_metric_kwargs( self.map_metric + ".unexpected_index_list", configuration=configuration, runtime_configuration=runtime_configuration, ) metric_dependencies[ self.map_metric + ".unexpected_index_list" ] = MetricConfiguration( metric_name=self.map_metric + ".unexpected_index_list", metric_domain_kwargs=metric_kwargs["metric_domain_kwargs"], metric_value_kwargs=metric_kwargs["metric_value_kwargs"], ) return dependencies