class RuleConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = RuleConfig domain_builder = fields.Nested( DomainBuilderConfigSchema, required=False, allow_none=True, ) parameter_builders = fields.List( cls_or_instance=fields.Nested( ParameterBuilderConfigSchema, required=True, allow_none=False, ), required=False, allow_none=True, ) expectation_configuration_builders = fields.List( cls_or_instance=fields.Nested( ExpectationConfigurationBuilderConfigSchema, required=True, allow_none=False, ), required=True, allow_none=False, )
class DatasourceConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(missing="Datasource") module_name = fields.String(missing="great_expectations.datasource") execution_engine = fields.Nested(ExecutionEngineConfigSchema) data_connectors = fields.Dict( keys=fields.Str(), values=fields.Nested(DataConnectorConfigSchema), required=True, allow_none=False, ) credentials = fields.Raw(allow_none=True) spark_context = fields.Raw(allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_datasource_config(self, data, **kwargs): return DatasourceConfig(**data)
class DatasourceConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(missing="Datasource") module_name = fields.String(missing="great_expectations.datasource") execution_engine = fields.Nested( ExecutionEngineConfigSchema, required=False, allow_none=True ) data_connectors = fields.Dict( keys=fields.Str(), values=fields.Nested(DataConnectorConfigSchema), required=False, allow_none=True, ) data_asset_type = fields.Nested(ClassConfigSchema, required=False, allow_none=True) # TODO: Update to generator-specific # batch_kwargs_generators = fields.Mapping(keys=fields.Str(), values=fields.Nested(fields.GeneratorSchema)) batch_kwargs_generators = fields.Dict( keys=fields.Str(), values=fields.Dict(), required=False, allow_none=True ) connection_string = fields.String(required=False, allow_none=True) credentials = fields.Raw(required=False, allow_none=True) introspection = fields.Dict(required=False, allow_none=True) tables = fields.Dict(required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): if "generators" in data: raise ge_exceptions.InvalidConfigError( 'Your current configuration uses the "generators" key in a datasource, but in version 0.10 of ' 'GE, that key is renamed to "batch_kwargs_generators". Please update your configuration to continue.' ) # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. if data["class_name"][0] == "$": return if ( "connection_string" in data or "credentials" in data or "introspection" in data or "tables" in data ) and not ( data["class_name"] in ["SqlAlchemyDatasource", "SimpleSqlalchemyDatasource",] ): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data source, that are required only by a sqlalchemy data source (your data source is "{data['class_name']}"). Please update your configuration to continue. """ ) # noinspection PyUnusedLocal @post_load def make_datasource_config(self, data, **kwargs): return DatasourceConfig(**data)
class ExpectationSuiteValidationResultSchema(Schema): success = fields.Bool() results = fields.List(fields.Nested(ExpectationValidationResultSchema)) evaluation_parameters = fields.Dict() statistics = fields.Dict() meta = fields.Dict(allow_none=True) ge_cloud_id = fields.UUID(required=False, allow_none=True) # noinspection PyUnusedLocal @pre_dump def prepare_dump(self, data, **kwargs): data = deepcopy(data) if isinstance(data, ExpectationSuiteValidationResult): data.meta = convert_to_json_serializable(data=data.meta) data.statistics = convert_to_json_serializable( data=data.statistics) elif isinstance(data, dict): data["meta"] = convert_to_json_serializable(data=data.get("meta")) data["statistics"] = convert_to_json_serializable( data=data.get("statistics")) return data # noinspection PyUnusedLocal @post_load def make_expectation_suite_validation_result(self, data, **kwargs): return ExpectationSuiteValidationResult(**data)
class ExpectationSuiteSchema(Schema): expectation_suite_name = fields.Str() expectations = fields.List(fields.Nested(ExpectationConfigurationSchema)) evaluation_parameters = fields.Dict(allow_none=True) data_asset_type = fields.Str(allow_none=True) meta = fields.Dict() # NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields; # doing so could also allow us not to have to make a copy of data in the pre_dump method. def clean_empty(self, data): if not hasattr(data, "evaluation_parameters"): pass elif len(data.evaluation_parameters) == 0: del data.evaluation_parameters if not hasattr(data, "meta"): pass elif data.meta is None or data.meta == []: pass elif len(data.meta) == 0: del data.meta return data # noinspection PyUnusedLocal @pre_dump def prepare_dump(self, data, **kwargs): data = deepcopy(data) data.meta = convert_to_json_serializable(data.meta) data = self.clean_empty(data) return data # noinspection PyUnusedLocal @post_load def make_expectation_suite(self, data, **kwargs): return ExpectationSuite(**data)
class ParameterBuilderConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = ParameterBuilderConfig name = fields.String( required=True, allow_none=False, ) module_name = fields.String( required=False, allow_none=True, missing="great_expectations.rule_based_profiler.parameter_builder", ) class_name = fields.String( required=True, allow_none=False, ) evaluation_parameter_builder_configs = fields.List( cls_or_instance=fields.Nested( lambda: ParameterBuilderConfigSchema(), required=True, allow_none=False, ), required=False, allow_none=True, )
class DatasourceConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String(missing="great_expectations.datasource") data_asset_type = fields.Nested(ClassConfigSchema) # TODO: Update to generator-specific # batch_kwargs_generators = fields.Mapping(keys=fields.Str(), values=fields.Nested(fields.GeneratorSchema)) batch_kwargs_generators = fields.Dict( keys=fields.Str(), values=fields.Dict(), allow_none=True ) credentials = fields.Raw(allow_none=True) spark_context = fields.Raw(allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): if "generators" in data: raise ge_exceptions.InvalidConfigError( "Your current configuration uses the 'generators' key in a datasource, but in version 0.10 of " "GE, that key is renamed to 'batch_kwargs_generators'. Please update your config to continue." ) # noinspection PyUnusedLocal @post_load def make_datasource_config(self, data, **kwargs): return DatasourceConfig(**data)
class ExpectationValidationResultSchema(Schema): success = fields.Bool() expectation_config = fields.Nested(ExpectationConfigurationSchema) result = fields.Dict() meta = fields.Dict() exception_info = fields.Dict() # noinspection PyUnusedLocal @pre_dump def convert_result_to_serializable(self, data, **kwargs): data = deepcopy(data) if isinstance(data, ExpectationValidationResult): data.result = convert_to_json_serializable(data.result) elif isinstance(data, dict): data["result"] = convert_to_json_serializable(data.get("result")) return data # # noinspection PyUnusedLocal # @pre_dump # def clean_empty(self, data, **kwargs): # # if not hasattr(data, 'meta'): # # pass # # elif len(data.meta) == 0: # # del data.meta # # return data # pass # noinspection PyUnusedLocal @post_load def make_expectation_validation_result(self, data, **kwargs): return ExpectationValidationResult(**data)
class NotebooksConfigSchema(Schema): # for now only suite_edit, could have other customization options for # notebooks in the future suite_edit = fields.Nested(NotebookConfigSchema) # noinspection PyUnusedLocal @post_load def make_notebooks_config(self, data, **kwargs): return NotebooksConfig(**data)
class CheckpointResultSchema(Schema): # JC: I think this needs to be changed to be an instance of a new type called CheckpointResult, # which would include the top-level keys run_id, config, name, and a list of results. run_id = fields.Nested(RunIdentifierSchema) run_results = fields.Dict() checkpoint_config = fields.Nested(CheckpointConfigSchema) success = fields.Bool() # noinspection PyUnusedLocal @pre_dump def prepare_dump(self, data, **kwargs): data = deepcopy(data) data._run_results = convert_to_json_serializable(data.run_results) return data # noinspection PyUnusedLocal @post_load def make_checkpoint_result(self, data, **kwargs): return CheckpointResult(**data)
class RuleBasedProfilerConfigSchema(Schema): """ Schema classes for configurations which extend from BaseYamlConfig must extend top-level Marshmallow Schema class. Schema classes for their constituent configurations which extend DictDot leve must extend NotNullSchema class. """ class Meta: unknown = INCLUDE name = fields.String( required=True, allow_none=False, ) class_name = fields.String( required=False, all_none=True, allow_none=True, missing="RuleBasedProfiler", ) module_name = fields.String( required=False, all_none=True, allow_none=True, missing="great_expectations.rule_based_profiler", ) config_version = fields.Float( required=True, allow_none=False, validate=lambda x: x == 1.0, error_messages={ "invalid": "config version is not supported; it must be 1.0 per the current version of Great Expectations" }, ) variables = fields.Dict( keys=fields.String( required=True, allow_none=False, ), required=False, allow_none=True, ) rules = fields.Dict( keys=fields.String( required=True, allow_none=False, ), values=fields.Nested( RuleConfigSchema, required=True, allow_none=False, ), required=True, allow_none=False, )
class RenderedAtomicContentSchema(Schema): class Meta: unknown: INCLUDE name = fields.String(required=False, allow_none=True) value = fields.Nested(RenderedAtomicValueSchema(), required=True, allow_none=False) value_type = fields.String(required=True, allow_none=False) @post_load def make_rendered_atomic_content(self, data, **kwargs): return RenderedAtomicContent(**data)
class ValidationResultIdentifierSchema(Schema): expectation_suite_identifier = fields.Nested( ExpectationSuiteIdentifierSchema, required=True, error_messages={ "required": "expectation_suite_identifier is required for a ValidationResultIdentifier" }, ) run_id = fields.Nested( RunIdentifierSchema, required=True, error_messages={ "required": "run_id is required for a " "ValidationResultIdentifier" }, ) batch_identifier = fields.Nested(BatchIdentifierSchema, required=True) # noinspection PyUnusedLocal @post_load def make_validation_result_identifier(self, data, **kwargs): return ValidationResultIdentifier(**data)
class ExpectationValidationResultSchema(Schema): success = fields.Bool(required=False, allow_none=True) expectation_config = fields.Nested(lambda: ExpectationConfigurationSchema, required=False, allow_none=True) result = fields.Dict(required=False, allow_none=True) meta = fields.Dict(required=False, allow_none=True) exception_info = fields.Dict(required=False, allow_none=True) rendered_content = fields.List( fields.Nested(lambda: RenderedAtomicContentSchema, required=False, allow_none=True)) # noinspection PyUnusedLocal @pre_dump def convert_result_to_serializable(self, data, **kwargs): data = deepcopy(data) if isinstance(data, ExpectationValidationResult): data.result = convert_to_json_serializable(data.result) elif isinstance(data, dict): data["result"] = convert_to_json_serializable(data.get("result")) return data REMOVE_KEYS_IF_NONE = ["rendered_content"] @post_dump def clean_null_attrs(self, data: dict, **kwargs: dict) -> dict: """Removes the attributes in ExpectationValidationResultSchema.REMOVE_KEYS_IF_NONE during serialization if their values are None.""" data = deepcopy(data) for key in ExpectationConfigurationSchema.REMOVE_KEYS_IF_NONE: if key in data and data[key] is None: data.pop(key) return data # noinspection PyUnusedLocal @post_load def make_expectation_validation_result(self, data, **kwargs): return ExpectationValidationResult(**data)
class ExpectationSuiteSchema(Schema): expectation_suite_name = fields.Str() ge_cloud_id = fields.UUID(required=False, allow_none=True) expectations = fields.List(fields.Nested(ExpectationConfigurationSchema)) evaluation_parameters = fields.Dict(allow_none=True) data_asset_type = fields.Str(allow_none=True) meta = fields.Dict() # NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields; # doing so could also allow us not to have to make a copy of data in the pre_dump method. # noinspection PyMethodMayBeStatic def clean_empty(self, data): if isinstance(data, ExpectationSuite): if not hasattr(data, "evaluation_parameters"): pass elif len(data.evaluation_parameters) == 0: del data.evaluation_parameters if not hasattr(data, "meta"): pass elif data.meta is None or data.meta == []: pass elif len(data.meta) == 0: del data.meta elif isinstance(data, dict): if not data.get("evaluation_parameters"): pass elif len(data.get("evaluation_parameters")) == 0: data.pop("evaluation_parameters") if not data.get("meta"): pass elif data.get("meta") is None or data.get("meta") == []: pass elif len(data.get("meta")) == 0: data.pop("meta") return data # noinspection PyUnusedLocal @pre_dump def prepare_dump(self, data, **kwargs): data = deepcopy(data) if isinstance(data, ExpectationSuite): data.meta = convert_to_json_serializable(data.meta) elif isinstance(data, dict): data["meta"] = convert_to_json_serializable(data.get("meta")) data = self.clean_empty(data) return data
class ValidationOperatorResultSchema(Schema): run_id = fields.Nested(RunIdentifierSchema) run_results = fields.Dict() evaluation_parameters = fields.Dict(allow_none=True) validation_operator_config = fields.Dict() success = fields.Bool() # noinspection PyUnusedLocal @pre_dump def prepare_dump(self, data, **kwargs): data = deepcopy(data) data._run_results = convert_to_json_serializable(data.run_results) return data # noinspection PyUnusedLocal @post_load def make_expectation_suite_validation_result(self, data, **kwargs): return ValidationOperatorResult(**data)
class ExpectationSuiteValidationResultSchema(Schema): success = fields.Bool() results = fields.List(fields.Nested(ExpectationValidationResultSchema)) evaluation_parameters = fields.Dict() statistics = fields.Dict() meta = fields.Dict(allow_none=True) # noinspection PyUnusedLocal @pre_dump def prepare_dump(self, data, **kwargs): data = deepcopy(data) data.meta = convert_to_json_serializable(data.meta) return data # noinspection PyUnusedLocal @post_load def make_expectation_suite_validation_result(self, data, **kwargs): return ExpectationSuiteValidationResult(**data)
class ExpectationConfigurationBuilderConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = ExpectationConfigurationBuilderConfig module_name = fields.String( required=False, allow_none=True, missing= "great_expectations.rule_based_profiler.expectation_configuration_builder", ) class_name = fields.String( required=True, allow_none=False, ) expectation_type = fields.Str( required=True, error_messages={ "required": "expectation_type missing in expectation configuration builder" }, ) meta = fields.Dict( keys=fields.String( required=True, allow_none=False, ), required=False, allow_none=True, ) validation_parameter_builder_configs = fields.List( cls_or_instance=fields.Nested( lambda: ParameterBuilderConfigSchema(), required=True, allow_none=False, ), required=False, allow_none=True, )
class DataConnectorConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String(missing="great_expectations.datasource.data_connector") assets = fields.Dict( keys=fields.Str(), values=fields.Nested(AssetConfigSchema), required=False, allow_none=True, ) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_data_connector_config(self, data, **kwargs): return DataConnectorConfig(**data)
class DataConnectorConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String( missing="great_expectations.datasource.data_connector") assets = fields.Dict( keys=fields.Str(), values=fields.Nested(AssetConfigSchema, required=False, allow_none=True), required=False, allow_none=True, ) base_directory = fields.String(required=False, allow_none=True) glob_directive = fields.String(required=False, allow_none=True) default_regex = fields.Dict(required=False, allow_none=True) runtime_keys = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) bucket = fields.String(required=False, allow_none=True) prefix = fields.String(required=False, allow_none=True) delimiter = fields.String(required=False, allow_none=True) max_keys = fields.Integer(required=False, allow_none=True) boto3_options = fields.Dict(keys=fields.Str(), values=fields.Str(), required=False, allow_none=True) data_asset_name_prefix = fields.String(required=False, allow_none=True) data_asset_name_suffix = fields.String(required=False, allow_none=True) include_schema_name = fields.Boolean(required=False, allow_none=True) splitter_method = fields.String(required=False, allow_none=True) splitter_kwargs = fields.Dict(required=False, allow_none=True) sampling_method = fields.String(required=False, allow_none=True) sampling_kwargs = fields.Dict(required=False, allow_none=True) excluded_tables = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) included_tables = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) skip_inapplicable_tables = fields.Boolean(required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. if data["class_name"][0] == "$": return if ("default_regex" in data) and not (data["class_name"] in [ "InferredAssetFilesystemDataConnector", "ConfiguredAssetFilesystemDataConnector", "InferredAssetS3DataConnector", "ConfiguredAssetS3DataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by a subclass of the FilePathDataConnector class (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) if ("glob_directive" in data) and not (data["class_name"] in [ "InferredAssetFilesystemDataConnector", "ConfiguredAssetFilesystemDataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by a filesystem type of the data connector (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) if ("bucket" in data or "prefix" in data or "delimiter" in data or "max_keys" in data) and not (data["class_name"] in [ "InferredAssetS3DataConnector", "ConfiguredAssetS3DataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by an S3 type of the data connector (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) if ("data_asset_name_prefix" in data or "data_asset_name_suffix" in data or "include_schema_name" in data or "splitter_method" in data or "splitter_kwargs" in data or "sampling_method" in data or "sampling_kwargs" in data or "excluded_tables" in data or "included_tables" in data or "skip_inapplicable_tables" in data) and not (data["class_name"] in [ "InferredAssetSqlDataConnector", "ConfiguredAssetSqlDataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by an SQL type of the data connector (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) # noinspection PyUnusedLocal @post_load def make_data_connector_config(self, data, **kwargs): return DataConnectorConfig(**data)
class NotebookConfigSchema(Schema): class_name = fields.String(missing="SuiteEditNotebookRenderer") module_name = fields.String( missing= "great_expectations.render.renderer.suite_edit_notebook_renderer") custom_templates_module = fields.String() header_markdown = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) footer_markdown = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) table_expectations_header_markdown = fields.Nested( NotebookTemplateConfigSchema, allow_none=True) column_expectations_header_markdown = fields.Nested( NotebookTemplateConfigSchema, allow_none=True) table_expectations_not_found_markdown = fields.Nested( NotebookTemplateConfigSchema, allow_none=True) column_expectations_not_found_markdown = fields.Nested( NotebookTemplateConfigSchema, allow_none=True) authoring_intro_markdown = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) column_expectations_markdown = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) header_code = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) footer_code = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) column_expectation_code = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) table_expectation_code = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) # noinspection PyUnusedLocal @post_load def make_notebook_config(self, data, **kwargs): return NotebookConfig(**data)
class DataContextConfigSchema(Schema): config_version = fields.Number( validate=lambda x: 0 < x < 100, error_messages={"invalid": "config version must " "be a number."}, ) datasources = fields.Dict( keys=fields.Str(), values=fields.Nested(DatasourceConfigSchema), required=False, allow_none=True, ) expectations_store_name = fields.Str() validations_store_name = fields.Str() evaluation_parameter_store_name = fields.Str() plugins_directory = fields.Str(allow_none=True) validation_operators = fields.Dict(keys=fields.Str(), values=fields.Dict()) stores = fields.Dict(keys=fields.Str(), values=fields.Dict()) notebooks = fields.Nested(NotebooksConfigSchema, allow_none=True) data_docs_sites = fields.Dict(keys=fields.Str(), values=fields.Dict(), allow_none=True) config_variables_file_path = fields.Str(allow_none=True) anonymous_usage_statistics = fields.Nested( AnonymizedUsageStatisticsConfigSchema) # noinspection PyMethodMayBeStatic # noinspection PyUnusedLocal def handle_error(self, exc, data, **kwargs): """Log and raise our custom exception when (de)serialization fails.""" logger.error(exc.messages) raise ge_exceptions.InvalidDataContextConfigError( "Error while processing DataContextConfig.", exc) @validates_schema def validate_schema(self, data, **kwargs): if "config_version" not in data: raise ge_exceptions.InvalidDataContextConfigError( "The key `config_version` is missing; please check your config file.", validation_error=ValidationError("no config_version key"), ) if not isinstance(data["config_version"], (int, float)): raise ge_exceptions.InvalidDataContextConfigError( "The key `config_version` must be a number. Please check your config file.", validation_error=ValidationError( "config version not a number"), ) # When migrating from 0.7.x to 0.8.0 if data["config_version"] == 0 and ("validations_store" in list( data.keys()) or "validations_stores" in list(data.keys())): raise ge_exceptions.UnsupportedConfigVersionError( "You appear to be using a config version from the 0.7.x series. This version is no longer supported." ) elif data["config_version"] < MINIMUM_SUPPORTED_CONFIG_VERSION: raise ge_exceptions.UnsupportedConfigVersionError( "You appear to have an invalid config version ({}).\n The version number must be at least {}. " "Please see the migration guide at https://docs.greatexpectations.io/en/latest/guides/how_to_guides/migrating_versions.html" .format(data["config_version"], MINIMUM_SUPPORTED_CONFIG_VERSION), ) elif data["config_version"] > CURRENT_CONFIG_VERSION: raise ge_exceptions.InvalidDataContextConfigError( "You appear to have an invalid config version ({}).\n The maximum valid version is {}." .format(data["config_version"], CURRENT_CONFIG_VERSION), validation_error=ValidationError("config version too high"), )