class ExpectationSuiteSchema(Schema): expectation_suite_name = fields.Str() expectations = fields.List(fields.Nested(ExpectationConfigurationSchema)) evaluation_parameters = fields.Dict(allow_none=True) data_asset_type = fields.Str(allow_none=True) meta = fields.Dict() # NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields; # doing so could also allow us not to have to make a copy of data in the pre_dump method. def clean_empty(self, data): if not hasattr(data, "evaluation_parameters"): pass elif len(data.evaluation_parameters) == 0: del data.evaluation_parameters if not hasattr(data, "meta"): pass elif data.meta is None or data.meta == []: pass elif len(data.meta) == 0: del data.meta return data # noinspection PyUnusedLocal @pre_dump def prepare_dump(self, data, **kwargs): data = deepcopy(data) data.meta = convert_to_json_serializable(data.meta) data = self.clean_empty(data) return data # noinspection PyUnusedLocal @post_load def make_expectation_suite(self, data, **kwargs): return ExpectationSuite(**data)
class LegacyDatasourceConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String(missing="great_expectations.datasource") data_asset_type = fields.Nested(ClassConfigSchema) boto3_options = fields.Dict(keys=fields.Str(), values=fields.Str(), allow_none=True) # TODO: Update to generator-specific # batch_kwargs_generators = fields.Mapping(keys=fields.Str(), values=fields.Nested(fields.GeneratorSchema)) batch_kwargs_generators = fields.Dict( keys=fields.Str(), values=fields.Dict(), allow_none=True ) credentials = fields.Raw(allow_none=True) spark_context = fields.Raw(allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): if "generators" in data: raise ge_exceptions.InvalidConfigError( "Your current configuration uses the 'generators' key in a datasource, but in version 0.10 of " "GE, that key is renamed to 'batch_kwargs_generators'. Please update your config to continue." ) # noinspection PyUnusedLocal @post_load def make_datasource_config(self, data, **kwargs): return LegacyDatasourceConfig(**data)
class AssetConfigurationSchema(Schema): table = fields.Str() schema = fields.Str() @post_load def make_asset_configuration(self, data, **kwargs): return AssetConfiguration(**data)
class BatchIdentifierSchema(Schema): batch_identifier = fields.Str() data_asset_name = fields.Str() # noinspection PyUnusedLocal @post_load def make_batch_identifier(self, data, **kwargs): return BatchIdentifier(**data)
class DatasourceConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(missing="Datasource") module_name = fields.String(missing="great_expectations.datasource") execution_engine = fields.Nested( ExecutionEngineConfigSchema, required=False, allow_none=True ) data_connectors = fields.Dict( keys=fields.Str(), values=fields.Nested(DataConnectorConfigSchema), required=False, allow_none=True, ) data_asset_type = fields.Nested(ClassConfigSchema, required=False, allow_none=True) # TODO: Update to generator-specific # batch_kwargs_generators = fields.Mapping(keys=fields.Str(), values=fields.Nested(fields.GeneratorSchema)) batch_kwargs_generators = fields.Dict( keys=fields.Str(), values=fields.Dict(), required=False, allow_none=True ) connection_string = fields.String(required=False, allow_none=True) credentials = fields.Raw(required=False, allow_none=True) introspection = fields.Dict(required=False, allow_none=True) tables = fields.Dict(required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): if "generators" in data: raise ge_exceptions.InvalidConfigError( 'Your current configuration uses the "generators" key in a datasource, but in version 0.10 of ' 'GE, that key is renamed to "batch_kwargs_generators". Please update your configuration to continue.' ) # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. if data["class_name"][0] == "$": return if ( "connection_string" in data or "credentials" in data or "introspection" in data or "tables" in data ) and not ( data["class_name"] in ["SqlAlchemyDatasource", "SimpleSqlalchemyDatasource",] ): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data source, that are required only by a sqlalchemy data source (your data source is "{data['class_name']}"). Please update your configuration to continue. """ ) # noinspection PyUnusedLocal @post_load def make_datasource_config(self, data, **kwargs): return DatasourceConfig(**data)
class NotebookTemplateConfigSchema(Schema): file_name = fields.String() template_kwargs = fields.Dict(keys=fields.Str(), values=fields.Str(), allow_none=True) # noinspection PyUnusedLocal @post_load def make_notebook_template_config(self, data, **kwargs): return NotebookTemplateConfig(**data)
class ExpectationSuiteSchema(Schema): expectation_suite_name = fields.Str() ge_cloud_id = fields.UUID(required=False, allow_none=True) expectations = fields.List(fields.Nested(ExpectationConfigurationSchema)) evaluation_parameters = fields.Dict(allow_none=True) data_asset_type = fields.Str(allow_none=True) meta = fields.Dict() # NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields; # doing so could also allow us not to have to make a copy of data in the pre_dump method. # noinspection PyMethodMayBeStatic def clean_empty(self, data): if isinstance(data, ExpectationSuite): if not hasattr(data, "evaluation_parameters"): pass elif len(data.evaluation_parameters) == 0: del data.evaluation_parameters if not hasattr(data, "meta"): pass elif data.meta is None or data.meta == []: pass elif len(data.meta) == 0: del data.meta elif isinstance(data, dict): if not data.get("evaluation_parameters"): pass elif len(data.get("evaluation_parameters")) == 0: data.pop("evaluation_parameters") if not data.get("meta"): pass elif data.get("meta") is None or data.get("meta") == []: pass elif len(data.get("meta")) == 0: data.pop("meta") return data # noinspection PyUnusedLocal @pre_dump def prepare_dump(self, data, **kwargs): data = deepcopy(data) if isinstance(data, ExpectationSuite): data.meta = convert_to_json_serializable(data.meta) elif isinstance(data, dict): data["meta"] = convert_to_json_serializable(data.get("meta")) data = self.clean_empty(data) return data
class DatasourceConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(missing="Datasource") module_name = fields.String(missing="great_expectations.datasource") execution_engine = fields.Nested(ExecutionEngineConfigSchema) data_connectors = fields.Dict( keys=fields.Str(), values=fields.Nested(DataConnectorConfigSchema), required=True, allow_none=False, ) credentials = fields.Raw(allow_none=True) spark_context = fields.Raw(allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_datasource_config(self, data, **kwargs): return DatasourceConfig(**data)
class RunIdentifierSchema(Schema): run_name = fields.Str() run_time = fields.DateTime(format="iso") @post_load def make_run_identifier(self, data, **kwargs): return RunIdentifier(**data)
class ConfigurationIdentifierSchema(Schema): configuration_key = fields.Str() # noinspection PyUnusedLocal @post_load def make_configuration_identifier(self, data, **kwargs): return ConfigurationIdentifier(**data)
class ExpectationConfigurationBuilderConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = ExpectationConfigurationBuilderConfig class_name = fields.String( required=False, all_none=True, ) module_name = fields.String( required=False, all_none=True, missing= "great_expectations.rule_based_profiler.expectation_configuration_builder", ) expectation_type = fields.Str( required=True, error_messages={ "required": "expectation_type missing in expectation configuration builder" }, ) meta = fields.Dict( keys=fields.String( required=True, allow_none=False, ), required=False, allow_none=True, )
class ExpectationSuiteIdentifierSchema(Schema): expectation_suite_name = fields.Str() # noinspection PyUnusedLocal @post_load def make_expectation_suite_identifier(self, data, **kwargs): return ExpectationSuiteIdentifier(**data)
class ExecutionEngineConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String(missing="great_expectations.execution_engine") connection_string = fields.String(required=False, allow_none=True) spark_config = fields.Raw(required=False, allow_none=True) boto3_options = fields.Dict( keys=fields.Str(), values=fields.Str(), required=False, allow_none=True ) caching = fields.Boolean(required=False, allow_none=True) batch_spec_defaults = fields.Dict(required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. if data["class_name"][0] == "$": return if "connection_string" in data and not ( data["class_name"] == "SqlAlchemyExecutionEngine" ): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses the "connection_string" key in an execution engine, but only SqlAlchemyExecutionEngine requires this attribute (your execution engine is "{data['class_name']}"). Please update your configuration to continue. """ ) if "spark_config" in data and not ( data["class_name"] == "SparkDFExecutionEngine" ): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses the "spark_config" key in an execution engine, but only SparkDFExecutionEngine requires this attribute (your execution engine is "{data['class_name']}"). Please update your configuration to continue. """ ) # noinspection PyUnusedLocal @post_load def make_execution_engine_config(self, data, **kwargs): return ExecutionEngineConfig(**data)
class ExpectationConfigurationSchema(Schema): expectation_type = fields.Str( required=True, error_messages={ "required": "expectation_type missing in expectation configuration" }, ) kwargs = fields.Dict() meta = fields.Dict() # noinspection PyUnusedLocal @post_load def make_expectation_configuration(self, data, **kwargs): return ExpectationConfiguration(**data)
class AssetConfigSchema(Schema): class Meta: unknown = INCLUDE base_directory = fields.String(required=False, allow_none=True) glob_directive = fields.String(required=False, allow_none=True) pattern = fields.String(required=False, allow_none=True) group_names = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_asset_config(self, data, **kwargs): return AssetConfig(**data)
class ExpectationConfigurationBuilderConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = ExpectationConfigurationBuilderConfig module_name = fields.String( required=False, allow_none=True, missing= "great_expectations.rule_based_profiler.expectation_configuration_builder", ) class_name = fields.String( required=True, allow_none=False, ) expectation_type = fields.Str( required=True, error_messages={ "required": "expectation_type missing in expectation configuration builder" }, ) meta = fields.Dict( keys=fields.String( required=True, allow_none=False, ), required=False, allow_none=True, ) validation_parameter_builder_configs = fields.List( cls_or_instance=fields.Nested( lambda: ParameterBuilderConfigSchema(), required=True, allow_none=False, ), required=False, allow_none=True, )
class DataConnectorConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String(missing="great_expectations.datasource.data_connector") assets = fields.Dict( keys=fields.Str(), values=fields.Nested(AssetConfigSchema), required=False, allow_none=True, ) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_data_connector_config(self, data, **kwargs): return DataConnectorConfig(**data)
class DataContextConfigSchema(Schema): config_version = fields.Number( validate=lambda x: 0 < x < 100, error_messages={"invalid": "config version must " "be a number."}, ) datasources = fields.Dict( keys=fields.Str(), values=fields.Nested(DatasourceConfigSchema), required=False, allow_none=True, ) expectations_store_name = fields.Str() validations_store_name = fields.Str() evaluation_parameter_store_name = fields.Str() plugins_directory = fields.Str(allow_none=True) validation_operators = fields.Dict(keys=fields.Str(), values=fields.Dict()) stores = fields.Dict(keys=fields.Str(), values=fields.Dict()) notebooks = fields.Nested(NotebooksConfigSchema, allow_none=True) data_docs_sites = fields.Dict(keys=fields.Str(), values=fields.Dict(), allow_none=True) config_variables_file_path = fields.Str(allow_none=True) anonymous_usage_statistics = fields.Nested( AnonymizedUsageStatisticsConfigSchema) # noinspection PyMethodMayBeStatic # noinspection PyUnusedLocal def handle_error(self, exc, data, **kwargs): """Log and raise our custom exception when (de)serialization fails.""" logger.error(exc.messages) raise ge_exceptions.InvalidDataContextConfigError( "Error while processing DataContextConfig.", exc) @validates_schema def validate_schema(self, data, **kwargs): if "config_version" not in data: raise ge_exceptions.InvalidDataContextConfigError( "The key `config_version` is missing; please check your config file.", validation_error=ValidationError("no config_version key"), ) if not isinstance(data["config_version"], (int, float)): raise ge_exceptions.InvalidDataContextConfigError( "The key `config_version` must be a number. Please check your config file.", validation_error=ValidationError( "config version not a number"), ) # When migrating from 0.7.x to 0.8.0 if data["config_version"] == 0 and ("validations_store" in list( data.keys()) or "validations_stores" in list(data.keys())): raise ge_exceptions.UnsupportedConfigVersionError( "You appear to be using a config version from the 0.7.x series. This version is no longer supported." ) elif data["config_version"] < MINIMUM_SUPPORTED_CONFIG_VERSION: raise ge_exceptions.UnsupportedConfigVersionError( "You appear to have an invalid config version ({}).\n The version number must be at least {}. " "Please see the migration guide at https://docs.greatexpectations.io/en/latest/guides/how_to_guides/migrating_versions.html" .format(data["config_version"], MINIMUM_SUPPORTED_CONFIG_VERSION), ) elif data["config_version"] > CURRENT_CONFIG_VERSION: raise ge_exceptions.InvalidDataContextConfigError( "You appear to have an invalid config version ({}).\n The maximum valid version is {}." .format(data["config_version"], CURRENT_CONFIG_VERSION), validation_error=ValidationError("config version too high"), )
class PackageInfoSchema(Schema): package_name = fields.Str() installed = fields.Boolean() install_environment = fields.Function( lambda obj: obj.install_environment.value) version = fields.Str(required=False, allow_none=True)
class DataConnectorConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String( missing="great_expectations.datasource.data_connector") assets = fields.Dict( keys=fields.Str(), values=fields.Nested(AssetConfigSchema, required=False, allow_none=True), required=False, allow_none=True, ) base_directory = fields.String(required=False, allow_none=True) glob_directive = fields.String(required=False, allow_none=True) default_regex = fields.Dict(required=False, allow_none=True) runtime_keys = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) bucket = fields.String(required=False, allow_none=True) prefix = fields.String(required=False, allow_none=True) delimiter = fields.String(required=False, allow_none=True) max_keys = fields.Integer(required=False, allow_none=True) boto3_options = fields.Dict(keys=fields.Str(), values=fields.Str(), required=False, allow_none=True) data_asset_name_prefix = fields.String(required=False, allow_none=True) data_asset_name_suffix = fields.String(required=False, allow_none=True) include_schema_name = fields.Boolean(required=False, allow_none=True) splitter_method = fields.String(required=False, allow_none=True) splitter_kwargs = fields.Dict(required=False, allow_none=True) sampling_method = fields.String(required=False, allow_none=True) sampling_kwargs = fields.Dict(required=False, allow_none=True) excluded_tables = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) included_tables = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) skip_inapplicable_tables = fields.Boolean(required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. if data["class_name"][0] == "$": return if ("default_regex" in data) and not (data["class_name"] in [ "InferredAssetFilesystemDataConnector", "ConfiguredAssetFilesystemDataConnector", "InferredAssetS3DataConnector", "ConfiguredAssetS3DataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by a subclass of the FilePathDataConnector class (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) if ("glob_directive" in data) and not (data["class_name"] in [ "InferredAssetFilesystemDataConnector", "ConfiguredAssetFilesystemDataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by a filesystem type of the data connector (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) if ("bucket" in data or "prefix" in data or "delimiter" in data or "max_keys" in data) and not (data["class_name"] in [ "InferredAssetS3DataConnector", "ConfiguredAssetS3DataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by an S3 type of the data connector (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) if ("data_asset_name_prefix" in data or "data_asset_name_suffix" in data or "include_schema_name" in data or "splitter_method" in data or "splitter_kwargs" in data or "sampling_method" in data or "sampling_kwargs" in data or "excluded_tables" in data or "included_tables" in data or "skip_inapplicable_tables" in data) and not (data["class_name"] in [ "InferredAssetSqlDataConnector", "ConfiguredAssetSqlDataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by an SQL type of the data connector (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) # noinspection PyUnusedLocal @post_load def make_data_connector_config(self, data, **kwargs): return DataConnectorConfig(**data)
class ClassConfigSchema(Schema): class_name = fields.Str() module_name = fields.Str(allow_none=True)