class DatasourceConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(missing="Datasource") module_name = fields.String(missing="great_expectations.datasource") execution_engine = fields.Nested(ExecutionEngineConfigSchema) data_connectors = fields.Dict( keys=fields.Str(), values=fields.Nested(DataConnectorConfigSchema), required=True, allow_none=False, ) credentials = fields.Raw(allow_none=True) spark_context = fields.Raw(allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_datasource_config(self, data, **kwargs): return DatasourceConfig(**data)
class DatasourceConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String(missing="great_expectations.datasource") data_asset_type = fields.Nested(ClassConfigSchema) # TODO: Update to generator-specific # batch_kwargs_generators = fields.Mapping(keys=fields.Str(), values=fields.Nested(fields.GeneratorSchema)) batch_kwargs_generators = fields.Dict( keys=fields.Str(), values=fields.Dict(), allow_none=True ) credentials = fields.Raw(allow_none=True) spark_context = fields.Raw(allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): if "generators" in data: raise ge_exceptions.InvalidConfigError( "Your current configuration uses the 'generators' key in a datasource, but in version 0.10 of " "GE, that key is renamed to 'batch_kwargs_generators'. Please update your config to continue." ) # noinspection PyUnusedLocal @post_load def make_datasource_config(self, data, **kwargs): return DatasourceConfig(**data)
class ParameterBuilderConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = ParameterBuilderConfig name = fields.String( required=True, allow_none=False, ) module_name = fields.String( required=False, allow_none=True, missing="great_expectations.rule_based_profiler.parameter_builder", ) class_name = fields.String( required=True, allow_none=False, ) evaluation_parameter_builder_configs = fields.List( cls_or_instance=fields.Nested( lambda: ParameterBuilderConfigSchema(), required=True, allow_none=False, ), required=False, allow_none=True, )
class NotebookConfigSchema(Schema): class_name = fields.String(missing="SuiteEditNotebookRenderer") module_name = fields.String( missing= "great_expectations.render.renderer.suite_edit_notebook_renderer") custom_templates_module = fields.String() header_markdown = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) footer_markdown = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) table_expectations_header_markdown = fields.Nested( NotebookTemplateConfigSchema, allow_none=True) column_expectations_header_markdown = fields.Nested( NotebookTemplateConfigSchema, allow_none=True) table_expectations_not_found_markdown = fields.Nested( NotebookTemplateConfigSchema, allow_none=True) column_expectations_not_found_markdown = fields.Nested( NotebookTemplateConfigSchema, allow_none=True) authoring_intro_markdown = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) column_expectations_markdown = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) header_code = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) footer_code = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) column_expectation_code = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) table_expectation_code = fields.Nested(NotebookTemplateConfigSchema, allow_none=True) # noinspection PyUnusedLocal @post_load def make_notebook_config(self, data, **kwargs): return NotebookConfig(**data)
class ExpectationConfigurationBuilderConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = ExpectationConfigurationBuilderConfig class_name = fields.String( required=False, all_none=True, ) module_name = fields.String( required=False, all_none=True, missing= "great_expectations.rule_based_profiler.expectation_configuration_builder", ) expectation_type = fields.Str( required=True, error_messages={ "required": "expectation_type missing in expectation configuration builder" }, ) meta = fields.Dict( keys=fields.String( required=True, allow_none=False, ), required=False, allow_none=True, )
class DatasourceConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(missing="Datasource") module_name = fields.String(missing="great_expectations.datasource") execution_engine = fields.Nested( ExecutionEngineConfigSchema, required=False, allow_none=True ) data_connectors = fields.Dict( keys=fields.Str(), values=fields.Nested(DataConnectorConfigSchema), required=False, allow_none=True, ) data_asset_type = fields.Nested(ClassConfigSchema, required=False, allow_none=True) # TODO: Update to generator-specific # batch_kwargs_generators = fields.Mapping(keys=fields.Str(), values=fields.Nested(fields.GeneratorSchema)) batch_kwargs_generators = fields.Dict( keys=fields.Str(), values=fields.Dict(), required=False, allow_none=True ) connection_string = fields.String(required=False, allow_none=True) credentials = fields.Raw(required=False, allow_none=True) introspection = fields.Dict(required=False, allow_none=True) tables = fields.Dict(required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): if "generators" in data: raise ge_exceptions.InvalidConfigError( 'Your current configuration uses the "generators" key in a datasource, but in version 0.10 of ' 'GE, that key is renamed to "batch_kwargs_generators". Please update your configuration to continue.' ) # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. if data["class_name"][0] == "$": return if ( "connection_string" in data or "credentials" in data or "introspection" in data or "tables" in data ) and not ( data["class_name"] in ["SqlAlchemyDatasource", "SimpleSqlalchemyDatasource",] ): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data source, that are required only by a sqlalchemy data source (your data source is "{data['class_name']}"). Please update your configuration to continue. """ ) # noinspection PyUnusedLocal @post_load def make_datasource_config(self, data, **kwargs): return DatasourceConfig(**data)
class RenderedAtomicContentSchema(Schema): class Meta: unknown: INCLUDE name = fields.String(required=False, allow_none=True) value = fields.Nested(RenderedAtomicValueSchema(), required=True, allow_none=False) value_type = fields.String(required=True, allow_none=False) @post_load def make_rendered_atomic_content(self, data, **kwargs): return RenderedAtomicContent(**data)
class RuleBasedProfilerConfigSchema(Schema): """ Schema classes for configurations which extend from BaseYamlConfig must extend top-level Marshmallow Schema class. Schema classes for their constituent configurations which extend DictDot leve must extend NotNullSchema class. """ class Meta: unknown = INCLUDE name = fields.String( required=True, allow_none=False, ) class_name = fields.String( required=False, all_none=True, allow_none=True, missing="RuleBasedProfiler", ) module_name = fields.String( required=False, all_none=True, allow_none=True, missing="great_expectations.rule_based_profiler", ) config_version = fields.Float( required=True, allow_none=False, validate=lambda x: x == 1.0, error_messages={ "invalid": "config version is not supported; it must be 1.0 per the current version of Great Expectations" }, ) variables = fields.Dict( keys=fields.String( required=True, allow_none=False, ), required=False, allow_none=True, ) rules = fields.Dict( keys=fields.String( required=True, allow_none=False, ), values=fields.Nested( RuleConfigSchema, required=True, allow_none=False, ), required=True, allow_none=False, )
class DomainBuilderConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = DomainBuilderConfig module_name = fields.String( required=False, allow_none=True, missing="great_expectations.rule_based_profiler.domain_builder", ) class_name = fields.String( required=True, allow_none=False, )
class ExpectationSuiteValidationResultSchema(Schema): success = fields.Bool() results = fields.List(fields.Nested(ExpectationValidationResultSchema)) evaluation_parameters = fields.Dict() statistics = fields.Dict() meta = fields.Dict(allow_none=True) ge_cloud_id = fields.UUID(required=False, allow_none=True) checkpoint_name = fields.String(required=False, allow_none=True) # noinspection PyUnusedLocal @pre_dump def prepare_dump(self, data, **kwargs): data = deepcopy(data) if isinstance(data, ExpectationSuiteValidationResult): data.meta = convert_to_json_serializable(data=data.meta) data.statistics = convert_to_json_serializable( data=data.statistics) elif isinstance(data, dict): data["meta"] = convert_to_json_serializable(data=data.get("meta")) data["statistics"] = convert_to_json_serializable( data=data.get("statistics")) return data # noinspection PyUnusedLocal @post_load def make_expectation_suite_validation_result(self, data, **kwargs): return ExpectationSuiteValidationResult(**data)
class ExecutionEngineConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String(missing="great_expectations.execution_engine") caching = fields.Boolean() batch_spec_defaults = fields.Dict(allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_execution_engine_config(self, data, **kwargs): return ExecutionEngineConfig(**data)
class SorterConfigSchema(Schema): class Meta: unknown = INCLUDE name = fields.String(required=True) class_name = fields.String(required=True) module_name = fields.String( missing="great_expectations.datasource.data_connector.sorter") orderby = fields.String(required=False, missing="asc", allow_none=False) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_sorter_config(self, data, **kwargs): return SorterConfig(**data)
class NotebookTemplateConfigSchema(Schema): file_name = fields.String() template_kwargs = fields.Dict(keys=fields.Str(), values=fields.Str(), allow_none=True) # noinspection PyUnusedLocal @post_load def make_notebook_template_config(self, data, **kwargs): return NotebookTemplateConfig(**data)
class AssetConfigSchema(Schema): class Meta: unknown = INCLUDE base_directory = fields.String(required=False, allow_none=True) glob_directive = fields.String(required=False, allow_none=True) pattern = fields.String(required=False, allow_none=True) group_names = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_asset_config(self, data, **kwargs): return AssetConfig(**data)
class SampleConfigSchema(Schema): class Meta: unknown = INCLUDE some_param_0 = fields.String() some_param_1 = fields.Integer() @validates_schema def validate_schema(self, data, **kwargs): pass
class ExecutionEngineConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String(missing="great_expectations.execution_engine") connection_string = fields.String(required=False, allow_none=True) spark_config = fields.Raw(required=False, allow_none=True) boto3_options = fields.Dict( keys=fields.Str(), values=fields.Str(), required=False, allow_none=True ) caching = fields.Boolean(required=False, allow_none=True) batch_spec_defaults = fields.Dict(required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. if data["class_name"][0] == "$": return if "connection_string" in data and not ( data["class_name"] == "SqlAlchemyExecutionEngine" ): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses the "connection_string" key in an execution engine, but only SqlAlchemyExecutionEngine requires this attribute (your execution engine is "{data['class_name']}"). Please update your configuration to continue. """ ) if "spark_config" in data and not ( data["class_name"] == "SparkDFExecutionEngine" ): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses the "spark_config" key in an execution engine, but only SparkDFExecutionEngine requires this attribute (your execution engine is "{data['class_name']}"). Please update your configuration to continue. """ ) # noinspection PyUnusedLocal @post_load def make_execution_engine_config(self, data, **kwargs): return ExecutionEngineConfig(**data)
class RenderedAtomicValueSchema(Schema): class Meta: unknown = INCLUDE # for StringType template = fields.String(required=False, allow_none=True) params = fields.Dict(required=False, allow_none=True) schema = fields.Dict(required=False, allow_none=True) # for TableType header = fields.Dict(required=False, allow_none=True) header_row = fields.List(fields.Dict, required=False, allow_none=True) table = fields.List(fields.List(fields.Dict, required=False, allow_none=True)) # for GraphType graph = fields.String(required=False, allow_none=True) @post_load def create_value_obj(self, data, **kwargs): return RenderedAtomicValue(**data)
class ExpectationConfigurationBuilderConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = ExpectationConfigurationBuilderConfig module_name = fields.String( required=False, allow_none=True, missing= "great_expectations.rule_based_profiler.expectation_configuration_builder", ) class_name = fields.String( required=True, allow_none=False, ) expectation_type = fields.Str( required=True, error_messages={ "required": "expectation_type missing in expectation configuration builder" }, ) meta = fields.Dict( keys=fields.String( required=True, allow_none=False, ), required=False, allow_none=True, ) validation_parameter_builder_configs = fields.List( cls_or_instance=fields.Nested( lambda: ParameterBuilderConfigSchema(), required=True, allow_none=False, ), required=False, allow_none=True, )
class DataConnectorConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String(missing="great_expectations.datasource.data_connector") assets = fields.Dict( keys=fields.Str(), values=fields.Nested(AssetConfigSchema), required=False, allow_none=True, ) @validates_schema def validate_schema(self, data, **kwargs): pass # noinspection PyUnusedLocal @post_load def make_data_connector_config(self, data, **kwargs): return DataConnectorConfig(**data)
class ParameterBuilderConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = ParameterBuilderConfig name = fields.String( required=True, allow_none=False, ) class_name = fields.String( required=False, all_none=True, ) module_name = fields.String( required=False, all_none=True, missing="great_expectations.rule_based_profiler.parameter_builder", ) batch_request = fields.Raw( required=False, allow_none=True, )
class RenderedAtomicValueSchema(Schema): class Meta: unknown = INCLUDE schema = fields.Dict(required=False, allow_none=True) header = fields.Dict(required=False, allow_none=True) # for StringValueType template = fields.String(required=False, allow_none=True) params = fields.Dict(required=False, allow_none=True) # for TableType header_row = fields.List(fields.Dict, required=False, allow_none=True) table = fields.List( fields.List(fields.Dict, required=False, allow_none=True)) # for GraphType graph = fields.Dict(required=False, allow_none=True) # for UnknownType kwargs = fields.Dict(required=False, allow_none=True) @post_load def create_value_obj(self, data, **kwargs): return RenderedAtomicValue(**data) REMOVE_KEYS_IF_NONE = [ "template", "table", "params", "header_row", "table", "graph", "kwargs", ] @post_dump def clean_null_attrs(self, data: dict, **kwargs: dict) -> dict: """Removes the attributes in RenderedAtomicValueSchema.REMOVE_KEYS_IF_NONE during serialization if their values are None.""" data = deepcopy(data) for key in RenderedAtomicValueSchema.REMOVE_KEYS_IF_NONE: if key == "graph" and key in data and data[key].graph is None: data.pop(key) elif key in data and data[key] is None: data.pop(key) return data
class RuleConfigSchema(NotNullSchema): class Meta: unknown = INCLUDE __config_class__ = RuleConfig variables = fields.Dict( keys=fields.String( required=True, allow_none=False, ), required=False, allow_none=True, ) domain_builder = fields.Nested( DomainBuilderConfigSchema, required=False, allow_none=True, ) parameter_builders = fields.List( cls_or_instance=fields.Nested( ParameterBuilderConfigSchema, required=True, allow_none=False, ), required=False, allow_none=True, ) expectation_configuration_builders = fields.List( cls_or_instance=fields.Nested( ExpectationConfigurationBuilderConfigSchema, required=True, allow_none=False, ), required=False, allow_none=True, )
class DataConnectorConfigSchema(Schema): class Meta: unknown = INCLUDE class_name = fields.String(required=True) module_name = fields.String( missing="great_expectations.datasource.data_connector") assets = fields.Dict( keys=fields.Str(), values=fields.Nested(AssetConfigSchema, required=False, allow_none=True), required=False, allow_none=True, ) base_directory = fields.String(required=False, allow_none=True) glob_directive = fields.String(required=False, allow_none=True) default_regex = fields.Dict(required=False, allow_none=True) runtime_keys = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) bucket = fields.String(required=False, allow_none=True) prefix = fields.String(required=False, allow_none=True) delimiter = fields.String(required=False, allow_none=True) max_keys = fields.Integer(required=False, allow_none=True) boto3_options = fields.Dict(keys=fields.Str(), values=fields.Str(), required=False, allow_none=True) data_asset_name_prefix = fields.String(required=False, allow_none=True) data_asset_name_suffix = fields.String(required=False, allow_none=True) include_schema_name = fields.Boolean(required=False, allow_none=True) splitter_method = fields.String(required=False, allow_none=True) splitter_kwargs = fields.Dict(required=False, allow_none=True) sampling_method = fields.String(required=False, allow_none=True) sampling_kwargs = fields.Dict(required=False, allow_none=True) excluded_tables = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) included_tables = fields.List(cls_or_instance=fields.Str(), required=False, allow_none=True) skip_inapplicable_tables = fields.Boolean(required=False, allow_none=True) @validates_schema def validate_schema(self, data, **kwargs): # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. if data["class_name"][0] == "$": return if ("default_regex" in data) and not (data["class_name"] in [ "InferredAssetFilesystemDataConnector", "ConfiguredAssetFilesystemDataConnector", "InferredAssetS3DataConnector", "ConfiguredAssetS3DataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by a subclass of the FilePathDataConnector class (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) if ("glob_directive" in data) and not (data["class_name"] in [ "InferredAssetFilesystemDataConnector", "ConfiguredAssetFilesystemDataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by a filesystem type of the data connector (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) if ("bucket" in data or "prefix" in data or "delimiter" in data or "max_keys" in data) and not (data["class_name"] in [ "InferredAssetS3DataConnector", "ConfiguredAssetS3DataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by an S3 type of the data connector (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) if ("data_asset_name_prefix" in data or "data_asset_name_suffix" in data or "include_schema_name" in data or "splitter_method" in data or "splitter_kwargs" in data or "sampling_method" in data or "sampling_kwargs" in data or "excluded_tables" in data or "included_tables" in data or "skip_inapplicable_tables" in data) and not (data["class_name"] in [ "InferredAssetSqlDataConnector", "ConfiguredAssetSqlDataConnector", ]): raise ge_exceptions.InvalidConfigError( f"""Your current configuration uses one or more keys in a data connector, that are required only by an SQL type of the data connector (your data conntector is "{data['class_name']}"). Please update your configuration to continue. """) # noinspection PyUnusedLocal @post_load def make_data_connector_config(self, data, **kwargs): return DataConnectorConfig(**data)