Esempio n. 1
0
class ParameterBuilderConfigSchema(NotNullSchema):
    class Meta:
        unknown = INCLUDE

    __config_class__ = ParameterBuilderConfig

    name = fields.String(
        required=True,
        allow_none=False,
    )
    module_name = fields.String(
        required=False,
        allow_none=True,
        missing="great_expectations.rule_based_profiler.parameter_builder",
    )
    class_name = fields.String(
        required=True,
        allow_none=False,
    )
    evaluation_parameter_builder_configs = fields.List(
        cls_or_instance=fields.Nested(
            lambda: ParameterBuilderConfigSchema(),
            required=True,
            allow_none=False,
        ),
        required=False,
        allow_none=True,
    )
    json_serialize = fields.Boolean(
        required=False,
        allow_none=True,
        missing=True,
    )
Esempio n. 2
0
class AnonymizedUsageStatisticsConfigSchema(Schema):
    data_context_id = fields.UUID()
    enabled = fields.Boolean(default=True)
    usage_statistics_url = fields.URL(allow_none=True)
    _explicit_url = fields.Boolean(required=False)

    # noinspection PyUnusedLocal
    @post_load()
    def make_usage_statistics_config(self, data, **kwargs):
        if "data_context_id" in data:
            data["data_context_id"] = str(data["data_context_id"])
        return AnonymizedUsageStatisticsConfig(**data)

    # noinspection PyUnusedLocal
    @post_dump()
    def filter_implicit(self, data, **kwargs):
        if not data.get("_explicit_url") and "usage_statistics_url" in data:
            del data["usage_statistics_url"]
        if "_explicit_url" in data:
            del data["_explicit_url"]
        return data
Esempio n. 3
0
class ExecutionEngineConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(required=True)
    module_name = fields.String(missing="great_expectations.execution_engine")
    caching = fields.Boolean()
    batch_spec_defaults = fields.Dict(allow_none=True)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        pass

    # noinspection PyUnusedLocal
    @post_load
    def make_execution_engine_config(self, data, **kwargs):
        return ExecutionEngineConfig(**data)
Esempio n. 4
0
class CheckpointResultSchema(Schema):
    # JC: I think this needs to be changed to be an instance of a new type called CheckpointResult,
    # which would include the top-level keys run_id, config, name, and a list of results.
    run_id = fields.Nested(RunIdentifierSchema)
    run_results = fields.Dict(required=False, allow_none=True)
    checkpoint_config = fields.Dict(required=False, allow_none=True)
    success = fields.Boolean(required=False, allow_none=True)

    # noinspection PyUnusedLocal
    @pre_dump
    def prepare_dump(self, data, **kwargs):
        data = copy.deepcopy(data)
        data._run_results = convert_to_json_serializable(data.run_results)
        return data

    # noinspection PyUnusedLocal
    @post_load
    def make_checkpoint_result(self, data, **kwargs):
        return CheckpointResult(**data)
Esempio n. 5
0
class ExecutionEngineConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(required=True)
    module_name = fields.String(missing="great_expectations.execution_engine")
    connection_string = fields.String(required=False, allow_none=True)
    spark_config = fields.Raw(required=False, allow_none=True)
    boto3_options = fields.Dict(
        keys=fields.Str(), values=fields.Str(), required=False, allow_none=True
    )
    caching = fields.Boolean(required=False, allow_none=True)
    batch_spec_defaults = fields.Dict(required=False, allow_none=True)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted.
        if data["class_name"][0] == "$":
            return
        if "connection_string" in data and not (
            data["class_name"] == "SqlAlchemyExecutionEngine"
        ):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses the "connection_string" key in an execution engine, but only 
SqlAlchemyExecutionEngine requires this attribute (your execution engine is "{data['class_name']}").  Please update your
configuration to continue.
                """
            )
        if "spark_config" in data and not (
            data["class_name"] == "SparkDFExecutionEngine"
        ):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses the "spark_config" key in an execution engine, but only 
SparkDFExecutionEngine requires this attribute (your execution engine is "{data['class_name']}").  Please update your
configuration to continue.
                """
            )

    # noinspection PyUnusedLocal
    @post_load
    def make_execution_engine_config(self, data, **kwargs):
        return ExecutionEngineConfig(**data)
Esempio n. 6
0
class DataConnectorConfigSchema(Schema):
    class Meta:
        unknown = INCLUDE

    class_name = fields.String(required=True)
    module_name = fields.String(
        missing="great_expectations.datasource.data_connector")

    assets = fields.Dict(
        keys=fields.Str(),
        values=fields.Nested(AssetConfigSchema,
                             required=False,
                             allow_none=True),
        required=False,
        allow_none=True,
    )

    base_directory = fields.String(required=False, allow_none=True)
    glob_directive = fields.String(required=False, allow_none=True)
    default_regex = fields.Dict(required=False, allow_none=True)
    runtime_keys = fields.List(cls_or_instance=fields.Str(),
                               required=False,
                               allow_none=True)
    bucket = fields.String(required=False, allow_none=True)
    prefix = fields.String(required=False, allow_none=True)
    delimiter = fields.String(required=False, allow_none=True)
    max_keys = fields.Integer(required=False, allow_none=True)
    boto3_options = fields.Dict(keys=fields.Str(),
                                values=fields.Str(),
                                required=False,
                                allow_none=True)
    data_asset_name_prefix = fields.String(required=False, allow_none=True)
    data_asset_name_suffix = fields.String(required=False, allow_none=True)
    include_schema_name = fields.Boolean(required=False, allow_none=True)
    splitter_method = fields.String(required=False, allow_none=True)
    splitter_kwargs = fields.Dict(required=False, allow_none=True)
    sampling_method = fields.String(required=False, allow_none=True)
    sampling_kwargs = fields.Dict(required=False, allow_none=True)
    excluded_tables = fields.List(cls_or_instance=fields.Str(),
                                  required=False,
                                  allow_none=True)
    included_tables = fields.List(cls_or_instance=fields.Str(),
                                  required=False,
                                  allow_none=True)
    skip_inapplicable_tables = fields.Boolean(required=False, allow_none=True)

    @validates_schema
    def validate_schema(self, data, **kwargs):
        # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted.
        if data["class_name"][0] == "$":
            return
        if ("default_regex" in data) and not (data["class_name"] in [
                "InferredAssetFilesystemDataConnector",
                "ConfiguredAssetFilesystemDataConnector",
                "InferredAssetS3DataConnector",
                "ConfiguredAssetS3DataConnector",
        ]):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses one or more keys in a data connector, that are required only by a
subclass of the FilePathDataConnector class (your data conntector is "{data['class_name']}").  Please update your
configuration to continue.
                """)
        if ("glob_directive" in data) and not (data["class_name"] in [
                "InferredAssetFilesystemDataConnector",
                "ConfiguredAssetFilesystemDataConnector",
        ]):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses one or more keys in a data connector, that are required only by a
filesystem type of the data connector (your data conntector is "{data['class_name']}").  Please update your
configuration to continue.
                """)
        if ("bucket" in data or "prefix" in data or "delimiter" in data
                or "max_keys" in data) and not (data["class_name"] in [
                    "InferredAssetS3DataConnector",
                    "ConfiguredAssetS3DataConnector",
                ]):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses one or more keys in a data connector, that are required only by an
S3 type of the data connector (your data conntector is "{data['class_name']}").  Please update your configuration to
continue.
                """)
        if ("data_asset_name_prefix" in data
                or "data_asset_name_suffix" in data
                or "include_schema_name" in data or "splitter_method" in data
                or "splitter_kwargs" in data or "sampling_method" in data
                or "sampling_kwargs" in data or "excluded_tables" in data
                or "included_tables" in data or "skip_inapplicable_tables"
                in data) and not (data["class_name"] in [
                    "InferredAssetSqlDataConnector",
                    "ConfiguredAssetSqlDataConnector",
                ]):
            raise ge_exceptions.InvalidConfigError(
                f"""Your current configuration uses one or more keys in a data connector, that are required only by an
SQL type of the data connector (your data conntector is "{data['class_name']}").  Please update your configuration to
continue.
                """)

    # noinspection PyUnusedLocal
    @post_load
    def make_data_connector_config(self, data, **kwargs):
        return DataConnectorConfig(**data)
Esempio n. 7
0
class PackageInfoSchema(Schema):
    package_name = fields.Str()
    installed = fields.Boolean()
    install_environment = fields.Function(
        lambda obj: obj.install_environment.value)
    version = fields.Str(required=False, allow_none=True)