コード例 #1
0
    def get_config(
        self,
        mode: ConfigOutputModeType = ConfigOutputModes.TYPED,
        **kwargs,
    ) -> Union[BaseYamlConfig, dict, str]:
        if isinstance(mode, str):
            mode = ConfigOutputModes(mode.lower())

        config: BaseYamlConfig = self.config

        if mode == ConfigOutputModes.TYPED:
            return config

        if mode == ConfigOutputModes.COMMENTED_MAP:
            return config.commented_map

        if mode == ConfigOutputModes.YAML:
            return config.to_yaml_str()

        if mode == ConfigOutputModes.DICT:
            config_kwargs: dict = config.to_dict()
        elif mode == ConfigOutputModes.JSON_DICT:
            config_kwargs: dict = config.to_json_dict()
        else:
            raise ValueError(
                f'Unknown mode {mode} in "BaseCheckpoint.get_config()".')

        kwargs["inplace"] = True
        filter_properties_dict(
            properties=config_kwargs,
            **kwargs,
        )

        return config_kwargs
コード例 #2
0
    def __init__(self, store_backend=None, store_name=None):
        if store_backend is not None:
            store_backend_module_name = store_backend.get(
                "module_name", "great_expectations.data_context.store"
            )
            store_backend_class_name = store_backend.get(
                "class_name", "InMemoryStoreBackend"
            )
            verify_dynamic_loading_support(module_name=store_backend_module_name)
            store_backend_class = load_class(
                store_backend_class_name, store_backend_module_name
            )

            # Store Backend Class was loaded successfully; verify that it is of a correct subclass.
            if issubclass(store_backend_class, DatabaseStoreBackend):
                # Provide defaults for this common case
                store_backend["table_name"] = store_backend.get(
                    "table_name", "ge_evaluation_parameters"
                )
        super().__init__(store_backend=store_backend, store_name=store_name)

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "store_backend": store_backend,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
コード例 #3
0
    def __init__(
        self,
        data_context: "DataContext",  # noqa: F821
        runtime_environment: Optional[dict] = None,
        fixed_length_key: bool = False,
        suppress_store_backend_id: bool = False,
        manually_initialize_store_backend_id: str = "",
        store_name: Optional[str] = None,
    ) -> None:
        super().__init__(
            fixed_length_key=fixed_length_key,
            suppress_store_backend_id=suppress_store_backend_id,
            manually_initialize_store_backend_id=manually_initialize_store_backend_id,
            store_name=store_name,
        )

        self._data_context = data_context

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "runtime_environment": runtime_environment,
            "fixed_length_key": fixed_length_key,
            "suppress_store_backend_id": suppress_store_backend_id,
            "manually_initialize_store_backend_id": manually_initialize_store_backend_id,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
コード例 #4
0
def test_get_citations_sorted(baseline_suite):
    assert "citations" not in baseline_suite.meta

    dt: datetime.datetime

    baseline_suite.add_citation("first", citation_date="2000-01-01")
    baseline_suite.add_citation("third", citation_date="2000-01-03")
    baseline_suite.add_citation("second", citation_date="2000-01-02")
    properties_dict_list: List[Dict[str, Any]] = baseline_suite.get_citations(
        sort=True)
    for properties_dict in properties_dict_list:
        filter_properties_dict(properties=properties_dict, inplace=True)
        properties_dict.pop("interactive", None)

    assert properties_dict_list == [
        {
            "citation_date": "2000-01-01T00:00:00.000000Z",
            "comment": "first",
        },
        {
            "citation_date": "2000-01-02T00:00:00.000000Z",
            "comment": "second",
        },
        {
            "citation_date": "2000-01-03T00:00:00.000000Z",
            "comment": "third",
        },
    ]
コード例 #5
0
def test_get_citations_with_multiple_citations_containing_batch_kwargs(
        baseline_suite):
    assert "citations" not in baseline_suite.meta

    baseline_suite.add_citation("first",
                                batch_kwargs={"path": "first"},
                                citation_date="2000-01-01")
    baseline_suite.add_citation("second",
                                batch_kwargs={"path": "second"},
                                citation_date="2001-01-01")
    baseline_suite.add_citation("third", citation_date="2002-01-01")

    properties_dict_list: List[Dict[str, Any]] = baseline_suite.get_citations(
        sort=True, require_batch_kwargs=True)
    for properties_dict in properties_dict_list:
        filter_properties_dict(properties=properties_dict, inplace=True)
        properties_dict.pop("interactive", None)

    assert properties_dict_list == [
        {
            "citation_date": "2000-01-01T00:00:00.000000Z",
            "batch_kwargs": {
                "path": "first"
            },
            "comment": "first",
        },
        {
            "citation_date": "2001-01-01T00:00:00.000000Z",
            "batch_kwargs": {
                "path": "second"
            },
            "comment": "second",
        },
    ]
コード例 #6
0
    def __init__(
        self,
        store_name: Optional[str] = None,
        store_backend: Optional[dict] = None,
        runtime_environment: Optional[dict] = None,
    ) -> None:
        self._schema = DatasourceConfigSchema()
        super().__init__(
            store_backend=store_backend,
            runtime_environment=runtime_environment,
            store_name=store_name,
        )

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "store_backend": store_backend,
            "runtime_environment": runtime_environment,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config,
                               clean_falsy=True,
                               inplace=True)
コード例 #7
0
    def __init__(self,
                 store_backend=None,
                 runtime_environment=None,
                 store_name=None):

        if store_backend is not None:
            store_backend_module_name = store_backend.get(
                "module_name", "great_expectations.data_context.store")
            store_backend_class_name = store_backend.get(
                "class_name", "InMemoryStoreBackend")
            verify_dynamic_loading_support(
                module_name=store_backend_module_name)
            store_backend_class = load_class(store_backend_class_name,
                                             store_backend_module_name)

        super().__init__(
            store_backend=store_backend,
            runtime_environment=runtime_environment,
            store_name=store_name,
        )

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "store_backend": store_backend,
            "runtime_environment": runtime_environment,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config,
                               clean_falsy=True,
                               inplace=True)
コード例 #8
0
    def __init__(
        self,
        bucket,
        project,
        prefix="",
        filepath_template=None,
        filepath_prefix=None,
        filepath_suffix=None,
        forbidden_substrings=None,
        platform_specific_separator=False,
        fixed_length_key=False,
        suppress_store_backend_id=False,
        manually_initialize_store_backend_id: str = "",
        public_urls=True,
        base_public_path=None,
        store_name=None,
    ):
        super().__init__(
            filepath_template=filepath_template,
            filepath_prefix=filepath_prefix,
            filepath_suffix=filepath_suffix,
            forbidden_substrings=forbidden_substrings,
            platform_specific_separator=platform_specific_separator,
            fixed_length_key=fixed_length_key,
            suppress_store_backend_id=suppress_store_backend_id,
            manually_initialize_store_backend_id=
            manually_initialize_store_backend_id,
            base_public_path=base_public_path,
            store_name=store_name,
        )
        self.bucket = bucket
        self.prefix = prefix
        self.project = project
        self._public_urls = public_urls
        # Initialize with store_backend_id if not part of an HTMLSiteStore
        if not self._suppress_store_backend_id:
            _ = self.store_backend_id

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "bucket": bucket,
            "project": project,
            "prefix": prefix,
            "filepath_template": filepath_template,
            "filepath_prefix": filepath_prefix,
            "filepath_suffix": filepath_suffix,
            "forbidden_substrings": forbidden_substrings,
            "platform_specific_separator": platform_specific_separator,
            "fixed_length_key": fixed_length_key,
            "suppress_store_backend_id": suppress_store_backend_id,
            "manually_initialize_store_backend_id":
            manually_initialize_store_backend_id,
            "public_urls": public_urls,
            "base_public_path": base_public_path,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config, inplace=True)
コード例 #9
0
    def __init__(
        self,
        runtime_environment=None,
        fixed_length_key=False,
        suppress_store_backend_id=False,
        manually_initialize_store_backend_id: str = "",
        store_name=None,
    ) -> None:
        super().__init__(
            fixed_length_key=fixed_length_key,
            suppress_store_backend_id=suppress_store_backend_id,
            manually_initialize_store_backend_id=
            manually_initialize_store_backend_id,
            store_name=store_name,
        )
        self._store = {}
        # Initialize with store_backend_id if not part of an HTMLSiteStore
        if not self._suppress_store_backend_id:
            _ = self.store_backend_id

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "runtime_environment": runtime_environment,
            "fixed_length_key": fixed_length_key,
            "suppress_store_backend_id": suppress_store_backend_id,
            "manually_initialize_store_backend_id":
            manually_initialize_store_backend_id,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config,
                               clean_falsy=True,
                               inplace=True)
コード例 #10
0
    def __init__(
        self,
        credentials,
        queries=None,
        store_backend=None,
        runtime_environment=None,
        store_name=None,
    ) -> None:
        if not sqlalchemy:
            raise ge_exceptions.DataContextError(
                "sqlalchemy module not found, but is required for "
                "SqlAlchemyQueryStore"
            )
        super().__init__(
            store_backend=store_backend,
            runtime_environment=runtime_environment,
            store_name=store_name,
        )
        if queries:
            # If queries are defined in configuration, then we load them into an InMemoryStoreBackend
            try:
                assert isinstance(
                    queries, dict
                ), "SqlAlchemyQueryStore queries must be defined as a dictionary"
                assert (
                    store_backend is None
                    or store_backend["class_name"] == "InMemoryStoreBackend"
                ), (
                    "If queries are provided in configuration, then store_backend must be empty or an "
                    "InMemoryStoreBackend"
                )
                for k, v in queries.items():
                    self._store_backend.set(tuple([k]), v)

            except (AssertionError, KeyError) as e:
                raise ge_exceptions.InvalidConfigError(str(e))

        if "engine" in credentials:
            self.engine = credentials["engine"]
        elif "url" in credentials:
            self.engine = create_engine(credentials["url"])
        elif "connection_string" in credentials:
            self.engine = create_engine(credentials["connection_string"])
        else:
            drivername = credentials.pop("drivername")
            options = URL(drivername, **credentials)
            self.engine = create_engine(options)

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "credentials": credentials,
            "queries": queries,
            "store_backend": store_backend,
            "runtime_environment": runtime_environment,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
コード例 #11
0
    def _add_table_level_expectations(
        self,
        expectations_by_column: Dict[str, List[ExpectationConfiguration]],
        batch_request: Optional[Union[str, Dict[str, Any]]] = None,
    ) -> None:
        if not expectations_by_column["table_expectations"]:
            markdown: str = self.render_with_overwrite(
                notebook_config=self.table_expectations_not_found_markdown,
                default_file_name="TABLE_EXPECTATIONS_NOT_FOUND.md",
                batch_request=batch_request,
                env=os.environ,
            )
            self.add_markdown_cell(markdown=markdown)
            return

        expectation: ExpectationConfiguration
        for expectation in expectations_by_column["table_expectations"]:
            filter_properties_dict(properties=expectation["kwargs"],
                                   clean_falsy=True,
                                   inplace=True)
            code: str = self.render_with_overwrite(
                notebook_config=self.table_expectation_code,
                default_file_name="table_expectation.py.j2",
                expectation=expectation,
                batch_request=batch_request,
                env=os.environ,
                kwargs_string=self._build_kwargs_string(
                    expectation=expectation),
                meta_args=self._build_meta_arguments(meta=expectation.meta),
            )
            self.add_code_cell(code=code, lint=True)
コード例 #12
0
    def get_json_dict(self) -> dict:
        data_connector_query: Optional[dict] = None
        if self.data_connector_query is not None:
            data_connector_query = copy.deepcopy(self.data_connector_query)
            if data_connector_query.get("custom_filter_function") is not None:
                data_connector_query["custom_filter_function"] = data_connector_query[
                    "custom_filter_function"
                ].__name__
        json_dict: dict = {
            "datasource_name": self.datasource_name,
            "data_connector_name": self.data_connector_name,
            "data_asset_name": self.data_asset_name,
            "data_connector_query": data_connector_query,
        }

        if self.batch_spec_passthrough is not None:
            json_dict["batch_spec_passthrough"] = self.batch_spec_passthrough
        if self.limit is not None:
            json_dict["limit"] = self.limit
        if self.batch_identifiers is not None:
            json_dict["batch_identifiers"] = self.batch_identifiers
        if self.runtime_parameters is not None:
            json_dict["runtime_parameters"] = self.runtime_parameters
            if json_dict["runtime_parameters"].get("batch_data") is not None:
                json_dict["runtime_parameters"]["batch_data"] = str(
                    type(json_dict["runtime_parameters"]["batch_data"])
                )

        filter_properties_dict(properties=json_dict, clean_falsy=True, inplace=True)

        return json_dict
コード例 #13
0
    def __init__(
        self,
        ge_cloud_credentials: Dict,
        ge_cloud_base_url: str = "https://app.greatexpectations.io/",
        ge_cloud_resource_type: Optional[GeCloudRESTResource] = None,
        ge_cloud_resource_name: Optional[str] = None,
        suppress_store_backend_id: bool = True,
        manually_initialize_store_backend_id: str = "",
        store_name: Optional[str] = None,
    ) -> None:
        super().__init__(
            fixed_length_key=True,
            suppress_store_backend_id=suppress_store_backend_id,
            manually_initialize_store_backend_id=
            manually_initialize_store_backend_id,
            store_name=store_name,
        )
        assert ge_cloud_resource_type or ge_cloud_resource_name, (
            "Must provide either ge_cloud_resource_type or "
            "ge_cloud_resource_name")

        self._ge_cloud_base_url = ge_cloud_base_url

        self._ge_cloud_resource_name = (
            ge_cloud_resource_name
            or self.RESOURCE_PLURALITY_LOOKUP_DICT[ge_cloud_resource_type])
        self._ge_cloud_resource_type = (
            ge_cloud_resource_type
            or self.RESOURCE_PLURALITY_LOOKUP_DICT[ge_cloud_resource_name])

        # TOTO: remove when account_id is deprecated
        if ge_cloud_credentials.get("account_id"):
            logger.warning(
                'The "account_id" ge_cloud_credentials key has been renamed to "organization_id" and will '
                "be deprecated in the next major release.")
            ge_cloud_credentials["organization_id"] = ge_cloud_credentials[
                "account_id"]
            ge_cloud_credentials.pop("account_id")
        self._ge_cloud_credentials = ge_cloud_credentials

        # Initialize with store_backend_id if not part of an HTMLSiteStore
        if not self._suppress_store_backend_id:
            _ = self.store_backend_id

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "ge_cloud_base_url": ge_cloud_base_url,
            "ge_cloud_resource_name": ge_cloud_resource_name,
            "ge_cloud_resource_type": ge_cloud_resource_type,
            "fixed_length_key": True,
            "suppress_store_backend_id": suppress_store_backend_id,
            "manually_initialize_store_backend_id":
            manually_initialize_store_backend_id,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config, inplace=True)
コード例 #14
0
    def __init__(
        self,
        base_directory,
        filepath_template=None,
        filepath_prefix=None,
        filepath_suffix=None,
        forbidden_substrings=None,
        platform_specific_separator=True,
        root_directory=None,
        fixed_length_key=False,
        suppress_store_backend_id=False,
        manually_initialize_store_backend_id: str = "",
        base_public_path=None,
        store_name=None,
    ):
        super().__init__(
            filepath_template=filepath_template,
            filepath_prefix=filepath_prefix,
            filepath_suffix=filepath_suffix,
            forbidden_substrings=forbidden_substrings,
            platform_specific_separator=platform_specific_separator,
            fixed_length_key=fixed_length_key,
            suppress_store_backend_id=suppress_store_backend_id,
            manually_initialize_store_backend_id=
            manually_initialize_store_backend_id,
            base_public_path=base_public_path,
            store_name=store_name,
        )
        if os.path.isabs(base_directory):
            self.full_base_directory = base_directory
        else:
            if root_directory is None:
                raise ValueError(
                    "base_directory must be an absolute path if root_directory is not provided"
                )
            elif not os.path.isabs(root_directory):
                raise ValueError(
                    "root_directory must be an absolute path. Got {} instead.".
                    format(root_directory))
            else:
                self.full_base_directory = os.path.join(
                    root_directory, base_directory)

        os.makedirs(str(os.path.dirname(self.full_base_directory)),
                    exist_ok=True)
        # Initialize with store_backend_id if not part of an HTMLSiteStore
        if not self._suppress_store_backend_id:
            _ = self.store_backend_id

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = get_currently_executing_function_call_arguments(
            include_module_name=True,
            **{
                "class_name": self.__class__.__name__,
            },
        )
        filter_properties_dict(properties=self._config, inplace=True)
コード例 #15
0
    def __init__(
        self,
        bucket,
        prefix="",
        boto3_options=None,
        filepath_template=None,
        filepath_prefix=None,
        filepath_suffix=None,
        forbidden_substrings=None,
        platform_specific_separator=False,
        fixed_length_key=False,
        suppress_store_backend_id=False,
        manually_initialize_store_backend_id: str = "",
        base_public_path=None,
        endpoint_url=None,
        store_name=None,
    ):
        super().__init__(
            filepath_template=filepath_template,
            filepath_prefix=filepath_prefix,
            filepath_suffix=filepath_suffix,
            forbidden_substrings=forbidden_substrings,
            platform_specific_separator=platform_specific_separator,
            fixed_length_key=fixed_length_key,
            suppress_store_backend_id=suppress_store_backend_id,
            manually_initialize_store_backend_id=
            manually_initialize_store_backend_id,
            base_public_path=base_public_path,
            store_name=store_name,
        )
        self.bucket = bucket
        if prefix:
            if self.platform_specific_separator:
                prefix = prefix.strip(os.sep)

            # we *always* strip "/" from the prefix based on the norms of s3
            # whether the rest of the key is built with platform-specific separators or not
            prefix = prefix.strip("/")
        self.prefix = prefix
        if boto3_options is None:
            boto3_options = {}
        self._boto3_options = boto3_options
        self.endpoint_url = endpoint_url
        # Initialize with store_backend_id if not part of an HTMLSiteStore
        if not self._suppress_store_backend_id:
            _ = self.store_backend_id

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = get_currently_executing_function_call_arguments(
            include_module_name=True,
            **{
                "class_name": self.__class__.__name__,
            },
        )
        filter_properties_dict(properties=self._config, inplace=True)
コード例 #16
0
    def __init__(
        self,
        name=None,
        caching=True,
        batch_spec_defaults=None,
        batch_data_dict=None,
        validator=None,
    ):
        self.name = name
        self._validator = validator

        # NOTE: using caching makes the strong assumption that the user will not modify the core data store
        # (e.g. self.spark_df) over the lifetime of the dataset instance
        self._caching = caching
        # NOTE: 20200918 - this is a naive cache; update.
        if self._caching:
            self._metric_cache = {}
        else:
            self._metric_cache = NoOpDict()

        if batch_spec_defaults is None:
            batch_spec_defaults = {}
        batch_spec_defaults_keys = set(batch_spec_defaults.keys())
        if not batch_spec_defaults_keys <= self.recognized_batch_spec_defaults:
            logger.warning("Unrecognized batch_spec_default(s): %s" %
                           str(batch_spec_defaults_keys -
                               self.recognized_batch_spec_defaults))

        self._batch_spec_defaults = {
            key: value
            for key, value in batch_spec_defaults.items()
            if key in self.recognized_batch_spec_defaults
        }

        self._batch_data_dict = {}
        if batch_data_dict is None:
            batch_data_dict = {}
        self._active_batch_data_id = None
        self._load_batch_data_from_dict(batch_data_dict)

        # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, and
        # set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "name": name,
            "caching": caching,
            "batch_spec_defaults": batch_spec_defaults,
            "batch_data_dict": batch_data_dict,
            "validator": validator,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config,
                               clean_falsy=True,
                               inplace=True)
コード例 #17
0
    def __init__(
        self,
        store_backend=None,
        runtime_environment=None,
        store_name=None,
        data_context=None,
    ) -> None:
        self._expectationSuiteSchema = ExpectationSuiteSchema()
        # TODO: refactor so ExpectationStore can have access to DataContext. Currently used by usage_stats messages.
        self._data_context = data_context
        if store_backend is not None:
            store_backend_module_name = store_backend.get(
                "module_name", "great_expectations.data_context.store"
            )
            store_backend_class_name = store_backend.get(
                "class_name", "InMemoryStoreBackend"
            )
            verify_dynamic_loading_support(module_name=store_backend_module_name)
            store_backend_class = load_class(
                store_backend_class_name, store_backend_module_name
            )

            # Store Backend Class was loaded successfully; verify that it is of a correct subclass.
            if issubclass(store_backend_class, TupleStoreBackend):
                # Provide defaults for this common case
                store_backend["filepath_suffix"] = store_backend.get(
                    "filepath_suffix", ".json"
                )
            elif issubclass(store_backend_class, DatabaseStoreBackend):
                # Provide defaults for this common case
                store_backend["table_name"] = store_backend.get(
                    "table_name", "ge_expectations_store"
                )
                store_backend["key_columns"] = store_backend.get(
                    "key_columns", ["expectation_suite_name"]
                )

        super().__init__(
            store_backend=store_backend,
            runtime_environment=runtime_environment,
            store_name=store_name,
        )

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "store_backend": store_backend,
            "runtime_environment": runtime_environment,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
コード例 #18
0
def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately
    This test sets the root_dir parameter
    """

    test_root_directory = "test_root_dir"

    store_backend_defaults = FilesystemStoreBackendDefaults(
        root_directory=test_root_directory
    )
    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=store_backend_defaults,
    )

    # Create desired config
    data_context_id = data_context_config.anonymous_usage_statistics.data_context_id
    desired_config = construct_data_context_config(
        data_context_id=data_context_id, datasources=default_pandas_datasource_config
    )
    # Add root_directory to stores and data_docs
    desired_config["stores"][desired_config["expectations_store_name"]][
        "store_backend"
    ]["root_directory"] = test_root_directory
    desired_config["stores"][desired_config["validations_store_name"]]["store_backend"][
        "root_directory"
    ] = test_root_directory
    desired_config["stores"][desired_config["checkpoint_store_name"]]["store_backend"][
        "root_directory"
    ] = test_root_directory
    desired_config["data_docs_sites"]["local_site"]["store_backend"][
        "root_directory"
    ] = test_root_directory

    data_context_config_schema = DataContextConfigSchema()
    assert filter_properties_dict(
        properties=data_context_config_schema.dump(data_context_config)
    ) == filter_properties_dict(properties=desired_config)
    assert DataContext.validate_config(project_config=data_context_config)
コード例 #19
0
    def __init__(self,
                 store_backend=None,
                 runtime_environment=None,
                 store_name=None):
        self._expectationSuiteValidationResultSchema = (
            ExpectationSuiteValidationResultSchema())

        if store_backend is not None:
            store_backend_module_name = store_backend.get(
                "module_name", "great_expectations.data_context.store")
            store_backend_class_name = store_backend.get(
                "class_name", "InMemoryStoreBackend")
            verify_dynamic_loading_support(
                module_name=store_backend_module_name)
            store_backend_class = load_class(store_backend_class_name,
                                             store_backend_module_name)

            # Store Backend Class was loaded successfully; verify that it is of a correct subclass.
            if issubclass(store_backend_class, TupleStoreBackend):
                # Provide defaults for this common case
                store_backend["filepath_suffix"] = store_backend.get(
                    "filepath_suffix", ".json")
            elif issubclass(store_backend_class, DatabaseStoreBackend):
                # Provide defaults for this common case
                store_backend["table_name"] = store_backend.get(
                    "table_name", "ge_validations_store")
                store_backend["key_columns"] = store_backend.get(
                    "key_columns",
                    [
                        "expectation_suite_name",
                        "run_name",
                        "run_time",
                        "batch_identifier",
                    ],
                )
        super().__init__(
            store_backend=store_backend,
            runtime_environment=runtime_environment,
            store_name=store_name,
        )

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = get_currently_executing_function_call_arguments(
            include_module_name=True,
            **{
                "class_name": self.__class__.__name__,
            },
        )
        filter_properties_dict(properties=self._config, inplace=True)
コード例 #20
0
def test_data_asset_citations(pandas_dataset):
    citation_date = "2020-02-27T12:34:56.123456Z"
    pandas_dataset.add_citation("test citation", citation_date=citation_date)
    suite = pandas_dataset.get_expectation_suite()
    assert filter_properties_dict(
        properties=suite.meta["citations"][0],
        delete_fields=["interactive"]) == filter_properties_dict(
            properties={
                "comment": "test citation",
                "batch_kwargs": pandas_dataset.batch_kwargs,
                "batch_parameters": pandas_dataset.batch_parameters,
                "batch_markers": pandas_dataset.batch_markers,
                "citation_date": citation_date,
            })
コード例 #21
0
    def __init__(
        self,
        store_name: str,
        store_backend: Optional[dict] = None,
        overwrite_existing: bool = False,
        runtime_environment: Optional[dict] = None,
    ) -> None:
        if not issubclass(self._configuration_class, BaseYamlConfig):
            raise ge_exceptions.DataContextError(
                "Invalid configuration: A configuration_class needs to inherit from the BaseYamlConfig class."
            )

        if store_backend is not None:
            store_backend_module_name = store_backend.get(
                "module_name", "great_expectations.data_context.store")
            store_backend_class_name = store_backend.get(
                "class_name", "InMemoryStoreBackend")
            verify_dynamic_loading_support(
                module_name=store_backend_module_name)
            store_backend_class = load_class(store_backend_class_name,
                                             store_backend_module_name)

            # Store Backend Class was loaded successfully; verify that it is of a correct subclass.
            if issubclass(store_backend_class, TupleStoreBackend):
                # Provide defaults for this common case
                store_backend["filepath_suffix"] = store_backend.get(
                    "filepath_suffix", ".yml")

        super().__init__(
            store_backend=store_backend,
            runtime_environment=runtime_environment,
            store_name=store_name,
        )

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "store_name": store_name,
            "store_backend": store_backend,
            "overwrite_existing": overwrite_existing,
            "runtime_environment": runtime_environment,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config,
                               clean_falsy=True,
                               inplace=True)

        self._overwrite_existing = overwrite_existing
コード例 #22
0
    def instantiate_from_config_with_runtime_args(
        checkpoint_config: CheckpointConfig,
        data_context: "DataContext",  # noqa: F821
        **runtime_kwargs,
    ) -> "Checkpoint":
        config: dict = checkpoint_config.to_json_dict()

        key: str
        value: Any
        for key, value in runtime_kwargs.items():
            if value is not None:
                config[key] = value

        config = filter_properties_dict(properties=config, clean_falsy=True)

        checkpoint: Checkpoint = instantiate_class_from_config(
            config=config,
            runtime_environment={
                "data_context": data_context,
            },
            config_defaults={
                "module_name": "great_expectations.checkpoint",
            },
        )

        return checkpoint
コード例 #23
0
ファイル: base.py プロジェクト: alfredo-f/great_expectations
    def remove_nulls_and_keep_unknowns(self, output: dict,
                                       original: Type[DictDot],
                                       **kwargs) -> dict:
        """Hook to clear the config object of any null values before being written as a dictionary.
        Additionally, it bypasses strict schema validation before writing to dict to ensure that dynamic
        attributes set through `setattr` are captured in the resulting object.
        It is important to note that only public attributes are captured through this process.
        Chetan - 20220126 - Note that if we tighten up the schema (remove the dynamic `setattr` behavior),
        the functionality to keep unknowns should also be removed.

        Args:
            output: Processed dictionary representation of the configuration object (leaving original intact)
            original: The dictionary representation of the configuration object
            kwargs: Marshmallow-specific kwargs required to maintain hook signature (unused herein)

        Returns:
            A cleaned dictionary that has no null values
        """
        for key in original.keys():
            if key not in output and not key.startswith("_"):
                output[key] = original[key]

        cleaned_output = filter_properties_dict(
            properties=output,
            clean_nulls=True,
            clean_falsy=False,
        )

        return cleaned_output
コード例 #24
0
 def __eq__(self, other):
     return (other is not None) and (
         (hasattr(other, "to_json_dict")
          and self.to_json_dict() == other.to_json_dict()) or
         (isinstance(other, dict) and self.to_json_dict()
          == filter_properties_dict(properties=other, clean_falsy=True)) or
         (self.__str__() == str(other)))
コード例 #25
0
    def __init__(
        self,
        ge_cloud_credentials: Dict,
        ge_cloud_base_url: Optional[str] = "https://app.greatexpectations.io/",
        ge_cloud_resource_type: Optional[str] = None,
        ge_cloud_resource_name: Optional[str] = None,
        suppress_store_backend_id: Optional[bool] = True,
        manually_initialize_store_backend_id: Optional[str] = "",
        store_name: Optional[str] = None,
    ):
        super().__init__(
            fixed_length_key=True,
            suppress_store_backend_id=suppress_store_backend_id,
            manually_initialize_store_backend_id=
            manually_initialize_store_backend_id,
            store_name=store_name,
        )
        assert ge_cloud_resource_type or ge_cloud_resource_name, (
            "Must provide either ge_cloud_resource_type or "
            "ge_cloud_resource_name")
        self._ge_cloud_base_url = ge_cloud_base_url
        self._ge_cloud_resource_name = ge_cloud_resource_name or pluralize(
            ge_cloud_resource_type)
        self._ge_cloud_resource_type = ge_cloud_resource_type or singularize(
            ge_cloud_resource_name)
        self._ge_cloud_credentials = ge_cloud_credentials

        # Initialize with store_backend_id if not part of an HTMLSiteStore
        if not self._suppress_store_backend_id:
            _ = self.store_backend_id

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "ge_cloud_base_url": ge_cloud_base_url,
            "ge_cloud_resource_name": ge_cloud_resource_name,
            "ge_cloud_resource_type": ge_cloud_resource_type,
            "fixed_length_key": True,
            "suppress_store_backend_id": suppress_store_backend_id,
            "manually_initialize_store_backend_id":
            manually_initialize_store_backend_id,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config, inplace=True)
コード例 #26
0
    def _convert_dictionaries_to_domain_kwargs(
            self,
            source: Optional[Any] = None) -> Optional[Union[Any, "Domain"]]:
        if source is None:
            return None

        if isinstance(source, dict):
            if not isinstance(source, Domain):
                filter_properties_dict(properties=source, inplace=True)
                source = DomainKwargs(source)
            key: str
            value: Any
            for key, value in source.items():
                source[key] = self._convert_dictionaries_to_domain_kwargs(
                    source=value)

        return source
コード例 #27
0
    def _convert_dictionaries_to_parameter_nodes(
        self, source: Optional[Any] = None
    ) -> Optional[Union[Any, ParameterNode]]:
        if source is None:
            return None

        if isinstance(source, dict):
            if not isinstance(source, ParameterNode):
                filter_properties_dict(properties=source, inplace=True)
                source = ParameterNode(source)
            key: str
            value: Any
            for key, value in source.items():
                source[key] = self._convert_dictionaries_to_parameter_nodes(
                    source=value
                )

        return source
コード例 #28
0
 def to_json_dict(self) -> dict:
     json_dict: dict = {
         "domain_type": self["domain_type"].value,
         "domain_kwargs": self["domain_kwargs"].to_json_dict(),
         "details":
         {key: value.value
          for key, value in self["details"].items()},
     }
     return filter_properties_dict(properties=json_dict, clean_falsy=True)
コード例 #29
0
    def _add_column_level_expectations(
        self,
        expectations_by_column: Dict[str, List[ExpectationConfiguration]],
        batch_request: Optional[Union[str, Dict[str,
                                                Union[str, int,
                                                      Dict[str,
                                                           Any]]]]] = None,
    ):
        if not expectations_by_column:
            markdown: str = self.render_with_overwrite(
                notebook_config=self.column_expectations_not_found_markdown,
                default_file_name="COLUMN_EXPECTATIONS_NOT_FOUND.md",
                batch_request=batch_request,
                env=os.environ,
            )
            self.add_markdown_cell(markdown=markdown)
            return

        column_name: str
        expectations: List[ExpectationConfiguration]
        for column_name, expectations in expectations_by_column.items():
            markdown: str = self.render_with_overwrite(
                notebook_config=self.column_expectations_markdown,
                default_file_name="COLUMN_EXPECTATIONS.md",
                column=column_name,
            )
            self.add_markdown_cell(markdown=markdown)

            expectation: ExpectationConfiguration
            for expectation in expectations:
                filter_properties_dict(properties=expectation["kwargs"],
                                       inplace=True)
                code: str = self.render_with_overwrite(
                    notebook_config=self.column_expectation_code,
                    default_file_name="column_expectation.py.j2",
                    expectation=expectation,
                    batch_request=batch_request,
                    env=os.environ,
                    kwargs_string=self._build_kwargs_string(
                        expectation=expectation),
                    meta_args=self._build_meta_arguments(
                        meta=expectation.meta),
                )
                self.add_code_cell(code=code, lint=True)
コード例 #30
0
def test_get_citations_with_multiple_citations_containing_profiler_config(
    baseline_suite, profiler_config
):
    assert "citations" not in baseline_suite.meta

    baseline_suite.add_citation(
        "first",
        citation_date="2000-01-01",
        profiler_config=profiler_config,
    )
    baseline_suite.add_citation(
        "second",
        citation_date="2001-01-01",
        profiler_config=profiler_config,
    )
    baseline_suite.add_citation("third", citation_date="2002-01-01")

    properties_dict_list: List[Dict[str, Any]] = baseline_suite.get_citations(
        sort=True, require_profiler_config=True
    )
    for properties_dict in properties_dict_list:
        filter_properties_dict(
            properties=properties_dict, clean_falsy=True, inplace=True
        )
        properties_dict.pop("interactive", None)

    assert properties_dict_list == [
        {
            "citation_date": "2000-01-01T00:00:00.000000Z",
            "profiler_config": profiler_config,
            "comment": "first",
        },
        {
            "citation_date": "2001-01-01T00:00:00.000000Z",
            "profiler_config": profiler_config,
            "comment": "second",
        },
    ]