def get_config( self, mode: ConfigOutputModeType = ConfigOutputModes.TYPED, **kwargs, ) -> Union[BaseYamlConfig, dict, str]: if isinstance(mode, str): mode = ConfigOutputModes(mode.lower()) config: BaseYamlConfig = self.config if mode == ConfigOutputModes.TYPED: return config if mode == ConfigOutputModes.COMMENTED_MAP: return config.commented_map if mode == ConfigOutputModes.YAML: return config.to_yaml_str() if mode == ConfigOutputModes.DICT: config_kwargs: dict = config.to_dict() elif mode == ConfigOutputModes.JSON_DICT: config_kwargs: dict = config.to_json_dict() else: raise ValueError( f'Unknown mode {mode} in "BaseCheckpoint.get_config()".') kwargs["inplace"] = True filter_properties_dict( properties=config_kwargs, **kwargs, ) return config_kwargs
def __init__(self, store_backend=None, store_name=None): if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store" ) store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend" ) verify_dynamic_loading_support(module_name=store_backend_module_name) store_backend_class = load_class( store_backend_class_name, store_backend_module_name ) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_evaluation_parameters" ) super().__init__(store_backend=store_backend, store_name=store_name) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def __init__( self, data_context: "DataContext", # noqa: F821 runtime_environment: Optional[dict] = None, fixed_length_key: bool = False, suppress_store_backend_id: bool = False, manually_initialize_store_backend_id: str = "", store_name: Optional[str] = None, ) -> None: super().__init__( fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id=manually_initialize_store_backend_id, store_name=store_name, ) self._data_context = data_context # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "runtime_environment": runtime_environment, "fixed_length_key": fixed_length_key, "suppress_store_backend_id": suppress_store_backend_id, "manually_initialize_store_backend_id": manually_initialize_store_backend_id, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def test_get_citations_sorted(baseline_suite): assert "citations" not in baseline_suite.meta dt: datetime.datetime baseline_suite.add_citation("first", citation_date="2000-01-01") baseline_suite.add_citation("third", citation_date="2000-01-03") baseline_suite.add_citation("second", citation_date="2000-01-02") properties_dict_list: List[Dict[str, Any]] = baseline_suite.get_citations( sort=True) for properties_dict in properties_dict_list: filter_properties_dict(properties=properties_dict, inplace=True) properties_dict.pop("interactive", None) assert properties_dict_list == [ { "citation_date": "2000-01-01T00:00:00.000000Z", "comment": "first", }, { "citation_date": "2000-01-02T00:00:00.000000Z", "comment": "second", }, { "citation_date": "2000-01-03T00:00:00.000000Z", "comment": "third", }, ]
def test_get_citations_with_multiple_citations_containing_batch_kwargs( baseline_suite): assert "citations" not in baseline_suite.meta baseline_suite.add_citation("first", batch_kwargs={"path": "first"}, citation_date="2000-01-01") baseline_suite.add_citation("second", batch_kwargs={"path": "second"}, citation_date="2001-01-01") baseline_suite.add_citation("third", citation_date="2002-01-01") properties_dict_list: List[Dict[str, Any]] = baseline_suite.get_citations( sort=True, require_batch_kwargs=True) for properties_dict in properties_dict_list: filter_properties_dict(properties=properties_dict, inplace=True) properties_dict.pop("interactive", None) assert properties_dict_list == [ { "citation_date": "2000-01-01T00:00:00.000000Z", "batch_kwargs": { "path": "first" }, "comment": "first", }, { "citation_date": "2001-01-01T00:00:00.000000Z", "batch_kwargs": { "path": "second" }, "comment": "second", }, ]
def __init__( self, store_name: Optional[str] = None, store_backend: Optional[dict] = None, runtime_environment: Optional[dict] = None, ) -> None: self._schema = DatasourceConfigSchema() super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def __init__(self, store_backend=None, runtime_environment=None, store_name=None): if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def __init__( self, bucket, project, prefix="", filepath_template=None, filepath_prefix=None, filepath_suffix=None, forbidden_substrings=None, platform_specific_separator=False, fixed_length_key=False, suppress_store_backend_id=False, manually_initialize_store_backend_id: str = "", public_urls=True, base_public_path=None, store_name=None, ): super().__init__( filepath_template=filepath_template, filepath_prefix=filepath_prefix, filepath_suffix=filepath_suffix, forbidden_substrings=forbidden_substrings, platform_specific_separator=platform_specific_separator, fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, base_public_path=base_public_path, store_name=store_name, ) self.bucket = bucket self.prefix = prefix self.project = project self._public_urls = public_urls # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "bucket": bucket, "project": project, "prefix": prefix, "filepath_template": filepath_template, "filepath_prefix": filepath_prefix, "filepath_suffix": filepath_suffix, "forbidden_substrings": forbidden_substrings, "platform_specific_separator": platform_specific_separator, "fixed_length_key": fixed_length_key, "suppress_store_backend_id": suppress_store_backend_id, "manually_initialize_store_backend_id": manually_initialize_store_backend_id, "public_urls": public_urls, "base_public_path": base_public_path, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, runtime_environment=None, fixed_length_key=False, suppress_store_backend_id=False, manually_initialize_store_backend_id: str = "", store_name=None, ) -> None: super().__init__( fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, store_name=store_name, ) self._store = {} # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "runtime_environment": runtime_environment, "fixed_length_key": fixed_length_key, "suppress_store_backend_id": suppress_store_backend_id, "manually_initialize_store_backend_id": manually_initialize_store_backend_id, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def __init__( self, credentials, queries=None, store_backend=None, runtime_environment=None, store_name=None, ) -> None: if not sqlalchemy: raise ge_exceptions.DataContextError( "sqlalchemy module not found, but is required for " "SqlAlchemyQueryStore" ) super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) if queries: # If queries are defined in configuration, then we load them into an InMemoryStoreBackend try: assert isinstance( queries, dict ), "SqlAlchemyQueryStore queries must be defined as a dictionary" assert ( store_backend is None or store_backend["class_name"] == "InMemoryStoreBackend" ), ( "If queries are provided in configuration, then store_backend must be empty or an " "InMemoryStoreBackend" ) for k, v in queries.items(): self._store_backend.set(tuple([k]), v) except (AssertionError, KeyError) as e: raise ge_exceptions.InvalidConfigError(str(e)) if "engine" in credentials: self.engine = credentials["engine"] elif "url" in credentials: self.engine = create_engine(credentials["url"]) elif "connection_string" in credentials: self.engine = create_engine(credentials["connection_string"]) else: drivername = credentials.pop("drivername") options = URL(drivername, **credentials) self.engine = create_engine(options) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "credentials": credentials, "queries": queries, "store_backend": store_backend, "runtime_environment": runtime_environment, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def _add_table_level_expectations( self, expectations_by_column: Dict[str, List[ExpectationConfiguration]], batch_request: Optional[Union[str, Dict[str, Any]]] = None, ) -> None: if not expectations_by_column["table_expectations"]: markdown: str = self.render_with_overwrite( notebook_config=self.table_expectations_not_found_markdown, default_file_name="TABLE_EXPECTATIONS_NOT_FOUND.md", batch_request=batch_request, env=os.environ, ) self.add_markdown_cell(markdown=markdown) return expectation: ExpectationConfiguration for expectation in expectations_by_column["table_expectations"]: filter_properties_dict(properties=expectation["kwargs"], clean_falsy=True, inplace=True) code: str = self.render_with_overwrite( notebook_config=self.table_expectation_code, default_file_name="table_expectation.py.j2", expectation=expectation, batch_request=batch_request, env=os.environ, kwargs_string=self._build_kwargs_string( expectation=expectation), meta_args=self._build_meta_arguments(meta=expectation.meta), ) self.add_code_cell(code=code, lint=True)
def get_json_dict(self) -> dict: data_connector_query: Optional[dict] = None if self.data_connector_query is not None: data_connector_query = copy.deepcopy(self.data_connector_query) if data_connector_query.get("custom_filter_function") is not None: data_connector_query["custom_filter_function"] = data_connector_query[ "custom_filter_function" ].__name__ json_dict: dict = { "datasource_name": self.datasource_name, "data_connector_name": self.data_connector_name, "data_asset_name": self.data_asset_name, "data_connector_query": data_connector_query, } if self.batch_spec_passthrough is not None: json_dict["batch_spec_passthrough"] = self.batch_spec_passthrough if self.limit is not None: json_dict["limit"] = self.limit if self.batch_identifiers is not None: json_dict["batch_identifiers"] = self.batch_identifiers if self.runtime_parameters is not None: json_dict["runtime_parameters"] = self.runtime_parameters if json_dict["runtime_parameters"].get("batch_data") is not None: json_dict["runtime_parameters"]["batch_data"] = str( type(json_dict["runtime_parameters"]["batch_data"]) ) filter_properties_dict(properties=json_dict, clean_falsy=True, inplace=True) return json_dict
def __init__( self, ge_cloud_credentials: Dict, ge_cloud_base_url: str = "https://app.greatexpectations.io/", ge_cloud_resource_type: Optional[GeCloudRESTResource] = None, ge_cloud_resource_name: Optional[str] = None, suppress_store_backend_id: bool = True, manually_initialize_store_backend_id: str = "", store_name: Optional[str] = None, ) -> None: super().__init__( fixed_length_key=True, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, store_name=store_name, ) assert ge_cloud_resource_type or ge_cloud_resource_name, ( "Must provide either ge_cloud_resource_type or " "ge_cloud_resource_name") self._ge_cloud_base_url = ge_cloud_base_url self._ge_cloud_resource_name = ( ge_cloud_resource_name or self.RESOURCE_PLURALITY_LOOKUP_DICT[ge_cloud_resource_type]) self._ge_cloud_resource_type = ( ge_cloud_resource_type or self.RESOURCE_PLURALITY_LOOKUP_DICT[ge_cloud_resource_name]) # TOTO: remove when account_id is deprecated if ge_cloud_credentials.get("account_id"): logger.warning( 'The "account_id" ge_cloud_credentials key has been renamed to "organization_id" and will ' "be deprecated in the next major release.") ge_cloud_credentials["organization_id"] = ge_cloud_credentials[ "account_id"] ge_cloud_credentials.pop("account_id") self._ge_cloud_credentials = ge_cloud_credentials # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "ge_cloud_base_url": ge_cloud_base_url, "ge_cloud_resource_name": ge_cloud_resource_name, "ge_cloud_resource_type": ge_cloud_resource_type, "fixed_length_key": True, "suppress_store_backend_id": suppress_store_backend_id, "manually_initialize_store_backend_id": manually_initialize_store_backend_id, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, base_directory, filepath_template=None, filepath_prefix=None, filepath_suffix=None, forbidden_substrings=None, platform_specific_separator=True, root_directory=None, fixed_length_key=False, suppress_store_backend_id=False, manually_initialize_store_backend_id: str = "", base_public_path=None, store_name=None, ): super().__init__( filepath_template=filepath_template, filepath_prefix=filepath_prefix, filepath_suffix=filepath_suffix, forbidden_substrings=forbidden_substrings, platform_specific_separator=platform_specific_separator, fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, base_public_path=base_public_path, store_name=store_name, ) if os.path.isabs(base_directory): self.full_base_directory = base_directory else: if root_directory is None: raise ValueError( "base_directory must be an absolute path if root_directory is not provided" ) elif not os.path.isabs(root_directory): raise ValueError( "root_directory must be an absolute path. Got {} instead.". format(root_directory)) else: self.full_base_directory = os.path.join( root_directory, base_directory) os.makedirs(str(os.path.dirname(self.full_base_directory)), exist_ok=True) # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, bucket, prefix="", boto3_options=None, filepath_template=None, filepath_prefix=None, filepath_suffix=None, forbidden_substrings=None, platform_specific_separator=False, fixed_length_key=False, suppress_store_backend_id=False, manually_initialize_store_backend_id: str = "", base_public_path=None, endpoint_url=None, store_name=None, ): super().__init__( filepath_template=filepath_template, filepath_prefix=filepath_prefix, filepath_suffix=filepath_suffix, forbidden_substrings=forbidden_substrings, platform_specific_separator=platform_specific_separator, fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, base_public_path=base_public_path, store_name=store_name, ) self.bucket = bucket if prefix: if self.platform_specific_separator: prefix = prefix.strip(os.sep) # we *always* strip "/" from the prefix based on the norms of s3 # whether the rest of the key is built with platform-specific separators or not prefix = prefix.strip("/") self.prefix = prefix if boto3_options is None: boto3_options = {} self._boto3_options = boto3_options self.endpoint_url = endpoint_url # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, name=None, caching=True, batch_spec_defaults=None, batch_data_dict=None, validator=None, ): self.name = name self._validator = validator # NOTE: using caching makes the strong assumption that the user will not modify the core data store # (e.g. self.spark_df) over the lifetime of the dataset instance self._caching = caching # NOTE: 20200918 - this is a naive cache; update. if self._caching: self._metric_cache = {} else: self._metric_cache = NoOpDict() if batch_spec_defaults is None: batch_spec_defaults = {} batch_spec_defaults_keys = set(batch_spec_defaults.keys()) if not batch_spec_defaults_keys <= self.recognized_batch_spec_defaults: logger.warning("Unrecognized batch_spec_default(s): %s" % str(batch_spec_defaults_keys - self.recognized_batch_spec_defaults)) self._batch_spec_defaults = { key: value for key, value in batch_spec_defaults.items() if key in self.recognized_batch_spec_defaults } self._batch_data_dict = {} if batch_data_dict is None: batch_data_dict = {} self._active_batch_data_id = None self._load_batch_data_from_dict(batch_data_dict) # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, and # set the instance "_config" variable equal to the resulting dictionary. self._config = { "name": name, "caching": caching, "batch_spec_defaults": batch_spec_defaults, "batch_data_dict": batch_data_dict, "validator": validator, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def __init__( self, store_backend=None, runtime_environment=None, store_name=None, data_context=None, ) -> None: self._expectationSuiteSchema = ExpectationSuiteSchema() # TODO: refactor so ExpectationStore can have access to DataContext. Currently used by usage_stats messages. self._data_context = data_context if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store" ) store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend" ) verify_dynamic_loading_support(module_name=store_backend_module_name) store_backend_class = load_class( store_backend_class_name, store_backend_module_name ) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, TupleStoreBackend): # Provide defaults for this common case store_backend["filepath_suffix"] = store_backend.get( "filepath_suffix", ".json" ) elif issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_expectations_store" ) store_backend["key_columns"] = store_backend.get( "key_columns", ["expectation_suite_name"] ) super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults( construct_data_context_config, default_pandas_datasource_config ): """ What does this test and why? Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately This test sets the root_dir parameter """ test_root_directory = "test_root_dir" store_backend_defaults = FilesystemStoreBackendDefaults( root_directory=test_root_directory ) data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=store_backend_defaults, ) # Create desired config data_context_id = data_context_config.anonymous_usage_statistics.data_context_id desired_config = construct_data_context_config( data_context_id=data_context_id, datasources=default_pandas_datasource_config ) # Add root_directory to stores and data_docs desired_config["stores"][desired_config["expectations_store_name"]][ "store_backend" ]["root_directory"] = test_root_directory desired_config["stores"][desired_config["validations_store_name"]]["store_backend"][ "root_directory" ] = test_root_directory desired_config["stores"][desired_config["checkpoint_store_name"]]["store_backend"][ "root_directory" ] = test_root_directory desired_config["data_docs_sites"]["local_site"]["store_backend"][ "root_directory" ] = test_root_directory data_context_config_schema = DataContextConfigSchema() assert filter_properties_dict( properties=data_context_config_schema.dump(data_context_config) ) == filter_properties_dict(properties=desired_config) assert DataContext.validate_config(project_config=data_context_config)
def __init__(self, store_backend=None, runtime_environment=None, store_name=None): self._expectationSuiteValidationResultSchema = ( ExpectationSuiteValidationResultSchema()) if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, TupleStoreBackend): # Provide defaults for this common case store_backend["filepath_suffix"] = store_backend.get( "filepath_suffix", ".json") elif issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_validations_store") store_backend["key_columns"] = store_backend.get( "key_columns", [ "expectation_suite_name", "run_name", "run_time", "batch_identifier", ], ) super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def test_data_asset_citations(pandas_dataset): citation_date = "2020-02-27T12:34:56.123456Z" pandas_dataset.add_citation("test citation", citation_date=citation_date) suite = pandas_dataset.get_expectation_suite() assert filter_properties_dict( properties=suite.meta["citations"][0], delete_fields=["interactive"]) == filter_properties_dict( properties={ "comment": "test citation", "batch_kwargs": pandas_dataset.batch_kwargs, "batch_parameters": pandas_dataset.batch_parameters, "batch_markers": pandas_dataset.batch_markers, "citation_date": citation_date, })
def __init__( self, store_name: str, store_backend: Optional[dict] = None, overwrite_existing: bool = False, runtime_environment: Optional[dict] = None, ) -> None: if not issubclass(self._configuration_class, BaseYamlConfig): raise ge_exceptions.DataContextError( "Invalid configuration: A configuration_class needs to inherit from the BaseYamlConfig class." ) if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, TupleStoreBackend): # Provide defaults for this common case store_backend["filepath_suffix"] = store_backend.get( "filepath_suffix", ".yml") super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_name": store_name, "store_backend": store_backend, "overwrite_existing": overwrite_existing, "runtime_environment": runtime_environment, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True) self._overwrite_existing = overwrite_existing
def instantiate_from_config_with_runtime_args( checkpoint_config: CheckpointConfig, data_context: "DataContext", # noqa: F821 **runtime_kwargs, ) -> "Checkpoint": config: dict = checkpoint_config.to_json_dict() key: str value: Any for key, value in runtime_kwargs.items(): if value is not None: config[key] = value config = filter_properties_dict(properties=config, clean_falsy=True) checkpoint: Checkpoint = instantiate_class_from_config( config=config, runtime_environment={ "data_context": data_context, }, config_defaults={ "module_name": "great_expectations.checkpoint", }, ) return checkpoint
def remove_nulls_and_keep_unknowns(self, output: dict, original: Type[DictDot], **kwargs) -> dict: """Hook to clear the config object of any null values before being written as a dictionary. Additionally, it bypasses strict schema validation before writing to dict to ensure that dynamic attributes set through `setattr` are captured in the resulting object. It is important to note that only public attributes are captured through this process. Chetan - 20220126 - Note that if we tighten up the schema (remove the dynamic `setattr` behavior), the functionality to keep unknowns should also be removed. Args: output: Processed dictionary representation of the configuration object (leaving original intact) original: The dictionary representation of the configuration object kwargs: Marshmallow-specific kwargs required to maintain hook signature (unused herein) Returns: A cleaned dictionary that has no null values """ for key in original.keys(): if key not in output and not key.startswith("_"): output[key] = original[key] cleaned_output = filter_properties_dict( properties=output, clean_nulls=True, clean_falsy=False, ) return cleaned_output
def __eq__(self, other): return (other is not None) and ( (hasattr(other, "to_json_dict") and self.to_json_dict() == other.to_json_dict()) or (isinstance(other, dict) and self.to_json_dict() == filter_properties_dict(properties=other, clean_falsy=True)) or (self.__str__() == str(other)))
def __init__( self, ge_cloud_credentials: Dict, ge_cloud_base_url: Optional[str] = "https://app.greatexpectations.io/", ge_cloud_resource_type: Optional[str] = None, ge_cloud_resource_name: Optional[str] = None, suppress_store_backend_id: Optional[bool] = True, manually_initialize_store_backend_id: Optional[str] = "", store_name: Optional[str] = None, ): super().__init__( fixed_length_key=True, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, store_name=store_name, ) assert ge_cloud_resource_type or ge_cloud_resource_name, ( "Must provide either ge_cloud_resource_type or " "ge_cloud_resource_name") self._ge_cloud_base_url = ge_cloud_base_url self._ge_cloud_resource_name = ge_cloud_resource_name or pluralize( ge_cloud_resource_type) self._ge_cloud_resource_type = ge_cloud_resource_type or singularize( ge_cloud_resource_name) self._ge_cloud_credentials = ge_cloud_credentials # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "ge_cloud_base_url": ge_cloud_base_url, "ge_cloud_resource_name": ge_cloud_resource_name, "ge_cloud_resource_type": ge_cloud_resource_type, "fixed_length_key": True, "suppress_store_backend_id": suppress_store_backend_id, "manually_initialize_store_backend_id": manually_initialize_store_backend_id, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, inplace=True)
def _convert_dictionaries_to_domain_kwargs( self, source: Optional[Any] = None) -> Optional[Union[Any, "Domain"]]: if source is None: return None if isinstance(source, dict): if not isinstance(source, Domain): filter_properties_dict(properties=source, inplace=True) source = DomainKwargs(source) key: str value: Any for key, value in source.items(): source[key] = self._convert_dictionaries_to_domain_kwargs( source=value) return source
def _convert_dictionaries_to_parameter_nodes( self, source: Optional[Any] = None ) -> Optional[Union[Any, ParameterNode]]: if source is None: return None if isinstance(source, dict): if not isinstance(source, ParameterNode): filter_properties_dict(properties=source, inplace=True) source = ParameterNode(source) key: str value: Any for key, value in source.items(): source[key] = self._convert_dictionaries_to_parameter_nodes( source=value ) return source
def to_json_dict(self) -> dict: json_dict: dict = { "domain_type": self["domain_type"].value, "domain_kwargs": self["domain_kwargs"].to_json_dict(), "details": {key: value.value for key, value in self["details"].items()}, } return filter_properties_dict(properties=json_dict, clean_falsy=True)
def _add_column_level_expectations( self, expectations_by_column: Dict[str, List[ExpectationConfiguration]], batch_request: Optional[Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]] = None, ): if not expectations_by_column: markdown: str = self.render_with_overwrite( notebook_config=self.column_expectations_not_found_markdown, default_file_name="COLUMN_EXPECTATIONS_NOT_FOUND.md", batch_request=batch_request, env=os.environ, ) self.add_markdown_cell(markdown=markdown) return column_name: str expectations: List[ExpectationConfiguration] for column_name, expectations in expectations_by_column.items(): markdown: str = self.render_with_overwrite( notebook_config=self.column_expectations_markdown, default_file_name="COLUMN_EXPECTATIONS.md", column=column_name, ) self.add_markdown_cell(markdown=markdown) expectation: ExpectationConfiguration for expectation in expectations: filter_properties_dict(properties=expectation["kwargs"], inplace=True) code: str = self.render_with_overwrite( notebook_config=self.column_expectation_code, default_file_name="column_expectation.py.j2", expectation=expectation, batch_request=batch_request, env=os.environ, kwargs_string=self._build_kwargs_string( expectation=expectation), meta_args=self._build_meta_arguments( meta=expectation.meta), ) self.add_code_cell(code=code, lint=True)
def test_get_citations_with_multiple_citations_containing_profiler_config( baseline_suite, profiler_config ): assert "citations" not in baseline_suite.meta baseline_suite.add_citation( "first", citation_date="2000-01-01", profiler_config=profiler_config, ) baseline_suite.add_citation( "second", citation_date="2001-01-01", profiler_config=profiler_config, ) baseline_suite.add_citation("third", citation_date="2002-01-01") properties_dict_list: List[Dict[str, Any]] = baseline_suite.get_citations( sort=True, require_profiler_config=True ) for properties_dict in properties_dict_list: filter_properties_dict( properties=properties_dict, clean_falsy=True, inplace=True ) properties_dict.pop("interactive", None) assert properties_dict_list == [ { "citation_date": "2000-01-01T00:00:00.000000Z", "profiler_config": profiler_config, "comment": "first", }, { "citation_date": "2001-01-01T00:00:00.000000Z", "profiler_config": profiler_config, "comment": "second", }, ]