def __init__(self, store_backend=None): if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) if issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_metrics") store_backend["key_columns"] = store_backend.get( "key_columns", [ "run_id", "expectation_suite_identifier", "metric_name", "metric_kwargs_id", ], ) super().__init__(store_backend=store_backend)
def __init__(self, store_backend=None, runtime_environment=None, store_name=None): if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def __init__(self, store_backend=None, store_name=None): if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store" ) store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend" ) verify_dynamic_loading_support(module_name=store_backend_module_name) store_backend_class = load_class( store_backend_class_name, store_backend_module_name ) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_evaluation_parameters" ) super().__init__(store_backend=store_backend, store_name=store_name) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def __init__(self, store_backend=None, runtime_environment=None): self._expectationSuiteValidationResultSchema = ExpectationSuiteValidationResultSchema( ) if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name, package_name=None) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, TupleStoreBackend): # Provide defaults for this common case store_backend["filepath_suffix"] = store_backend.get( "filepath_suffix", ".json") elif issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_validations_store") store_backend["key_columns"] = store_backend.get( "key_columns", ["expectation_suite_name", "run_id", "batch_identifier"]) super().__init__(store_backend=store_backend, runtime_environment=runtime_environment)
def test_store_dynamic_loading_enablement() -> None: module = importlib.import_module("great_expectations.data_context.store") module_dict = vars(module) for module_name in module_dict: if module_name.endswith("_store") or module_name.endswith("_store_backend"): verify_dynamic_loading_support( module_name=f".{module_name}", package_name="great_expectations.data_context.store", )
def __init__( self, store_backend=None, runtime_environment=None, store_name=None, data_context=None, ) -> None: self._expectationSuiteSchema = ExpectationSuiteSchema() # TODO: refactor so ExpectationStore can have access to DataContext. Currently used by usage_stats messages. self._data_context = data_context if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store" ) store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend" ) verify_dynamic_loading_support(module_name=store_backend_module_name) store_backend_class = load_class( store_backend_class_name, store_backend_module_name ) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, TupleStoreBackend): # Provide defaults for this common case store_backend["filepath_suffix"] = store_backend.get( "filepath_suffix", ".json" ) elif issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_expectations_store" ) store_backend["key_columns"] = store_backend.get( "key_columns", ["expectation_suite_name"] ) super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def __init__(self, store_backend=None): if store_backend is not None: store_backend_module_name = store_backend.get("module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get("class_name", "InMemoryStoreBackend") verify_dynamic_loading_support(module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get("table_name", "ge_evaluation_parameters") super().__init__(store_backend=store_backend)
def __init__(self, bullet_list_renderer=None): super().__init__() if bullet_list_renderer is None: bullet_list_renderer = { "class_name": "ExpectationSuiteBulletListContentBlockRenderer" } module_name = bullet_list_renderer.get( "module_name", "great_expectations.render.renderer.content_block") verify_dynamic_loading_support(module_name=module_name) class_name = bullet_list_renderer.get("class_name") self._bullet_list_renderer = load_class(class_name=class_name, module_name=module_name)
def __init__(self, table_renderer=None): super().__init__() if table_renderer is None: table_renderer = { "class_name": "ValidationResultsTableContentBlockRenderer" } module_name = table_renderer.get( "module_name", "great_expectations.render.renderer.content_block") verify_dynamic_loading_support(module_name=module_name) class_name = table_renderer.get("class_name") self._table_renderer = load_class(class_name=class_name, module_name=module_name)
def __init__(self, store_backend=None, runtime_environment=None, store_name=None): self._expectationSuiteValidationResultSchema = ( ExpectationSuiteValidationResultSchema()) if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, TupleStoreBackend): # Provide defaults for this common case store_backend["filepath_suffix"] = store_backend.get( "filepath_suffix", ".json") elif issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_validations_store") store_backend["key_columns"] = store_backend.get( "key_columns", [ "expectation_suite_name", "run_name", "run_time", "batch_identifier", ], ) super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, store_name: str, store_backend: Optional[dict] = None, overwrite_existing: bool = False, runtime_environment: Optional[dict] = None, ) -> None: if not issubclass(self._configuration_class, BaseYamlConfig): raise ge_exceptions.DataContextError( "Invalid configuration: A configuration_class needs to inherit from the BaseYamlConfig class." ) if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, TupleStoreBackend): # Provide defaults for this common case store_backend["filepath_suffix"] = store_backend.get( "filepath_suffix", ".yml") super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_name": store_name, "store_backend": store_backend, "overwrite_existing": overwrite_existing, "runtime_environment": runtime_environment, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True) self._overwrite_existing = overwrite_existing
def __init__(self, batch, expectation_suite, expectation_engine=None, **kwargs): self.batch = batch self.expectation_suite = expectation_suite if isinstance(expectation_engine, dict): expectation_engine = ClassConfig(**expectation_engine) if isinstance(expectation_engine, ClassConfig): module_name = expectation_engine.module_name or "great_expectations.dataset" verify_dynamic_loading_support(module_name=module_name) expectation_engine = load_class( class_name=expectation_engine.class_name, module_name=module_name) self.expectation_engine = expectation_engine if self.expectation_engine is None: # Guess the engine try: import pandas as pd if isinstance(batch.data, pd.DataFrame): self.expectation_engine = PandasDataset except ImportError: pass if self.expectation_engine is None: if isinstance(batch.data, SqlAlchemyBatchReference): self.expectation_engine = SqlAlchemyDataset if self.expectation_engine is None: try: import pyspark if isinstance(batch.data, pyspark.sql.DataFrame): self.expectation_engine = SparkDFDataset except ImportError: pass if self.expectation_engine is None: raise ValueError( "Unable to identify expectation_engine. It must be a subclass of DataAsset." ) self.init_kwargs = kwargs
from great_expectations.util import verify_dynamic_loading_support for module_name, package_name in [ ('.store', 'great_expectations.data_context.store'), ('.validations_store', 'great_expectations.data_context.store'), ('.expectations_store', 'great_expectations.data_context.store'), ('.html_site_store', 'great_expectations.data_context.store'), ('.metric_store', 'great_expectations.data_context.store'), ('.store_backend', 'great_expectations.data_context.store'), ('.tuple_store_backend', 'great_expectations.data_context.store'), ('.database_store_backend', 'great_expectations.data_context.store'), ]: verify_dynamic_loading_support(module_name=module_name, package_name=package_name) from .store import Store from .validations_store import ValidationsStore from .expectations_store import ExpectationsStore from .html_site_store import HtmlSiteStore from .metric_store import ( MetricStore, EvaluationParameterStore, ) from .store_backend import ( StoreBackend, InMemoryStoreBackend, ) from .tuple_store_backend import ( TupleFilesystemStoreBackend, TupleS3StoreBackend, TupleGCSStoreBackend, )
def __init__(self, store_backend=None, runtime_environment=None): store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "TupleFilesystemStoreBackend") verify_dynamic_loading_support(module_name=store_backend_module_name) store_class = load_class(store_backend_class_name, store_backend_module_name) # Store Class was loaded successfully; verify that it is of a correct subclass. if not issubclass(store_class, TupleStoreBackend): raise DataContextError( "Invalid configuration: HtmlSiteStore needs a TupleStoreBackend" ) if "filepath_template" in store_backend or ( "fixed_length_key" in store_backend and store_backend["fixed_length_key"] is True): logger.warning( "Configuring a filepath_template or using fixed_length_key is not supported in SiteBuilder: " "filepaths will be selected based on the type of asset rendered." ) # One thing to watch for is reversibility of keys. # If several types are being written to overlapping directories, we could get collisions. module_name = 'great_expectations.data_context.store' filepath_prefix = 'expectations' filepath_suffix = '.html' expectation_suite_identifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_prefix": filepath_prefix, "filepath_suffix": filepath_suffix, }) if not expectation_suite_identifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend['class_name']) filepath_prefix = 'validations' validation_result_idendifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_prefix": filepath_prefix, "filepath_suffix": filepath_suffix, }) if not validation_result_idendifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend['class_name']) filepath_template = 'index.html' index_page_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_template": filepath_template, }) if not index_page_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend['class_name']) filepath_template = None static_assets_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_template": filepath_template, }) if not static_assets_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend['class_name']) self.store_backends = { ExpectationSuiteIdentifier: expectation_suite_identifier_obj, ValidationResultIdentifier: validation_result_idendifier_obj, "index_page": index_page_obj, "static_assets": static_assets_obj, } # NOTE: Instead of using the filesystem as the source of record for keys, # this class tracks keys separately in an internal set. # This means that keys are stored for a specific session, but can't be fetched after the original # HtmlSiteStore instance leaves scope. # Doing it this way allows us to prevent namespace collisions among keys while still having multiple # backends that write to the same directory structure. # It's a pretty reasonable way for HtmlSiteStore to do its job---you just ahve to remember that it # can't necessarily set and list_keys like most other Stores. self.keys = set()
def instantiate_class_from_config(config, runtime_environment, config_defaults=None): """Build a GE class from configuration dictionaries.""" if config_defaults is None: config_defaults = {} config = copy.deepcopy(config) module_name = config.pop("module_name", None) if module_name is None: try: module_name = config_defaults.pop("module_name") except KeyError: raise KeyError( "Neither config : {} nor config_defaults : {} contains a module_name key." .format( config, config_defaults, )) else: # Pop the value without using it, to avoid sending an unwanted value to the config_class config_defaults.pop("module_name", None) verify_dynamic_loading_support(module_name=module_name) class_name = config.pop("class_name", None) if class_name is None: logger.warning( "Instantiating class from config without an explicit class_name is dangerous. Consider adding " "an explicit class_name for %s" % config.get("name")) try: class_name = config_defaults.pop("class_name") except KeyError: raise KeyError( "Neither config : {} nor config_defaults : {} contains a class_name key." .format( config, config_defaults, )) else: # Pop the value without using it, to avoid sending an unwanted value to the config_class config_defaults.pop("class_name", None) class_ = load_class(class_name=class_name, module_name=module_name) config_with_defaults = copy.deepcopy(config_defaults) config_with_defaults.update(config) if runtime_environment is not None: # If there are additional kwargs available in the runtime_environment requested by a # class to be instantiated, provide them argspec = inspect.getfullargspec(class_.__init__)[0][1:] missing_args = set(argspec) - set(config_with_defaults.keys()) config_with_defaults.update({ missing_arg: runtime_environment[missing_arg] for missing_arg in missing_args if missing_arg in runtime_environment }) # Add the entire runtime_environment as well if it's requested if "runtime_environment" in missing_args: config_with_defaults.update( {"runtime_environment": runtime_environment}) try: class_instance = class_(**config_with_defaults) except TypeError as e: raise TypeError( "Couldn't instantiate class : {} with config : \n\t{}\n \n".format( class_name, format_dict_for_error_message( config_with_defaults)) + str(e)) return class_instance
def __init__(self, store_backend=None, runtime_environment=None): store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "TupleFilesystemStoreBackend") verify_dynamic_loading_support(module_name=store_backend_module_name) store_class = load_class(store_backend_class_name, store_backend_module_name) # Store Class was loaded successfully; verify that it is of a correct subclass. if not issubclass(store_class, (TupleStoreBackend, GeCloudStoreBackend)): raise DataContextError( "Invalid configuration: HtmlSiteStore needs a TupleStoreBackend or GeCloudStoreBackend" ) if "filepath_template" in store_backend or ( "fixed_length_key" in store_backend and store_backend["fixed_length_key"] is True): logger.warning( "Configuring a filepath_template or using fixed_length_key is not supported in SiteBuilder: " "filepaths will be selected based on the type of asset rendered." ) # One thing to watch for is reversibility of keys. # If several types are being written to overlapping directories, we could get collisions. module_name = "great_expectations.data_context.store" filepath_suffix = ".html" is_ge_cloud_store = store_backend[ "class_name"] == "GeCloudStoreBackend" expectation_config_defaults = { "module_name": module_name, "filepath_prefix": "expectations", "filepath_suffix": filepath_suffix, "suppress_store_backend_id": True, } if is_ge_cloud_store: expectation_config_defaults = { "module_name": module_name, "suppress_store_backend_id": True, } expectation_suite_identifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults=expectation_config_defaults, ) if not expectation_suite_identifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) validation_result_config_defaults = { "module_name": module_name, "filepath_prefix": "validations", "filepath_suffix": filepath_suffix, "suppress_store_backend_id": True, } if is_ge_cloud_store: validation_result_config_defaults = { "module_name": module_name, "suppress_store_backend_id": True, } validation_result_idendifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults=validation_result_config_defaults, ) if not validation_result_idendifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) filepath_template = "index.html" index_page_config_defaults = { "module_name": module_name, "filepath_template": filepath_template, "suppress_store_backend_id": True, } if is_ge_cloud_store: index_page_config_defaults = { "module_name": module_name, "suppress_store_backend_id": True, } index_page_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults=index_page_config_defaults, ) if not index_page_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) static_assets_config_defaults = { "module_name": module_name, "filepath_template": None, "suppress_store_backend_id": True, } if is_ge_cloud_store: static_assets_config_defaults = { "module_name": module_name, "suppress_store_backend_id": True, } static_assets_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults=static_assets_config_defaults, ) if not static_assets_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) self.store_backends = { ExpectationSuiteIdentifier: expectation_suite_identifier_obj, ValidationResultIdentifier: validation_result_idendifier_obj, "index_page": index_page_obj, "static_assets": static_assets_obj, } # NOTE: Instead of using the filesystem as the source of record for keys, # this class tracks keys separately in an internal set. # This means that keys are stored for a specific session, but can't be fetched after the original # HtmlSiteStore instance leaves scope. # Doing it this way allows us to prevent namespace collisions among keys while still having multiple # backends that write to the same directory structure. # It's a pretty reasonable way for HtmlSiteStore to do its job---you just have to remember that it # can't necessarily set and list_keys like most other Stores. self.keys = set() # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
) from .database_store_backend import DatabaseStoreBackend # isort:skip from .configuration_store import ConfigurationStore # isort:skip from .checkpoint_store import CheckpointStore # isort:skip from .metric_store import ( # isort:skip EvaluationParameterStore, MetricStore, ) from .expectations_store import ExpectationsStore # isort:skip from .validations_store import ValidationsStore # isort:skip from .query_store import SqlAlchemyQueryStore # isort:skip from .html_site_store import HtmlSiteStore # isort:skip from .profiler_store import ProfilerStore # isort:skip for module_name in ( ".store", ".validations_store", ".configuration_store", ".expectations_store", ".html_site_store", ".profiler_store", ".metric_store", ".checkpoint_store", ".store_backend", ".tuple_store_backend", ".database_store_backend", ".ge_cloud_store_backend", ): verify_dynamic_loading_support( module_name=module_name, package_name="great_expectations.data_context.store")