def __init__( self, runtime_environment=None, fixed_length_key=False, suppress_store_backend_id=False, manually_initialize_store_backend_id: str = "", store_name=None, ): super().__init__( fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, store_name=store_name, ) self._store = {} # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def __init__(self, store_backend=None, store_name=None): if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_evaluation_parameters") super().__init__(store_backend=store_backend, store_name=store_name) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }) filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, base_directory, filepath_template=None, filepath_prefix=None, filepath_suffix=None, forbidden_substrings=None, platform_specific_separator=True, root_directory=None, fixed_length_key=False, suppress_store_backend_id=False, manually_initialize_store_backend_id: str = "", base_public_path=None, store_name=None, ): super().__init__( filepath_template=filepath_template, filepath_prefix=filepath_prefix, filepath_suffix=filepath_suffix, forbidden_substrings=forbidden_substrings, platform_specific_separator=platform_specific_separator, fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, base_public_path=base_public_path, store_name=store_name, ) if os.path.isabs(base_directory): self.full_base_directory = base_directory else: if root_directory is None: raise ValueError( "base_directory must be an absolute path if root_directory is not provided" ) elif not os.path.isabs(root_directory): raise ValueError( "root_directory must be an absolute path. Got {} instead.". format(root_directory)) else: self.full_base_directory = os.path.join( root_directory, base_directory) os.makedirs(str(os.path.dirname(self.full_base_directory)), exist_ok=True) # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, bucket, prefix="", boto3_options=None, filepath_template=None, filepath_prefix=None, filepath_suffix=None, forbidden_substrings=None, platform_specific_separator=False, fixed_length_key=False, suppress_store_backend_id=False, manually_initialize_store_backend_id: str = "", base_public_path=None, endpoint_url=None, store_name=None, ): super().__init__( filepath_template=filepath_template, filepath_prefix=filepath_prefix, filepath_suffix=filepath_suffix, forbidden_substrings=forbidden_substrings, platform_specific_separator=platform_specific_separator, fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, base_public_path=base_public_path, store_name=store_name, ) self.bucket = bucket if prefix: if self.platform_specific_separator: prefix = prefix.strip(os.sep) # we *always* strip "/" from the prefix based on the norms of s3 # whether the rest of the key is built with platform-specific separators or not prefix = prefix.strip("/") self.prefix = prefix if boto3_options is None: boto3_options = {} self._boto3_options = boto3_options self.endpoint_url = endpoint_url # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, credentials, queries=None, store_backend=None, runtime_environment=None, store_name=None, ): if not sqlalchemy: raise ge_exceptions.DataContextError( "sqlalchemy module not found, but is required for " "SqlAlchemyQueryStore") super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) if queries: # If queries are defined in configuration, then we load them into an InMemoryStoreBackend try: assert isinstance( queries, dict ), "SqlAlchemyQueryStore queries must be defined as a dictionary" assert ( store_backend is None or store_backend["class_name"] == "InMemoryStoreBackend" ), ("If queries are provided in configuration, then store_backend must be empty or an " "InMemoryStoreBackend") for k, v in queries.items(): self._store_backend.set(tuple([k]), v) except (AssertionError, KeyError) as e: raise ge_exceptions.InvalidConfigError(str(e)) if "engine" in credentials: self.engine = credentials["engine"] elif "url" in credentials: self.engine = create_engine(credentials["url"]) else: drivername = credentials.pop("drivername") options = URL(drivername, **credentials) self.engine = create_engine(options) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }) filter_properties_dict(properties=self._config, inplace=True)
def __init__(self, store_backend=None, runtime_environment=None, store_name=None): self._expectationSuiteValidationResultSchema = ( ExpectationSuiteValidationResultSchema()) if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, TupleStoreBackend): # Provide defaults for this common case store_backend["filepath_suffix"] = store_backend.get( "filepath_suffix", ".json") elif issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_validations_store") store_backend["key_columns"] = store_backend.get( "key_columns", [ "expectation_suite_name", "run_name", "run_time", "batch_identifier", ], ) super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, name=None, caching=True, batch_spec_defaults=None, batch_data_dict=None, validator=None, ): self.name = name self._validator = validator # NOTE: using caching makes the strong assumption that the user will not modify the core data store # (e.g. self.spark_df) over the lifetime of the dataset instance self._caching = caching # NOTE: 20200918 - this is a naive cache; update. if self._caching: self._metric_cache = {} else: self._metric_cache = NoOpDict() if batch_spec_defaults is None: batch_spec_defaults = {} batch_spec_defaults_keys = set(batch_spec_defaults.keys()) if not batch_spec_defaults_keys <= self.recognized_batch_spec_defaults: logger.warning("Unrecognized batch_spec_default(s): %s" % str(batch_spec_defaults_keys - self.recognized_batch_spec_defaults)) self._batch_spec_defaults = { key: value for key, value in batch_spec_defaults.items() if key in self.recognized_batch_spec_defaults } self._batch_data_dict = {} if batch_data_dict is None: batch_data_dict = {} self._load_batch_data_from_dict(batch_data_dict) self._active_batch_data_id = None # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, and # set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( **{"class_name": self.__class__.__name__}) filter_properties_dict( properties=self._config, inplace=True, )
def __init__( self, bucket, project, prefix="", filepath_template=None, filepath_prefix=None, filepath_suffix=None, forbidden_substrings=None, platform_specific_separator=False, fixed_length_key=False, suppress_store_backend_id=False, manually_initialize_store_backend_id: str = "", public_urls=True, base_public_path=None, store_name=None, ): super().__init__( filepath_template=filepath_template, filepath_prefix=filepath_prefix, filepath_suffix=filepath_suffix, forbidden_substrings=forbidden_substrings, platform_specific_separator=platform_specific_separator, fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, base_public_path=base_public_path, store_name=store_name, ) self.bucket = bucket self.prefix = prefix self.project = project self._public_urls = public_urls # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def __init__( self, store_name: str, store_backend: Optional[dict] = None, overwrite_existing: bool = False, runtime_environment: Optional[dict] = None, ): if not issubclass(self._configuration_class, BaseYamlConfig): raise ge_exceptions.DataContextError( "Invalid configuration: A configuration_class needs to inherit from the BaseYamlConfig class." ) if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, TupleStoreBackend): # Provide defaults for this common case store_backend["filepath_template"] = store_backend.get( "filepath_template", "{0}.yml") super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, store_name=store_name, ) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True) self._overwrite_existing = overwrite_existing
def test_get_currently_executing_function_call_arguments( a=None, *args, **kwargs): if a is None: test_get_currently_executing_function_call_arguments(0, 1, 2, 3, b=5) else: assert a == 0 assert args == (1, 2, 3) assert kwargs == {"b": 5} params = get_currently_executing_function_call_arguments( **{ "additional_param_0": "xyz_0", "additional_param_1": "xyz_1", "additional_param_2": "xyz_2", }) assert params["a"] == 0 assert params["args"] == (1, 2, 3) assert params["b"] == 5 assert params["additional_param_0"] == "xyz_0" assert params["additional_param_1"] == "xyz_1" assert params["additional_param_2"] == "xyz_2"
def __init__( self, name=None, credentials=None, data_context=None, engine=None, connection_string=None, url=None, batch_data_dict=None, **kwargs, # These will be passed as optional parameters to the SQLAlchemy engine, **not** the ExecutionEngine ): """Builds a SqlAlchemyExecutionEngine, using a provided connection string/url/engine/credentials to access the desired database. Also initializes the dialect to be used and configures usage statistics. Args: name (str): \ The name of the SqlAlchemyExecutionEngine credentials: \ If the Execution Engine is not provided, the credentials can be used to build the Execution Engine. If the Engine is provided, it will be used instead data_context (DataContext): \ An object representing a Great Expectations project that can be used to access Expectation Suites and the Project Data itself engine (Engine): \ A SqlAlchemy Engine used to set the SqlAlchemyExecutionEngine being configured, useful if an Engine has already been configured and should be reused. Will override Credentials if provided. connection_string (string): \ If neither the engines nor the credentials have been provided, a connection string can be used to access the data. This will be overridden by both the engine and credentials if those are provided. url (string): \ If neither the engines, the credentials, nor the connection_string have been provided, a url can be used to access the data. This will be overridden by all other configuration options if any are provided. """ super().__init__(name=name, batch_data_dict=batch_data_dict) # , **kwargs) self._name = name self._credentials = credentials self._connection_string = connection_string self._url = url if engine is not None: if credentials is not None: logger.warning( "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. " "Ignoring credentials." ) self.engine = engine elif credentials is not None: self.engine = self._build_engine(credentials=credentials, **kwargs) elif connection_string is not None: self.engine = sa.create_engine(connection_string, **kwargs) elif url is not None: self.drivername = urlparse(url).scheme self.engine = sa.create_engine(url, **kwargs) else: raise InvalidConfigError( "Credentials or an engine are required for a SqlAlchemyExecutionEngine." ) # Get the dialect **for purposes of identifying types** if self.engine.dialect.name.lower() in [ "postgresql", "mysql", "sqlite", "oracle", "mssql", "oracle", ]: # These are the officially included and supported dialects by sqlalchemy self.dialect = import_library_module( module_name="sqlalchemy.dialects." + self.engine.dialect.name ) elif self.engine.dialect.name.lower() == "snowflake": self.dialect = import_library_module( module_name="snowflake.sqlalchemy.snowdialect" ) elif self.engine.dialect.name.lower() == "redshift": self.dialect = import_library_module( module_name="sqlalchemy_redshift.dialect" ) elif self.engine.dialect.name.lower() == "bigquery": self.dialect = import_library_module( module_name="pybigquery.sqlalchemy_bigquery" ) else: self.dialect = None if self.engine and self.engine.dialect.name.lower() in [ "sqlite", "mssql", "snowflake", ]: # sqlite/mssql temp tables only persist within a connection so override the engine self.engine = self.engine.connect() # Send a connect event to provide dialect type if data_context is not None and getattr( data_context, "_usage_statistics_handler", None ): handler = data_context._usage_statistics_handler handler.send_usage_message( event="execution_engine.sqlalchemy.connect", event_payload={ "anonymized_name": handler._execution_engine_anonymizer.anonymize( self.name ), "sqlalchemy_dialect": self.engine.name, }, success=True, ) # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, # and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( **{"class_name": self.__class__.__name__} ) filter_properties_dict( properties=self._config, inplace=True, )
def __init__( self, table_name, key_columns, fixed_length_key=True, credentials=None, url=None, connection_string=None, engine=None, store_name=None, suppress_store_backend_id=False, manually_initialize_store_backend_id: str = "", **kwargs, ): super().__init__( fixed_length_key=fixed_length_key, suppress_store_backend_id=suppress_store_backend_id, manually_initialize_store_backend_id= manually_initialize_store_backend_id, store_name=store_name, ) if not sa: raise ge_exceptions.DataContextError( "ModuleNotFoundError: No module named 'sqlalchemy'") if not self.fixed_length_key: raise ge_exceptions.InvalidConfigError( "DatabaseStoreBackend requires use of a fixed-length-key") self._schema_name = None self._credentials = credentials self._connection_string = connection_string self._url = url if engine is not None: if credentials is not None: logger.warning( "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. " "Ignoring credentials.") self.engine = engine elif credentials is not None: self.engine = self._build_engine(credentials=credentials, **kwargs) elif connection_string is not None: self.engine = sa.create_engine(connection_string, **kwargs) elif url is not None: self.drivername = urlparse(url).scheme self.engine = sa.create_engine(url, **kwargs) else: raise ge_exceptions.InvalidConfigError( "Credentials, url, connection_string, or an engine are required for a DatabaseStoreBackend." ) meta = MetaData(schema=self._schema_name) self.key_columns = key_columns # Dynamically construct a SQLAlchemy table with the name and column names we'll use cols = [] for column in key_columns: if column == "value": raise ge_exceptions.InvalidConfigError( "'value' cannot be used as a key_element name") cols.append(Column(column, String, primary_key=True)) cols.append(Column("value", String)) try: table = Table(table_name, meta, autoload=True, autoload_with=self.engine) # We do a "light" check: if the columns' names match, we will proceed, otherwise, create the table if {str(col.name).lower() for col in table.columns } != (set(key_columns) | {"value"}): raise ge_exceptions.StoreBackendError( f"Unable to use table {table_name}: it exists, but does not have the expected schema." ) except NoSuchTableError: table = Table(table_name, meta, *cols) try: if self._schema_name: self.engine.execute( f"CREATE SCHEMA IF NOT EXISTS {self._schema_name};") meta.create_all(self.engine) except SQLAlchemyError as e: raise ge_exceptions.StoreBackendError( f"Unable to connect to table {table_name} because of an error. It is possible your table needs to be migrated to a new schema. SqlAlchemyError: {str(e)}" ) self._table = table # Initialize with store_backend_id self._store_backend_id = None self._store_backend_id = self.store_backend_id # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }, ) filter_properties_dict(properties=self._config, inplace=True)
def __init__(self, store_backend=None, runtime_environment=None): store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "TupleFilesystemStoreBackend") verify_dynamic_loading_support(module_name=store_backend_module_name) store_class = load_class(store_backend_class_name, store_backend_module_name) # Store Class was loaded successfully; verify that it is of a correct subclass. if not issubclass(store_class, TupleStoreBackend): raise DataContextError( "Invalid configuration: HtmlSiteStore needs a TupleStoreBackend" ) if "filepath_template" in store_backend or ( "fixed_length_key" in store_backend and store_backend["fixed_length_key"] is True): logger.warning( "Configuring a filepath_template or using fixed_length_key is not supported in SiteBuilder: " "filepaths will be selected based on the type of asset rendered." ) # One thing to watch for is reversibility of keys. # If several types are being written to overlapping directories, we could get collisions. module_name = "great_expectations.data_context.store" filepath_prefix = "expectations" filepath_suffix = ".html" expectation_suite_identifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_prefix": filepath_prefix, "filepath_suffix": filepath_suffix, "suppress_store_backend_id": True, }, ) if not expectation_suite_identifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) filepath_prefix = "validations" validation_result_idendifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_prefix": filepath_prefix, "filepath_suffix": filepath_suffix, "suppress_store_backend_id": True, }, ) if not validation_result_idendifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) filepath_template = "index.html" index_page_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_template": filepath_template, "suppress_store_backend_id": True, }, ) if not index_page_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) filepath_template = None static_assets_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_template": filepath_template, "suppress_store_backend_id": True, }, ) if not static_assets_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) self.store_backends = { ExpectationSuiteIdentifier: expectation_suite_identifier_obj, ValidationResultIdentifier: validation_result_idendifier_obj, "index_page": index_page_obj, "static_assets": static_assets_obj, } # NOTE: Instead of using the filesystem as the source of record for keys, # this class tracks keys separately in an internal set. # This means that keys are stored for a specific session, but can't be fetched after the original # HtmlSiteStore instance leaves scope. # Doing it this way allows us to prevent namespace collisions among keys while still having multiple # backends that write to the same directory structure. # It's a pretty reasonable way for HtmlSiteStore to do its job---you just ahve to remember that it # can't necessarily set and list_keys like most other Stores. self.keys = set() # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( include_module_name=True, **{ "class_name": self.__class__.__name__, }) filter_properties_dict(properties=self._config, inplace=True)