def dialect(self) -> sa.engine.Dialect: # TODO: AJB 20220512 move this dialect retrieval to a separate class from the SqlAlchemyExecutionEngine # and then use it here. dialect_name: GESqlDialect = self._dialect_name if dialect_name == GESqlDialect.ORACLE: return import_library_module( module_name="sqlalchemy.dialects.oracle" ).dialect() elif dialect_name == GESqlDialect.SNOWFLAKE: return import_library_module( module_name="snowflake.sqlalchemy.snowdialect" ).dialect() elif dialect_name == GESqlDialect.DREMIO: # WARNING: Dremio Support is experimental, functionality is not fully under test return import_library_module( module_name="sqlalchemy_dremio.pyodbc" ).dialect() # NOTE: AJB 20220512 Redshift dialect is not yet fully supported. # The below throws an `AttributeError: type object 'RedshiftDialect_psycopg2' has no attribute 'positional'` # elif dialect_name == "redshift": # return import_library_module( # module_name="sqlalchemy_redshift.dialect" # ).RedshiftDialect elif dialect_name == GESqlDialect.BIGQUERY: return import_library_module( module_name=self._BIGQUERY_MODULE_NAME ).dialect() elif dialect_name == GESqlDialect.TERADATASQL: # WARNING: Teradata Support is experimental, functionality is not fully under test return import_library_module( module_name="teradatasqlalchemy.dialect" ).dialect() else: return sa.create_engine(self._connection_string).dialect
def reload_modules(module_names: list) -> None: module_name: str for module_name in module_names: module_obj: Optional[ModuleType] = import_library_module( module_name=module_name) if module_obj is not None: try: _ = importlib.reload(module_obj) except RuntimeError: pass
def __init__( self, name: Optional[str] = None, credentials: Optional[dict] = None, data_context: Optional[Any] = None, engine=None, connection_string: Optional[str] = None, url: Optional[str] = None, batch_data_dict: Optional[dict] = None, create_temp_table: bool = True, concurrency: Optional[ConcurrencyConfig] = None, **kwargs, # These will be passed as optional parameters to the SQLAlchemy engine, **not** the ExecutionEngine ) -> None: """Builds a SqlAlchemyExecutionEngine, using a provided connection string/url/engine/credentials to access the desired database. Also initializes the dialect to be used and configures usage statistics. Args: name (str): \ The name of the SqlAlchemyExecutionEngine credentials: \ If the Execution Engine is not provided, the credentials can be used to build the Execution Engine. If the Engine is provided, it will be used instead data_context (DataContext): \ An object representing a Great Expectations project that can be used to access Expectation Suites and the Project Data itself engine (Engine): \ A SqlAlchemy Engine used to set the SqlAlchemyExecutionEngine being configured, useful if an Engine has already been configured and should be reused. Will override Credentials if provided. connection_string (string): \ If neither the engines nor the credentials have been provided, a connection string can be used to access the data. This will be overridden by both the engine and credentials if those are provided. url (string): \ If neither the engines, the credentials, nor the connection_string have been provided, a url can be used to access the data. This will be overridden by all other configuration options if any are provided. concurrency (ConcurrencyConfig): Concurrency config used to configure the sqlalchemy engine. """ super().__init__(name=name, batch_data_dict=batch_data_dict) self._name = name self._credentials = credentials self._connection_string = connection_string self._url = url self._create_temp_table = create_temp_table if engine is not None: if credentials is not None: logger.warning( "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. " "Ignoring credentials." ) self.engine = engine else: concurrency: ConcurrencyConfig if data_context is None or data_context.concurrency is None: concurrency = ConcurrencyConfig() else: concurrency = data_context.concurrency concurrency.add_sqlalchemy_create_engine_parameters(kwargs) if credentials is not None: self.engine = self._build_engine(credentials=credentials, **kwargs) elif connection_string is not None: self.engine = sa.create_engine(connection_string, **kwargs) elif url is not None: parsed_url = make_url(url) self.drivername = parsed_url.drivername self.engine = sa.create_engine(url, **kwargs) else: raise InvalidConfigError( "Credentials or an engine are required for a SqlAlchemyExecutionEngine." ) # these are two backends where temp_table_creation is not supported we set the default value to False. if self.engine.dialect.name.lower() in [ "trino", "awsathena", # WKS 202201 - AWS Athena currently doesn't support temp_tables. ]: self._create_temp_table = False # Get the dialect **for purposes of identifying types** if self.engine.dialect.name.lower() in [ "postgresql", "mysql", "sqlite", "oracle", "mssql", ]: # These are the officially included and supported dialects by sqlalchemy self.dialect_module = import_library_module( module_name=f"sqlalchemy.dialects.{self.engine.dialect.name}" ) elif self.engine.dialect.name.lower() == "snowflake": self.dialect_module = import_library_module( module_name="snowflake.sqlalchemy.snowdialect" ) elif self.engine.dialect.name.lower() == "dremio": # WARNING: Dremio Support is experimental, functionality is not fully under test self.dialect_module = import_library_module( module_name="sqlalchemy_dremio.pyodbc" ) elif self.engine.dialect.name.lower() == "redshift": self.dialect_module = import_library_module( module_name="sqlalchemy_redshift.dialect" ) elif self.engine.dialect.name.lower() == "bigquery": self.dialect_module = import_library_module( module_name=_BIGQUERY_MODULE_NAME ) elif self.engine.dialect.name.lower() == "teradatasql": # WARNING: Teradata Support is experimental, functionality is not fully under test self.dialect_module = import_library_module( module_name="teradatasqlalchemy.dialect" ) else: self.dialect_module = None # <WILL> 20210726 - engine_backup is used by the snowflake connector, which requires connection and engine # to be closed and disposed separately. Currently self.engine can refer to either a Connection or Engine, # depending on the backend. This will need to be cleaned up in an upcoming refactor, so that Engine and # Connection can be handled separately. self._engine_backup = None if self.engine and self.engine.dialect.name.lower() in [ "sqlite", "mssql", "snowflake", "mysql", ]: self._engine_backup = self.engine # sqlite/mssql temp tables only persist within a connection so override the engine self.engine = self.engine.connect() # Send a connect event to provide dialect type if data_context is not None and getattr( data_context, "_usage_statistics_handler", None ): handler = data_context._usage_statistics_handler handler.send_usage_message( event=UsageStatsEvents.EXECUTION_ENGINE_SQLALCHEMY_CONNECT.value, event_payload={ "anonymized_name": handler.anonymizer.anonymize(self.name), "sqlalchemy_dialect": self.engine.name, }, success=True, ) # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, # and set the instance "_config" variable equal to the resulting dictionary. self._config = { "name": name, "credentials": credentials, "data_context": data_context, "engine": engine, "connection_string": connection_string, "url": url, "batch_data_dict": batch_data_dict, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } self._config.update(kwargs) filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True) self._data_splitter = SqlAlchemyDataSplitter() self._data_sampler = SqlAlchemyDataSampler()
def __init__( self, name=None, credentials=None, data_context=None, engine=None, connection_string=None, url=None, batch_data_dict=None, **kwargs, # These will be passed as optional parameters to the SQLAlchemy engine, **not** the ExecutionEngine ): """Builds a SqlAlchemyExecutionEngine, using a provided connection string/url/engine/credentials to access the desired database. Also initializes the dialect to be used and configures usage statistics. Args: name (str): \ The name of the SqlAlchemyExecutionEngine credentials: \ If the Execution Engine is not provided, the credentials can be used to build the Execution Engine. If the Engine is provided, it will be used instead data_context (DataContext): \ An object representing a Great Expectations project that can be used to access Expectation Suites and the Project Data itself engine (Engine): \ A SqlAlchemy Engine used to set the SqlAlchemyExecutionEngine being configured, useful if an Engine has already been configured and should be reused. Will override Credentials if provided. connection_string (string): \ If neither the engines nor the credentials have been provided, a connection string can be used to access the data. This will be overridden by both the engine and credentials if those are provided. url (string): \ If neither the engines, the credentials, nor the connection_string have been provided, a url can be used to access the data. This will be overridden by all other configuration options if any are provided. """ super().__init__(name=name, batch_data_dict=batch_data_dict) # , **kwargs) self._name = name self._credentials = credentials self._connection_string = connection_string self._url = url if engine is not None: if credentials is not None: logger.warning( "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. " "Ignoring credentials." ) self.engine = engine elif credentials is not None: self.engine = self._build_engine(credentials=credentials, **kwargs) elif connection_string is not None: self.engine = sa.create_engine(connection_string, **kwargs) elif url is not None: self.drivername = urlparse(url).scheme self.engine = sa.create_engine(url, **kwargs) else: raise InvalidConfigError( "Credentials or an engine are required for a SqlAlchemyExecutionEngine." ) # Get the dialect **for purposes of identifying types** if self.engine.dialect.name.lower() in [ "postgresql", "mysql", "sqlite", "oracle", "mssql", "oracle", ]: # These are the officially included and supported dialects by sqlalchemy self.dialect = import_library_module( module_name="sqlalchemy.dialects." + self.engine.dialect.name ) elif self.engine.dialect.name.lower() == "snowflake": self.dialect = import_library_module( module_name="snowflake.sqlalchemy.snowdialect" ) elif self.engine.dialect.name.lower() == "redshift": self.dialect = import_library_module( module_name="sqlalchemy_redshift.dialect" ) elif self.engine.dialect.name.lower() == "bigquery": self.dialect = import_library_module( module_name="pybigquery.sqlalchemy_bigquery" ) else: self.dialect = None if self.engine and self.engine.dialect.name.lower() in [ "sqlite", "mssql", "snowflake", ]: # sqlite/mssql temp tables only persist within a connection so override the engine self.engine = self.engine.connect() # Send a connect event to provide dialect type if data_context is not None and getattr( data_context, "_usage_statistics_handler", None ): handler = data_context._usage_statistics_handler handler.send_usage_message( event="execution_engine.sqlalchemy.connect", event_payload={ "anonymized_name": handler._execution_engine_anonymizer.anonymize( self.name ), "sqlalchemy_dialect": self.engine.name, }, success=True, ) # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, # and set the instance "_config" variable equal to the resulting dictionary. self._config = get_currently_executing_function_call_arguments( **{"class_name": self.__class__.__name__} ) filter_properties_dict( properties=self._config, inplace=True, )
def is_library_loadable(library_name: str) -> bool: module_obj: Union[ModuleType, None] = import_library_module(module_name=library_name) return module_obj is not None