def prepare_engine_args(disable_connection_pool=False): """Prepare SQLAlchemy engine args""" default_args = {} for dialect, default in DEFAULT_ENGINE_ARGS.items(): if SQL_ALCHEMY_CONN.startswith(dialect): default_args = default.copy() break engine_args: dict = conf.getjson('core', 'sql_alchemy_engine_args', fallback=default_args) # type: ignore if disable_connection_pool or not conf.getboolean('core', 'SQL_ALCHEMY_POOL_ENABLED'): engine_args['poolclass'] = NullPool log.debug("settings.prepare_engine_args(): Using NullPool") elif not SQL_ALCHEMY_CONN.startswith('sqlite'): # Pool size engine args not supported by sqlite. # If no config value is defined for the pool size, select a reasonable value. # 0 means no limit, which could lead to exceeding the Database connection limit. pool_size = conf.getint('core', 'SQL_ALCHEMY_POOL_SIZE', fallback=5) # The maximum overflow size of the pool. # When the number of checked-out connections reaches the size set in pool_size, # additional connections will be returned up to this limit. # When those additional connections are returned to the pool, they are disconnected and discarded. # It follows then that the total number of simultaneous connections # the pool will allow is pool_size + max_overflow, # and the total number of “sleeping” connections the pool will allow is pool_size. # max_overflow can be set to -1 to indicate no overflow limit; # no limit will be placed on the total number # of concurrent connections. Defaults to 10. max_overflow = conf.getint('core', 'SQL_ALCHEMY_MAX_OVERFLOW', fallback=10) # The DB server already has a value for wait_timeout (number of seconds after # which an idle sleeping connection should be killed). Since other DBs may # co-exist on the same server, SQLAlchemy should set its # pool_recycle to an equal or smaller value. pool_recycle = conf.getint('core', 'SQL_ALCHEMY_POOL_RECYCLE', fallback=1800) # Check connection at the start of each connection pool checkout. # Typically, this is a simple statement like “SELECT 1”, but may also make use # of some DBAPI-specific method to test the connection for liveness. # More information here: # https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic pool_pre_ping = conf.getboolean('core', 'SQL_ALCHEMY_POOL_PRE_PING', fallback=True) log.debug( "settings.prepare_engine_args(): Using pool settings. pool_size=%d, max_overflow=%d, " "pool_recycle=%d, pid=%d", pool_size, max_overflow, pool_recycle, os.getpid(), ) engine_args['pool_size'] = pool_size engine_args['pool_recycle'] = pool_recycle engine_args['pool_pre_ping'] = pool_pre_ping engine_args['max_overflow'] = max_overflow # The default isolation level for MySQL (REPEATABLE READ) can introduce inconsistencies when # running multiple schedulers, as repeated queries on the same session may read from stale snapshots. # 'READ COMMITTED' is the default value for PostgreSQL. # More information here: # https://dev.mysql.com/doc/refman/8.0/en/innodb-transaction-isolation-levels.html" # Similarly MSSQL default isolation level should be set to READ COMMITTED. # We also make sure that READ_COMMITTED_SNAPSHOT option is on, in order to avoid deadlocks when # Select queries are running. This is by default enforced during init/upgrade. More information: # https://docs.microsoft.com/en-us/sql/t-sql/statements/set-transaction-isolation-level-transact-sql if SQL_ALCHEMY_CONN.startswith(('mysql', 'mssql')): engine_args['isolation_level'] = 'READ COMMITTED' # Allow the user to specify an encoding for their DB otherwise default # to utf-8 so jobs & users with non-latin1 characters can still use us. engine_args['encoding'] = conf.get('core', 'SQL_ENGINE_ENCODING', fallback='utf-8') return engine_args
def __init__(self): configuration_dict = conf.as_dict(display_sensitive=True) self.core_configuration = configuration_dict[self.core_section] self.airflow_home = AIRFLOW_HOME self.dags_folder = conf.get(self.core_section, 'dags_folder') self.parallelism = conf.getint(self.core_section, 'parallelism') self.pod_template_file = conf.get(self.kubernetes_section, 'pod_template_file', fallback=None) self.delete_worker_pods = conf.getboolean(self.kubernetes_section, 'delete_worker_pods') self.delete_worker_pods_on_failure = conf.getboolean( self.kubernetes_section, 'delete_worker_pods_on_failure') self.worker_pods_creation_batch_size = conf.getint( self.kubernetes_section, 'worker_pods_creation_batch_size') self.worker_container_repository = conf.get( self.kubernetes_section, 'worker_container_repository') self.worker_container_tag = conf.get(self.kubernetes_section, 'worker_container_tag') if self.worker_container_repository and self.worker_container_tag: self.kube_image = f'{self.worker_container_repository}:{self.worker_container_tag}' else: self.kube_image = None # The Kubernetes Namespace in which the Scheduler and Webserver reside. Note # that if your # cluster has RBAC enabled, your scheduler may need service account permissions to # create, watch, get, and delete pods in this namespace. self.kube_namespace = conf.get(self.kubernetes_section, 'namespace') self.multi_namespace_mode = conf.getboolean(self.kubernetes_section, 'multi_namespace_mode') # The Kubernetes Namespace in which pods will be created by the executor. Note # that if your # cluster has RBAC enabled, your workers may need service account permissions to # interact with cluster components. self.executor_namespace = conf.get(self.kubernetes_section, 'namespace') self.worker_pods_pending_timeout = conf.getint( self.kubernetes_section, 'worker_pods_pending_timeout') self.worker_pods_pending_timeout_check_interval = conf.getint( self.kubernetes_section, 'worker_pods_pending_timeout_check_interval') self.worker_pods_pending_timeout_batch_size = conf.getint( self.kubernetes_section, 'worker_pods_pending_timeout_batch_size') self.worker_pods_queued_check_interval = conf.getint( self.kubernetes_section, 'worker_pods_queued_check_interval') self.kube_client_request_args = conf.getjson( self.kubernetes_section, 'kube_client_request_args', fallback={}) if not isinstance(self.kube_client_request_args, dict): raise AirflowConfigException( f"[{self.kubernetes_section}] 'kube_client_request_args' expected a JSON dict, got " + type(self.kube_client_request_args).__name__) if self.kube_client_request_args: if '_request_timeout' in self.kube_client_request_args and isinstance( self.kube_client_request_args['_request_timeout'], list): self.kube_client_request_args['_request_timeout'] = tuple( self.kube_client_request_args['_request_timeout']) self.delete_option_kwargs = conf.getjson(self.kubernetes_section, 'delete_option_kwargs', fallback={}) if not isinstance(self.delete_option_kwargs, dict): raise AirflowConfigException( f"[{self.kubernetes_section}] 'delete_option_kwargs' expected a JSON dict, got " + type(self.delete_option_kwargs).__name__)