예제 #1
0
파일: settings.py 프로젝트: pingzh/airflow
def prepare_engine_args(disable_connection_pool=False):
    """Prepare SQLAlchemy engine args"""
    default_args = {}
    for dialect, default in DEFAULT_ENGINE_ARGS.items():
        if SQL_ALCHEMY_CONN.startswith(dialect):
            default_args = default.copy()
            break

    engine_args: dict = conf.getjson('core', 'sql_alchemy_engine_args', fallback=default_args)  # type: ignore

    if disable_connection_pool or not conf.getboolean('core', 'SQL_ALCHEMY_POOL_ENABLED'):
        engine_args['poolclass'] = NullPool
        log.debug("settings.prepare_engine_args(): Using NullPool")
    elif not SQL_ALCHEMY_CONN.startswith('sqlite'):
        # Pool size engine args not supported by sqlite.
        # If no config value is defined for the pool size, select a reasonable value.
        # 0 means no limit, which could lead to exceeding the Database connection limit.
        pool_size = conf.getint('core', 'SQL_ALCHEMY_POOL_SIZE', fallback=5)

        # The maximum overflow size of the pool.
        # When the number of checked-out connections reaches the size set in pool_size,
        # additional connections will be returned up to this limit.
        # When those additional connections are returned to the pool, they are disconnected and discarded.
        # It follows then that the total number of simultaneous connections
        # the pool will allow is pool_size + max_overflow,
        # and the total number of “sleeping” connections the pool will allow is pool_size.
        # max_overflow can be set to -1 to indicate no overflow limit;
        # no limit will be placed on the total number
        # of concurrent connections. Defaults to 10.
        max_overflow = conf.getint('core', 'SQL_ALCHEMY_MAX_OVERFLOW', fallback=10)

        # The DB server already has a value for wait_timeout (number of seconds after
        # which an idle sleeping connection should be killed). Since other DBs may
        # co-exist on the same server, SQLAlchemy should set its
        # pool_recycle to an equal or smaller value.
        pool_recycle = conf.getint('core', 'SQL_ALCHEMY_POOL_RECYCLE', fallback=1800)

        # Check connection at the start of each connection pool checkout.
        # Typically, this is a simple statement like “SELECT 1”, but may also make use
        # of some DBAPI-specific method to test the connection for liveness.
        # More information here:
        # https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic
        pool_pre_ping = conf.getboolean('core', 'SQL_ALCHEMY_POOL_PRE_PING', fallback=True)

        log.debug(
            "settings.prepare_engine_args(): Using pool settings. pool_size=%d, max_overflow=%d, "
            "pool_recycle=%d, pid=%d",
            pool_size,
            max_overflow,
            pool_recycle,
            os.getpid(),
        )
        engine_args['pool_size'] = pool_size
        engine_args['pool_recycle'] = pool_recycle
        engine_args['pool_pre_ping'] = pool_pre_ping
        engine_args['max_overflow'] = max_overflow

    # The default isolation level for MySQL (REPEATABLE READ) can introduce inconsistencies when
    # running multiple schedulers, as repeated queries on the same session may read from stale snapshots.
    # 'READ COMMITTED' is the default value for PostgreSQL.
    # More information here:
    # https://dev.mysql.com/doc/refman/8.0/en/innodb-transaction-isolation-levels.html"

    # Similarly MSSQL default isolation level should be set to READ COMMITTED.
    # We also make sure that READ_COMMITTED_SNAPSHOT option is on, in order to avoid deadlocks when
    # Select queries are running. This is by default enforced during init/upgrade. More information:
    # https://docs.microsoft.com/en-us/sql/t-sql/statements/set-transaction-isolation-level-transact-sql

    if SQL_ALCHEMY_CONN.startswith(('mysql', 'mssql')):
        engine_args['isolation_level'] = 'READ COMMITTED'

    # Allow the user to specify an encoding for their DB otherwise default
    # to utf-8 so jobs & users with non-latin1 characters can still use us.
    engine_args['encoding'] = conf.get('core', 'SQL_ENGINE_ENCODING', fallback='utf-8')

    return engine_args
예제 #2
0
    def __init__(self):
        configuration_dict = conf.as_dict(display_sensitive=True)
        self.core_configuration = configuration_dict[self.core_section]
        self.airflow_home = AIRFLOW_HOME
        self.dags_folder = conf.get(self.core_section, 'dags_folder')
        self.parallelism = conf.getint(self.core_section, 'parallelism')
        self.pod_template_file = conf.get(self.kubernetes_section,
                                          'pod_template_file',
                                          fallback=None)

        self.delete_worker_pods = conf.getboolean(self.kubernetes_section,
                                                  'delete_worker_pods')
        self.delete_worker_pods_on_failure = conf.getboolean(
            self.kubernetes_section, 'delete_worker_pods_on_failure')
        self.worker_pods_creation_batch_size = conf.getint(
            self.kubernetes_section, 'worker_pods_creation_batch_size')

        self.worker_container_repository = conf.get(
            self.kubernetes_section, 'worker_container_repository')
        self.worker_container_tag = conf.get(self.kubernetes_section,
                                             'worker_container_tag')
        if self.worker_container_repository and self.worker_container_tag:
            self.kube_image = f'{self.worker_container_repository}:{self.worker_container_tag}'
        else:
            self.kube_image = None

        # The Kubernetes Namespace in which the Scheduler and Webserver reside. Note
        # that if your
        # cluster has RBAC enabled, your scheduler may need service account permissions to
        # create, watch, get, and delete pods in this namespace.
        self.kube_namespace = conf.get(self.kubernetes_section, 'namespace')
        self.multi_namespace_mode = conf.getboolean(self.kubernetes_section,
                                                    'multi_namespace_mode')
        # The Kubernetes Namespace in which pods will be created by the executor. Note
        # that if your
        # cluster has RBAC enabled, your workers may need service account permissions to
        # interact with cluster components.
        self.executor_namespace = conf.get(self.kubernetes_section,
                                           'namespace')

        self.worker_pods_pending_timeout = conf.getint(
            self.kubernetes_section, 'worker_pods_pending_timeout')
        self.worker_pods_pending_timeout_check_interval = conf.getint(
            self.kubernetes_section,
            'worker_pods_pending_timeout_check_interval')
        self.worker_pods_pending_timeout_batch_size = conf.getint(
            self.kubernetes_section, 'worker_pods_pending_timeout_batch_size')
        self.worker_pods_queued_check_interval = conf.getint(
            self.kubernetes_section, 'worker_pods_queued_check_interval')

        self.kube_client_request_args = conf.getjson(
            self.kubernetes_section, 'kube_client_request_args', fallback={})
        if not isinstance(self.kube_client_request_args, dict):
            raise AirflowConfigException(
                f"[{self.kubernetes_section}] 'kube_client_request_args' expected a JSON dict, got "
                + type(self.kube_client_request_args).__name__)
        if self.kube_client_request_args:
            if '_request_timeout' in self.kube_client_request_args and isinstance(
                    self.kube_client_request_args['_request_timeout'], list):
                self.kube_client_request_args['_request_timeout'] = tuple(
                    self.kube_client_request_args['_request_timeout'])
        self.delete_option_kwargs = conf.getjson(self.kubernetes_section,
                                                 'delete_option_kwargs',
                                                 fallback={})
        if not isinstance(self.delete_option_kwargs, dict):
            raise AirflowConfigException(
                f"[{self.kubernetes_section}] 'delete_option_kwargs' expected a JSON dict, got "
                + type(self.delete_option_kwargs).__name__)