def worker(args): """Starts Airflow Celery worker""" env = os.environ.copy() env['AIRFLOW_HOME'] = settings.AIRFLOW_HOME if not settings.validate_session(): print("Worker exiting... database connection precheck failed! ") sys.exit(1) autoscale = args.autoscale skip_serve_logs = args.skip_serve_logs if autoscale is None and conf.has_option("celery", "worker_autoscale"): autoscale = conf.get("celery", "worker_autoscale") worker_instance = worker_bin.worker(app=celery_app) options = { 'optimization': 'fair', 'O': 'fair', 'queues': args.queues, 'concurrency': args.concurrency, 'autoscale': autoscale, 'hostname': args.celery_hostname, 'loglevel': conf.get('logging', 'LOGGING_LEVEL'), } if conf.has_option("celery", "pool"): options["pool"] = conf.get("celery", "pool") if args.daemon: pid, stdout, stderr, log_file = setup_locations("worker", args.pid, args.stdout, args.stderr, args.log_file) handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') ctx = daemon.DaemonContext( pidfile=TimeoutPIDLockFile(pid, -1), files_preserve=[handle], stdout=stdout, stderr=stderr, ) with ctx: sub_proc = _serve_logs(skip_serve_logs) worker_instance.run(**options) stdout.close() stderr.close() else: signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigint_handler) sub_proc = _serve_logs(skip_serve_logs) worker_instance.run(**options) if sub_proc: sub_proc.terminate()
def test_config_use_original_when_original_and_fallback_are_present(self): assert conf.has_option("core", "FERNET_KEY") assert not conf.has_option("core", "FERNET_KEY_CMD") fernet_key = conf.get('core', 'FERNET_KEY') with conf_vars({('core', 'FERNET_KEY_CMD'): 'printf HELLO'}): fallback_fernet_key = conf.get("core", "FERNET_KEY") assert fernet_key == fallback_fernet_key
def test_config_use_original_when_original_and_fallback_are_present(self): self.assertTrue(conf.has_option("core", "FERNET_KEY")) self.assertFalse(conf.has_option("core", "FERNET_KEY_CMD")) fernet_key = conf.get('core', 'FERNET_KEY') with conf_vars({('core', 'FERNET_KEY_CMD'): 'printf HELLO'}): fallback_fernet_key = conf.get("core", "FERNET_KEY") self.assertEqual(fernet_key, fallback_fernet_key)
def test_config_throw_error_when_original_and_fallback_is_absent(self): assert conf.has_option("core", "FERNET_KEY") assert not conf.has_option("core", "FERNET_KEY_CMD") with conf_vars({('core', 'fernet_key'): None}): with pytest.raises(AirflowConfigException) as ctx: conf.get("core", "FERNET_KEY") exception = str(ctx.value) message = "section/key [core/fernet_key] not found in config" assert message == exception
def test_config_throw_error_when_original_and_fallback_is_absent(self): self.assertTrue(conf.has_option("core", "FERNET_KEY")) self.assertFalse(conf.has_option("core", "FERNET_KEY_CMD")) with conf_vars({('core', 'fernet_key'): None}): with self.assertRaises(AirflowConfigException) as cm: conf.get("core", "FERNET_KEY") exception = str(cm.exception) message = "section/key [core/fernet_key] not found in config" self.assertEqual(message, exception)
def get_statsd_logger(cls): """Returns logger for statsd""" # no need to check for the scheduler/statsd_on -> this method is only called when it is set # and previously it would crash with None is callable if it was called without it. from statsd import StatsClient if conf.has_option('scheduler', 'statsd_custom_client_path'): stats_class = conf.getimport('scheduler', 'statsd_custom_client_path') if not issubclass(stats_class, StatsClient): raise AirflowConfigException( "Your custom Statsd client must extend the statsd.StatsClient in order to ensure " "backwards compatibility.") else: log.info("Successfully loaded custom Statsd client") else: stats_class = StatsClient statsd = stats_class(host=conf.get('scheduler', 'statsd_host'), port=conf.getint('scheduler', 'statsd_port'), prefix=conf.get('scheduler', 'statsd_prefix')) allow_list_validator = AllowListValidator( conf.get('scheduler', 'statsd_allow_list', fallback=None)) return SafeStatsdLogger(statsd, allow_list_validator)
def get_statsd_logger(self): if conf.getboolean('scheduler', 'statsd_on'): from statsd import StatsClient if conf.has_option('scheduler', 'statsd_custom_client_path'): custom_statsd_module_path = conf.get( 'scheduler', 'statsd_custom_client_path') try: stats_class = import_string(custom_statsd_module_path) if not issubclass(stats_class, StatsClient): raise Exception( """Your custom Statsd client must extend the statsd.StatsClient in order to ensure backwards compatibility.""") else: log.info("Successfully loaded custom Statsd client " f"from {custom_statsd_module_path}") except Exception as err: raise ImportError( 'Unable to load custom Statsd client from ' f'{custom_statsd_module_path} due to {err}') else: stats_class = StatsClient statsd = stats_class(host=conf.get('scheduler', 'statsd_host'), port=conf.getint('scheduler', 'statsd_port'), prefix=conf.get('scheduler', 'statsd_prefix')) allow_list_validator = AllowListValidator( conf.get('scheduler', 'statsd_allow_list', fallback=None)) return SafeStatsdLogger(statsd, allow_list_validator)
def conf_vars(overrides): original = {} original_env_vars = {} for (section, key), value in overrides.items(): env = conf._env_var_name(section, key) if env in os.environ: original_env_vars[env] = os.environ.pop(env) if conf.has_option(section, key): original[(section, key)] = conf.get(section, key) else: original[(section, key)] = None if value is not None: conf.set(section, key, value) else: conf.remove_option(section, key) settings.configure_vars() try: yield finally: for (section, key), value in original.items(): if value is not None: conf.set(section, key, value) else: conf.remove_option(section, key) for env, value in original_env_vars.items(): os.environ[env] = value settings.configure_vars()
def configure_orm(disable_connection_pool=False): """Configure ORM using SQLAlchemy""" log.debug("Setting up DB connection pool (PID %s)", os.getpid()) global engine global Session engine_args = prepare_engine_args(disable_connection_pool) # Allow the user to specify an encoding for their DB otherwise default # to utf-8 so jobs & users with non-latin1 characters can still use us. engine_args['encoding'] = conf.get('core', 'SQL_ENGINE_ENCODING', fallback='utf-8') if conf.has_option('core', 'sql_alchemy_connect_args'): connect_args = conf.getimport('core', 'sql_alchemy_connect_args') else: connect_args = {} engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args) setup_event_handlers(engine) Session = scoped_session( sessionmaker( autocommit=False, autoflush=False, bind=engine, expire_on_commit=False, ) )
def _resolve_vineyard_xcom_options(): options = {} if conf.has_option('vineyard', 'persist'): options['persist'] = conf.getboolean('vineyard', 'persist') else: options['persist'] = False if conf.has_option('vineyard', 'ipc_socket'): options['ipc_socket'] = conf.get('vineyard', 'ipc_socket') else: if 'VINEYARD_IPC_SOCKET' in os.environ: options['ipc_socket'] = os.environ['VINEYARD_IPC_SOCKET'] else: raise RuntimeError("Failed to find vineyard IPC socket configuration, " + "please configure it using the environment variable " + "$VINEYARD_IPC_SOCKET, or via airfow's vineyard.ipc_socket configuration.") return options
def conf_vars(overrides): original = {} original_env_vars = {} reconfigure_vars = False for (section, key), value in overrides.items(): env = conf._env_var_name(section, key) if env in os.environ: original_env_vars[env] = os.environ.pop(env) if conf.has_option(section, key): original[(section, key)] = conf.get(section, key) else: original[(section, key)] = None if value is not None: conf.set(section, key, value) else: conf.remove_option(section, key) if section == 'core' and key.lower().endswith('_folder'): reconfigure_vars = True if reconfigure_vars: settings.configure_vars() yield for (section, key), value in original.items(): if value is not None: conf.set(section, key, value) else: conf.remove_option(section, key) for env, value in original_env_vars.items(): os.environ[env] = value if reconfigure_vars: settings.configure_vars()
def get_default(key, default=None): from airflow.configuration import conf if conf.has_option("airflowdocker", key): return conf.get("airflowdocker", key) else: return default
def get_logger_address(self): if conf.has_option('scheduler', 'statsd_socket_path'): statsd_socket_path = conf.get('scheduler', 'statsd_socket_path') if statsd_socket_path: return {'socket_path': statsd_socket_path} host = conf.get('scheduler', 'statsd_host') port = conf.getint('scheduler', 'statsd_port') return {'host': host, 'port': port}
def test_env_var_config(self): opt = conf.get('testsection', 'testkey') self.assertEqual(opt, 'testvalue') opt = conf.get('testsection', 'testpercent') self.assertEqual(opt, 'with%percent') self.assertTrue(conf.has_option('testsection', 'testkey'))
def test_env_var_config(self): opt = conf.get('testsection', 'testkey') self.assertEqual(opt, 'testvalue') opt = conf.get('testsection', 'testpercent') self.assertEqual(opt, 'with%percent') self.assertTrue(conf.has_option('testsection', 'testkey'))
def _get_multiprocessing_start_method(self): """ Determine method of creating new processes by checking if the mp_start_method is set in configs, else, it uses the OS default. """ if conf.has_option('core', 'mp_start_method'): return conf.get('core', 'mp_start_method') return multiprocessing.get_start_method()
def __init__(cls, *args, **kwargs): super().__init__(cls) if cls.__class__.factory is None: is_datadog_enabled_defined = conf.has_option('metrics', 'statsd_datadog_enabled') if is_datadog_enabled_defined and conf.getboolean('metrics', 'statsd_datadog_enabled'): cls.__class__.factory = cls.get_dogstatsd_logger elif conf.getboolean('metrics', 'statsd_on'): cls.__class__.factory = cls.get_statsd_logger else: cls.__class__.factory = DummyStatsLogger
def test_store_dag_code_config_when_set(self): store_serialized_dags = conf.getboolean('core', 'store_serialized_dags', fallback=False) store_dag_code = conf.getboolean("core", "store_dag_code", fallback=store_serialized_dags) assert conf.has_option("core", "store_dag_code") assert store_serialized_dags assert not store_dag_code
def test_store_dag_code_config_when_set(self): store_serialized_dags = conf.getboolean('core', 'store_serialized_dags', fallback=False) store_dag_code = conf.getboolean("core", "store_dag_code", fallback=store_serialized_dags) self.assertTrue(conf.has_option("core", "store_dag_code")) self.assertTrue(store_serialized_dags) self.assertFalse(store_dag_code)
def configure_orm(disable_connection_pool=False): """Configure ORM using SQLAlchemy""" from airflow.utils.log.secrets_masker import mask_secret log.debug("Setting up DB connection pool (PID %s)", os.getpid()) global engine global Session engine_args = prepare_engine_args(disable_connection_pool) if conf.has_option('database', 'sql_alchemy_connect_args'): connect_args = conf.getimport('database', 'sql_alchemy_connect_args') else: connect_args = {} engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args) mask_secret(engine.url.password) setup_event_handlers(engine) Session = scoped_session( sessionmaker( autocommit=False, autoflush=False, bind=engine, expire_on_commit=False, )) if engine.dialect.name == 'mssql': session = Session() try: result = session.execute( sqlalchemy.text( 'SELECT is_read_committed_snapshot_on FROM sys.databases WHERE name=:database_name' ), params={"database_name": engine.url.database}, ) data = result.fetchone()[0] if data != 1: log.critical( "MSSQL database MUST have READ_COMMITTED_SNAPSHOT enabled." ) log.critical("The database %s has it disabled.", engine.url.database) log.critical( "This will cause random deadlocks, Refusing to start.") log.critical( "See https://airflow.apache.org/docs/apache-airflow/stable/howto/" "set-up-database.html#setting-up-a-mssql-database") raise Exception( "MSSQL database MUST have READ_COMMITTED_SNAPSHOT enabled." ) finally: session.close()
def get_value(args): """Get one value from configuration""" if not conf.has_section(args.section): print(f'The section [{args.section}] is not found in config.', file=sys.stderr) sys.exit(1) if not conf.has_option(args.section, args.option): print(f'The option [{args.section}/{args.option}] is not found in config.', file=sys.stderr) sys.exit(1) value = conf.get(args.section, args.option) print(value)
def _get_multiprocessing_start_method(self) -> str: """ Determine method of creating new processes by checking if the mp_start_method is set in configs, else, it uses the OS default. """ if conf.has_option('core', 'mp_start_method'): return conf.get('core', 'mp_start_method') method = multiprocessing.get_start_method() if not method: raise ValueError("Failed to determine start method") return method
def try_login(username, password): conn = get_ldap_connection(conf.get("ldap", "bind_user"), conf.get("ldap", "bind_password")) search_filter = "(&({0})({1}={2}))".format( conf.get("ldap", "user_filter"), conf.get("ldap", "user_name_attr"), username ) search_scope = LEVEL if conf.has_option("ldap", "search_scope"): if conf.get("ldap", "search_scope") == "SUBTREE": search_scope = SUBTREE else: search_scope = LEVEL # todo: BASE or ONELEVEL? res = conn.search(native(conf.get("ldap", "basedn")), native(search_filter), search_scope=native(search_scope)) # todo: use list or result? if not res: log.info("Cannot find user %s", username) raise AuthenticationError("Invalid username or password") entry = conn.response[0] conn.unbind() if 'dn' not in entry: # The search filter for the user did not return any values, so an # invalid user was used for credentials. raise AuthenticationError("Invalid username or password") try: conn = get_ldap_connection(entry['dn'], password) except KeyError: log.error(""" Unable to parse LDAP structure. If you're using Active Directory and not specifying an OU, you must set search_scope=SUBTREE in airflow.cfg. %s """, traceback.format_exc()) raise LdapException( "Could not parse LDAP structure. " "Try setting search_scope in airflow.cfg, or check logs" ) if not conn: log.info("Password incorrect for user %s", username) raise AuthenticationError("Invalid username or password")
def test_env_var_config(self): opt = conf.get('testsection', 'testkey') self.assertEqual(opt, 'testvalue') opt = conf.get('testsection', 'testpercent') self.assertEqual(opt, 'with%percent') self.assertTrue(conf.has_option('testsection', 'testkey')) opt = conf.get('kubernetes_environment_variables', 'AIRFLOW__TESTSECTION__TESTKEY') self.assertEqual(opt, 'nested')
def test_env_var_config(self): opt = conf.get('testsection', 'testkey') self.assertEqual(opt, 'testvalue') opt = conf.get('testsection', 'testpercent') self.assertEqual(opt, 'with%percent') self.assertTrue(conf.has_option('testsection', 'testkey')) os.environ['AIRFLOW__KUBERNETES_ENVIRONMENT_VARIABLES__AIRFLOW__TESTSECTION__TESTKEY'] = 'nested' opt = conf.get('kubernetes_environment_variables', 'AIRFLOW__TESTSECTION__TESTKEY') self.assertEqual(opt, 'nested') del os.environ['AIRFLOW__KUBERNETES_ENVIRONMENT_VARIABLES__AIRFLOW__TESTSECTION__TESTKEY']
def get_value(args): """Get one value from configuration""" if not conf.has_section(args.section): raise SystemExit( f'The section [{args.section}] is not found in config.') if not conf.has_option(args.section, args.option): raise SystemExit( f'The option [{args.section}/{args.option}] is not found in config.' ) value = conf.get(args.section, args.option) print(value)
def __init__(cls, *args, **kwargs): super().__init__(cls) if cls.__class__.instance is None: try: is_datadog_enabled_defined = conf.has_option('metrics', 'statsd_datadog_enabled') if is_datadog_enabled_defined and conf.getboolean('metrics', 'statsd_datadog_enabled'): cls.__class__.instance = cls.get_dogstatsd_logger() elif conf.getboolean('metrics', 'statsd_on'): cls.__class__.instance = cls.get_statsd_logger() else: cls.__class__.instance = DummyStatsLogger() except (socket.gaierror, ImportError) as e: log.error("Could not configure StatsClient: %s, using DummyStatsLogger instead.", e) cls.__class__.instance = DummyStatsLogger()
def test_env_var_config(self): opt = conf.get('testsection', 'testkey') assert opt == 'testvalue' opt = conf.get('testsection', 'testpercent') assert opt == 'with%percent' assert conf.has_option('testsection', 'testkey') with unittest.mock.patch.dict( 'os.environ', AIRFLOW__KUBERNETES_ENVIRONMENT_VARIABLES__AIRFLOW__TESTSECTION__TESTKEY='nested' ): opt = conf.get('kubernetes_environment_variables', 'AIRFLOW__TESTSECTION__TESTKEY') assert opt == 'nested'
def conf_vars(overrides): original = {} for (section, key), value in overrides.items(): if conf.has_option(section, key): original[(section, key)] = conf.get(section, key) else: original[(section, key)] = None if value is not None: conf.set(section, key, value) else: conf.remove_option(section, key) yield for (section, key), value in original.items(): if value is not None: conf.set(section, key, value) else: conf.remove_option(section, key)
def __init__(self, *args, **kwargs): super().__init__(self) if self.__class__.instance is None: try: is_datadog_enabled_defined = conf.has_option( 'scheduler', 'statsd_datadog_enabled') if is_datadog_enabled_defined and conf.getboolean( 'scheduler', 'statsd_datadog_enabled'): self.__class__.instance = self.get_dogstatsd_logger() elif conf.getboolean('scheduler', 'statsd_on'): self.__class__.instance = self.get_statsd_logger() else: self.__class__.instance = DummyStatsLogger() except (socket.gaierror, ImportError) as e: log.warning( "Could not configure StatsClient: %s, using DummyStatsLogger instead.", e)
def get_statsd_logger(self): if conf.getboolean('scheduler', 'statsd_on'): from statsd import StatsClient if conf.has_option('scheduler', 'statsd_custom_client_path'): stats_class = conf.getimport('scheduler', 'statsd_custom_client_path') if not issubclass(stats_class, StatsClient): raise AirflowConfigException( "Your custom Statsd client must extend the statsd.StatsClient in order to ensure " "backwards compatibility." ) else: log.info("Successfully loaded custom Statsd client") else: stats_class = StatsClient statsd = stats_class( host=conf.get('scheduler', 'statsd_host'), port=conf.getint('scheduler', 'statsd_port'), prefix=conf.get('scheduler', 'statsd_prefix')) allow_list_validator = AllowListValidator(conf.get('scheduler', 'statsd_allow_list', fallback=None)) return SafeStatsdLogger(statsd, allow_list_validator)