Exemple #1
0
def worker(args):
    """Starts Airflow Celery worker"""
    env = os.environ.copy()
    env['AIRFLOW_HOME'] = settings.AIRFLOW_HOME

    if not settings.validate_session():
        print("Worker exiting... database connection precheck failed! ")
        sys.exit(1)

    autoscale = args.autoscale
    skip_serve_logs = args.skip_serve_logs

    if autoscale is None and conf.has_option("celery", "worker_autoscale"):
        autoscale = conf.get("celery", "worker_autoscale")

    worker_instance = worker_bin.worker(app=celery_app)
    options = {
        'optimization': 'fair',
        'O': 'fair',
        'queues': args.queues,
        'concurrency': args.concurrency,
        'autoscale': autoscale,
        'hostname': args.celery_hostname,
        'loglevel': conf.get('logging', 'LOGGING_LEVEL'),
    }

    if conf.has_option("celery", "pool"):
        options["pool"] = conf.get("celery", "pool")

    if args.daemon:
        pid, stdout, stderr, log_file = setup_locations("worker",
                                                        args.pid,
                                                        args.stdout,
                                                        args.stderr,
                                                        args.log_file)
        handle = setup_logging(log_file)
        stdout = open(stdout, 'w+')
        stderr = open(stderr, 'w+')

        ctx = daemon.DaemonContext(
            pidfile=TimeoutPIDLockFile(pid, -1),
            files_preserve=[handle],
            stdout=stdout,
            stderr=stderr,
        )
        with ctx:
            sub_proc = _serve_logs(skip_serve_logs)
            worker_instance.run(**options)

        stdout.close()
        stderr.close()
    else:
        signal.signal(signal.SIGINT, sigint_handler)
        signal.signal(signal.SIGTERM, sigint_handler)

        sub_proc = _serve_logs(skip_serve_logs)
        worker_instance.run(**options)

    if sub_proc:
        sub_proc.terminate()
Exemple #2
0
    def test_config_use_original_when_original_and_fallback_are_present(self):
        assert conf.has_option("core", "FERNET_KEY")
        assert not conf.has_option("core", "FERNET_KEY_CMD")

        fernet_key = conf.get('core', 'FERNET_KEY')

        with conf_vars({('core', 'FERNET_KEY_CMD'): 'printf HELLO'}):
            fallback_fernet_key = conf.get("core", "FERNET_KEY")

        assert fernet_key == fallback_fernet_key
Exemple #3
0
    def test_config_use_original_when_original_and_fallback_are_present(self):
        self.assertTrue(conf.has_option("core", "FERNET_KEY"))
        self.assertFalse(conf.has_option("core", "FERNET_KEY_CMD"))

        fernet_key = conf.get('core', 'FERNET_KEY')

        with conf_vars({('core', 'FERNET_KEY_CMD'): 'printf HELLO'}):
            fallback_fernet_key = conf.get("core", "FERNET_KEY")

        self.assertEqual(fernet_key, fallback_fernet_key)
Exemple #4
0
    def test_config_throw_error_when_original_and_fallback_is_absent(self):
        assert conf.has_option("core", "FERNET_KEY")
        assert not conf.has_option("core", "FERNET_KEY_CMD")

        with conf_vars({('core', 'fernet_key'): None}):
            with pytest.raises(AirflowConfigException) as ctx:
                conf.get("core", "FERNET_KEY")

        exception = str(ctx.value)
        message = "section/key [core/fernet_key] not found in config"
        assert message == exception
Exemple #5
0
    def test_config_throw_error_when_original_and_fallback_is_absent(self):
        self.assertTrue(conf.has_option("core", "FERNET_KEY"))
        self.assertFalse(conf.has_option("core", "FERNET_KEY_CMD"))

        with conf_vars({('core', 'fernet_key'): None}):
            with self.assertRaises(AirflowConfigException) as cm:
                conf.get("core", "FERNET_KEY")

        exception = str(cm.exception)
        message = "section/key [core/fernet_key] not found in config"
        self.assertEqual(message, exception)
Exemple #6
0
    def get_statsd_logger(cls):
        """Returns logger for statsd"""
        # no need to check for the scheduler/statsd_on -> this method is only called when it is set
        # and previously it would crash with None is callable if it was called without it.
        from statsd import StatsClient

        if conf.has_option('scheduler', 'statsd_custom_client_path'):
            stats_class = conf.getimport('scheduler',
                                         'statsd_custom_client_path')

            if not issubclass(stats_class, StatsClient):
                raise AirflowConfigException(
                    "Your custom Statsd client must extend the statsd.StatsClient in order to ensure "
                    "backwards compatibility.")
            else:
                log.info("Successfully loaded custom Statsd client")

        else:
            stats_class = StatsClient

        statsd = stats_class(host=conf.get('scheduler', 'statsd_host'),
                             port=conf.getint('scheduler', 'statsd_port'),
                             prefix=conf.get('scheduler', 'statsd_prefix'))
        allow_list_validator = AllowListValidator(
            conf.get('scheduler', 'statsd_allow_list', fallback=None))
        return SafeStatsdLogger(statsd, allow_list_validator)
Exemple #7
0
    def get_statsd_logger(self):
        if conf.getboolean('scheduler', 'statsd_on'):
            from statsd import StatsClient

            if conf.has_option('scheduler', 'statsd_custom_client_path'):
                custom_statsd_module_path = conf.get(
                    'scheduler', 'statsd_custom_client_path')

                try:
                    stats_class = import_string(custom_statsd_module_path)
                    if not issubclass(stats_class, StatsClient):
                        raise Exception(
                            """Your custom Statsd client must extend the statsd.StatsClient in order to ensure backwards
                            compatibility.""")
                    else:
                        log.info("Successfully loaded custom Statsd client "
                                 f"from {custom_statsd_module_path}")

                except Exception as err:
                    raise ImportError(
                        'Unable to load custom Statsd client from '
                        f'{custom_statsd_module_path} due to {err}')
            else:
                stats_class = StatsClient

        statsd = stats_class(host=conf.get('scheduler', 'statsd_host'),
                             port=conf.getint('scheduler', 'statsd_port'),
                             prefix=conf.get('scheduler', 'statsd_prefix'))
        allow_list_validator = AllowListValidator(
            conf.get('scheduler', 'statsd_allow_list', fallback=None))
        return SafeStatsdLogger(statsd, allow_list_validator)
Exemple #8
0
def conf_vars(overrides):
    original = {}
    original_env_vars = {}
    for (section, key), value in overrides.items():

        env = conf._env_var_name(section, key)
        if env in os.environ:
            original_env_vars[env] = os.environ.pop(env)

        if conf.has_option(section, key):
            original[(section, key)] = conf.get(section, key)
        else:
            original[(section, key)] = None
        if value is not None:
            conf.set(section, key, value)
        else:
            conf.remove_option(section, key)
    settings.configure_vars()
    try:
        yield
    finally:
        for (section, key), value in original.items():
            if value is not None:
                conf.set(section, key, value)
            else:
                conf.remove_option(section, key)
        for env, value in original_env_vars.items():
            os.environ[env] = value
        settings.configure_vars()
Exemple #9
0
def configure_orm(disable_connection_pool=False):
    """Configure ORM using SQLAlchemy"""
    log.debug("Setting up DB connection pool (PID %s)", os.getpid())
    global engine
    global Session
    engine_args = prepare_engine_args(disable_connection_pool)

    # Allow the user to specify an encoding for their DB otherwise default
    # to utf-8 so jobs & users with non-latin1 characters can still use us.
    engine_args['encoding'] = conf.get('core', 'SQL_ENGINE_ENCODING', fallback='utf-8')

    if conf.has_option('core', 'sql_alchemy_connect_args'):
        connect_args = conf.getimport('core', 'sql_alchemy_connect_args')
    else:
        connect_args = {}

    engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args)
    setup_event_handlers(engine)

    Session = scoped_session(
        sessionmaker(
            autocommit=False,
            autoflush=False,
            bind=engine,
            expire_on_commit=False,
        )
    )
Exemple #10
0
def _resolve_vineyard_xcom_options():
    options = {}
    if conf.has_option('vineyard', 'persist'):
        options['persist'] = conf.getboolean('vineyard', 'persist')
    else:
        options['persist'] = False
    if conf.has_option('vineyard', 'ipc_socket'):
        options['ipc_socket'] = conf.get('vineyard', 'ipc_socket')
    else:
        if 'VINEYARD_IPC_SOCKET' in os.environ:
            options['ipc_socket'] = os.environ['VINEYARD_IPC_SOCKET']
        else:
            raise RuntimeError("Failed to find vineyard IPC socket configuration, " +
                               "please configure it using the environment variable " +
                               "$VINEYARD_IPC_SOCKET, or via airfow's vineyard.ipc_socket configuration.")
    return options
Exemple #11
0
def conf_vars(overrides):
    original = {}
    original_env_vars = {}
    reconfigure_vars = False
    for (section, key), value in overrides.items():

        env = conf._env_var_name(section, key)
        if env in os.environ:
            original_env_vars[env] = os.environ.pop(env)

        if conf.has_option(section, key):
            original[(section, key)] = conf.get(section, key)
        else:
            original[(section, key)] = None
        if value is not None:
            conf.set(section, key, value)
        else:
            conf.remove_option(section, key)

        if section == 'core' and key.lower().endswith('_folder'):
            reconfigure_vars = True
    if reconfigure_vars:
        settings.configure_vars()
    yield
    for (section, key), value in original.items():
        if value is not None:
            conf.set(section, key, value)
        else:
            conf.remove_option(section, key)
    for env, value in original_env_vars.items():
        os.environ[env] = value
    if reconfigure_vars:
        settings.configure_vars()
Exemple #12
0
def get_default(key, default=None):
    from airflow.configuration import conf

    if conf.has_option("airflowdocker", key):
        return conf.get("airflowdocker", key)
    else:
        return default
Exemple #13
0
 def get_logger_address(self):
     if conf.has_option('scheduler', 'statsd_socket_path'):
         statsd_socket_path = conf.get('scheduler', 'statsd_socket_path')
         if statsd_socket_path:
             return {'socket_path': statsd_socket_path}
     host = conf.get('scheduler', 'statsd_host')
     port = conf.getint('scheduler', 'statsd_port')
     return {'host': host, 'port': port}
Exemple #14
0
    def test_env_var_config(self):
        opt = conf.get('testsection', 'testkey')
        self.assertEqual(opt, 'testvalue')

        opt = conf.get('testsection', 'testpercent')
        self.assertEqual(opt, 'with%percent')

        self.assertTrue(conf.has_option('testsection', 'testkey'))
    def test_env_var_config(self):
        opt = conf.get('testsection', 'testkey')
        self.assertEqual(opt, 'testvalue')

        opt = conf.get('testsection', 'testpercent')
        self.assertEqual(opt, 'with%percent')

        self.assertTrue(conf.has_option('testsection', 'testkey'))
Exemple #16
0
    def _get_multiprocessing_start_method(self):
        """
        Determine method of creating new processes by checking if the
        mp_start_method is set in configs, else, it uses the OS default.
        """
        if conf.has_option('core', 'mp_start_method'):
            return conf.get('core', 'mp_start_method')

        return multiprocessing.get_start_method()
Exemple #17
0
 def __init__(cls, *args, **kwargs):
     super().__init__(cls)
     if cls.__class__.factory is None:
         is_datadog_enabled_defined = conf.has_option('metrics', 'statsd_datadog_enabled')
         if is_datadog_enabled_defined and conf.getboolean('metrics', 'statsd_datadog_enabled'):
             cls.__class__.factory = cls.get_dogstatsd_logger
         elif conf.getboolean('metrics', 'statsd_on'):
             cls.__class__.factory = cls.get_statsd_logger
         else:
             cls.__class__.factory = DummyStatsLogger
Exemple #18
0
 def test_store_dag_code_config_when_set(self):
     store_serialized_dags = conf.getboolean('core',
                                             'store_serialized_dags',
                                             fallback=False)
     store_dag_code = conf.getboolean("core",
                                      "store_dag_code",
                                      fallback=store_serialized_dags)
     assert conf.has_option("core", "store_dag_code")
     assert store_serialized_dags
     assert not store_dag_code
Exemple #19
0
 def test_store_dag_code_config_when_set(self):
     store_serialized_dags = conf.getboolean('core',
                                             'store_serialized_dags',
                                             fallback=False)
     store_dag_code = conf.getboolean("core",
                                      "store_dag_code",
                                      fallback=store_serialized_dags)
     self.assertTrue(conf.has_option("core", "store_dag_code"))
     self.assertTrue(store_serialized_dags)
     self.assertFalse(store_dag_code)
Exemple #20
0
def configure_orm(disable_connection_pool=False):
    """Configure ORM using SQLAlchemy"""
    from airflow.utils.log.secrets_masker import mask_secret

    log.debug("Setting up DB connection pool (PID %s)", os.getpid())
    global engine
    global Session
    engine_args = prepare_engine_args(disable_connection_pool)

    if conf.has_option('database', 'sql_alchemy_connect_args'):
        connect_args = conf.getimport('database', 'sql_alchemy_connect_args')
    else:
        connect_args = {}

    engine = create_engine(SQL_ALCHEMY_CONN,
                           connect_args=connect_args,
                           **engine_args)

    mask_secret(engine.url.password)

    setup_event_handlers(engine)

    Session = scoped_session(
        sessionmaker(
            autocommit=False,
            autoflush=False,
            bind=engine,
            expire_on_commit=False,
        ))
    if engine.dialect.name == 'mssql':
        session = Session()
        try:
            result = session.execute(
                sqlalchemy.text(
                    'SELECT is_read_committed_snapshot_on FROM sys.databases WHERE name=:database_name'
                ),
                params={"database_name": engine.url.database},
            )
            data = result.fetchone()[0]
            if data != 1:
                log.critical(
                    "MSSQL database MUST have READ_COMMITTED_SNAPSHOT enabled."
                )
                log.critical("The database %s has it disabled.",
                             engine.url.database)
                log.critical(
                    "This will cause random deadlocks, Refusing to start.")
                log.critical(
                    "See https://airflow.apache.org/docs/apache-airflow/stable/howto/"
                    "set-up-database.html#setting-up-a-mssql-database")
                raise Exception(
                    "MSSQL database MUST have READ_COMMITTED_SNAPSHOT enabled."
                )
        finally:
            session.close()
Exemple #21
0
def get_value(args):
    """Get one value from configuration"""
    if not conf.has_section(args.section):
        print(f'The section [{args.section}] is not found in config.', file=sys.stderr)
        sys.exit(1)

    if not conf.has_option(args.section, args.option):
        print(f'The option [{args.section}/{args.option}] is not found in config.', file=sys.stderr)
        sys.exit(1)

    value = conf.get(args.section, args.option)
    print(value)
Exemple #22
0
    def _get_multiprocessing_start_method(self) -> str:
        """
        Determine method of creating new processes by checking if the
        mp_start_method is set in configs, else, it uses the OS default.
        """
        if conf.has_option('core', 'mp_start_method'):
            return conf.get('core', 'mp_start_method')

        method = multiprocessing.get_start_method()
        if not method:
            raise ValueError("Failed to determine start method")
        return method
Exemple #23
0
    def try_login(username, password):
        conn = get_ldap_connection(conf.get("ldap", "bind_user"),
                                   conf.get("ldap", "bind_password"))

        search_filter = "(&({0})({1}={2}))".format(
            conf.get("ldap", "user_filter"),
            conf.get("ldap", "user_name_attr"),
            username
        )

        search_scope = LEVEL
        if conf.has_option("ldap", "search_scope"):
            if conf.get("ldap", "search_scope") == "SUBTREE":
                search_scope = SUBTREE
            else:
                search_scope = LEVEL

        # todo: BASE or ONELEVEL?

        res = conn.search(native(conf.get("ldap", "basedn")),
                          native(search_filter),
                          search_scope=native(search_scope))

        # todo: use list or result?
        if not res:
            log.info("Cannot find user %s", username)
            raise AuthenticationError("Invalid username or password")

        entry = conn.response[0]

        conn.unbind()

        if 'dn' not in entry:
            # The search filter for the user did not return any values, so an
            # invalid user was used for credentials.
            raise AuthenticationError("Invalid username or password")

        try:
            conn = get_ldap_connection(entry['dn'], password)
        except KeyError:
            log.error("""
            Unable to parse LDAP structure. If you're using Active Directory
            and not specifying an OU, you must set search_scope=SUBTREE in airflow.cfg.
            %s
            """, traceback.format_exc())
            raise LdapException(
                "Could not parse LDAP structure. "
                "Try setting search_scope in airflow.cfg, or check logs"
            )

        if not conn:
            log.info("Password incorrect for user %s", username)
            raise AuthenticationError("Invalid username or password")
Exemple #24
0
    def test_env_var_config(self):
        opt = conf.get('testsection', 'testkey')
        self.assertEqual(opt, 'testvalue')

        opt = conf.get('testsection', 'testpercent')
        self.assertEqual(opt, 'with%percent')

        self.assertTrue(conf.has_option('testsection', 'testkey'))

        opt = conf.get('kubernetes_environment_variables',
                       'AIRFLOW__TESTSECTION__TESTKEY')
        self.assertEqual(opt, 'nested')
Exemple #25
0
    def test_env_var_config(self):
        opt = conf.get('testsection', 'testkey')
        self.assertEqual(opt, 'testvalue')

        opt = conf.get('testsection', 'testpercent')
        self.assertEqual(opt, 'with%percent')

        self.assertTrue(conf.has_option('testsection', 'testkey'))

        os.environ['AIRFLOW__KUBERNETES_ENVIRONMENT_VARIABLES__AIRFLOW__TESTSECTION__TESTKEY'] = 'nested'
        opt = conf.get('kubernetes_environment_variables', 'AIRFLOW__TESTSECTION__TESTKEY')
        self.assertEqual(opt, 'nested')
        del os.environ['AIRFLOW__KUBERNETES_ENVIRONMENT_VARIABLES__AIRFLOW__TESTSECTION__TESTKEY']
Exemple #26
0
def get_value(args):
    """Get one value from configuration"""
    if not conf.has_section(args.section):
        raise SystemExit(
            f'The section [{args.section}] is not found in config.')

    if not conf.has_option(args.section, args.option):
        raise SystemExit(
            f'The option [{args.section}/{args.option}] is not found in config.'
        )

    value = conf.get(args.section, args.option)
    print(value)
Exemple #27
0
 def __init__(cls, *args, **kwargs):
     super().__init__(cls)
     if cls.__class__.instance is None:
         try:
             is_datadog_enabled_defined = conf.has_option('metrics', 'statsd_datadog_enabled')
             if is_datadog_enabled_defined and conf.getboolean('metrics', 'statsd_datadog_enabled'):
                 cls.__class__.instance = cls.get_dogstatsd_logger()
             elif conf.getboolean('metrics', 'statsd_on'):
                 cls.__class__.instance = cls.get_statsd_logger()
             else:
                 cls.__class__.instance = DummyStatsLogger()
         except (socket.gaierror, ImportError) as e:
             log.error("Could not configure StatsClient: %s, using DummyStatsLogger instead.", e)
             cls.__class__.instance = DummyStatsLogger()
    def test_env_var_config(self):
        opt = conf.get('testsection', 'testkey')
        assert opt == 'testvalue'

        opt = conf.get('testsection', 'testpercent')
        assert opt == 'with%percent'

        assert conf.has_option('testsection', 'testkey')

        with unittest.mock.patch.dict(
            'os.environ', AIRFLOW__KUBERNETES_ENVIRONMENT_VARIABLES__AIRFLOW__TESTSECTION__TESTKEY='nested'
        ):
            opt = conf.get('kubernetes_environment_variables', 'AIRFLOW__TESTSECTION__TESTKEY')
            assert opt == 'nested'
Exemple #29
0
def conf_vars(overrides):
    original = {}
    for (section, key), value in overrides.items():
        if conf.has_option(section, key):
            original[(section, key)] = conf.get(section, key)
        else:
            original[(section, key)] = None
        if value is not None:
            conf.set(section, key, value)
        else:
            conf.remove_option(section, key)
    yield
    for (section, key), value in original.items():
        if value is not None:
            conf.set(section, key, value)
        else:
            conf.remove_option(section, key)
Exemple #30
0
 def __init__(self, *args, **kwargs):
     super().__init__(self)
     if self.__class__.instance is None:
         try:
             is_datadog_enabled_defined = conf.has_option(
                 'scheduler', 'statsd_datadog_enabled')
             if is_datadog_enabled_defined and conf.getboolean(
                     'scheduler', 'statsd_datadog_enabled'):
                 self.__class__.instance = self.get_dogstatsd_logger()
             elif conf.getboolean('scheduler', 'statsd_on'):
                 self.__class__.instance = self.get_statsd_logger()
             else:
                 self.__class__.instance = DummyStatsLogger()
         except (socket.gaierror, ImportError) as e:
             log.warning(
                 "Could not configure StatsClient: %s, using DummyStatsLogger instead.",
                 e)
Exemple #31
0
    def get_statsd_logger(self):
        if conf.getboolean('scheduler', 'statsd_on'):
            from statsd import StatsClient

            if conf.has_option('scheduler', 'statsd_custom_client_path'):
                stats_class = conf.getimport('scheduler', 'statsd_custom_client_path')

                if not issubclass(stats_class, StatsClient):
                    raise AirflowConfigException(
                        "Your custom Statsd client must extend the statsd.StatsClient in order to ensure "
                        "backwards compatibility."
                    )
                else:
                    log.info("Successfully loaded custom Statsd client")

            else:
                stats_class = StatsClient

        statsd = stats_class(
            host=conf.get('scheduler', 'statsd_host'),
            port=conf.getint('scheduler', 'statsd_port'),
            prefix=conf.get('scheduler', 'statsd_prefix'))
        allow_list_validator = AllowListValidator(conf.get('scheduler', 'statsd_allow_list', fallback=None))
        return SafeStatsdLogger(statsd, allow_list_validator)