def reinit_airflow_sql_conn(): from airflow.settings import configure_orm, configure_vars from dbnd._core.configuration.dbnd_config import config as dbnd_config configure_vars() # The webservers import this file from models.py with the default settings. configure_orm() # add query handler before every execute # this will print query, code line and stack trace if dbnd_config.getboolean("log", "sqlalchemy_trace"): from airflow import settings as airflow_settings from sqlalchemy import event from dbnd_airflow.db_utils import trace_sqlalchemy_query event.listen(airflow_settings.engine, "before_cursor_execute", trace_sqlalchemy_query) # this will print query execution time from airflow import settings as airflow_settings from sqlalchemy import event from dbnd_airflow.db_utils import ( profile_after_cursor_execute, profile_before_cursor_execute, ) event.listen(airflow_settings.engine, "before_cursor_execute", profile_before_cursor_execute) event.listen(airflow_settings.engine, "after_cursor_execute", profile_after_cursor_execute)
def setUpClass(cls): settings.configure_orm() cls.session = settings.Session with conf_vars({("api", "auth_backend"): "tests.test_utils.remote_user_api_auth_backend"}): cls.app = app.create_app(testing=True) # TODO: Add new role for each view to test permission. create_user(cls.app, username="******", role="Admin")
def main(): # Inline the airflow imports because they cause the global config to be loaded from airflow.utils import timezone from airflow import jobs from airflow.configuration import conf from airflow.settings import configure_orm, Session configure_orm(disable_connection_pool=True) base_job_model = jobs.BaseJob scheduler_health_check_threshold = timedelta( seconds=conf.getint('scheduler', 'scheduler_health_check_threshold') ) latest_scheduler_heartbeat = None try: latest_scheduler_heartbeat = ( Session.query(func.max(base_job_model.latest_heartbeat)) .filter(base_job_model.state == 'running', base_job_model.job_type == 'SchedulerJob') .scalar() ) except Exception: pass if not latest_scheduler_heartbeat: status_code = 1 else: if timezone.utcnow() - latest_scheduler_heartbeat <= scheduler_health_check_threshold: status_code = 0 else: status_code = 1 return status_code
def setUpClass(cls): settings.configure_orm() cls.session = settings.Session cls.app = application.create_app(testing=True) cls.appbuilder = cls.app.appbuilder # pylint: disable=no-member cls.app.config['WTF_CSRF_ENABLED'] = False cls.security_manager = cls.appbuilder.sm cls.delete_roles()
def setUp(self): self.app, self.appbuilder = application.create_app(session=Session, testing=True) self.app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///' self.app.config['SECRET_KEY'] = 'secret_key' self.app.config['CSRF_ENABLED'] = False self.app.config['WTF_CSRF_ENABLED'] = False self.client = self.app.test_client() settings.configure_orm() self.session = Session
def setUp(self): super().setUp() from airflow.www import app as application self.app, self.appbuilder = application.create_app(session=Session, testing=True) self.app.config['TESTING'] = True self.parser = cli.CLIFactory.get_parser() self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) settings.configure_orm() self.session = Session
def setUp(self): conf.load_test_config() self.app, self.appbuilder = application.create_app(session=Session, testing=True) self.app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///' self.app.config['SECRET_KEY'] = 'secret_key' self.app.config['CSRF_ENABLED'] = False self.app.config['WTF_CSRF_ENABLED'] = False self.client = self.app.test_client() settings.configure_orm() self.session = Session
def set_airflow_db(sql_alchemy_conn: str, fernet_key: str): with set_env( AIRFLOW__CORE__SQL_ALCHEMY_CONN=sql_alchemy_conn, AIRFLOW__CORE__FERNET_KEY=fernet_key, ): settings.configure_vars() settings.configure_orm() assert repr(settings.engine.url) == sql_alchemy_conn yield settings.configure_vars() settings.configure_orm()
def setUpClass(cls): settings.configure_orm() cls.session = settings.Session cls.app = application.create_app(testing=True) cls.appbuilder = cls.app.appbuilder # pylint: disable=no-member cls.app.config['WTF_CSRF_ENABLED'] = False cls.security_manager = cls.appbuilder.sm cls.role_admin = cls.security_manager.find_role('Admin') cls.user = cls.appbuilder.sm.add_user('admin', 'admin', 'user', '*****@*****.**', cls.role_admin, 'general')
def test_sql_alchemy_invalid_connect_args(self, mock_create_engine, mock_sessionmaker, mock_scoped_session, mock_setup_event_handlers): config = { ('core', 'sql_alchemy_connect_args'): 'does.not.exist', ('core', 'sql_alchemy_pool_enabled'): 'False' } with self.assertRaises(AirflowConfigException): with conf_vars(config): settings.configure_orm()
def mock_airflow_db() -> ContextManager[AirflowDb]: with tempfile.TemporaryDirectory() as temp_dir: test_db_path = os.path.join(temp_dir, 'airflow.db') sql_alchemy_conn = f'sqlite:///{test_db_path}' with set_env(AIRFLOW__CORE__SQL_ALCHEMY_CONN=sql_alchemy_conn): settings.configure_vars() settings.configure_orm() assert repr(settings.engine.url) == sql_alchemy_conn initdb() yield AirflowDb(sql_alchemy_conn=sql_alchemy_conn) settings.configure_vars() settings.configure_orm()
def test_configure_orm_with_default_values(self, mock_create_engine, mock_sessionmaker, mock_scoped_session, mock_setup_event_handlers): settings.configure_orm() mock_create_engine.assert_called_once_with(settings.SQL_ALCHEMY_CONN, connect_args={}, encoding='utf-8', max_overflow=10, pool_pre_ping=True, pool_recycle=1800, pool_size=5)
def task_run(args, dag=None): """Runs a single task instance""" if dag: args.dag_id = dag.dag_id log = LoggingMixin().log # Load custom airflow config if args.cfg_path: with open(args.cfg_path, 'r') as conf_file: conf_dict = json.load(conf_file) if os.path.exists(args.cfg_path): os.remove(args.cfg_path) conf.read_dict(conf_dict, source=args.cfg_path) settings.configure_vars() # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave # behind multiple open sleeping connections while heartbeating, which could # easily exceed the database connection limit when # processing hundreds of simultaneous tasks. settings.configure_orm(disable_connection_pool=True) if not args.pickle and not dag: dag = get_dag(args) elif not dag: with db.create_session() as session: log.info('Loading pickle id %s', args.pickle) dag_pickle = session.query(DagPickle).filter( DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() ti.init_run_context(raw=args.raw) hostname = get_hostname() log.info("Running %s on host %s", ti, hostname) if args.interactive: _run(args, dag, ti) else: with redirect_stdout(ti.log, logging.INFO), redirect_stderr( ti.log, logging.WARN): _run(args, dag, ti) logging.shutdown()
def factory(): app = create_app(testing=True) app.config["WTF_CSRF_ENABLED"] = False settings.configure_orm() security_manager = app.appbuilder.sm # pylint: disable=no-member if not security_manager.find_user(username='******'): security_manager.add_user( username='******', first_name='test', last_name='test', email='*****@*****.**', role=security_manager.find_role('Admin'), password='******', ) return app
def test_sql_alchemy_connect_args(self, mock_create_engine, mock_sessionmaker, mock_scoped_session, mock_setup_event_handlers): config = { ('core', 'sql_alchemy_connect_args'): 'tests.core.test_sqlalchemy_config.SQL_ALCHEMY_CONNECT_ARGS', ('core', 'sql_alchemy_pool_enabled'): 'False' } with conf_vars(config): settings.configure_orm() mock_create_engine.assert_called_once_with( settings.SQL_ALCHEMY_CONN, connect_args=SQL_ALCHEMY_CONNECT_ARGS, poolclass=NullPool, encoding='utf-8')
def task_run(args, dag=None): """Runs a single task instance""" # Load custom airflow config if args.cfg_path: with open(args.cfg_path, 'r') as conf_file: conf_dict = json.load(conf_file) if os.path.exists(args.cfg_path): os.remove(args.cfg_path) conf.read_dict(conf_dict, source=args.cfg_path) settings.configure_vars() # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave # behind multiple open sleeping connections while heartbeating, which could # easily exceed the database connection limit when # processing hundreds of simultaneous tasks. settings.configure_orm(disable_connection_pool=True) if dag and args.pickle: raise AirflowException( "You cannot use the --pickle option when using DAG.cli() method.") elif args.pickle: print(f'Loading pickle id: {args.pickle}') dag = get_dag_by_pickle(args.pickle) elif not dag: dag = get_dag(args.subdir, args.dag_id) else: # Use DAG from parameter pass task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() ti.init_run_context(raw=args.raw) hostname = get_hostname() print(f"Running {ti} on host {hostname}") if args.interactive: _run_task_by_selected_method(args, dag, ti) else: with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \ redirect_stderr(StreamLogWriter(ti.log, logging.WARN)): _run_task_by_selected_method(args, dag, ti) logging.shutdown()
def set_airflow_db(sql_alchemy_conn: Optional[str], fernet_key: Optional[str]) -> ContextManager[AirflowDb]: env = {} if sql_alchemy_conn is not None: env['AIRFLOW__CORE__SQL_ALCHEMY_CONN'] = sql_alchemy_conn if fernet_key is not None: env['AIRFLOW__CORE__FERNET_KEY'] = fernet_key with set_env(**env): settings.configure_vars() settings.configure_orm() if sql_alchemy_conn is not None: assert str( settings.engine.url ) == sql_alchemy_conn, f'{settings.engine.url} != {sql_alchemy_conn}' yield AirflowDb(sql_alchemy_conn or settings.SQL_ALCHEMY_CONN) settings.configure_vars() settings.configure_orm()
def setUpClass(cls): settings.configure_orm() cls.session = settings.Session with conf_vars({ ("api", "auth_backend"): "tests.test_utils.remote_user_api_auth_backend" }): cls.app = app.create_app(testing=True) create_user( cls.app, username="******", role_name="Test", permissions=[('can_read', 'Dag'), ('can_read', 'DagRun'), ('can_read', 'Task')], ) create_user(cls.app, username="******", role_name="TestNoPermissions")
def test_sql_alchemy_connect_args(self, mock_create_engine, mock_sessionmaker, mock_scoped_session, mock_setup_event_handlers): config = { ( 'core', 'sql_alchemy_connect_args', ): 'tests.core.test_sqlalchemy_config.SQL_ALCHEMY_CONNECT_ARGS', ('core', 'sql_alchemy_pool_enabled'): 'False', } with conf_vars(config): settings.configure_orm() engine_args = {} if settings.SQL_ALCHEMY_CONN.startswith('mysql'): engine_args['isolation_level'] = 'READ COMMITTED' mock_create_engine.assert_called_once_with( settings.SQL_ALCHEMY_CONN, connect_args=SQL_ALCHEMY_CONNECT_ARGS, poolclass=NullPool, encoding='utf-8', **engine_args, )
def setUpClass(cls): settings.configure_orm() cls.session = settings.Session with conf_vars({ ("api", "auth_backend"): "tests.test_utils.remote_user_api_auth_backend" }): cls.app = app.create_app(testing=True) create_user( cls.app, username="******", role_name="Test", permissions=[ (permissions.ACTION_CAN_READ, permissions.RESOURCE_DAGS), (permissions.ACTION_CAN_READ, permissions.RESOURCE_DAG_RUN), (permissions.ACTION_CAN_READ, permissions.RESOURCE_TASK), ], ) create_user(cls.app, username="******", role_name="TestNoPermissions")
def task_run(args, dag=None): """Runs a single task instance""" # Load custom airflow config if args.cfg_path: with open(args.cfg_path, 'r') as conf_file: conf_dict = json.load(conf_file) if os.path.exists(args.cfg_path): os.remove(args.cfg_path) conf.read_dict(conf_dict, source=args.cfg_path) settings.configure_vars() # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave # behind multiple open sleeping connections while heartbeating, which could # easily exceed the database connection limit when # processing hundreds of simultaneous tasks. settings.configure_orm(disable_connection_pool=True) if dag and args.pickle: raise AirflowException("You cannot use the --pickle option when using DAG.cli() method.") elif args.pickle: print(f'Loading pickle id: {args.pickle}') dag = get_dag_by_pickle(args.pickle) elif not dag: dag = get_dag(args.subdir, args.dag_id) else: # Use DAG from parameter pass task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.init_run_context(raw=args.raw) hostname = get_hostname() print(f"Running {ti} on host {hostname}") if args.interactive: _run_task_by_selected_method(args, dag, ti) else: if settings.DONOT_MODIFY_HANDLERS: with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \ redirect_stderr(StreamLogWriter(ti.log, logging.WARN)): _run_task_by_selected_method(args, dag, ti) else: # Get all the Handlers from 'airflow.task' logger # Add these handlers to the root logger so that we can get logs from # any custom loggers defined in the DAG airflow_logger_handlers = logging.getLogger('airflow.task').handlers root_logger = logging.getLogger() root_logger_handlers = root_logger.handlers # Remove all handlers from Root Logger to avoid duplicate logs for handler in root_logger_handlers: root_logger.removeHandler(handler) for handler in airflow_logger_handlers: root_logger.addHandler(handler) root_logger.setLevel(logging.getLogger('airflow.task').level) with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \ redirect_stderr(StreamLogWriter(ti.log, logging.WARN)): _run_task_by_selected_method(args, dag, ti) # We need to restore the handlers to the loggers as celery worker process # can call this command multiple times, # so if we don't reset this then logs from next task would go to the wrong place for handler in airflow_logger_handlers: root_logger.removeHandler(handler) for handler in root_logger_handlers: root_logger.addHandler(handler) logging.shutdown()
def setUpClass(cls): settings.configure_orm() cls.session = settings.Session cls.app = app.create_app(testing=True)
def setUp(self): super().setUp() settings.configure_orm() self.session = Session self._cleanup()
def configured_session(): settings.configure_orm() return Session
def _run_file_processor( result_channel: MultiprocessingConnection, parent_channel: MultiprocessingConnection, file_path: str, pickle_dags: bool, dag_ids: Optional[List[str]], thread_name: str, callback_requests: List[CallbackRequest], ) -> None: """ Process the given file. :param result_channel: the connection to use for passing back the result :type result_channel: multiprocessing.Connection :param parent_channel: the parent end of the channel to close in the child :type parent_channel: multiprocessing.Connection :param file_path: the file to process :type file_path: str :param pickle_dags: whether to pickle the DAGs found in the file and save them to the DB :type pickle_dags: bool :param dag_ids: if specified, only examine DAG ID's that are in this list :type dag_ids: list[str] :param thread_name: the name to use for the process that is launched :type thread_name: str :param callback_requests: failure callback to execute :type callback_requests: List[airflow.utils.callback_requests.CallbackRequest] :return: the process that was launched :rtype: multiprocessing.Process """ # This helper runs in the newly created process log: logging.Logger = logging.getLogger("airflow.processor") # Since we share all open FDs from the parent, we need to close the parent side of the pipe here in # the child, else it won't get closed properly until we exit. log.info("Closing parent pipe") parent_channel.close() del parent_channel set_context(log, file_path) setproctitle(f"airflow scheduler - DagFileProcessor {file_path}") try: # redirect stdout/stderr to log with redirect_stdout(StreamLogWriter( log, logging.INFO)), redirect_stderr( StreamLogWriter(log, logging.WARN)), Stats.timer() as timer: # Re-configure the ORM engine as there are issues with multiple processes settings.configure_orm() # Change the thread name to differentiate log lines. This is # really a separate process, but changing the name of the # process doesn't work, so changing the thread name instead. threading.current_thread().name = thread_name log.info("Started process (PID=%s) to work on %s", os.getpid(), file_path) dag_file_processor = DagFileProcessor(dag_ids=dag_ids, log=log) result: Tuple[int, int] = dag_file_processor.process_file( file_path=file_path, pickle_dags=pickle_dags, callback_requests=callback_requests, ) result_channel.send(result) log.info("Processing %s took %.3f seconds", file_path, timer.duration) except Exception: # pylint: disable=broad-except # Log exceptions through the logging framework. log.exception("Got an exception! Propagating...") raise finally: # We re-initialized the ORM within this Process above so we need to # tear it down manually here settings.dispose_orm() result_channel.close()
def task_run(args, dag=None): """Run a single task instance. Note that there must be at least one DagRun for this to start, i.e. it must have been scheduled and/or triggered previously. Alternatively, if you just need to run it for testing then use "airflow tasks test ..." command instead. """ # Load custom airflow config if args.local and args.raw: raise AirflowException( "Option --raw and --local are mutually exclusive. " "Please remove one option to execute the command.") if args.raw: unsupported_options = [ o for o in RAW_TASK_UNSUPPORTED_OPTION if getattr(args, o) ] if unsupported_options: unsupported_raw_task_flags = ', '.join( f'--{o}' for o in RAW_TASK_UNSUPPORTED_OPTION) unsupported_flags = ', '.join(f'--{o}' for o in unsupported_options) raise AirflowException( "Option --raw does not work with some of the other options on this command. " "You can't use --raw option and the following options: " f"{unsupported_raw_task_flags}. " f"You provided the option {unsupported_flags}. " "Delete it to execute the command.") if dag and args.pickle: raise AirflowException( "You cannot use the --pickle option when using DAG.cli() method.") if args.cfg_path: with open(args.cfg_path) as conf_file: conf_dict = json.load(conf_file) if os.path.exists(args.cfg_path): os.remove(args.cfg_path) conf.read_dict(conf_dict, source=args.cfg_path) settings.configure_vars() settings.MASK_SECRETS_IN_LOGS = True # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave # behind multiple open sleeping connections while heartbeating, which could # easily exceed the database connection limit when # processing hundreds of simultaneous tasks. settings.configure_orm(disable_connection_pool=True) if args.pickle: print(f'Loading pickle id: {args.pickle}') dag = get_dag_by_pickle(args.pickle) elif not dag: dag = get_dag(args.subdir, args.dag_id) else: # Use DAG from parameter pass task = dag.get_task(task_id=args.task_id) ti = _get_ti(task, args.execution_date_or_run_id) ti.init_run_context(raw=args.raw) hostname = get_hostname() print(f"Running {ti} on host {hostname}") if args.interactive: _run_task_by_selected_method(args, dag, ti) else: with _capture_task_logs(ti): _run_task_by_selected_method(args, dag, ti)
def setUpClass(cls) -> None: from airflow import settings settings.configure_orm()
def session(): settings.configure_orm() yield settings.Session