def _execute_in_fork(command_to_exec: CommandType) -> None: pid = os.fork() if pid: # In parent, wait for the child pid, ret = os.waitpid(pid, 0) if ret == 0: return raise AirflowException('Celery command failed on host: ' + get_hostname()) from airflow.sentry import Sentry ret = 1 try: from airflow.cli.cli_parser import get_parser parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(command_to_exec[1:]) setproctitle(f"airflow task supervisor: {command_to_exec}") args.func(args) ret = 0 except Exception as e: # pylint: disable=broad-except log.error("Failed to execute task %s.", str(e)) ret = 1 finally: Sentry.flush() os._exit(ret) # pylint: disable=protected-access
def _execute_work_in_fork(self, command: CommandType) -> str: pid = os.fork() if pid: # In parent, wait for the child pid, ret = os.waitpid(pid, 0) return State.SUCCESS if ret == 0 else State.FAILED from airflow.sentry import Sentry ret = 1 try: import signal from airflow.cli.cli_parser import get_parser signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(command[1:]) setproctitle(f"airflow task supervisor: {command}") args.func(args) ret = 0 return State.SUCCESS except Exception as e: # pylint: disable=broad-except self.log.error("Failed to execute task %s.", str(e)) finally: Sentry.flush() os._exit(ret) # pylint: disable=protected-access
def setUp(self) -> None: self.dag_id = "test_logging_dag" self.task_id = "test_task" self.dag_path = os.path.join(ROOT_FOLDER, "dags", "test_logging_in_dag.py") reset(self.dag_id) self.execution_date = timezone.make_aware(datetime(2017, 1, 1)) self.execution_date_str = self.execution_date.isoformat() self.task_args = [ 'tasks', 'run', self.dag_id, self.task_id, '--local', self.execution_date_str ] self.log_dir = conf.get('logging', 'base_log_folder') self.log_filename = f"{self.dag_id}/{self.task_id}/{self.execution_date_str}/1.log" self.ti_log_file_path = os.path.join(self.log_dir, self.log_filename) self.parser = cli_parser.get_parser() root = self.root_logger = logging.getLogger() self.root_handlers = root.handlers.copy() self.root_filters = root.filters.copy() self.root_level = root.level try: os.remove(self.ti_log_file_path) except OSError: pass
def _start_by_fork(self): # pylint: disable=inconsistent-return-statements pid = os.fork() if pid: self.log.info("Started process %d to run task", pid) return psutil.Process(pid) else: from airflow.cli.cli_parser import get_parser import signal import airflow.settings as settings signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) # Start a new process group os.setpgid(0, 0) # Force a new SQLAlchemy session. We can't share open DB handles # between process. The cli code will re-create this as part of its # normal startup settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(self._command[1:]) proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date}" if hasattr(args, "job_id"): proc_title += " {0.job_id}" setproctitle(proc_title.format(args)) try: args.func(args, dag=self.dag) os._exit(0) # pylint: disable=protected-access except Exception: # pylint: disable=broad-except os._exit(1) # pylint: disable=protected-access
def setUp(self, session=None): clear_db_connections(add_default_connections_back=False) merge_conn( Connection( conn_id="airflow_db", conn_type="mysql", description="mysql conn description", host="mysql", login="******", password="******", schema="airflow", ), session, ) merge_conn( Connection( conn_id="druid_broker_default", conn_type="druid", description="druid-broker conn description", host="druid-broker", port=8082, extra='{"endpoint": "druid/v2/sql"}', ), session, ) self.parser = cli_parser.get_parser()
def test_commands_and_command_group_sections(self): parser = cli_parser.get_parser() with contextlib.redirect_stdout(io.StringIO()) as stdout: with self.assertRaises(SystemExit): parser.parse_args(['--help']) stdout = stdout.getvalue() self.assertIn("Commands", stdout) self.assertIn("Groups", stdout)
def main(): """Main executable function""" if conf.get("core", "security") == 'kerberos': os.environ['KRB5CCNAME'] = conf.get('kerberos', 'ccache') os.environ['KRB5_KTNAME'] = conf.get('kerberos', 'keytab') parser = cli_parser.get_parser() argcomplete.autocomplete(parser) args = parser.parse_args() args.func(args)
def _start_by_fork(self): pid = os.fork() if pid: self.log.info("Started process %d to run task", pid) return psutil.Process(pid) else: # Start a new process group os.setpgid(0, 0) import signal signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) from airflow import settings from airflow.cli.cli_parser import get_parser from airflow.sentry import Sentry # Force a new SQLAlchemy session. We can't share open DB handles # between process. The cli code will re-create this as part of its # normal startup settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(self._command[1:]) # We prefer the job_id passed on the command-line because at this time, the # task instance may not have been updated. job_id = getattr(args, "job_id", self._task_instance.job_id) self.log.info('Running: %s', self._command) self.log.info('Job %s: Subtask %s', job_id, self._task_instance.task_id) proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}" if job_id is not None: proc_title += " {0.job_id}" setproctitle(proc_title.format(args)) try: args.func(args, dag=self.dag) return_code = 0 except Exception as exc: return_code = 1 self.log.error( "Failed to execute job %s for task %s (%s; %r)", job_id, self._task_instance.task_id, exc, os.getpid(), ) finally: # Explicitly flush any pending exception to Sentry if enabled Sentry.flush() logging.shutdown() os._exit(return_code)
def setUp(self) -> None: self.dag_id = "test_logging_dag" self.task_id = "test_task" reset(self.dag_id) self.execution_date_str = timezone.make_aware(datetime(2017, 1, 1)).isoformat() self.log_dir = conf.get('logging', 'base_log_folder') self.log_filename = f"{self.dag_id}/{self.task_id}/{self.execution_date_str}/1.log" self.ti_log_file_path = os.path.join(self.log_dir, self.log_filename) self.parser = cli_parser.get_parser() try: os.remove(self.ti_log_file_path) except OSError: pass
def test_should_display_help(self): parser = cli_parser.get_parser() all_command_as_args = [ command_as_args for top_commaand in cli_parser.airflow_commands for command_as_args in ([[top_commaand.name]] if isinstance( top_commaand, cli_parser.ActionCommand ) else [[top_commaand.name, nested_command.name] for nested_command in top_commaand.subcommands]) ] for cmd_args in all_command_as_args: with self.assertRaises(SystemExit): parser.parse_args([*cmd_args, '--help'])
def main(): """Main executable function""" if conf.get("core", "security") == 'kerberos': os.environ['KRB5CCNAME'] = conf.get('kerberos', 'ccache') os.environ['KRB5_KTNAME'] = conf.get('kerberos', 'keytab') if PY310: docs_url = get_docs_url('installation/prerequisites.html') warnings.warn( "Python v3.10 is not official supported on this version of Airflow. Please be careful. " f"For details, see: {docs_url}") parser = cli_parser.get_parser() argcomplete.autocomplete(parser) args = parser.parse_args() args.func(args)
def _start_by_fork(self): pid = os.fork() if pid: self.log.info("Started process %d to run task", pid) return psutil.Process(pid) else: import signal from airflow import settings from airflow.cli.cli_parser import get_parser from airflow.sentry import Sentry signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) # Start a new process group os.setpgid(0, 0) # Force a new SQLAlchemy session. We can't share open DB handles # between process. The cli code will re-create this as part of its # normal startup settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(self._command[1:]) self.log.info('Running: %s', self._command) self.log.info('Job %s: Subtask %s', self._task_instance.job_id, self._task_instance.task_id) proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}" if hasattr(args, "job_id"): proc_title += " {0.job_id}" setproctitle(proc_title.format(args)) try: args.func(args, dag=self.dag) return_code = 0 except Exception: return_code = 1 finally: # Explicitly flush any pending exception to Sentry if enabled Sentry.flush() logging.shutdown() os._exit(return_code)
def _execute_in_fork(command_to_exec: CommandType, celery_task_id: Optional[str] = None) -> None: pid = os.fork() if pid: # In parent, wait for the child pid, ret = os.waitpid(pid, 0) if ret == 0: return msg = f'Celery command failed on host: {get_hostname()} with celery_task_id {celery_task_id}' raise AirflowException(msg) from airflow.sentry import Sentry ret = 1 try: from airflow.cli.cli_parser import get_parser settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(command_to_exec[1:]) args.shut_down_logging = False if celery_task_id: args.external_executor_id = celery_task_id setproctitle(f"airflow task supervisor: {command_to_exec}") args.func(args) ret = 0 except Exception as e: log.exception("[%s] Failed to execute task %s.", celery_task_id, str(e)) ret = 1 finally: Sentry.flush() logging.shutdown() os._exit(ret)
def _execute_work_in_fork(self, command: CommandType) -> str: pid = os.fork() if pid: # In parent, wait for the child pid, ret = os.waitpid(pid, 0) return State.SUCCESS if ret == 0 else State.FAILED from airflow.sentry import Sentry ret = 1 try: import signal from airflow.cli.cli_parser import get_parser signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) signal.signal(signal.SIGUSR2, signal.SIG_DFL) parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(command[1:]) args.shut_down_logging = False setproctitle(f"airflow task supervisor: {command}") args.func(args) ret = 0 return State.SUCCESS except Exception as e: self.log.exception("Failed to execute task %s.", e) return State.FAILED finally: Sentry.flush() logging.shutdown() os._exit(ret)
def setUp(self): clear_db_runs() clear_db_pools() self.parser = cli_parser.get_parser()
def setUpClass(cls): importlib.reload(cli_parser) cls.parser = cli_parser.get_parser()
def _start_by_fork(self): pid = os.fork() if pid: self.log.info("Started process %d to run task", pid) return psutil.Process(pid) else: # Start a new process group os.setpgid(0, 0) import signal signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) from airflow import settings from airflow.cli.cli_parser import get_parser from airflow.sentry import Sentry from airflow.utils.cli import get_dag # Force a new SQLAlchemy session. We can't share open DB handles # between process. The cli code will re-create this as part of its # normal startup settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(self._command[1:]) # We prefer the job_id passed on the command-line because at this time, the # task instance may not have been updated. job_id = getattr(args, "job_id", self._task_instance.job_id) self.log.info('Running: %s', self._command) self.log.info('Job %s: Subtask %s', job_id, self._task_instance.task_id) proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}" if job_id is not None: proc_title += " {0.job_id}" setproctitle(proc_title.format(args)) return_code = 0 try: # parse dag file since `airflow tasks run --local` does not parse dag file dag = get_dag(args.subdir, args.dag_id) args.func(args, dag=dag) return_code = 0 except Exception as exc: return_code = 1 self.log.error( "Failed to execute job %s for task %s (%s; %r)", job_id, self._task_instance.task_id, exc, os.getpid(), ) except SystemExit as sys_ex: # Someone called sys.exit() in the fork - mistakenly. You should not run sys.exit() in # the fork because you can mistakenly execute atexit that were set by the parent process # before fork happened return_code = sys_ex.code except BaseException: # while we want to handle Also Base exceptions here - we do not want to log them (this # is the default behaviour anyway. Setting the return code here to 2 to indicate that # this had happened. return_code = 2 finally: try: # Explicitly flush any pending exception to Sentry and logging if enabled Sentry.flush() logging.shutdown() except BaseException: # also make sure to silently ignore ALL POSSIBLE exceptions thrown in the flush/shutdown, # otherwise os._exit() might never be called. We could have used `except:` but # except BaseException is more explicit (and linters do not comply). pass # We run os._exit() making sure it is not run within the `finally` clause. # We cannot run os._exit() in finally clause, because during finally clause processing, the # Exception handled is held in memory as well as stack trace and possibly some objects that # might need to be finalized. Running os._exit() inside the `finally` clause might cause effects # similar to https://github.com/apache/airflow/issues/22404. There Temporary file has not been # deleted at os._exit() os._exit(return_code)
def parser(): return cli_parser.get_parser()
def setUpClass(cls): cls.dagbag = models.DagBag(include_examples=True) cls.parser = cli_parser.get_parser()
def setUp(self): self.clean_db() self.parser = cli_parser.get_parser()
def setUpClass(cls): cls.parser = cli_parser.get_parser() clear_db_connections(add_default_connections_back=False)
def setUpClass(cls): cls.parser = cli_parser.get_parser() clear_db_connections()
def setUp(self): self.parser = cli_parser.get_parser() clear_db_connections()
def setUpClass(cls): cls.parser = cli_parser.get_parser()
def setup_class(cls): # pylint: disable=attribute-defined-outside-init cls.parser = cli_parser.get_parser()
def setUpClass(cls): cls.dagbag = DagBag(include_examples=True) cls.dagbag.sync_to_db() cls.parser = cli_parser.get_parser()
def setup_parser(): yield cli_parser.get_parser()