def _execute_in_fork(command_to_exec: CommandType) -> None: pid = os.fork() if pid: # In parent, wait for the child pid, ret = os.waitpid(pid, 0) if ret == 0: return raise AirflowException('Celery command failed on host: ' + get_hostname()) from airflow.sentry import Sentry ret = 1 try: from airflow.cli.cli_parser import get_parser parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(command_to_exec[1:]) setproctitle(f"airflow task supervisor: {command_to_exec}") args.func(args) ret = 0 except Exception as e: # pylint: disable=broad-except log.error("Failed to execute task %s.", str(e)) ret = 1 finally: Sentry.flush() os._exit(ret) # pylint: disable=protected-access
def _execute_work_in_fork(self, command: CommandType) -> str: pid = os.fork() if pid: # In parent, wait for the child pid, ret = os.waitpid(pid, 0) return State.SUCCESS if ret == 0 else State.FAILED from airflow.sentry import Sentry ret = 1 try: import signal from airflow.cli.cli_parser import get_parser signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(command[1:]) setproctitle(f"airflow task supervisor: {command}") args.func(args) ret = 0 return State.SUCCESS except Exception as e: # pylint: disable=broad-except self.log.error("Failed to execute task %s.", str(e)) finally: Sentry.flush() os._exit(ret) # pylint: disable=protected-access
def _start_by_fork(self): pid = os.fork() if pid: self.log.info("Started process %d to run task", pid) return psutil.Process(pid) else: # Start a new process group os.setpgid(0, 0) import signal signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) from airflow import settings from airflow.cli.cli_parser import get_parser from airflow.sentry import Sentry # Force a new SQLAlchemy session. We can't share open DB handles # between process. The cli code will re-create this as part of its # normal startup settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(self._command[1:]) # We prefer the job_id passed on the command-line because at this time, the # task instance may not have been updated. job_id = getattr(args, "job_id", self._task_instance.job_id) self.log.info('Running: %s', self._command) self.log.info('Job %s: Subtask %s', job_id, self._task_instance.task_id) proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}" if job_id is not None: proc_title += " {0.job_id}" setproctitle(proc_title.format(args)) try: args.func(args, dag=self.dag) return_code = 0 except Exception as exc: return_code = 1 self.log.error( "Failed to execute job %s for task %s (%s; %r)", job_id, self._task_instance.task_id, exc, os.getpid(), ) finally: # Explicitly flush any pending exception to Sentry if enabled Sentry.flush() logging.shutdown() os._exit(return_code)
def _start_by_fork(self): pid = os.fork() if pid: self.log.info("Started process %d to run task", pid) return psutil.Process(pid) else: import signal from airflow import settings from airflow.cli.cli_parser import get_parser from airflow.sentry import Sentry signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) # Start a new process group os.setpgid(0, 0) # Force a new SQLAlchemy session. We can't share open DB handles # between process. The cli code will re-create this as part of its # normal startup settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(self._command[1:]) self.log.info('Running: %s', self._command) self.log.info('Job %s: Subtask %s', self._task_instance.job_id, self._task_instance.task_id) proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}" if hasattr(args, "job_id"): proc_title += " {0.job_id}" setproctitle(proc_title.format(args)) try: args.func(args, dag=self.dag) return_code = 0 except Exception: return_code = 1 finally: # Explicitly flush any pending exception to Sentry if enabled Sentry.flush() logging.shutdown() os._exit(return_code)
def _execute_in_fork(command_to_exec: CommandType, celery_task_id: Optional[str] = None) -> None: pid = os.fork() if pid: # In parent, wait for the child pid, ret = os.waitpid(pid, 0) if ret == 0: return msg = f'Celery command failed on host: {get_hostname()} with celery_task_id {celery_task_id}' raise AirflowException(msg) from airflow.sentry import Sentry ret = 1 try: from airflow.cli.cli_parser import get_parser settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(command_to_exec[1:]) args.shut_down_logging = False if celery_task_id: args.external_executor_id = celery_task_id setproctitle(f"airflow task supervisor: {command_to_exec}") args.func(args) ret = 0 except Exception as e: log.exception("[%s] Failed to execute task %s.", celery_task_id, str(e)) ret = 1 finally: Sentry.flush() logging.shutdown() os._exit(ret)
def _start_by_fork(self): # pylint: disable=inconsistent-return-statements pid = os.fork() if pid: self.log.info("Started process %d to run task", pid) return psutil.Process(pid) else: from airflow.cli.cli_parser import get_parser from airflow.sentry import Sentry import signal import airflow.settings as settings signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) # Start a new process group os.setpgid(0, 0) # Force a new SQLAlchemy session. We can't share open DB handles # between process. The cli code will re-create this as part of its # normal startup settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(self._command[1:]) proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date}" if hasattr(args, "job_id"): proc_title += " {0.job_id}" setproctitle(proc_title.format(args)) try: args.func(args, dag=self.dag) return_code = 0 except Exception: # pylint: disable=broad-except return_code = 1 finally: # Explicitly flush any pending exception to Sentry if enabled Sentry.flush() os._exit(return_code) # pylint: disable=protected-access
def _execute_work_in_fork(self, command: CommandType) -> str: pid = os.fork() if pid: # In parent, wait for the child pid, ret = os.waitpid(pid, 0) return State.SUCCESS if ret == 0 else State.FAILED from airflow.sentry import Sentry ret = 1 try: import signal from airflow.cli.cli_parser import get_parser signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) signal.signal(signal.SIGUSR2, signal.SIG_DFL) parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(command[1:]) args.shut_down_logging = False setproctitle(f"airflow task supervisor: {command}") args.func(args) ret = 0 return State.SUCCESS except Exception as e: self.log.exception("Failed to execute task %s.", e) return State.FAILED finally: Sentry.flush() logging.shutdown() os._exit(ret)
def _start_by_fork(self): pid = os.fork() if pid: self.log.info("Started process %d to run task", pid) return psutil.Process(pid) else: # Start a new process group os.setpgid(0, 0) import signal signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) from airflow import settings from airflow.cli.cli_parser import get_parser from airflow.sentry import Sentry from airflow.utils.cli import get_dag # Force a new SQLAlchemy session. We can't share open DB handles # between process. The cli code will re-create this as part of its # normal startup settings.engine.pool.dispose() settings.engine.dispose() parser = get_parser() # [1:] - remove "airflow" from the start of the command args = parser.parse_args(self._command[1:]) # We prefer the job_id passed on the command-line because at this time, the # task instance may not have been updated. job_id = getattr(args, "job_id", self._task_instance.job_id) self.log.info('Running: %s', self._command) self.log.info('Job %s: Subtask %s', job_id, self._task_instance.task_id) proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}" if job_id is not None: proc_title += " {0.job_id}" setproctitle(proc_title.format(args)) return_code = 0 try: # parse dag file since `airflow tasks run --local` does not parse dag file dag = get_dag(args.subdir, args.dag_id) args.func(args, dag=dag) return_code = 0 except Exception as exc: return_code = 1 self.log.error( "Failed to execute job %s for task %s (%s; %r)", job_id, self._task_instance.task_id, exc, os.getpid(), ) except SystemExit as sys_ex: # Someone called sys.exit() in the fork - mistakenly. You should not run sys.exit() in # the fork because you can mistakenly execute atexit that were set by the parent process # before fork happened return_code = sys_ex.code except BaseException: # while we want to handle Also Base exceptions here - we do not want to log them (this # is the default behaviour anyway. Setting the return code here to 2 to indicate that # this had happened. return_code = 2 finally: try: # Explicitly flush any pending exception to Sentry and logging if enabled Sentry.flush() logging.shutdown() except BaseException: # also make sure to silently ignore ALL POSSIBLE exceptions thrown in the flush/shutdown, # otherwise os._exit() might never be called. We could have used `except:` but # except BaseException is more explicit (and linters do not comply). pass # We run os._exit() making sure it is not run within the `finally` clause. # We cannot run os._exit() in finally clause, because during finally clause processing, the # Exception handled is held in memory as well as stack trace and possibly some objects that # might need to be finalized. Running os._exit() inside the `finally` clause might cause effects # similar to https://github.com/apache/airflow/issues/22404. There Temporary file has not been # deleted at os._exit() os._exit(return_code)