Ejemplo n.º 1
0
def _execute_in_fork(command_to_exec: CommandType) -> None:
    pid = os.fork()
    if pid:
        # In parent, wait for the child
        pid, ret = os.waitpid(pid, 0)
        if ret == 0:
            return

        raise AirflowException('Celery command failed on host: ' +
                               get_hostname())

    from airflow.sentry import Sentry

    ret = 1
    try:
        from airflow.cli.cli_parser import get_parser
        parser = get_parser()
        # [1:] - remove "airflow" from the start of the command
        args = parser.parse_args(command_to_exec[1:])

        setproctitle(f"airflow task supervisor: {command_to_exec}")

        args.func(args)
        ret = 0
    except Exception as e:  # pylint: disable=broad-except
        log.error("Failed to execute task %s.", str(e))
        ret = 1
    finally:
        Sentry.flush()
        os._exit(ret)  # pylint: disable=protected-access
Ejemplo n.º 2
0
    def _execute_work_in_fork(self, command: CommandType) -> str:
        pid = os.fork()
        if pid:
            # In parent, wait for the child
            pid, ret = os.waitpid(pid, 0)
            return State.SUCCESS if ret == 0 else State.FAILED

        from airflow.sentry import Sentry
        ret = 1
        try:
            import signal

            from airflow.cli.cli_parser import get_parser

            signal.signal(signal.SIGINT, signal.SIG_DFL)
            signal.signal(signal.SIGTERM, signal.SIG_DFL)

            parser = get_parser()
            # [1:] - remove "airflow" from the start of the command
            args = parser.parse_args(command[1:])

            setproctitle(f"airflow task supervisor: {command}")

            args.func(args)
            ret = 0
            return State.SUCCESS
        except Exception as e:  # pylint: disable=broad-except
            self.log.error("Failed to execute task %s.", str(e))
        finally:
            Sentry.flush()
            os._exit(ret)  # pylint: disable=protected-access
Ejemplo n.º 3
0
    def setUp(self) -> None:
        self.dag_id = "test_logging_dag"
        self.task_id = "test_task"
        self.dag_path = os.path.join(ROOT_FOLDER, "dags",
                                     "test_logging_in_dag.py")
        reset(self.dag_id)
        self.execution_date = timezone.make_aware(datetime(2017, 1, 1))
        self.execution_date_str = self.execution_date.isoformat()
        self.task_args = [
            'tasks', 'run', self.dag_id, self.task_id, '--local',
            self.execution_date_str
        ]
        self.log_dir = conf.get('logging', 'base_log_folder')
        self.log_filename = f"{self.dag_id}/{self.task_id}/{self.execution_date_str}/1.log"
        self.ti_log_file_path = os.path.join(self.log_dir, self.log_filename)
        self.parser = cli_parser.get_parser()

        root = self.root_logger = logging.getLogger()
        self.root_handlers = root.handlers.copy()
        self.root_filters = root.filters.copy()
        self.root_level = root.level

        try:
            os.remove(self.ti_log_file_path)
        except OSError:
            pass
Ejemplo n.º 4
0
    def _start_by_fork(self):  # pylint: disable=inconsistent-return-statements
        pid = os.fork()
        if pid:
            self.log.info("Started process %d to run task", pid)
            return psutil.Process(pid)
        else:
            from airflow.cli.cli_parser import get_parser
            import signal
            import airflow.settings as settings

            signal.signal(signal.SIGINT, signal.SIG_DFL)
            signal.signal(signal.SIGTERM, signal.SIG_DFL)
            # Start a new process group
            os.setpgid(0, 0)

            # Force a new SQLAlchemy session. We can't share open DB handles
            # between process. The cli code will re-create this as part of its
            # normal startup
            settings.engine.pool.dispose()
            settings.engine.dispose()

            parser = get_parser()
            # [1:] - remove "airflow" from the start of the command
            args = parser.parse_args(self._command[1:])

            proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date}"
            if hasattr(args, "job_id"):
                proc_title += " {0.job_id}"
            setproctitle(proc_title.format(args))

            try:
                args.func(args, dag=self.dag)
                os._exit(0)  # pylint: disable=protected-access
            except Exception:  # pylint: disable=broad-except
                os._exit(1)  # pylint: disable=protected-access
Ejemplo n.º 5
0
    def setUp(self, session=None):
        clear_db_connections(add_default_connections_back=False)
        merge_conn(
            Connection(
                conn_id="airflow_db",
                conn_type="mysql",
                description="mysql conn description",
                host="mysql",
                login="******",
                password="******",
                schema="airflow",
            ),
            session,
        )
        merge_conn(
            Connection(
                conn_id="druid_broker_default",
                conn_type="druid",
                description="druid-broker conn description",
                host="druid-broker",
                port=8082,
                extra='{"endpoint": "druid/v2/sql"}',
            ),
            session,
        )

        self.parser = cli_parser.get_parser()
Ejemplo n.º 6
0
    def test_commands_and_command_group_sections(self):
        parser = cli_parser.get_parser()

        with contextlib.redirect_stdout(io.StringIO()) as stdout:
            with self.assertRaises(SystemExit):
                parser.parse_args(['--help'])
            stdout = stdout.getvalue()
        self.assertIn("Commands", stdout)
        self.assertIn("Groups", stdout)
Ejemplo n.º 7
0
def main():
    """Main executable function"""
    if conf.get("core", "security") == 'kerberos':
        os.environ['KRB5CCNAME'] = conf.get('kerberos', 'ccache')
        os.environ['KRB5_KTNAME'] = conf.get('kerberos', 'keytab')
    parser = cli_parser.get_parser()
    argcomplete.autocomplete(parser)
    args = parser.parse_args()
    args.func(args)
Ejemplo n.º 8
0
    def _start_by_fork(self):
        pid = os.fork()
        if pid:
            self.log.info("Started process %d to run task", pid)
            return psutil.Process(pid)
        else:
            # Start a new process group
            os.setpgid(0, 0)
            import signal

            signal.signal(signal.SIGINT, signal.SIG_DFL)
            signal.signal(signal.SIGTERM, signal.SIG_DFL)

            from airflow import settings
            from airflow.cli.cli_parser import get_parser
            from airflow.sentry import Sentry

            # Force a new SQLAlchemy session. We can't share open DB handles
            # between process. The cli code will re-create this as part of its
            # normal startup
            settings.engine.pool.dispose()
            settings.engine.dispose()

            parser = get_parser()
            # [1:] - remove "airflow" from the start of the command
            args = parser.parse_args(self._command[1:])

            # We prefer the job_id passed on the command-line because at this time, the
            # task instance may not have been updated.
            job_id = getattr(args, "job_id", self._task_instance.job_id)
            self.log.info('Running: %s', self._command)
            self.log.info('Job %s: Subtask %s', job_id, self._task_instance.task_id)

            proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}"
            if job_id is not None:
                proc_title += " {0.job_id}"
            setproctitle(proc_title.format(args))

            try:
                args.func(args, dag=self.dag)
                return_code = 0
            except Exception as exc:
                return_code = 1

                self.log.error(
                    "Failed to execute job %s for task %s (%s; %r)",
                    job_id,
                    self._task_instance.task_id,
                    exc,
                    os.getpid(),
                )
            finally:
                # Explicitly flush any pending exception to Sentry if enabled
                Sentry.flush()
                logging.shutdown()
                os._exit(return_code)
Ejemplo n.º 9
0
 def setUp(self) -> None:
     self.dag_id = "test_logging_dag"
     self.task_id = "test_task"
     reset(self.dag_id)
     self.execution_date_str = timezone.make_aware(datetime(2017, 1, 1)).isoformat()
     self.log_dir = conf.get('logging', 'base_log_folder')
     self.log_filename = f"{self.dag_id}/{self.task_id}/{self.execution_date_str}/1.log"
     self.ti_log_file_path = os.path.join(self.log_dir, self.log_filename)
     self.parser = cli_parser.get_parser()
     try:
         os.remove(self.ti_log_file_path)
     except OSError:
         pass
Ejemplo n.º 10
0
    def test_should_display_help(self):
        parser = cli_parser.get_parser()

        all_command_as_args = [
            command_as_args for top_commaand in cli_parser.airflow_commands
            for command_as_args in ([[top_commaand.name]] if isinstance(
                top_commaand, cli_parser.ActionCommand
            ) else [[top_commaand.name, nested_command.name]
                    for nested_command in top_commaand.subcommands])
        ]
        for cmd_args in all_command_as_args:
            with self.assertRaises(SystemExit):
                parser.parse_args([*cmd_args, '--help'])
Ejemplo n.º 11
0
def main():
    """Main executable function"""
    if conf.get("core", "security") == 'kerberos':
        os.environ['KRB5CCNAME'] = conf.get('kerberos', 'ccache')
        os.environ['KRB5_KTNAME'] = conf.get('kerberos', 'keytab')
    if PY310:
        docs_url = get_docs_url('installation/prerequisites.html')
        warnings.warn(
            "Python v3.10 is not official supported on this version of Airflow. Please be careful. "
            f"For details, see: {docs_url}")

    parser = cli_parser.get_parser()
    argcomplete.autocomplete(parser)
    args = parser.parse_args()
    args.func(args)
Ejemplo n.º 12
0
    def _start_by_fork(self):
        pid = os.fork()
        if pid:
            self.log.info("Started process %d to run task", pid)
            return psutil.Process(pid)
        else:
            import signal

            from airflow import settings
            from airflow.cli.cli_parser import get_parser
            from airflow.sentry import Sentry

            signal.signal(signal.SIGINT, signal.SIG_DFL)
            signal.signal(signal.SIGTERM, signal.SIG_DFL)
            # Start a new process group
            os.setpgid(0, 0)

            # Force a new SQLAlchemy session. We can't share open DB handles
            # between process. The cli code will re-create this as part of its
            # normal startup
            settings.engine.pool.dispose()
            settings.engine.dispose()

            parser = get_parser()
            # [1:] - remove "airflow" from the start of the command
            args = parser.parse_args(self._command[1:])

            self.log.info('Running: %s', self._command)
            self.log.info('Job %s: Subtask %s', self._task_instance.job_id,
                          self._task_instance.task_id)

            proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}"
            if hasattr(args, "job_id"):
                proc_title += " {0.job_id}"
            setproctitle(proc_title.format(args))

            try:
                args.func(args, dag=self.dag)
                return_code = 0
            except Exception:
                return_code = 1
            finally:
                # Explicitly flush any pending exception to Sentry if enabled
                Sentry.flush()
                logging.shutdown()
                os._exit(return_code)
Ejemplo n.º 13
0
def _execute_in_fork(command_to_exec: CommandType,
                     celery_task_id: Optional[str] = None) -> None:
    pid = os.fork()
    if pid:
        # In parent, wait for the child
        pid, ret = os.waitpid(pid, 0)
        if ret == 0:
            return

        msg = f'Celery command failed on host: {get_hostname()} with celery_task_id {celery_task_id}'
        raise AirflowException(msg)

    from airflow.sentry import Sentry

    ret = 1
    try:
        from airflow.cli.cli_parser import get_parser

        settings.engine.pool.dispose()
        settings.engine.dispose()

        parser = get_parser()
        # [1:] - remove "airflow" from the start of the command
        args = parser.parse_args(command_to_exec[1:])
        args.shut_down_logging = False
        if celery_task_id:
            args.external_executor_id = celery_task_id

        setproctitle(f"airflow task supervisor: {command_to_exec}")

        args.func(args)
        ret = 0
    except Exception as e:
        log.exception("[%s] Failed to execute task %s.", celery_task_id,
                      str(e))
        ret = 1
    finally:
        Sentry.flush()
        logging.shutdown()
        os._exit(ret)
Ejemplo n.º 14
0
    def _execute_work_in_fork(self, command: CommandType) -> str:
        pid = os.fork()
        if pid:
            # In parent, wait for the child
            pid, ret = os.waitpid(pid, 0)
            return State.SUCCESS if ret == 0 else State.FAILED

        from airflow.sentry import Sentry

        ret = 1
        try:
            import signal

            from airflow.cli.cli_parser import get_parser

            signal.signal(signal.SIGINT, signal.SIG_DFL)
            signal.signal(signal.SIGTERM, signal.SIG_DFL)
            signal.signal(signal.SIGUSR2, signal.SIG_DFL)

            parser = get_parser()
            # [1:] - remove "airflow" from the start of the command
            args = parser.parse_args(command[1:])
            args.shut_down_logging = False

            setproctitle(f"airflow task supervisor: {command}")

            args.func(args)
            ret = 0
            return State.SUCCESS
        except Exception as e:
            self.log.exception("Failed to execute task %s.", e)
            return State.FAILED
        finally:
            Sentry.flush()
            logging.shutdown()
            os._exit(ret)
Ejemplo n.º 15
0
    def setUp(self):
        clear_db_runs()
        clear_db_pools()

        self.parser = cli_parser.get_parser()
 def setUpClass(cls):
     importlib.reload(cli_parser)
     cls.parser = cli_parser.get_parser()
Ejemplo n.º 17
0
    def _start_by_fork(self):
        pid = os.fork()
        if pid:
            self.log.info("Started process %d to run task", pid)
            return psutil.Process(pid)
        else:
            # Start a new process group
            os.setpgid(0, 0)
            import signal

            signal.signal(signal.SIGINT, signal.SIG_DFL)
            signal.signal(signal.SIGTERM, signal.SIG_DFL)

            from airflow import settings
            from airflow.cli.cli_parser import get_parser
            from airflow.sentry import Sentry
            from airflow.utils.cli import get_dag

            # Force a new SQLAlchemy session. We can't share open DB handles
            # between process. The cli code will re-create this as part of its
            # normal startup
            settings.engine.pool.dispose()
            settings.engine.dispose()

            parser = get_parser()
            # [1:] - remove "airflow" from the start of the command
            args = parser.parse_args(self._command[1:])

            # We prefer the job_id passed on the command-line because at this time, the
            # task instance may not have been updated.
            job_id = getattr(args, "job_id", self._task_instance.job_id)
            self.log.info('Running: %s', self._command)
            self.log.info('Job %s: Subtask %s', job_id, self._task_instance.task_id)

            proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}"
            if job_id is not None:
                proc_title += " {0.job_id}"
            setproctitle(proc_title.format(args))

            return_code = 0
            try:
                # parse dag file since `airflow tasks run --local` does not parse dag file
                dag = get_dag(args.subdir, args.dag_id)
                args.func(args, dag=dag)
                return_code = 0
            except Exception as exc:
                return_code = 1

                self.log.error(
                    "Failed to execute job %s for task %s (%s; %r)",
                    job_id,
                    self._task_instance.task_id,
                    exc,
                    os.getpid(),
                )
            except SystemExit as sys_ex:
                # Someone called sys.exit() in the fork - mistakenly. You should not run sys.exit() in
                # the fork because you can mistakenly execute atexit that were set by the parent process
                # before fork happened
                return_code = sys_ex.code
            except BaseException:
                # while we want to handle Also Base exceptions here - we do not want to log them (this
                # is the default behaviour anyway. Setting the return code here to 2 to indicate that
                # this had happened.
                return_code = 2
            finally:
                try:
                    # Explicitly flush any pending exception to Sentry and logging if enabled
                    Sentry.flush()
                    logging.shutdown()
                except BaseException:
                    # also make sure to silently ignore ALL POSSIBLE exceptions thrown in the flush/shutdown,
                    # otherwise os._exit() might never be called. We could have used `except:` but
                    # except BaseException is more explicit (and linters do not comply).
                    pass
            # We run os._exit() making sure it is not run within the `finally` clause.
            # We cannot run os._exit() in finally clause, because during finally clause processing, the
            # Exception handled is held in memory as well as stack trace and possibly some objects that
            # might need to be finalized. Running os._exit() inside the `finally` clause might cause effects
            # similar to https://github.com/apache/airflow/issues/22404. There Temporary file has not been
            # deleted at os._exit()
            os._exit(return_code)
Ejemplo n.º 18
0
def parser():
    return cli_parser.get_parser()
Ejemplo n.º 19
0
 def setUpClass(cls):
     cls.dagbag = models.DagBag(include_examples=True)
     cls.parser = cli_parser.get_parser()
Ejemplo n.º 20
0
 def setUp(self):
     self.clean_db()
     self.parser = cli_parser.get_parser()
Ejemplo n.º 21
0
 def setUpClass(cls):
     cls.parser = cli_parser.get_parser()
     clear_db_connections(add_default_connections_back=False)
Ejemplo n.º 22
0
 def setUpClass(cls):
     cls.parser = cli_parser.get_parser()
     clear_db_connections()
Ejemplo n.º 23
0
 def setUp(self):
     self.parser = cli_parser.get_parser()
     clear_db_connections()
Ejemplo n.º 24
0
 def setUpClass(cls):
     cls.parser = cli_parser.get_parser()
Ejemplo n.º 25
0
 def setup_class(cls):
     # pylint: disable=attribute-defined-outside-init
     cls.parser = cli_parser.get_parser()
Ejemplo n.º 26
0
 def setUpClass(cls):
     cls.dagbag = DagBag(include_examples=True)
     cls.dagbag.sync_to_db()
     cls.parser = cli_parser.get_parser()
Ejemplo n.º 27
0
def setup_parser():
    yield cli_parser.get_parser()