Example #1
0
 def __init__(self,
              dag_directory,
              server_uri=None,
              max_runs=-1,
              refresh_dag_dir_interval=conf.getint(
                  'scheduler', 'refresh_dag_dir_interval', fallback=30),
              *args,
              **kwargs):
     super().__init__(*args, **kwargs)
     self.mailbox: Mailbox = Mailbox()
     self.dag_trigger: DagTrigger = DagTrigger(
         dag_directory=dag_directory,
         max_runs=max_runs,
         dag_ids=None,
         pickle_dags=False,
         mailbox=self.mailbox,
         refresh_dag_dir_interval=refresh_dag_dir_interval,
         notification_service_uri=server_uri)
     self.task_event_manager = DagRunEventManager(self.mailbox)
     self.executor.set_mailbox(self.mailbox)
     self.notification_client: NotificationClient = NotificationClient(
         server_uri=server_uri, default_namespace=SCHEDULER_NAMESPACE)
     self.scheduler: EventBasedScheduler = EventBasedScheduler(
         self.id, self.mailbox, self.task_event_manager, self.executor,
         self.notification_client)
     self.last_scheduling_id = self._last_scheduler_job_id()
    def test_dag_run_event_manager_release_runner(self):
        dag_run1 = self._dag_run
        _, dag_run2 = self.init_dag_and_dag_run(
            '../../dags/test_task_event_handler_dag.py', 'test_event_handler',
            timezone.datetime(2017, 1, 2))
        self.create_task_state(dag_run1, 'operator_toggle_handler')
        self.create_task_state(dag_run2, 'operator_toggle_handler')

        event = BaseEvent("test_event", "test_event", namespace="default")
        mailbox = Mailbox()

        event_manager = DagRunEventManager(mailbox=mailbox)
        event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                   event)

        time.sleep(5)
        event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run2.run_id),
                                   event)
        assert (DagRunId(
            dag_run2.dag_id,
            dag_run2.run_id)) in event_manager._event_executor_runners
        assert (DagRunId(
            dag_run1.dag_id,
            dag_run1.run_id)) not in event_manager._event_executor_runners

        event_manager.end()
    def test_dag_run_event_manager_resubmit_if_exit_with_nonempty_queue(self):
        mailbox = Mailbox()
        event_manager = DagRunEventManager(mailbox, max_num_event=1)
        event_manager.start()

        self.create_task_state(dag_run=self._dag_run,
                               task_id='operator_toggle_handler')
        event = BaseEvent("test_event", "test_event", namespace="default")
        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)
        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)

        assert mailbox.get_message() is not None
        assert mailbox.get_message_with_timeout(5) is not None
Example #4
0
class EventBasedSchedulerJob(BaseJob):
    """
    1. todo self heartbeat
    2. todo check other scheduler failed
    3. todo timeout dagrun
    """
    __mapper_args__ = {'polymorphic_identity': 'EventBasedSchedulerJob'}

    def __init__(self,
                 dag_directory,
                 server_uri=None,
                 max_runs=-1,
                 refresh_dag_dir_interval=conf.getint(
                     'scheduler', 'refresh_dag_dir_interval', fallback=30),
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        self.mailbox: Mailbox = Mailbox()
        self.dag_trigger: DagTrigger = DagTrigger(
            dag_directory=dag_directory,
            max_runs=max_runs,
            dag_ids=None,
            pickle_dags=False,
            mailbox=self.mailbox,
            refresh_dag_dir_interval=refresh_dag_dir_interval,
            notification_service_uri=server_uri)
        self.task_event_manager = DagRunEventManager(self.mailbox)
        self.executor.set_mailbox(self.mailbox)
        self.notification_client: NotificationClient = NotificationClient(
            server_uri=server_uri, default_namespace=SCHEDULER_NAMESPACE)
        self.scheduler: EventBasedScheduler = EventBasedScheduler(
            self.id, self.mailbox, self.task_event_manager, self.executor,
            self.notification_client)
        self.last_scheduling_id = self._last_scheduler_job_id()

    @staticmethod
    def _last_scheduler_job_id():
        last_run = EventBasedSchedulerJob.most_recent_job()
        if not last_run:
            return None
        else:
            return last_run.id

    def _execute(self):
        # faulthandler.enable()
        self.log.info("Starting the scheduler Job")

        # DAGs can be pickled for easier remote execution by some executors
        # pickle_dags = self.do_pickle and self.executor_class not in UNPICKLEABLE_EXECUTORS

        try:
            self.mailbox.set_scheduling_job_id(self.id)
            self.scheduler.id = self.id
            self._start_listen_events()
            self.dag_trigger.start()
            self.task_event_manager.start()
            self.executor.job_id = self.id
            self.executor.start()

            self.register_signals()

            # Start after resetting orphaned tasks to avoid stressing out DB.

            execute_start_time = timezone.utcnow()

            self.scheduler.submit_sync_thread()
            self.scheduler.recover(self.last_scheduling_id)
            self.scheduler.schedule()

            self.executor.end()
            self.dag_trigger.end()
            self.task_event_manager.end()
            self._stop_listen_events()

            settings.Session.remove()  # type: ignore
        except Exception as e:  # pylint: disable=broad-except
            self.log.exception("Exception when executing scheduler, %s", e)
        finally:
            self.log.info("Exited execute loop")

    def _start_listen_events(self):
        watcher = SchedulerEventWatcher(self.mailbox)
        self.notification_client.start_listen_events(watcher=watcher,
                                                     start_time=int(
                                                         time.time() * 1000),
                                                     version=None)

    def _stop_listen_events(self):
        self.notification_client.stop_listen_events()

    def register_signals(self) -> None:
        """Register signals that stop child processes"""
        signal.signal(signal.SIGINT, self._exit_gracefully)
        signal.signal(signal.SIGTERM, self._exit_gracefully)
        signal.signal(signal.SIGUSR2, self._debug_dump)

    def _exit_gracefully(self, signum, frame) -> None:  # pylint: disable=unused-argument
        """Helper method to clean up processor_agent to avoid leaving orphan processes."""
        self.log.info("Exiting gracefully upon receiving signal %s", signum)
        sys.exit(os.EX_OK)

    def _debug_dump(self, signum, frame):  # pylint: disable=unused-argument
        try:
            sig_name = signal.Signals(signum).name  # pylint: disable=no-member
        except Exception:  # pylint: disable=broad-except
            sig_name = str(signum)

        self.log.info("%s\n%s received, printing debug\n%s", "-" * 80,
                      sig_name, "-" * 80)

        self.executor.debug_dump()
        self.log.info("-" * 80)

    def is_alive(self, grace_multiplier: Optional[float] = None) -> bool:
        """
        Is this SchedulerJob alive?

        We define alive as in a state of running and a heartbeat within the
        threshold defined in the ``scheduler_health_check_threshold`` config
        setting.

        ``grace_multiplier`` is accepted for compatibility with the parent class.

        :rtype: boolean
        """
        if grace_multiplier is not None:
            # Accept the same behaviour as superclass
            return super().is_alive(grace_multiplier=grace_multiplier)
        scheduler_health_check_threshold: int = conf.getint(
            'scheduler', 'scheduler_health_check_threshold')
        return (self.state == State.RUNNING and
                (timezone.utcnow() - self.latest_heartbeat).total_seconds() <
                scheduler_health_check_threshold)
    def test_dag_run_event_manager(self):
        mailbox = Mailbox()
        event_manager = DagRunEventManager(mailbox)
        event_manager.start()

        self.create_task_state(dag_run=self._dag_run,
                               task_id='operator_toggle_handler')
        event = BaseEvent("test_event", "test_event", namespace="default")

        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)
        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)

        handle_event = mailbox.get_message()
        message = EventHandleResult.from_event(
            SchedulerInnerEventUtil.to_inner_event(handle_event))
        assert message == EventHandleResult(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id),
            "operator_toggle_handler", SchedulingAction.START)

        handle_event = mailbox.get_message()
        message = EventHandleResult.from_event(
            SchedulerInnerEventUtil.to_inner_event(handle_event))
        assert message == EventHandleResult(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id),
            "operator_toggle_handler", SchedulingAction.STOP)

        time.sleep(2)
        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)
        handle_event = mailbox.get_message()
        message = EventHandleResult.from_event(
            SchedulerInnerEventUtil.to_inner_event(handle_event))
        assert message == EventHandleResult(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id),
            "operator_toggle_handler", SchedulingAction.START)
        event_manager.end()
    def test_dag_run_event_manager_multiple_dag_runs(self):
        dag_run1 = self._dag_run
        _, dag_run2 = self.init_dag_and_dag_run(
            '../../dags/test_task_event_handler_dag.py', 'test_event_handler',
            timezone.datetime(2017, 1, 2))
        self.create_task_state(dag_run1, 'operator_toggle_handler')
        self.create_task_state(dag_run2, 'operator_toggle_handler')

        event = BaseEvent("test_event", "test_event", namespace="default")
        mailbox = Mailbox()

        event_manager = DagRunEventManager(mailbox=mailbox)
        event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                   event)
        event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id),
                                   event)
        messages = [
            EventHandleResult.from_event(
                SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())),
            EventHandleResult.from_event(
                SchedulerInnerEventUtil.to_inner_event(mailbox.get_message()))
        ]
        assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                 "operator_toggle_handler",
                                 SchedulingAction.START) in messages
        assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id),
                                 "operator_toggle_handler",
                                 SchedulingAction.START) in messages

        event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                   event)
        event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id),
                                   event)
        messages = [
            EventHandleResult.from_event(
                SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())),
            EventHandleResult.from_event(
                SchedulerInnerEventUtil.to_inner_event(mailbox.get_message()))
        ]
        assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                 "operator_toggle_handler",
                                 SchedulingAction.STOP) in messages
        assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id),
                                 "operator_toggle_handler",
                                 SchedulingAction.STOP) in messages

        event_manager.end()