def __init__(self, dag_directory, server_uri=None, max_runs=-1, refresh_dag_dir_interval=conf.getint( 'scheduler', 'refresh_dag_dir_interval', fallback=30), *args, **kwargs): super().__init__(*args, **kwargs) self.mailbox: Mailbox = Mailbox() self.dag_trigger: DagTrigger = DagTrigger( dag_directory=dag_directory, max_runs=max_runs, dag_ids=None, pickle_dags=False, mailbox=self.mailbox, refresh_dag_dir_interval=refresh_dag_dir_interval, notification_service_uri=server_uri) self.task_event_manager = DagRunEventManager(self.mailbox) self.executor.set_mailbox(self.mailbox) self.notification_client: NotificationClient = NotificationClient( server_uri=server_uri, default_namespace=SCHEDULER_NAMESPACE) self.scheduler: EventBasedScheduler = EventBasedScheduler( self.id, self.mailbox, self.task_event_manager, self.executor, self.notification_client) self.last_scheduling_id = self._last_scheduler_job_id()
def test_dag_run_event_manager_release_runner(self): dag_run1 = self._dag_run _, dag_run2 = self.init_dag_and_dag_run( '../../dags/test_task_event_handler_dag.py', 'test_event_handler', timezone.datetime(2017, 1, 2)) self.create_task_state(dag_run1, 'operator_toggle_handler') self.create_task_state(dag_run2, 'operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") mailbox = Mailbox() event_manager = DagRunEventManager(mailbox=mailbox) event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id), event) time.sleep(5) event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run2.run_id), event) assert (DagRunId( dag_run2.dag_id, dag_run2.run_id)) in event_manager._event_executor_runners assert (DagRunId( dag_run1.dag_id, dag_run1.run_id)) not in event_manager._event_executor_runners event_manager.end()
def test_dag_run_event_manager_resubmit_if_exit_with_nonempty_queue(self): mailbox = Mailbox() event_manager = DagRunEventManager(mailbox, max_num_event=1) event_manager.start() self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) assert mailbox.get_message() is not None assert mailbox.get_message_with_timeout(5) is not None
class EventBasedSchedulerJob(BaseJob): """ 1. todo self heartbeat 2. todo check other scheduler failed 3. todo timeout dagrun """ __mapper_args__ = {'polymorphic_identity': 'EventBasedSchedulerJob'} def __init__(self, dag_directory, server_uri=None, max_runs=-1, refresh_dag_dir_interval=conf.getint( 'scheduler', 'refresh_dag_dir_interval', fallback=30), *args, **kwargs): super().__init__(*args, **kwargs) self.mailbox: Mailbox = Mailbox() self.dag_trigger: DagTrigger = DagTrigger( dag_directory=dag_directory, max_runs=max_runs, dag_ids=None, pickle_dags=False, mailbox=self.mailbox, refresh_dag_dir_interval=refresh_dag_dir_interval, notification_service_uri=server_uri) self.task_event_manager = DagRunEventManager(self.mailbox) self.executor.set_mailbox(self.mailbox) self.notification_client: NotificationClient = NotificationClient( server_uri=server_uri, default_namespace=SCHEDULER_NAMESPACE) self.scheduler: EventBasedScheduler = EventBasedScheduler( self.id, self.mailbox, self.task_event_manager, self.executor, self.notification_client) self.last_scheduling_id = self._last_scheduler_job_id() @staticmethod def _last_scheduler_job_id(): last_run = EventBasedSchedulerJob.most_recent_job() if not last_run: return None else: return last_run.id def _execute(self): # faulthandler.enable() self.log.info("Starting the scheduler Job") # DAGs can be pickled for easier remote execution by some executors # pickle_dags = self.do_pickle and self.executor_class not in UNPICKLEABLE_EXECUTORS try: self.mailbox.set_scheduling_job_id(self.id) self.scheduler.id = self.id self._start_listen_events() self.dag_trigger.start() self.task_event_manager.start() self.executor.job_id = self.id self.executor.start() self.register_signals() # Start after resetting orphaned tasks to avoid stressing out DB. execute_start_time = timezone.utcnow() self.scheduler.submit_sync_thread() self.scheduler.recover(self.last_scheduling_id) self.scheduler.schedule() self.executor.end() self.dag_trigger.end() self.task_event_manager.end() self._stop_listen_events() settings.Session.remove() # type: ignore except Exception as e: # pylint: disable=broad-except self.log.exception("Exception when executing scheduler, %s", e) finally: self.log.info("Exited execute loop") def _start_listen_events(self): watcher = SchedulerEventWatcher(self.mailbox) self.notification_client.start_listen_events(watcher=watcher, start_time=int( time.time() * 1000), version=None) def _stop_listen_events(self): self.notification_client.stop_listen_events() def register_signals(self) -> None: """Register signals that stop child processes""" signal.signal(signal.SIGINT, self._exit_gracefully) signal.signal(signal.SIGTERM, self._exit_gracefully) signal.signal(signal.SIGUSR2, self._debug_dump) def _exit_gracefully(self, signum, frame) -> None: # pylint: disable=unused-argument """Helper method to clean up processor_agent to avoid leaving orphan processes.""" self.log.info("Exiting gracefully upon receiving signal %s", signum) sys.exit(os.EX_OK) def _debug_dump(self, signum, frame): # pylint: disable=unused-argument try: sig_name = signal.Signals(signum).name # pylint: disable=no-member except Exception: # pylint: disable=broad-except sig_name = str(signum) self.log.info("%s\n%s received, printing debug\n%s", "-" * 80, sig_name, "-" * 80) self.executor.debug_dump() self.log.info("-" * 80) def is_alive(self, grace_multiplier: Optional[float] = None) -> bool: """ Is this SchedulerJob alive? We define alive as in a state of running and a heartbeat within the threshold defined in the ``scheduler_health_check_threshold`` config setting. ``grace_multiplier`` is accepted for compatibility with the parent class. :rtype: boolean """ if grace_multiplier is not None: # Accept the same behaviour as superclass return super().is_alive(grace_multiplier=grace_multiplier) scheduler_health_check_threshold: int = conf.getint( 'scheduler', 'scheduler_health_check_threshold') return (self.state == State.RUNNING and (timezone.utcnow() - self.latest_heartbeat).total_seconds() < scheduler_health_check_threshold)
def test_dag_run_event_manager(self): mailbox = Mailbox() event_manager = DagRunEventManager(mailbox) event_manager.start() self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) handle_event = mailbox.get_message() message = EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(handle_event)) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.START) handle_event = mailbox.get_message() message = EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(handle_event)) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.STOP) time.sleep(2) event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) handle_event = mailbox.get_message() message = EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(handle_event)) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.START) event_manager.end()
def test_dag_run_event_manager_multiple_dag_runs(self): dag_run1 = self._dag_run _, dag_run2 = self.init_dag_and_dag_run( '../../dags/test_task_event_handler_dag.py', 'test_event_handler', timezone.datetime(2017, 1, 2)) self.create_task_state(dag_run1, 'operator_toggle_handler') self.create_task_state(dag_run2, 'operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") mailbox = Mailbox() event_manager = DagRunEventManager(mailbox=mailbox) event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id), event) event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id), event) messages = [ EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())), EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())) ] assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id), "operator_toggle_handler", SchedulingAction.START) in messages assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id), "operator_toggle_handler", SchedulingAction.START) in messages event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id), event) event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id), event) messages = [ EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())), EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())) ] assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id), "operator_toggle_handler", SchedulingAction.STOP) in messages assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id), "operator_toggle_handler", SchedulingAction.STOP) in messages event_manager.end()