def test_task_event_executor_runner(self): event = BaseEvent("test_event", "test_event", namespace="default") self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') mailbox = Mailbox() executor_runner = DagRunEventExecutorRunner( mailbox, DagRunId(self._dag_run.dag_id, self._dag_run.run_id), 10) executor_runner.put_event(event) executor_runner.put_event(event) executor_runner.run() handle_event = mailbox.get_message() message = EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(handle_event)) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.START) handle_event = mailbox.get_message() message = EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(handle_event)) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.STOP)
def test_dag_run_event_manager_release_runner(self): dag_run1 = self._dag_run _, dag_run2 = self.init_dag_and_dag_run( '../../dags/test_task_event_handler_dag.py', 'test_event_handler', timezone.datetime(2017, 1, 2)) self.create_task_state(dag_run1, 'operator_toggle_handler') self.create_task_state(dag_run2, 'operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") mailbox = Mailbox() event_manager = DagRunEventManager(mailbox=mailbox) event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id), event) time.sleep(5) event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run2.run_id), event) assert (DagRunId( dag_run2.dag_id, dag_run2.run_id)) in event_manager._event_executor_runners assert (DagRunId( dag_run1.dag_id, dag_run1.run_id)) not in event_manager._event_executor_runners event_manager.end()
def test_dag_run_event_manager_resubmit_if_exit_with_nonempty_queue(self): mailbox = Mailbox() event_manager = DagRunEventManager(mailbox, max_num_event=1) event_manager.start() self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) assert mailbox.get_message() is not None assert mailbox.get_message_with_timeout(5) is not None
def schedule(self): self.log.info("Starting the scheduler.") self._restore_unfinished_dag_run() while True: identified_message = self.mailbox.get_identified_message() origin_event = identified_message.deserialize() self.log.debug("Event: {}".format(origin_event)) if SchedulerInnerEventUtil.is_inner_event(origin_event): event = SchedulerInnerEventUtil.to_inner_event(origin_event) else: event = origin_event with create_session() as session: if isinstance(event, BaseEvent): dagruns = self._find_dagruns_by_event(event, session) for dagrun in dagruns: dag_run_id = DagRunId(dagrun.dag_id, dagrun.run_id) self.task_event_manager.handle_event(dag_run_id, event) elif isinstance(event, RequestEvent): self._process_request_event(event) elif isinstance(event, ResponseEvent): continue elif isinstance(event, TaskSchedulingEvent): self._schedule_task(event) elif isinstance(event, TaskStatusChangedEvent): dagrun = self._find_dagrun(event.dag_id, event.execution_date, session) tasks = self._find_schedulable_tasks(dagrun, session) self._send_scheduling_task_events(tasks, SchedulingAction.START) elif isinstance(event, DagExecutableEvent): dagrun = self._create_dag_run(event.dag_id, session=session) tasks = self._find_schedulable_tasks(dagrun, session) self._send_scheduling_task_events(tasks, SchedulingAction.START) elif isinstance(event, EventHandleEvent): dag_runs = DagRun.find(dag_id=event.dag_id, run_id=event.dag_run_id) assert len(dag_runs) == 1 ti = dag_runs[0].get_task_instance(event.task_id) self._send_scheduling_task_event(ti, event.action) elif isinstance(event, StopDagEvent): self._stop_dag(event.dag_id, session) elif isinstance(event, ParseDagRequestEvent) or isinstance( event, ParseDagResponseEvent): pass elif isinstance(event, StopSchedulerEvent): self.log.info("{} {}".format(self.id, event.job_id)) if self.id == event.job_id or 0 == event.job_id: self.log.info("break the scheduler event loop.") identified_message.remove_handled_message() session.expunge_all() break else: self.log.error( "can not handler the event {}".format(event)) identified_message.remove_handled_message() session.expunge_all() self._stop_timer()
def test_dag_run_event_manager(self): mailbox = Mailbox() event_manager = DagRunEventManager(mailbox) event_manager.start() self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) handle_event = mailbox.get_message() message = EventHandleResult.from_event(handle_event) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.START) handle_event = mailbox.get_message() message = EventHandleResult.from_event(handle_event) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.STOP) time.sleep(2) event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) handle_event = mailbox.get_message() message = EventHandleResult.from_event(handle_event) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.START) event_manager.end()
def test_dag_run_event_manager_multiple_dag_runs(self): dag_run1 = self._dag_run _, dag_run2 = self.init_dag_and_dag_run( '../../dags/test_task_event_handler_dag.py', 'test_event_handler', timezone.datetime(2017, 1, 2)) self.create_task_state(dag_run1, 'operator_toggle_handler') self.create_task_state(dag_run2, 'operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") mailbox = Mailbox() event_manager = DagRunEventManager(mailbox=mailbox) event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id), event) event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id), event) messages = [ EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())), EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())) ] assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id), "operator_toggle_handler", SchedulingAction.START) in messages assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id), "operator_toggle_handler", SchedulingAction.START) in messages event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id), event) event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id), event) messages = [ EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())), EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())) ] assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id), "operator_toggle_handler", SchedulingAction.STOP) in messages assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id), "operator_toggle_handler", SchedulingAction.STOP) in messages event_manager.end()
def test_task_event_executor_runner_max_event(self): event = BaseEvent("test_event", "test_event", namespace="default") self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') mailbox = Mailbox() executor_runner = DagRunEventExecutorRunner( mailbox, DagRunId(self._dag_run.dag_id, self._dag_run.run_id), 5) for i in range(10): executor_runner.put_event(event) executor_runner.run() messages = [] for i in range(5): messages.append(mailbox.get_message()) assert executor_runner._event_queue.qsize() == 5
def schedule(self) -> bool: identified_message = self.mailbox.get_identified_message() if not identified_message: return True origin_event = identified_message.deserialize() self.log.debug("Event: {}".format(origin_event)) if SchedulerInnerEventUtil.is_inner_event(origin_event): event = SchedulerInnerEventUtil.to_inner_event(origin_event) else: event = origin_event with create_session() as session: if isinstance(event, BaseEvent): dagruns = self._find_dagruns_by_event(event, session) for dagrun in dagruns: dag_run_id = DagRunId(dagrun.dag_id, dagrun.run_id) self.task_event_manager.handle_event(dag_run_id, event) elif isinstance(event, RequestEvent): self._process_request_event(event) elif isinstance(event, TaskSchedulingEvent): self._schedule_task(event) elif isinstance(event, TaskStateChangedEvent): dagrun = self._find_dagrun(event.dag_id, event.execution_date, session) if dagrun is not None: dag_run_id = DagRunId(dagrun.dag_id, dagrun.run_id) self.task_event_manager.handle_event( dag_run_id, origin_event) tasks = self._find_scheduled_tasks(dagrun, session) self._send_scheduling_task_events(tasks, SchedulingAction.START) if dagrun.state in State.finished: self.mailbox.send_message( DagRunFinishedEvent(dagrun.run_id).to_event()) else: self.log.warning( "dagrun is None for dag_id:{} execution_date: {}". format(event.dag_id, event.execution_date)) elif isinstance(event, DagExecutableEvent): dagrun = self._create_dag_run(event.dag_id, session=session) tasks = self._find_scheduled_tasks(dagrun, session) self._send_scheduling_task_events(tasks, SchedulingAction.START) elif isinstance(event, EventHandleEvent): dag_runs = DagRun.find(dag_id=event.dag_id, run_id=event.dag_run_id) assert len(dag_runs) == 1 ti = dag_runs[0].get_task_instance(event.task_id) self._send_scheduling_task_event(ti, event.action) elif isinstance(event, StopDagEvent): self._stop_dag(event.dag_id, session) elif isinstance(event, DagRunFinishedEvent): self._remove_periodic_events(event.run_id) elif isinstance(event, PeriodicEvent): dag_runs = DagRun.find(run_id=event.run_id) assert len(dag_runs) == 1 ti = dag_runs[0].get_task_instance(event.task_id) self._send_scheduling_task_event(ti, SchedulingAction.RESTART) elif isinstance(event, StopSchedulerEvent): self.log.info("{} {}".format(self.id, event.job_id)) if self.id == event.job_id or 0 == event.job_id: self.log.info("break the scheduler event loop.") identified_message.remove_handled_message() session.expunge_all() return False elif isinstance(event, ParseDagRequestEvent) or isinstance( event, ParseDagResponseEvent): pass elif isinstance(event, ResponseEvent): pass else: self.log.error("can not handler the event {}".format(event)) identified_message.remove_handled_message() session.expunge_all() return True