def test_dag_run_event_manager(self): mailbox = Mailbox() event_manager = DagRunEventManager(mailbox) event_manager.start() self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) handle_event = mailbox.get_message() message = EventHandleResult.from_event(handle_event) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.START) handle_event = mailbox.get_message() message = EventHandleResult.from_event(handle_event) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.STOP) time.sleep(2) event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) handle_event = mailbox.get_message() message = EventHandleResult.from_event(handle_event) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.START) event_manager.end()
def test_dag_run_event_manager_release_runner(self): dag_run1 = self._dag_run _, dag_run2 = self.init_dag_and_dag_run( '../../dags/test_task_event_handler_dag.py', 'test_event_handler', timezone.datetime(2017, 1, 2)) self.create_task_state(dag_run1, 'operator_toggle_handler') self.create_task_state(dag_run2, 'operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") mailbox = Mailbox() event_manager = DagRunEventManager(mailbox=mailbox) event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id), event) time.sleep(5) event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run2.run_id), event) assert (DagRunId( dag_run2.dag_id, dag_run2.run_id)) in event_manager._event_executor_runners assert (DagRunId( dag_run1.dag_id, dag_run1.run_id)) not in event_manager._event_executor_runners event_manager.end()
def test_task_event_executor_runner(self): event = BaseEvent("test_event", "test_event", namespace="default") self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') mailbox = Mailbox() executor_runner = DagRunEventExecutorRunner( mailbox, DagRunId(self._dag_run.dag_id, self._dag_run.run_id), 10) executor_runner.put_event(event) executor_runner.put_event(event) executor_runner.run() handle_event = mailbox.get_message() message = EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(handle_event)) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.START) handle_event = mailbox.get_message() message = EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(handle_event)) assert message == EventHandleResult( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), "operator_toggle_handler", SchedulingAction.STOP)
def __init__(self, dag_directory, server_uri=None, max_runs=-1, refresh_dag_dir_interval=conf.getint( 'scheduler', 'refresh_dag_dir_interval', fallback=30), *args, **kwargs): super().__init__(*args, **kwargs) self.mailbox: Mailbox = Mailbox() self.dag_trigger: DagTrigger = DagTrigger( dag_directory=dag_directory, max_runs=max_runs, dag_ids=None, pickle_dags=False, mailbox=self.mailbox, refresh_dag_dir_interval=refresh_dag_dir_interval, notification_service_uri=server_uri) self.task_event_manager = DagRunEventManager(self.mailbox) self.executor.set_mailbox(self.mailbox) self.notification_client: NotificationClient = NotificationClient( server_uri=server_uri, default_namespace=SCHEDULER_NAMESPACE) self.scheduler: EventBasedScheduler = EventBasedScheduler( self.id, self.mailbox, self.task_event_manager, self.executor, self.notification_client) self.last_scheduling_id = self._last_scheduler_job_id()
def test_proccess_dag_file(self): mailbox = Mailbox() test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_event_scheduler_dags.py') dag_trigger = DagTrigger(subdir=test_dag_path, mailbox=mailbox, using_sqlite=True, num_runs=-1) dag_trigger.start() time.sleep(5) self.assertEqual(1, mailbox.length()) dag_trigger.stop()
def test_dag_trigger_is_alive(self): mailbox = Mailbox() dag_trigger = DagTrigger(".", -1, [], False, mailbox) assert not dag_trigger.is_alive() dag_trigger.start() time.sleep(1) assert dag_trigger.is_alive() dag_trigger.end() assert not dag_trigger.is_alive()
def test_dag_trigger(self): mailbox = Mailbox() dag_trigger = DagTrigger(".", -1, [], False, mailbox) dag_trigger.start() type(self)._add_dag_needing_dagrun() message = mailbox.get_message() message = SchedulerInnerEventUtil.to_inner_event(message) assert message.dag_id == "test" dag_trigger.end()
def test_file_processor_manager_kill(self): mailbox = Mailbox() dag_trigger = DagTrigger(".", -1, [], False, mailbox) dag_trigger.start() dag_file_processor_manager_process = dag_trigger._dag_file_processor_agent._process dag_file_processor_manager_process.kill() dag_file_processor_manager_process.join(1) assert not dag_file_processor_manager_process.is_alive() time.sleep(5) dag_file_processor_manager_process = dag_trigger._dag_file_processor_agent._process assert dag_file_processor_manager_process.is_alive() dag_trigger.end()
def test_add_task_invalidated(self): mailbox = Mailbox() periodic_manager = PeriodicManager(mailbox) periodic_manager.start() with self.assertRaises(Exception) as context: periodic_manager.add_task('1', '1', {'cron': '*/1 * * * *'}) self.assertTrue('The cron expression' in str(context.exception)) with self.assertRaises(Exception) as context: periodic_manager.add_task('2', '2', {'interval': '0,0,0,1'}) self.assertTrue('The interval expression' in str(context.exception)) periodic_manager.shutdown()
def test_dag_trigger_parse_dag(self): mailbox = Mailbox() dag_trigger = DagTrigger("../../dags/test_scheduler_dags.py", -1, [], False, mailbox) dag_trigger.start() message = mailbox.get_message() message = SchedulerInnerEventUtil.to_inner_event(message) # only one dag is executable assert "test_task_start_date_scheduling" == message.dag_id assert DagModel.get_dagmodel(dag_id="test_task_start_date_scheduling") is not None assert DagModel.get_dagmodel(dag_id="test_start_date_scheduling") is not None assert SerializedDagModel.get(dag_id="test_task_start_date_scheduling") is not None assert SerializedDagModel.get(dag_id="test_start_date_scheduling") is not None dag_trigger.end()
def test_dag_run_event_manager_resubmit_if_exit_with_nonempty_queue(self): mailbox = Mailbox() event_manager = DagRunEventManager(mailbox, max_num_event=1) event_manager.start() self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) event_manager.handle_event( DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event) assert mailbox.get_message() is not None assert mailbox.get_message_with_timeout(5) is not None
def test_dag_run_event_manager_multiple_dag_runs(self): dag_run1 = self._dag_run _, dag_run2 = self.init_dag_and_dag_run( '../../dags/test_task_event_handler_dag.py', 'test_event_handler', timezone.datetime(2017, 1, 2)) self.create_task_state(dag_run1, 'operator_toggle_handler') self.create_task_state(dag_run2, 'operator_toggle_handler') event = BaseEvent("test_event", "test_event", namespace="default") mailbox = Mailbox() event_manager = DagRunEventManager(mailbox=mailbox) event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id), event) event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id), event) messages = [ EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())), EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())) ] assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id), "operator_toggle_handler", SchedulingAction.START) in messages assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id), "operator_toggle_handler", SchedulingAction.START) in messages event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id), event) event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id), event) messages = [ EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())), EventHandleResult.from_event( SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())) ] assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id), "operator_toggle_handler", SchedulingAction.STOP) in messages assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id), "operator_toggle_handler", SchedulingAction.STOP) in messages event_manager.end()
def test_user_trigger_parse_dag(self): port = 50101 service_uri = 'localhost:{}'.format(port) storage = MemoryEventStorage() master = NotificationMaster(NotificationService(storage), port) master.run() mailbox = Mailbox() dag_trigger = DagTrigger("../../dags/test_scheduler_dags.py", -1, [], False, mailbox, 5, service_uri) dag_trigger.start() message = mailbox.get_message() message = SchedulerInnerEventUtil.to_inner_event(message) # only one dag is executable assert "test_task_start_date_scheduling" == message.dag_id sc = EventSchedulerClient(server_uri=service_uri, namespace='a') sc.trigger_parse_dag() dag_trigger.end() master.stop()
def test_trigger_parse_dag(self): import os port = 50102 server_uri = "localhost:{}".format(port) storage = MemoryEventStorage() master = NotificationMaster(NotificationService(storage), port) master.run() dag_folder = os.path.abspath(os.path.dirname(__file__)) + "/../../dags" mailbox = Mailbox() dag_trigger = DagTrigger(dag_folder, -1, [], False, mailbox, notification_service_uri=server_uri) dag_trigger.start() to_be_triggered = [dag_folder + "/test_event_based_scheduler.py", dag_folder + "/test_event_task_dag.py", dag_folder + "/test_event_based_executor.py", dag_folder + "/test_scheduler_dags.py", ] for file in to_be_triggered: self._send_request_and_receive_response(server_uri, file) dag_trigger.end()
def test_add_task(self): mailbox = Mailbox() periodic_manager = PeriodicManager(mailbox) periodic_manager.start() periodic_manager.add_task('1', '1', {'cron': '*/1 * * * * * *'}) event = mailbox.get_message() periodic_manager.remove_task('1', '1') self.assertEqual('1', event.key) periodic_manager.add_task('2', '2', {'cron': '*/1 * * * * *'}) event = mailbox.get_message() self.assertEqual('2', event.key) periodic_manager.remove_task('2', '2') periodic_manager.add_task('3', '3', {'interval': '0,0,0,0,1'}) event = mailbox.get_message() self.assertEqual('3', event.key) periodic_manager.remove_task('3', '3') periodic_manager.shutdown()
def test_task_event_executor_runner_max_event(self): event = BaseEvent("test_event", "test_event", namespace="default") self.create_task_state(dag_run=self._dag_run, task_id='operator_toggle_handler') mailbox = Mailbox() executor_runner = DagRunEventExecutorRunner( mailbox, DagRunId(self._dag_run.dag_id, self._dag_run.run_id), 5) for i in range(10): executor_runner.put_event(event) executor_runner.run() messages = [] for i in range(5): messages.append(mailbox.get_message()) assert executor_runner._event_queue.qsize() == 5
def __init__(self, dag_id=None, dag_ids=None, subdir=settings.DAGS_FOLDER, num_runs=conf.getint('scheduler', 'num_runs', fallback=-1), processor_poll_interval=conf.getfloat( 'scheduler', 'processor_poll_interval', fallback=1), use_local_nf=conf.getboolean('scheduler', 'use_local_notification', fallback=True), nf_host=conf.get('scheduler', 'notification_host', fallback='localhost'), nf_port=conf.getint('scheduler', 'notification_port', fallback=50051), unit_test_mode=conf.getboolean('core', 'unit_test_mode', fallback=False), executor_heartbeat_interval=conf.getint( 'scheduler', 'executor_heartbeat_interval', fallback=2), run_duration=None, do_pickle=False, log=None, *args, **kwargs): super().__init__(dag_id, dag_ids, subdir, num_runs, processor_poll_interval, run_duration, do_pickle, log, *args, **kwargs) self.dag_trigger = None self.notification_master = None self.use_local_nf = use_local_nf self.nf_host = nf_host self.nf_port = nf_port self.mail_box = Mailbox() self.running = True self.dagrun_route = DagRunRoute() self.unit_test_mode = unit_test_mode self.executor_heartbeat_interval = executor_heartbeat_interval self.heartbeat_thread = None
def test_replay_message(self): key = "stop" mailbox = Mailbox() mailbox.set_scheduling_job_id(1234) watcher = SchedulerEventWatcher(mailbox) self.client.start_listen_events(watcher=watcher, start_time=int(time.time() * 1000), version=None) self.send_event(key) msg: BaseEvent = mailbox.get_message() self.assertEqual(msg.key, key) with create_session() as session: msg_from_db = session.query(Message).first() expect_non_unprocessed = EventBasedScheduler.get_unprocessed_message( 1000) self.assertEqual(0, len(expect_non_unprocessed)) unprocessed = EventBasedScheduler.get_unprocessed_message(1234) self.assertEqual(unprocessed[0].serialized_message, msg_from_db.data) deserialized_data = pickle.loads(msg_from_db.data) self.assertEqual(deserialized_data.key, key) self.assertEqual(msg, deserialized_data)