Example #1
0
    def test_dag_run_event_manager(self):
        mailbox = Mailbox()
        event_manager = DagRunEventManager(mailbox)
        event_manager.start()

        self.create_task_state(dag_run=self._dag_run,
                               task_id='operator_toggle_handler')
        event = BaseEvent("test_event", "test_event", namespace="default")

        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)
        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)

        handle_event = mailbox.get_message()
        message = EventHandleResult.from_event(handle_event)
        assert message == EventHandleResult(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id),
            "operator_toggle_handler", SchedulingAction.START)

        handle_event = mailbox.get_message()
        message = EventHandleResult.from_event(handle_event)
        assert message == EventHandleResult(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id),
            "operator_toggle_handler", SchedulingAction.STOP)

        time.sleep(2)
        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)
        handle_event = mailbox.get_message()
        message = EventHandleResult.from_event(handle_event)
        assert message == EventHandleResult(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id),
            "operator_toggle_handler", SchedulingAction.START)
        event_manager.end()
    def test_dag_run_event_manager_release_runner(self):
        dag_run1 = self._dag_run
        _, dag_run2 = self.init_dag_and_dag_run(
            '../../dags/test_task_event_handler_dag.py', 'test_event_handler',
            timezone.datetime(2017, 1, 2))
        self.create_task_state(dag_run1, 'operator_toggle_handler')
        self.create_task_state(dag_run2, 'operator_toggle_handler')

        event = BaseEvent("test_event", "test_event", namespace="default")
        mailbox = Mailbox()

        event_manager = DagRunEventManager(mailbox=mailbox)
        event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                   event)

        time.sleep(5)
        event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run2.run_id),
                                   event)
        assert (DagRunId(
            dag_run2.dag_id,
            dag_run2.run_id)) in event_manager._event_executor_runners
        assert (DagRunId(
            dag_run1.dag_id,
            dag_run1.run_id)) not in event_manager._event_executor_runners

        event_manager.end()
    def test_task_event_executor_runner(self):
        event = BaseEvent("test_event", "test_event", namespace="default")

        self.create_task_state(dag_run=self._dag_run,
                               task_id='operator_toggle_handler')

        mailbox = Mailbox()
        executor_runner = DagRunEventExecutorRunner(
            mailbox, DagRunId(self._dag_run.dag_id, self._dag_run.run_id), 10)
        executor_runner.put_event(event)
        executor_runner.put_event(event)

        executor_runner.run()
        handle_event = mailbox.get_message()
        message = EventHandleResult.from_event(
            SchedulerInnerEventUtil.to_inner_event(handle_event))
        assert message == EventHandleResult(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id),
            "operator_toggle_handler", SchedulingAction.START)

        handle_event = mailbox.get_message()
        message = EventHandleResult.from_event(
            SchedulerInnerEventUtil.to_inner_event(handle_event))
        assert message == EventHandleResult(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id),
            "operator_toggle_handler", SchedulingAction.STOP)
Example #4
0
 def __init__(self,
              dag_directory,
              server_uri=None,
              max_runs=-1,
              refresh_dag_dir_interval=conf.getint(
                  'scheduler', 'refresh_dag_dir_interval', fallback=30),
              *args,
              **kwargs):
     super().__init__(*args, **kwargs)
     self.mailbox: Mailbox = Mailbox()
     self.dag_trigger: DagTrigger = DagTrigger(
         dag_directory=dag_directory,
         max_runs=max_runs,
         dag_ids=None,
         pickle_dags=False,
         mailbox=self.mailbox,
         refresh_dag_dir_interval=refresh_dag_dir_interval,
         notification_service_uri=server_uri)
     self.task_event_manager = DagRunEventManager(self.mailbox)
     self.executor.set_mailbox(self.mailbox)
     self.notification_client: NotificationClient = NotificationClient(
         server_uri=server_uri, default_namespace=SCHEDULER_NAMESPACE)
     self.scheduler: EventBasedScheduler = EventBasedScheduler(
         self.id, self.mailbox, self.task_event_manager, self.executor,
         self.notification_client)
     self.last_scheduling_id = self._last_scheduler_job_id()
 def test_proccess_dag_file(self):
     mailbox = Mailbox()
     test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_event_scheduler_dags.py')
     dag_trigger = DagTrigger(subdir=test_dag_path, mailbox=mailbox,  using_sqlite=True, num_runs=-1)
     dag_trigger.start()
     time.sleep(5)
     self.assertEqual(1, mailbox.length())
     dag_trigger.stop()
 def test_dag_trigger_is_alive(self):
     mailbox = Mailbox()
     dag_trigger = DagTrigger(".", -1, [], False, mailbox)
     assert not dag_trigger.is_alive()
     dag_trigger.start()
     time.sleep(1)
     assert dag_trigger.is_alive()
     dag_trigger.end()
     assert not dag_trigger.is_alive()
    def test_dag_trigger(self):
        mailbox = Mailbox()
        dag_trigger = DagTrigger(".", -1, [], False, mailbox)
        dag_trigger.start()
        type(self)._add_dag_needing_dagrun()

        message = mailbox.get_message()
        message = SchedulerInnerEventUtil.to_inner_event(message)
        assert message.dag_id == "test"
        dag_trigger.end()
 def test_file_processor_manager_kill(self):
     mailbox = Mailbox()
     dag_trigger = DagTrigger(".", -1, [], False, mailbox)
     dag_trigger.start()
     dag_file_processor_manager_process = dag_trigger._dag_file_processor_agent._process
     dag_file_processor_manager_process.kill()
     dag_file_processor_manager_process.join(1)
     assert not dag_file_processor_manager_process.is_alive()
     time.sleep(5)
     dag_file_processor_manager_process = dag_trigger._dag_file_processor_agent._process
     assert dag_file_processor_manager_process.is_alive()
     dag_trigger.end()
    def test_add_task_invalidated(self):
        mailbox = Mailbox()
        periodic_manager = PeriodicManager(mailbox)
        periodic_manager.start()
        with self.assertRaises(Exception) as context:
            periodic_manager.add_task('1', '1', {'cron': '*/1 * * * *'})
        self.assertTrue('The cron expression' in str(context.exception))

        with self.assertRaises(Exception) as context:
            periodic_manager.add_task('2', '2', {'interval': '0,0,0,1'})
        self.assertTrue('The interval expression' in str(context.exception))

        periodic_manager.shutdown()
    def test_dag_trigger_parse_dag(self):
        mailbox = Mailbox()
        dag_trigger = DagTrigger("../../dags/test_scheduler_dags.py", -1, [], False, mailbox)
        dag_trigger.start()

        message = mailbox.get_message()
        message = SchedulerInnerEventUtil.to_inner_event(message)
        # only one dag is executable
        assert "test_task_start_date_scheduling" == message.dag_id

        assert DagModel.get_dagmodel(dag_id="test_task_start_date_scheduling") is not None
        assert DagModel.get_dagmodel(dag_id="test_start_date_scheduling") is not None
        assert SerializedDagModel.get(dag_id="test_task_start_date_scheduling") is not None
        assert SerializedDagModel.get(dag_id="test_start_date_scheduling") is not None
        dag_trigger.end()
    def test_dag_run_event_manager_resubmit_if_exit_with_nonempty_queue(self):
        mailbox = Mailbox()
        event_manager = DagRunEventManager(mailbox, max_num_event=1)
        event_manager.start()

        self.create_task_state(dag_run=self._dag_run,
                               task_id='operator_toggle_handler')
        event = BaseEvent("test_event", "test_event", namespace="default")
        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)
        event_manager.handle_event(
            DagRunId(self._dag_run.dag_id, self._dag_run.run_id), event)

        assert mailbox.get_message() is not None
        assert mailbox.get_message_with_timeout(5) is not None
    def test_dag_run_event_manager_multiple_dag_runs(self):
        dag_run1 = self._dag_run
        _, dag_run2 = self.init_dag_and_dag_run(
            '../../dags/test_task_event_handler_dag.py', 'test_event_handler',
            timezone.datetime(2017, 1, 2))
        self.create_task_state(dag_run1, 'operator_toggle_handler')
        self.create_task_state(dag_run2, 'operator_toggle_handler')

        event = BaseEvent("test_event", "test_event", namespace="default")
        mailbox = Mailbox()

        event_manager = DagRunEventManager(mailbox=mailbox)
        event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                   event)
        event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id),
                                   event)
        messages = [
            EventHandleResult.from_event(
                SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())),
            EventHandleResult.from_event(
                SchedulerInnerEventUtil.to_inner_event(mailbox.get_message()))
        ]
        assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                 "operator_toggle_handler",
                                 SchedulingAction.START) in messages
        assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id),
                                 "operator_toggle_handler",
                                 SchedulingAction.START) in messages

        event_manager.handle_event(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                   event)
        event_manager.handle_event(DagRunId(dag_run2.dag_id, dag_run2.run_id),
                                   event)
        messages = [
            EventHandleResult.from_event(
                SchedulerInnerEventUtil.to_inner_event(mailbox.get_message())),
            EventHandleResult.from_event(
                SchedulerInnerEventUtil.to_inner_event(mailbox.get_message()))
        ]
        assert EventHandleResult(DagRunId(dag_run1.dag_id, dag_run1.run_id),
                                 "operator_toggle_handler",
                                 SchedulingAction.STOP) in messages
        assert EventHandleResult(DagRunId(dag_run2.dag_id, dag_run2.run_id),
                                 "operator_toggle_handler",
                                 SchedulingAction.STOP) in messages

        event_manager.end()
 def test_user_trigger_parse_dag(self):
     port = 50101
     service_uri = 'localhost:{}'.format(port)
     storage = MemoryEventStorage()
     master = NotificationMaster(NotificationService(storage), port)
     master.run()
     mailbox = Mailbox()
     dag_trigger = DagTrigger("../../dags/test_scheduler_dags.py", -1, [], False, mailbox, 5, service_uri)
     dag_trigger.start()
     message = mailbox.get_message()
     message = SchedulerInnerEventUtil.to_inner_event(message)
     # only one dag is executable
     assert "test_task_start_date_scheduling" == message.dag_id
     sc = EventSchedulerClient(server_uri=service_uri, namespace='a')
     sc.trigger_parse_dag()
     dag_trigger.end()
     master.stop()
    def test_trigger_parse_dag(self):
        import os
        port = 50102
        server_uri = "localhost:{}".format(port)
        storage = MemoryEventStorage()
        master = NotificationMaster(NotificationService(storage), port)
        master.run()
        dag_folder = os.path.abspath(os.path.dirname(__file__)) + "/../../dags"
        mailbox = Mailbox()
        dag_trigger = DagTrigger(dag_folder, -1, [], False, mailbox, notification_service_uri=server_uri)
        dag_trigger.start()

        to_be_triggered = [dag_folder + "/test_event_based_scheduler.py",
                           dag_folder + "/test_event_task_dag.py",
                           dag_folder + "/test_event_based_executor.py",
                           dag_folder + "/test_scheduler_dags.py",
                           ]
        for file in to_be_triggered:
            self._send_request_and_receive_response(server_uri, file)
        dag_trigger.end()
    def test_add_task(self):
        mailbox = Mailbox()
        periodic_manager = PeriodicManager(mailbox)
        periodic_manager.start()
        periodic_manager.add_task('1', '1', {'cron': '*/1 * * * * * *'})
        event = mailbox.get_message()
        periodic_manager.remove_task('1', '1')
        self.assertEqual('1', event.key)

        periodic_manager.add_task('2', '2', {'cron': '*/1 * * * * *'})
        event = mailbox.get_message()
        self.assertEqual('2', event.key)
        periodic_manager.remove_task('2', '2')

        periodic_manager.add_task('3', '3', {'interval': '0,0,0,0,1'})
        event = mailbox.get_message()
        self.assertEqual('3', event.key)
        periodic_manager.remove_task('3', '3')

        periodic_manager.shutdown()
    def test_task_event_executor_runner_max_event(self):
        event = BaseEvent("test_event", "test_event", namespace="default")

        self.create_task_state(dag_run=self._dag_run,
                               task_id='operator_toggle_handler')

        mailbox = Mailbox()

        executor_runner = DagRunEventExecutorRunner(
            mailbox, DagRunId(self._dag_run.dag_id, self._dag_run.run_id), 5)
        for i in range(10):
            executor_runner.put_event(event)

        executor_runner.run()

        messages = []
        for i in range(5):
            messages.append(mailbox.get_message())

        assert executor_runner._event_queue.qsize() == 5
 def __init__(self,
              dag_id=None,
              dag_ids=None,
              subdir=settings.DAGS_FOLDER,
              num_runs=conf.getint('scheduler', 'num_runs', fallback=-1),
              processor_poll_interval=conf.getfloat(
                  'scheduler', 'processor_poll_interval', fallback=1),
              use_local_nf=conf.getboolean('scheduler',
                                           'use_local_notification',
                                           fallback=True),
              nf_host=conf.get('scheduler',
                               'notification_host',
                               fallback='localhost'),
              nf_port=conf.getint('scheduler',
                                  'notification_port',
                                  fallback=50051),
              unit_test_mode=conf.getboolean('core',
                                             'unit_test_mode',
                                             fallback=False),
              executor_heartbeat_interval=conf.getint(
                  'scheduler', 'executor_heartbeat_interval', fallback=2),
              run_duration=None,
              do_pickle=False,
              log=None,
              *args,
              **kwargs):
     super().__init__(dag_id, dag_ids, subdir, num_runs,
                      processor_poll_interval, run_duration, do_pickle, log,
                      *args, **kwargs)
     self.dag_trigger = None
     self.notification_master = None
     self.use_local_nf = use_local_nf
     self.nf_host = nf_host
     self.nf_port = nf_port
     self.mail_box = Mailbox()
     self.running = True
     self.dagrun_route = DagRunRoute()
     self.unit_test_mode = unit_test_mode
     self.executor_heartbeat_interval = executor_heartbeat_interval
     self.heartbeat_thread = None
Example #18
0
 def test_replay_message(self):
     key = "stop"
     mailbox = Mailbox()
     mailbox.set_scheduling_job_id(1234)
     watcher = SchedulerEventWatcher(mailbox)
     self.client.start_listen_events(watcher=watcher,
                                     start_time=int(time.time() * 1000),
                                     version=None)
     self.send_event(key)
     msg: BaseEvent = mailbox.get_message()
     self.assertEqual(msg.key, key)
     with create_session() as session:
         msg_from_db = session.query(Message).first()
         expect_non_unprocessed = EventBasedScheduler.get_unprocessed_message(
             1000)
         self.assertEqual(0, len(expect_non_unprocessed))
         unprocessed = EventBasedScheduler.get_unprocessed_message(1234)
         self.assertEqual(unprocessed[0].serialized_message,
                          msg_from_db.data)
     deserialized_data = pickle.loads(msg_from_db.data)
     self.assertEqual(deserialized_data.key, key)
     self.assertEqual(msg, deserialized_data)