コード例 #1
0
 def __init__(self,
              dag_directory,
              server_uri=None,
              max_runs=-1,
              refresh_dag_dir_interval=conf.getint(
                  'scheduler', 'refresh_dag_dir_interval', fallback=30),
              *args,
              **kwargs):
     super().__init__(*args, **kwargs)
     self.mailbox: Mailbox = Mailbox()
     self.dag_trigger: DagTrigger = DagTrigger(
         dag_directory=dag_directory,
         max_runs=max_runs,
         dag_ids=None,
         pickle_dags=False,
         mailbox=self.mailbox,
         refresh_dag_dir_interval=refresh_dag_dir_interval,
         notification_service_uri=server_uri)
     self.task_event_manager = DagRunEventManager(self.mailbox)
     self.executor.set_mailbox(self.mailbox)
     self.notification_client: NotificationClient = NotificationClient(
         server_uri=server_uri, default_namespace=SCHEDULER_NAMESPACE)
     self.periodic_manager = PeriodicManager(self.mailbox)
     self.scheduler: EventBasedScheduler = EventBasedScheduler(
         self.id, self.mailbox, self.task_event_manager, self.executor,
         self.notification_client, None, self.periodic_manager)
     self.last_scheduling_id = self._last_scheduler_job_id()
コード例 #2
0
    def test_add_task_invalidated(self):
        mailbox = Mailbox()
        periodic_manager = PeriodicManager(mailbox)
        periodic_manager.start()
        with self.assertRaises(Exception) as context:
            periodic_manager.add_task('1', '1', {'cron': '*/1 * * * *'})
        self.assertTrue('The cron expression' in str(context.exception))

        with self.assertRaises(Exception) as context:
            periodic_manager.add_task('2', '2', {'interval': '0,0,0,1'})
        self.assertTrue('The interval expression' in str(context.exception))

        periodic_manager.shutdown()
コード例 #3
0
class EventBasedSchedulerJob(BaseJob):
    """
    1. todo self heartbeat
    2. todo check other scheduler failed
    3. todo timeout dagrun
    """
    __mapper_args__ = {'polymorphic_identity': 'EventBasedSchedulerJob'}

    def __init__(self,
                 dag_directory,
                 server_uri=None,
                 max_runs=-1,
                 refresh_dag_dir_interval=conf.getint(
                     'scheduler', 'refresh_dag_dir_interval', fallback=30),
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        self.mailbox: Mailbox = Mailbox()
        self.dag_trigger: DagTrigger = DagTrigger(
            dag_directory=dag_directory,
            max_runs=max_runs,
            dag_ids=None,
            pickle_dags=False,
            mailbox=self.mailbox,
            refresh_dag_dir_interval=refresh_dag_dir_interval,
            notification_service_uri=server_uri)
        self.task_event_manager = DagRunEventManager(self.mailbox)
        self.executor.set_mailbox(self.mailbox)
        self.notification_client: NotificationClient = NotificationClient(
            server_uri=server_uri, default_namespace=SCHEDULER_NAMESPACE)
        self.periodic_manager = PeriodicManager(self.mailbox)
        self.scheduler: EventBasedScheduler = EventBasedScheduler(
            self.id, self.mailbox, self.task_event_manager, self.executor,
            self.notification_client, None, self.periodic_manager)
        self.last_scheduling_id = self._last_scheduler_job_id()

    @staticmethod
    def _last_scheduler_job_id():
        last_run = EventBasedSchedulerJob.most_recent_job()
        if not last_run:
            return None
        else:
            return last_run.id

    def _execute(self):
        # faulthandler.enable()
        self.log.info("Starting the scheduler Job")

        # DAGs can be pickled for easier remote execution by some executors
        # pickle_dags = self.do_pickle and self.executor_class not in UNPICKLEABLE_EXECUTORS

        try:
            self.mailbox.set_scheduling_job_id(self.id)
            self.scheduler.id = self.id
            self._start_listen_events()
            self.dag_trigger.start()
            self.task_event_manager.start()
            self.executor.job_id = self.id
            self.executor.start()
            self.periodic_manager.start()

            self.register_signals()

            # Start after resetting orphaned tasks to avoid stressing out DB.

            execute_start_time = timezone.utcnow()

            self.scheduler.submit_sync_thread()
            self.scheduler.recover(self.last_scheduling_id)
            self.scheduler.schedule()

            self.executor.end()
            self.periodic_manager.shutdown()
            self.dag_trigger.end()
            self.task_event_manager.end()
            self._stop_listen_events()

            settings.Session.remove()  # type: ignore
        except Exception as e:  # pylint: disable=broad-except
            self.log.exception("Exception when executing scheduler, %s", e)
        finally:
            self.log.info("Exited execute loop")

    def _start_listen_events(self):
        watcher = SchedulerEventWatcher(self.mailbox)
        self.notification_client.start_listen_events(watcher=watcher,
                                                     start_time=int(
                                                         time.time() * 1000),
                                                     version=None)

    def _stop_listen_events(self):
        self.notification_client.stop_listen_events()

    def register_signals(self) -> None:
        """Register signals that stop child processes"""
        signal.signal(signal.SIGINT, self._exit_gracefully)
        signal.signal(signal.SIGTERM, self._exit_gracefully)
        signal.signal(signal.SIGUSR2, self._debug_dump)

    def _exit_gracefully(self, signum, frame) -> None:  # pylint: disable=unused-argument
        """Helper method to clean up processor_agent to avoid leaving orphan processes."""
        self.log.info("Exiting gracefully upon receiving signal %s", signum)
        sys.exit(os.EX_OK)

    def _debug_dump(self, signum, frame):  # pylint: disable=unused-argument
        try:
            sig_name = signal.Signals(signum).name  # pylint: disable=no-member
        except Exception:  # pylint: disable=broad-except
            sig_name = str(signum)

        self.log.info("%s\n%s received, printing debug\n%s", "-" * 80,
                      sig_name, "-" * 80)

        self.executor.debug_dump()
        self.log.info("-" * 80)

    def is_alive(self, grace_multiplier: Optional[float] = None) -> bool:
        """
        Is this SchedulerJob alive?

        We define alive as in a state of running and a heartbeat within the
        threshold defined in the ``scheduler_health_check_threshold`` config
        setting.

        ``grace_multiplier`` is accepted for compatibility with the parent class.

        :rtype: boolean
        """
        if grace_multiplier is not None:
            # Accept the same behaviour as superclass
            return super().is_alive(grace_multiplier=grace_multiplier)
        scheduler_health_check_threshold: int = conf.getint(
            'scheduler', 'scheduler_health_check_threshold')
        return (self.state == State.RUNNING and
                (timezone.utcnow() - self.latest_heartbeat).total_seconds() <
                scheduler_health_check_threshold)
コード例 #4
0
    def test_add_task(self):
        mailbox = Mailbox()
        periodic_manager = PeriodicManager(mailbox)
        periodic_manager.start()
        periodic_manager.add_task('1', '1', {'cron': '*/1 * * * * * *'})
        event = mailbox.get_message()
        periodic_manager.remove_task('1', '1')
        self.assertEqual('1', event.key)

        periodic_manager.add_task('2', '2', {'cron': '*/1 * * * * *'})
        event = mailbox.get_message()
        self.assertEqual('2', event.key)
        periodic_manager.remove_task('2', '2')

        periodic_manager.add_task('3', '3', {'interval': '0,0,0,0,1'})
        event = mailbox.get_message()
        self.assertEqual('3', event.key)
        periodic_manager.remove_task('3', '3')

        periodic_manager.shutdown()