def sync(self):
        if self.running:
            self.log.debug('self.running: %s', self.running)
        if self.queued_tasks:
            self.log.debug('self.queued: %s', self.queued_tasks)
        self.kube_scheduler.sync()

        last_resource_version = None
        while not self.result_queue.empty():
            results = self.result_queue.get()
            key, state, pod_id, resource_version = results
            last_resource_version = resource_version
            self.log.info('Changing state of %s to %s', results, state)
            try:
                self._change_state(key, state, pod_id)
            except Exception as e:
                self.log.exception('Exception: %s when attempting ' +
                                   'to change state of %s to %s, re-queueing.', e, results, state)
                self.result_queue.put(results)

        KubeResourceVersion.checkpoint_resource_version(last_resource_version)

        for i in range(min((self.kube_config.worker_pods_creation_batch_size, self.task_queue.qsize()))):
            task = self.task_queue.get()

            try:
                self.kube_scheduler.run_next(task)
            except ApiException:
                self.log.exception('ApiException when attempting ' +
                                   'to run task, re-queueing.')
                self.task_queue.put(task)
Beispiel #2
0
    def sync(self) -> None:
        """Synchronize task state."""
        if self.running:
            self.log.debug('self.running: %s', self.running)
        if self.queued_tasks:
            self.log.debug('self.queued: %s', self.queued_tasks)
        if not self.worker_uuid:
            raise AirflowException(NOT_STARTED_MESSAGE)
        if not self.kube_scheduler:
            raise AirflowException(NOT_STARTED_MESSAGE)
        if not self.kube_config:
            raise AirflowException(NOT_STARTED_MESSAGE)
        if not self.result_queue:
            raise AirflowException(NOT_STARTED_MESSAGE)
        if not self.task_queue:
            raise AirflowException(NOT_STARTED_MESSAGE)
        self.kube_scheduler.sync()

        last_resource_version = None
        while True:  # pylint: disable=too-many-nested-blocks
            try:
                results = self.result_queue.get_nowait()
                try:
                    key, state, pod_id, namespace, resource_version = results
                    last_resource_version = resource_version
                    self.log.info('Changing state of %s to %s', results, state)
                    try:
                        self._change_state(key, state, pod_id, namespace)
                    except Exception as e:  # pylint: disable=broad-except
                        self.log.exception(
                            "Exception: %s when attempting to change state of %s to %s, re-queueing.",
                            e, results, state
                        )
                        self.result_queue.put(results)
                finally:
                    self.result_queue.task_done()
            except Empty:
                break

        KubeResourceVersion.checkpoint_resource_version(last_resource_version)

        # pylint: disable=too-many-nested-blocks
        for _ in range(self.kube_config.worker_pods_creation_batch_size):
            try:
                task = self.task_queue.get_nowait()
                try:
                    self.kube_scheduler.run_next(task)
                except ApiException as e:
                    if e.reason == "BadRequest":
                        self.log.error("Request was invalid. Failing task")
                        key, _, _ = task
                        self.change_state(key, State.FAILED, e)
                    else:
                        self.log.warning('ApiException when attempting to run task, re-queueing. '
                                         'Message: %s', json.loads(e.body)['message'])
                        self.task_queue.put(task)
                finally:
                    self.task_queue.task_done()
            except Empty:
                break
Beispiel #3
0
    def sync(self):
        if self.running:
            self.log.debug('self.running: %s', self.running)
        if self.queued_tasks:
            self.log.debug('self.queued: %s', self.queued_tasks)
        self.kube_scheduler.sync()

        last_resource_version = None
        while not self.result_queue.empty():
            results = self.result_queue.get()
            key, state, pod_id, resource_version = results
            last_resource_version = resource_version
            self.log.info('Changing state of %s to %s', results, state)
            try:
                self._change_state(key, state, pod_id)
            except Exception as e:
                self.log.exception(
                    'Exception: %s when attempting ' +
                    'to change state of %s to %s, re-queueing.', e, results,
                    state)
                self.result_queue.put(results)

        KubeResourceVersion.checkpoint_resource_version(last_resource_version)

        for i in range(
                min((self.kube_config.worker_pods_creation_batch_size,
                     self.task_queue.qsize()))):
            task = self.task_queue.get()

            try:
                self.kube_scheduler.run_next(task)
            except ApiException:
                self.log.exception('ApiException when attempting ' +
                                   'to run task, re-queueing.')
                self.task_queue.put(task)
    def sync(self):
        if self.running:
            self.log.debug('self.running: %s', self.running)
        if self.queued_tasks:
            self.log.debug('self.queued: %s', self.queued_tasks)
        self.kube_scheduler.sync()

        last_resource_version = None
        while not self.result_queue.empty():
            results = self.result_queue.get()
            key, state, pod_id, resource_version = results
            last_resource_version = resource_version
            self.log.info('Changing state of %s to %s', results, state)
            self._change_state(key, state, pod_id)

        KubeResourceVersion.checkpoint_resource_version(last_resource_version)

        if not self.task_queue.empty():
            task = self.task_queue.get()

            try:
                self.kube_scheduler.run_next(task)
            except ApiException:
                self.log.exception('ApiException when attempting ' +
                                   'to run task, re-queueing.')
                self.task_queue.put(task)
    def sync(self):
        if self.running:
            self.log.debug('self.running: %s', self.running)
        if self.queued_tasks:
            self.log.debug('self.queued: %s', self.queued_tasks)
        self.kube_scheduler.sync()

        last_resource_version = None
        while not self.result_queue.empty():
            results = self.result_queue.get()
            key, state, pod_id, resource_version = results
            last_resource_version = resource_version
            self.log.info('Changing state of %s to %s', results, state)
            self._change_state(key, state, pod_id)

        KubeResourceVersion.checkpoint_resource_version(last_resource_version)

        if not self.task_queue.empty():
            task = self.task_queue.get()

            try:
                self.kube_scheduler.run_next(task)
            except ApiException:
                self.log.exception('ApiException when attempting ' +
                                   'to run task, re-queueing.')
                self.task_queue.put(task)
    def sync(self):
        """Synchronize task state."""
        if self.running:
            self.log.debug('self.running: %s', self.running)
        if self.queued_tasks:
            self.log.debug('self.queued: %s', self.queued_tasks)
        self.kube_scheduler.sync()

        last_resource_version = None
        while True:
            try:
                results = self.result_queue.get_nowait()
                try:
                    key, state, pod_id, namespace, resource_version = results
                    last_resource_version = resource_version
                    self.log.info('Changing state of %s to %s', results, state)
                    try:
                        self._change_state(key, state, pod_id, namespace)
                    except Exception as e:
                        self.log.exception(
                            'Exception: %s when attempting ' +
                            'to change state of %s to %s, re-queueing.', e,
                            results, state)
                        self.result_queue.put(results)
                finally:
                    self.result_queue.task_done()
            except Empty:
                break

        KubeResourceVersion.checkpoint_resource_version(last_resource_version)

        for _ in range(self.kube_config.worker_pods_creation_batch_size):
            try:
                task = self.task_queue.get_nowait()
                try:
                    self.kube_scheduler.run_next(task)
                except ApiException as e:
                    self.log.warning(
                        'ApiException when attempting to run task, re-queueing. '
                        'Message: %s',
                        json.loads(e.body)['message'])
                    self.task_queue.put(task)
                except HTTPError as e:
                    self.log.warning(
                        'HTTPError when attempting to run task, re-queueing. '
                        'Exception: %s', str(e))
                    self.task_queue.put(task)
                finally:
                    self.task_queue.task_done()
            except Empty:
                break
    def sync(self) -> None:
        """Synchronize task state."""
        if self.running:
            self.log.debug('self.running: %s', self.running)
        if self.queued_tasks:
            self.log.debug('self.queued: %s', self.queued_tasks)
        assert self.kube_scheduler, NOT_STARTED_MESSAGE
        assert self.kube_config, NOT_STARTED_MESSAGE
        assert self.result_queue, NOT_STARTED_MESSAGE
        assert self.task_queue, NOT_STARTED_MESSAGE
        self.kube_scheduler.sync()

        last_resource_version = None
        while True:  # pylint: disable=too-many-nested-blocks
            try:
                results = self.result_queue.get_nowait()
                try:
                    key, state, pod_id, resource_version = results
                    last_resource_version = resource_version
                    self.log.info('Changing state of %s to %s', results, state)
                    try:
                        self._change_state(key, state, pod_id)
                    except Exception as e:  # pylint: disable=broad-except
                        self.log.exception(
                            'Exception: %s when attempting ' +
                            'to change state of %s to %s, re-queueing.', e,
                            results, state)
                        self.result_queue.put(results)
                finally:
                    self.result_queue.task_done()
            except Empty:
                break

        KubeResourceVersion.checkpoint_resource_version(last_resource_version)

        # pylint: disable=too-many-nested-blocks
        for _ in range(self.kube_config.worker_pods_creation_batch_size):
            try:
                task = self.task_queue.get_nowait()
                try:
                    self.kube_scheduler.run_next(task)
                except ApiException as e:
                    self.log.warning(
                        'ApiException when attempting to run task, re-queueing. '
                        'Message: %s' % json.loads(e.body)['message'])
                    self.task_queue.put(task)
                finally:
                    self.task_queue.task_done()
            except Empty:
                break
Beispiel #8
0
 def _make_kube_watcher(self):
     resource_version = KubeResourceVersion.get_current_resource_version()
     watcher = KubernetesJobWatcher(self.namespace, self.watcher_queue,
                                    resource_version, self.worker_uuid,
                                    self.kube_config)
     watcher.start()
     return watcher
 def _make_kube_watcher(self) -> KubernetesJobWatcher:
     resource_version = KubeResourceVersion.get_current_resource_version()
     watcher = KubernetesJobWatcher(watcher_queue=self.watcher_queue,
                                    resource_version=resource_version,
                                    worker_uuid=self.worker_uuid,
                                    kube_config=self.kube_config)
     watcher.start()
     return watcher
Beispiel #10
0
 def start(self):
     self.log.info('Start Kubernetes executor')
     self.worker_uuid = KubeWorkerIdentifier.get_or_create_current_kube_worker_uuid(
     )
     self.log.debug('Start with worker_uuid: %s', self.worker_uuid)
     # always need to reset resource version since we don't know
     # when we last started, note for behavior below
     # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs
     # /CoreV1Api.md#list_namespaced_pod
     KubeResourceVersion.reset_resource_version()
     self.task_queue = self._manager.Queue()
     self.result_queue = self._manager.Queue()
     self.kube_client = get_kube_client()
     self.kube_scheduler = AirflowKubernetesScheduler(
         self.kube_config, self.task_queue, self.result_queue,
         self.kube_client, self.worker_uuid)
     self._inject_secrets()
     self.clear_not_launched_queued_tasks()
 def start(self):
     self.log.info('Start Kubernetes executor')
     self.worker_uuid = KubeWorkerIdentifier.get_or_create_current_kube_worker_uuid()
     self.log.debug('Start with worker_uuid: %s', self.worker_uuid)
     # always need to reset resource version since we don't know
     # when we last started, note for behavior below
     # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs
     # /CoreV1Api.md#list_namespaced_pod
     KubeResourceVersion.reset_resource_version()
     self.task_queue = Queue()
     self.result_queue = Queue()
     self.kube_client = get_kube_client()
     self.kube_scheduler = AirflowKubernetesScheduler(
         self.kube_config, self.task_queue, self.result_queue,
         self.kube_client, self.worker_uuid
     )
     self._inject_secrets()
     self.clear_not_launched_queued_tasks()
    def sync(self):
        self.log.info("self.running: {}".format(self.running))
        self.log.info("self.queued: {}".format(self.queued_tasks))
        self.kube_scheduler.sync()

        last_resource_version = None
        while not self.result_queue.empty():
            results = self.result_queue.get()
            key, state, pod_id, resource_version = results
            last_resource_version = resource_version
            self.log.info("Changing state of {} to {}".format(results, state))
            self._change_state(key, state, pod_id)

        KubeResourceVersion.checkpoint_resource_version(
            last_resource_version, session=self._session)

        if not self.task_queue.empty():
            key, command, kube_executor_config = self.task_queue.get()
            self.kube_scheduler.run_next((key, command, kube_executor_config))
    def sync(self):
        if self.running:
            self.log.debug('self.running: %s', self.running)
        if self.queued_tasks:
            self.log.debug('self.queued: %s', self.queued_tasks)
        self.kube_scheduler.sync()

        last_resource_version = None
        while not self.result_queue.empty():
            results = self.result_queue.get()
            key, state, pod_id, resource_version = results
            last_resource_version = resource_version
            self.log.info('Changing state of %s to %s', results, state)
            self._change_state(key, state, pod_id)

        KubeResourceVersion.checkpoint_resource_version(last_resource_version)

        if not self.task_queue.empty():
            key, command, kube_executor_config = self.task_queue.get()
            self.kube_scheduler.run_next((key, command, kube_executor_config))
Beispiel #14
0
 def _make_kube_watcher(self):
     resource_version = KubeResourceVersion.get_current_resource_version()
     watcher = KubernetesJobWatcher(
         watcher_queue=self.watcher_queue,
         namespace=self.kube_config.kube_namespace,
         multi_namespace_mode=self.kube_config.multi_namespace_mode,
         resource_version=resource_version,
         worker_uuid=self.worker_uuid,
         kube_config=self.kube_config)
     watcher.start()
     return watcher
    def sync(self):
        if self.running:
            self.log.info('self.running: %s', self.running)
        if self.queued_tasks:
            self.log.info('self.queued: %s', self.queued_tasks)
        self.kube_scheduler.sync()

        last_resource_version = None
        while not self.result_queue.empty():
            results = self.result_queue.get()
            key, state, pod_id, resource_version = results
            last_resource_version = resource_version
            self.log.info('Changing state of %s to %s', results, state)
            self._change_state(key, state, pod_id)

        KubeResourceVersion.checkpoint_resource_version(
            last_resource_version, session=self._session)

        if not self.task_queue.empty():
            key, command, kube_executor_config = self.task_queue.get()
            self.kube_scheduler.run_next((key, command, kube_executor_config))
Beispiel #16
0
 def test_reset_resource_version(self):
     session = settings.Session()
     version = KubeResourceVersion.reset_resource_version(session)
     self.assertEqual(version, '0')
     self.assertEqual(
         KubeResourceVersion.get_current_resource_version(session), '0')
Beispiel #17
0
 def test_checkpoint_resource_version(self):
     session = settings.Session()
     KubeResourceVersion.checkpoint_resource_version('7', session)
     self.assertEqual(
         KubeResourceVersion.get_current_resource_version(session), '7')
Beispiel #18
0
 def test_reset_resource_version(self):
     session = settings.Session()
     version = KubeResourceVersion.reset_resource_version(session)
     self.assertEqual(version, '0')
     self.assertEqual(KubeResourceVersion.get_current_resource_version(session), '0')
 def _make_kube_watcher(self):
     resource_version = KubeResourceVersion.get_current_resource_version(self._session)
     watcher = KubernetesJobWatcher(self.namespace, self.watcher_queue,
                                    resource_version, self.worker_uuid)
     watcher.start()
     return watcher
Beispiel #20
0
 def test_checkpoint_resource_version(self):
     session = settings.Session()
     KubeResourceVersion.checkpoint_resource_version('7', session)
     self.assertEqual(KubeResourceVersion.get_current_resource_version(session), '7')