Esempio n. 1
0
 def check_run_worker_health(self, run: PipelineRun):
     container = self._get_container(run)
     if container == None:
         return CheckRunHealthResult(WorkerStatus.NOT_FOUND)
     if container.status == "running":
         return CheckRunHealthResult(WorkerStatus.RUNNING)
     return CheckRunHealthResult(
         WorkerStatus.FAILED, msg=f"Container status is {container.status}")
Esempio n. 2
0
 def check_run_worker_health(self, run: PipelineRun):
     job_name = get_job_name_from_run_id(
         run.run_id, resume_attempt_number=self._instance.count_resume_run_attempts(run.run_id)
     )
     try:
         job = self._batch_api.read_namespaced_job(namespace=self.job_namespace, name=job_name)
     except Exception:
         return CheckRunHealthResult(
             WorkerStatus.UNKNOWN, str(serializable_error_info_from_exc_info(sys.exc_info()))
         )
     if job.status.failed:
         return CheckRunHealthResult(WorkerStatus.FAILED, "K8s job failed")
     return CheckRunHealthResult(WorkerStatus.RUNNING)
Esempio n. 3
0
 def check_run_worker_health(self, run: PipelineRun):
     job_namespace = _get_validated_celery_k8s_executor_config(run.run_config).get(
         "job_namespace"
     )
     job_name = get_job_name_from_run_id(run.run_id)
     try:
         job = self._batch_api.read_namespaced_job(namespace=job_namespace, name=job_name)
     except Exception:
         return CheckRunHealthResult(
             WorkerStatus.UNKNOWN, str(serializable_error_info_from_exc_info(sys.exc_info()))
         )
     if job.status.failed:
         return CheckRunHealthResult(WorkerStatus.FAILED, "K8s job failed")
     return CheckRunHealthResult(WorkerStatus.RUNNING)