def _schedule_job(self, job_id): job = Job.query.get(job_id) assert job is not None, f'Job {job_id} not found' if job.state != JobState.WAITING: return job.state with get_session(self._db_engine) as session: job_service = JobService(session) if not job_service.is_ready(job): return job.state config = job.get_config() if config.is_federated: if not job_service.is_peer_ready(job): return job.state try: yaml = generate_job_run_yaml(job) k8s_client.create_flapp(yaml) except Exception as e: logging.error(f'Start job {job_id} has error msg: {e.args}') job.error_message = str(e) db.session.commit() return job.state job.error_message = None job.start() db.session.commit() return job.state
def test_is_ready(self): job_0 = db.session.query(Job).get(0) job_2 = db.session.query(Job).get(2) job_4 = db.session.query(Job).get(4) job_service = JobService(db.session) self.assertTrue(job_service.is_ready(job_0)) self.assertFalse(job_service.is_ready(job_2)) self.assertTrue(job_service.is_ready(job_4))
def test_is_ready(self): job_0 = db.session.query(Job).get(0) job_2 = db.session.query(Job).get(2) job_4 = db.session.query(Job).get(4) job_service = JobService(db.session) self.assertTrue(job_service.is_ready(job_0)) self.assertFalse(job_service.is_ready(job_2)) with patch('fedlearner_webconsole.job.models.Job.is_complete', return_value=True): self.assertTrue(job_service.is_ready(job_4))
def _update_hook(self, event: Event): if event.obj_type == ObjectType.FLAPP: logging.debug('[k8s_watcher][_update_hook]receive event %s', event.flapp_name) with session_context() as session: JobService(session).update_running_state(event.flapp_name) session.commit()
def check_job_ready(self, request, context): with self._app.app_context(): project, _ = self.check_auth_info(request.auth_info, context) job = db.session.query(Job).filter_by( name=request.job_name, project_id=project.id).first() assert job is not None, \ f'Job {request.job_name} not found' with get_session(db.get_engine()) as session: is_ready = JobService(session).is_ready(job) return service_pb2.CheckJobReadyResponse( status=common_pb2.Status(code=common_pb2.STATUS_SUCCESS), is_ready=is_ready)
def test_update_running_state(self, mock_is_complete, mock_is_failed): job_0 = db.session.query(Job).get(0) job_2 = db.session.query(Job).get(2) mock_is_complete.return_value = True job_service = JobService(db.session) job_service.update_running_state(job_0.name) self.assertEqual(job_0.state, JobState.COMPLETED) self.assertTrue(job_service.is_ready(job_2)) job_0.state = JobState.STARTED mock_is_complete.return_value = False mock_is_failed = True job_service.update_running_state(job_0.name) self.assertEqual(job_0.state, JobState.FAILED)