Exemple #1
0
    def _schedule_job(self, job_id):
        job = Job.query.get(job_id)
        assert job is not None, f'Job {job_id} not found'
        if job.state != JobState.WAITING:
            return job.state

        with get_session(self._db_engine) as session:
            job_service = JobService(session)
            if not job_service.is_ready(job):
                return job.state
            config = job.get_config()
            if config.is_federated:
                if not job_service.is_peer_ready(job):
                    return job.state

        try:
            yaml = generate_job_run_yaml(job)
            k8s_client.create_flapp(yaml)
        except Exception as e:
            logging.error(f'Start job {job_id} has error msg: {e.args}')
            job.error_message = str(e)
            db.session.commit()
            return job.state
        job.error_message = None
        job.start()
        db.session.commit()

        return job.state
Exemple #2
0
 def test_is_ready(self):
     job_0 = db.session.query(Job).get(0)
     job_2 = db.session.query(Job).get(2)
     job_4 = db.session.query(Job).get(4)
     job_service = JobService(db.session)
     self.assertTrue(job_service.is_ready(job_0))
     self.assertFalse(job_service.is_ready(job_2))
     self.assertTrue(job_service.is_ready(job_4))
Exemple #3
0
 def test_is_ready(self):
     job_0 = db.session.query(Job).get(0)
     job_2 = db.session.query(Job).get(2)
     job_4 = db.session.query(Job).get(4)
     job_service = JobService(db.session)
     self.assertTrue(job_service.is_ready(job_0))
     self.assertFalse(job_service.is_ready(job_2))
     with patch('fedlearner_webconsole.job.models.Job.is_complete',
                return_value=True):
         self.assertTrue(job_service.is_ready(job_4))
Exemple #4
0
 def _update_hook(self, event: Event):
     if event.obj_type == ObjectType.FLAPP:
         logging.debug('[k8s_watcher][_update_hook]receive event %s',
                       event.flapp_name)
         with session_context() as session:
             JobService(session).update_running_state(event.flapp_name)
             session.commit()
Exemple #5
0
    def check_job_ready(self, request, context):
        with self._app.app_context():
            project, _ = self.check_auth_info(request.auth_info, context)
            job = db.session.query(Job).filter_by(
                name=request.job_name, project_id=project.id).first()
            assert job is not None, \
                f'Job {request.job_name} not found'

            with get_session(db.get_engine()) as session:
                is_ready = JobService(session).is_ready(job)
            return service_pb2.CheckJobReadyResponse(
                status=common_pb2.Status(code=common_pb2.STATUS_SUCCESS),
                is_ready=is_ready)
Exemple #6
0
 def test_update_running_state(self, mock_is_complete, mock_is_failed):
     job_0 = db.session.query(Job).get(0)
     job_2 = db.session.query(Job).get(2)
     mock_is_complete.return_value = True
     job_service = JobService(db.session)
     job_service.update_running_state(job_0.name)
     self.assertEqual(job_0.state, JobState.COMPLETED)
     self.assertTrue(job_service.is_ready(job_2))
     job_0.state = JobState.STARTED
     mock_is_complete.return_value = False
     mock_is_failed = True
     job_service.update_running_state(job_0.name)
     self.assertEqual(job_0.state, JobState.FAILED)