def check_job_status(job_id): jobs = query_job(job_id=job_id) for job in jobs: if job.f_status != JobStatus.RUNNING: return False return True
def job_quantity_constraint(self): if RuntimeConfig.WORK_MODE == WorkMode.CLUSTER: if self.role == 'host': running_jobs = job_utils.query_job(status='running', role=self.role) if len(running_jobs) >= MAX_CONCURRENT_JOB_RUN_HOST: raise Exception('The job running on the host side exceeds the maximum running amount')
def stop(job_id, end_status=JobStatus.FAILED, component_name=''): schedule_logger(job_id).info('get {} job {} {} command'.format( "cancel" if end_status == JobStatus.CANCELED else "stop", job_id, component_name)) jobs = job_utils.query_job(job_id=job_id, is_initiator=1) cancel_success = False is_cancel = (end_status == JobStatus.CANCELED) if jobs: initiator_job = jobs[0] job_info = {'f_job_id': job_id, 'f_status': end_status} roles = json_loads(initiator_job.f_roles) job_work_mode = initiator_job.f_work_mode initiator_party_id = initiator_job.f_party_id # set status first if not component_name: TaskScheduler.sync_job_status( job_id=job_id, roles=roles, initiator_party_id=initiator_party_id, initiator_role=initiator_job.f_role, work_mode=job_work_mode, job_info=job_info) for role, partys in roles.items(): for party_id in partys: response = federated_api( job_id=job_id, method='POST', endpoint='/{}/schedule/{}/{}/{}/{}'.format( API_VERSION, job_id, role, party_id, "cancel" if is_cancel else "kill"), src_party_id=initiator_party_id, dest_party_id=party_id, src_role=initiator_job.f_role, json_body={ 'job_initiator': { 'party_id': initiator_job.f_party_id, 'role': initiator_job.f_role }, 'timeout': end_status == JobStatus.TIMEOUT, 'component_name': component_name }, work_mode=job_work_mode) if response['retcode'] == 0: cancel_success = True schedule_logger(job_id).info( 'send {} {} {} job {} {} command successfully'. format(role, party_id, "cancel" if is_cancel else "kill", job_id, component_name)) if is_cancel: break else: schedule_logger(job_id).info( 'send {} {} {} job {} {} command failed: {}'. format(role, party_id, "cancel" if is_cancel else "kill", job_id, component_name, response['retmsg'])) if is_cancel: return cancel_success else: schedule_logger(job_id).info( 'send {} job {} {} command failed'.format( "cancel" if is_cancel else "kill", job_id, component_name)) raise Exception('can not found job: {}'.format(job_id))
def query_job(): jobs = job_utils.query_job(**request.json) if not jobs: return get_json_result(retcode=101, retmsg='find job failed') return get_json_result(retcode=0, retmsg='success', data=[job.to_json() for job in jobs])