def run_do(self):
     try:
         running_tasks = job_utils.query_task(status='running',
                                              run_ip=get_lan_ip())
         stop_job_ids = set()
         # detect_logger.info('start to detect running job..')
         for task in running_tasks:
             try:
                 process_exist = job_utils.check_job_process(
                     int(task.f_run_pid))
                 if not process_exist:
                     detect_logger.info(
                         'job {} component {} on {} {} task {} {} process does not exist'
                         .format(task.f_job_id, task.f_component_name,
                                 task.f_role, task.f_party_id,
                                 task.f_task_id, task.f_run_pid))
                     stop_job_ids.add(task.f_job_id)
             except Exception as e:
                 detect_logger.exception(e)
         if stop_job_ids:
             schedule_logger().info(
                 'start to stop jobs: {}'.format(stop_job_ids))
         for job_id in stop_job_ids:
             jobs = job_utils.query_job(job_id=job_id)
             if jobs:
                 initiator_party_id = jobs[0].f_initiator_party_id
                 job_work_mode = jobs[0].f_work_mode
                 if len(jobs) > 1:
                     # i am initiator
                     my_party_id = initiator_party_id
                 else:
                     my_party_id = jobs[0].f_party_id
                     initiator_party_id = jobs[0].f_initiator_party_id
                 api_utils.federated_api(
                     job_id=job_id,
                     method='POST',
                     endpoint='/{}/job/stop'.format(API_VERSION),
                     src_party_id=my_party_id,
                     dest_party_id=initiator_party_id,
                     src_role=None,
                     json_body={
                         'job_id': job_id,
                         'operate': 'kill'
                     },
                     work_mode=job_work_mode)
                 TaskScheduler.finish_job(job_id=job_id,
                                          job_runtime_conf=json_loads(
                                              jobs[0].f_runtime_conf),
                                          stop=True)
     except Exception as e:
         detect_logger.exception(e)
     finally:
         detect_logger.info('finish detect running job')
Exemple #2
0
 def run_do(self):
     try:
         running_tasks = job_utils.query_task(status='running',
                                              run_ip=get_lan_ip())
         stop_job_ids = set()
         detect_logger.info('start to detect running job..')
         for task in running_tasks:
             try:
                 process_exist = job_utils.check_job_process(
                     int(task.f_run_pid))
                 if not process_exist:
                     detect_logger.info(
                         'job {} component {} on {} {} task {} {} process does not exist'
                         .format(task.f_job_id, task.f_component_name,
                                 task.f_role, task.f_party_id,
                                 task.f_task_id, task.f_run_pid))
                     stop_job_ids.add(task.f_job_id)
             except Exception as e:
                 detect_logger.exception(e)
         if stop_job_ids:
             schedule_logger.info(
                 'start to stop jobs: {}'.format(stop_job_ids))
         for job_id in stop_job_ids:
             jobs = job_utils.query_job(job_id=job_id)
             if jobs:
                 initiator_party_id = jobs[0].f_initiator_party_id
                 job_work_mode = jobs[0].f_work_mode
                 if len(jobs) > 1:
                     # i am initiator
                     my_party_id = initiator_party_id
                 else:
                     my_party_id = jobs[0].f_party_id
                     initiator_party_id = jobs[0].f_initiator_party_id
                 api_utils.local_api(
                     method='POST',
                     endpoint='/{}/job/stop'.format(API_VERSION),
                     json_body={'job_id': job_id})
                 schedule_logger.info(
                     'send stop job {} command'.format(job_id))
     except Exception as e:
         detect_logger.exception(e)
     finally:
         detect_logger.info('finish detect running job')
Exemple #3
0
 def detect_running_task(cls):
     detect_logger().info('start to detect running task..')
     count = 0
     try:
         running_tasks = JobSaver.query_task(
             party_status=TaskStatus.RUNNING,
             run_on_this_party=True,
             run_ip=RuntimeConfig.JOB_SERVER_HOST,
             only_latest=False)
         stop_job_ids = set()
         for task in running_tasks:
             count += 1
             try:
                 process_exist = job_utils.check_job_process(
                     int(task.f_run_pid))
                 if not process_exist:
                     detect_logger(job_id=task.f_job_id).info(
                         'job {} task {} {} on {} {} process {} does not exist'
                         .format(task.f_job_id, task.f_task_id,
                                 task.f_task_version, task.f_role,
                                 task.f_party_id, task.f_run_pid))
                     stop_job_ids.add(task.f_job_id)
             except Exception as e:
                 detect_logger(job_id=task.f_job_id).exception(e)
         if stop_job_ids:
             detect_logger().info(
                 'start to stop jobs: {}'.format(stop_job_ids))
         stop_jobs = set()
         for job_id in stop_job_ids:
             jobs = JobSaver.query_job(job_id=job_id)
             if jobs:
                 stop_jobs.add(jobs[0])
         cls.request_stop_jobs(jobs=stop_jobs,
                               stop_msg="task executor process abort",
                               stop_status=JobStatus.CANCELED)
     except Exception as e:
         detect_logger().exception(e)
     finally:
         detect_logger().info(f"finish detect {count} running task")
Exemple #4
0
 def is_alive(task):
     return job_utils.check_job_process(int(task.f_run_pid))