Exemple #1
0
 def kill(self, task):
     kill_status_code = process_utils.kill_task_executor_process(task)
     # session stop
     if kill_status_code is KillProcessRetCode.KILLED or task.f_status not in {
             TaskStatus.WAITING
     }:
         job_utils.start_session_stop(task)
Exemple #2
0
    def kill_job(job_id, role, party_id, job_initiator, timeout=False, component_name=''):
        schedule_logger(job_id).info('{} {} get kill job {} {} command'.format(role, party_id, job_id, component_name))
        task_info = job_utils.get_task_info(job_id, role, party_id, component_name)
        tasks = job_utils.query_task(**task_info)
        job = job_utils.query_job(job_id=job_id)
        for task in tasks:
            kill_status = False
            try:
                # task clean up
                runtime_conf = json_loads(job[0].f_runtime_conf)
                roles = ','.join(runtime_conf['role'].keys())
                party_ids = ','.join([','.join([str(j) for j in i]) for i in runtime_conf['role'].values()])
                # Tracking(job_id=job_id, role=role, party_id=party_id, task_id=task.f_task_id).clean_task(roles, party_ids)
                # stop task
                kill_status = job_utils.kill_task_executor_process(task)
                # session stop
                job_utils.start_session_stop(task)
            except Exception as e:
                schedule_logger(job_id).exception(e)
            finally:
                schedule_logger(job_id).info(
                    'job {} component {} on {} {} process {} kill {}'.format(job_id, task.f_component_name, task.f_role,
                                                                             task.f_party_id, task.f_run_pid,
                                                                             'success' if kill_status else 'failed'))
            status = TaskStatus.FAILED if not timeout else TaskStatus.TIMEOUT

            if task.f_status != TaskStatus.COMPLETE:
                task.f_status = status
            try:
                TaskExecutor.sync_task_status(job_id=job_id, component_name=task.f_component_name, task_id=task.f_task_id,
                                              role=role,
                                              party_id=party_id, initiator_party_id=job_initiator.get('party_id', None),
                                              task_info=task.to_json(), initiator_role=job_initiator.get('role', None))
            except Exception as e:
                schedule_logger(job_id).exception(e)
Exemple #3
0
 def start_clean_job(cls, **kwargs):
     tasks = JobSaver.query_task(**kwargs)
     if tasks:
         for task in tasks:
             try:
                 # clean session
                 stat_logger.info('start {} {} {} {} session stop'.format(
                     task.f_job_id, task.f_role, task.f_party_id,
                     task.f_component_name))
                 start_session_stop(task)
                 stat_logger.info('stop {} {} {} {} session success'.format(
                     task.f_job_id, task.f_role, task.f_party_id,
                     task.f_component_name))
             except Exception as e:
                 pass
             try:
                 # clean data table
                 JobClean.clean_table(job_id=task.f_job_id,
                                      role=task.f_role,
                                      party_id=task.f_party_id,
                                      component_name=task.f_component_name)
             except Exception as e:
                 stat_logger.info(
                     'delete {} {} {} {} data table failed'.format(
                         task.f_job_id, task.f_role, task.f_party_id,
                         task.f_component_name))
                 stat_logger.exception(e)
             try:
                 # clean metric data
                 stat_logger.info(
                     'start delete {} {} {} {} metric data'.format(
                         task.f_job_id, task.f_role, task.f_party_id,
                         task.f_component_name))
                 delete_metric_data({
                     'job_id': task.f_job_id,
                     'role': task.f_role,
                     'party_id': task.f_party_id,
                     'component_name': task.f_component_name
                 })
                 stat_logger.info(
                     'delete {} {} {} {} metric data success'.format(
                         task.f_job_id, task.f_role, task.f_party_id,
                         task.f_component_name))
             except Exception as e:
                 stat_logger.info(
                     'delete {} {} {} {} metric data failed'.format(
                         task.f_job_id, task.f_role, task.f_party_id,
                         task.f_component_name))
                 stat_logger.exception(e)
     else:
         raise Exception('no found task')
Exemple #4
0
 def kill_task(cls, task: Task):
     kill_status = False
     try:
         # kill task executor
         kill_status_code = job_utils.kill_task_executor_process(task)
         # session stop
         if kill_status_code == KillProcessStatusCode.KILLED or task.f_status not in {
                 TaskStatus.WAITING
         }:
             job_utils.start_session_stop(task)
     except Exception as e:
         schedule_logger(task.f_job_id).exception(e)
     else:
         kill_status = True
     finally:
         schedule_logger(task.f_job_id).info(
             'job {} task {} {} on {} {} process {} kill {}'.format(
                 task.f_job_id, task.f_task_id, task.f_task_version,
                 task.f_role, task.f_party_id, task.f_run_pid,
                 'success' if kill_status else 'failed'))
         return kill_status
Exemple #5
0
    def kill_job(job_id,
                 role,
                 party_id,
                 job_initiator,
                 timeout=False,
                 component_name=''):
        schedule_logger(job_id).info('{} {} get kill job {} {} command'.format(
            role, party_id, job_id, component_name))
        task_info = job_utils.get_task_info(job_id, role, party_id,
                                            component_name)
        tasks = job_utils.query_task(**task_info)
        for task in tasks:
            kill_status = False
            try:
                kill_status = job_utils.kill_process(int(task.f_run_pid))
                job_utils.start_session_stop(task)
            except Exception as e:
                schedule_logger(job_id).exception(e)
            finally:
                schedule_logger(job_id).info(
                    'job {} component {} on {} {} process {} kill {}'.format(
                        job_id, task.f_component_name, task.f_role,
                        task.f_party_id, task.f_run_pid,
                        'success' if kill_status else 'failed'))
            status = TaskStatus.FAILED if not timeout else TaskStatus.TIMEOUT

            if task.f_status != TaskStatus.COMPLETE:
                task.f_status = status
            try:
                TaskExecutor.sync_task_status(
                    job_id=job_id,
                    component_name=task.f_component_name,
                    task_id=task.f_task_id,
                    role=role,
                    party_id=party_id,
                    initiator_party_id=job_initiator.get('party_id', None),
                    task_info=task.to_json(),
                    initiator_role=job_initiator.get('role', None))
            except Exception as e:
                schedule_logger(job_id).exception(e)
Exemple #6
0
 def detect_expired_session(cls):
     sessions_record = StorageSessionBase.query_expired_sessions_record(
         ttl=30 * 60 * 1000)
     for session_record in sessions_record:
         job_utils.start_session_stop()