def get_job_parameters(job_id, role, party_id): jobs = Job.select(Job.f_runtime_conf_on_party).where( Job.f_job_id == job_id, Job.f_role == role, Job.f_party_id == party_id) if jobs: job = jobs[0] return job.f_runtime_conf_on_party.get("job_parameters") else: return {}
def get_job_configuration(job_id, role, party_id) -> JobConfiguration: jobs = Job.select(Job.f_dsl, Job.f_runtime_conf, Job.f_train_runtime_conf, Job.f_runtime_conf_on_party).where( Job.f_job_id == job_id, Job.f_role == role, Job.f_party_id == party_id) if jobs: job = jobs[0] return JobConfiguration(**job.to_human_model_dict())
def get_job_dsl(job_id, role, party_id): jobs = Job.select(Job.f_dsl).where(Job.f_job_id == job_id, Job.f_role == role, Job.f_party_id == party_id) if jobs: job = jobs[0] return job.f_dsl else: return {}
def get_job_configuration(job_id, role, party_id, tasks=None): if tasks: jobs_run_conf = {} for task in tasks: jobs = Job.select(Job.f_job_id, Job.f_runtime_conf_on_party, Job.f_description).where(Job.f_job_id == task.f_job_id) job = jobs[0] jobs_run_conf[job.f_job_id] = job.f_runtime_conf_on_party["component_parameters"]["role"]["local"]["0"]["upload_0"] jobs_run_conf[job.f_job_id]["notes"] = job.f_description return jobs_run_conf else: jobs = Job.select(Job.f_dsl, Job.f_runtime_conf, Job.f_train_runtime_conf, Job.f_runtime_conf_on_party).where(Job.f_job_id == job_id, Job.f_role == role, Job.f_party_id == party_id) if jobs: job = jobs[0] return job.f_dsl, job.f_runtime_conf, job.f_runtime_conf_on_party, job.f_train_runtime_conf else: return {}, {}, {}, {}
def save_job_info(self, role, party_id, job_info, create=False): with DB.connection_context(): schedule_logger(self.job_id).info('save {} {} job: {}'.format( role, party_id, job_info)) jobs = Job.select().where(Job.f_job_id == self.job_id, Job.f_role == role, Job.f_party_id == party_id) is_insert = True if jobs: job = jobs[0] is_insert = False if job.f_status == JobStatus.TIMEOUT: return None elif create: job = Job() job.f_create_time = current_timestamp() else: return None job.f_job_id = self.job_id job.f_role = role job.f_party_id = party_id if 'f_status' in job_info: if job.f_status in [JobStatus.COMPLETE, JobStatus.FAILED]: # Termination status cannot be updated # TODO: return if (job_info['f_status'] in [ JobStatus.FAILED, JobStatus.TIMEOUT ]) and (not job.f_end_time): if not job.f_start_time: return job_info['f_end_time'] = current_timestamp() job_info['f_elapsed'] = job_info[ 'f_end_time'] - job.f_start_time job_info['f_update_time'] = current_timestamp() if (job_info['f_status'] in [ JobStatus.FAILED, JobStatus.TIMEOUT, JobStatus.CANCELED, JobStatus.COMPLETE ]): job_info['f_tag'] = 'job_end' update_fields = [] for k, v in job_info.items(): try: if k in ['f_job_id', 'f_role', 'f_party_id' ] or v == getattr(Job, k).default: continue setattr(job, k, v) update_fields.append(getattr(Job, k)) except: pass if is_insert: job.save(force_insert=True) else: job.save(only=update_fields)
def get_upload_job_configuration_summary(upload_tasks: typing.List[Task]): jobs_run_conf = {} for task in upload_tasks: jobs = Job.select( Job.f_job_id, Job.f_runtime_conf_on_party, Job.f_description).where(Job.f_job_id == task.f_job_id) job = jobs[0] jobs_run_conf[job.f_job_id] = job.f_runtime_conf_on_party[ "component_parameters"]["role"]["local"]["0"]["upload_0"] jobs_run_conf[job.f_job_id]["notes"] = job.f_description return jobs_run_conf
def get_job_dsl_parser_by_job_id(job_id): jobs = Job.select(Job.f_dsl, Job.f_runtime_conf, Job.f_train_runtime_conf).where(Job.f_job_id == job_id) if jobs: job = jobs[0] job_dsl_parser = get_job_dsl_parser( dsl=job.f_dsl, runtime_conf=job.f_runtime_conf, train_runtime_conf=job.f_train_runtime_conf) return job_dsl_parser else: return None
def get_job_configuration(job_id, role, party_id): with DB.connection_context(): jobs = Job.select(Job.f_dsl, Job.f_runtime_conf, Job.f_train_runtime_conf).where( Job.f_job_id == job_id, Job.f_role == role, Job.f_party_id == party_id) if jobs: job = jobs[0] return json_loads(job.f_dsl), json_loads( job.f_runtime_conf), json_loads(job.f_train_runtime_conf) else: return {}, {}, {}
def check_request_parameters(request_data): if 'role' not in request_data and 'party_id' not in request_data: jobs = Job.select(Job.f_runtime_conf_on_party).where(Job.f_job_id == request_data.get('job_id', ''), Job.f_is_initiator == True) if jobs: job = jobs[0] job_runtime_conf = job.f_runtime_conf_on_party job_initiator = job_runtime_conf.get('initiator', {}) role = job_initiator.get('role', '') party_id = job_initiator.get('party_id', 0) request_data['role'] = role request_data['party_id'] = party_id
def check_request_parameters(request_data): with DB.connection_context(): if 'role' not in request_data and 'party_id' not in request_data: jobs = Job.select(Job.f_runtime_conf).where(Job.f_job_id == request_data.get('job_id', ''), Job.f_is_initiator == 1) if jobs: job = jobs[0] job_runtime_conf = json_loads(job.f_runtime_conf) job_initiator = job_runtime_conf.get('initiator', {}) role = job_initiator.get('role', '') party_id = job_initiator.get('party_id', 0) request_data['role'] = role request_data['party_id'] = party_id
def query_job(**kwargs): with DB.connection_context(): filters = [] for f_n, f_v in kwargs.items(): attr_name = 'f_%s' % f_n if hasattr(Job, attr_name): filters.append(operator.attrgetter('f_%s' % f_n)(Job) == f_v) if filters: jobs = Job.select().where(*filters) return [job for job in jobs] else: # not allow query all job return []
def get_job_dsl_parser_by_job_id(job_id): with DB.connection_context(): jobs = Job.select( Job.f_dsl, Job.f_runtime_conf, Job.f_train_runtime_conf).where(Job.f_job_id == job_id) if jobs: job = jobs[0] job_dsl_parser = get_job_dsl_parser( dsl=json_loads(job.f_dsl), runtime_conf=json_loads(job.f_runtime_conf), train_runtime_conf=json_loads(job.f_train_runtime_conf)) return job_dsl_parser else: return None
def get_job_configuration(job_id, role, party_id, tasks=None): with DB.connection_context(): if tasks: jobs_run_conf = {} for task in tasks: jobs = Job.select( Job.f_job_id, Job.f_runtime_conf, Job.f_description).where(Job.f_job_id == task.f_job_id) job = jobs[0] jobs_run_conf[job.f_job_id] = json_loads( job.f_runtime_conf)["role_parameters"]["local"]["upload_0"] jobs_run_conf[job.f_job_id]["notes"] = job.f_description return jobs_run_conf else: jobs = Job.select(Job.f_dsl, Job.f_runtime_conf, Job.f_train_runtime_conf).where( Job.f_job_id == job_id, Job.f_role == role, Job.f_party_id == party_id) if jobs: job = jobs[0] return json_loads(job.f_dsl), json_loads( job.f_runtime_conf), json_loads(job.f_train_runtime_conf) else: return {}, {}, {}
def save_job_info(self, role, party_id, job_info, create=False): with DB.connection_context(): stat_logger.info('save {} {} job: {}'.format( role, party_id, job_info)) jobs = Job.select().where(Job.f_job_id == self.job_id, Job.f_role == role, Job.f_party_id == party_id) is_insert = True if jobs: job = jobs[0] is_insert = False if job.f_status == JobStatus.TIMEOUT: return None elif create: job = Job() job.f_create_time = current_timestamp() else: return None job.f_job_id = self.job_id job.f_role = role job.f_party_id = party_id if 'f_status' in job_info: if job.f_status in [JobStatus.COMPLETE, JobStatus.FAILED]: # Termination status cannot be updated # TODO: pass if job_info[ 'f_status'] == JobStatus.FAILED and not job.f_end_time: job.f_end_time = current_timestamp() job.f_elapsed = job.f_end_time - job.f_start_time job.f_update_time = current_timestamp() for k, v in job_info.items(): try: if k in ['f_job_id', 'f_role', 'f_party_id' ] or v == getattr(Job, k).default: continue setattr(job, k, v) except: pass if is_insert: job.save(force_insert=True) else: job.save()
def query_job(cls, reverse=None, order_by=None, **kwargs): filters = [] for f_n, f_v in kwargs.items(): attr_name = 'f_%s' % f_n if hasattr(Job, attr_name): filters.append(operator.attrgetter('f_%s' % f_n)(Job) == f_v) if filters: jobs = Job.select().where(*filters) if reverse is not None: if not order_by or not hasattr(Job, f"f_{order_by}"): order_by = "create_time" if reverse is True: jobs = jobs.order_by(getattr(Job, f"f_{order_by}").desc()) elif reverse is False: jobs = jobs.order_by(getattr(Job, f"f_{order_by}").asc()) return [job for job in jobs] else: # not allow query all job return []
def detect_resource_record(cls): detect_logger().info('start detect resource recycle') try: filter_status = EndStatus.status_list() filter_status.append(JobStatus.WAITING) jobs = Job.select().where( Job.f_resource_in_use == True, current_timestamp() - Job.f_apply_resource_time > 10 * 60 * 1000, Job.f_status << filter_status) stop_jobs = set() for job in jobs: if job.f_status == JobStatus.WAITING: stop_jobs.add(job) else: try: detect_logger(job_id=job.f_job_id).info( f"start to return job {job.f_job_id} on {job.f_role} {job.f_party_id} resource" ) flag = ResourceManager.return_job_resource( job_id=job.f_job_id, role=job.f_role, party_id=job.f_party_id) if flag: detect_logger(job_id=job.f_job_id).info( f"return job {job.f_job_id} on {job.f_role} {job.f_party_id} resource successfully" ) else: detect_logger(job_id=job.f_job_id).info( f"return job {job.f_job_id} on {job.f_role} {job.f_party_id} resource failed" ) except Exception as e: detect_logger(job_id=job.f_job_id).exception(e) cls.request_stop_jobs(jobs=stop_jobs, stop_msg="start timeout", stop_status=JobStatus.TIMEOUT) except Exception as e: detect_logger().exception(e) finally: detect_logger().info('finish detect resource recycle')
def query_job(cls, reverse=None, order_by=None, **kwargs): filters = [] for f_n, f_v in kwargs.items(): attr_name = 'f_%s' % f_n if attr_name in ['f_start_time', 'f_end_time', 'f_elapsed' ] and isinstance(f_v, list): if attr_name == 'f_elapsed': b_timestamp = f_v[0] e_timestamp = f_v[1] else: # time type: %Y-%m-%d %H:%M:%S b_timestamp = str_to_time_stamp(f_v[0]) if isinstance( f_v[0], str) else f_v[0] e_timestamp = str_to_time_stamp(f_v[1]) if isinstance( f_v[1], str) else f_v[1] filters.append( getattr(Job, attr_name).between(b_timestamp, e_timestamp)) elif hasattr(Job, attr_name): if isinstance(f_v, set): filters.append( operator.attrgetter('f_%s' % f_n)(Job) << f_v) else: filters.append( operator.attrgetter('f_%s' % f_n)(Job) == f_v) if filters: jobs = Job.select().where(*filters) if reverse is not None: if not order_by or not hasattr(Job, f"f_{order_by}"): order_by = "create_time" if reverse is True: jobs = jobs.order_by(getattr(Job, f"f_{order_by}").desc()) elif reverse is False: jobs = jobs.order_by(getattr(Job, f"f_{order_by}").asc()) return [job for job in jobs] else: return []
def save_job_info(self, role, party_id, job_info, create=False): with DB.connection_context(): stat_logger.info('save {} {} job: {}'.format( role, party_id, job_info)) jobs = Job.select().where(Job.f_job_id == self.job_id, Job.f_role == role, Job.f_party_id == party_id) is_insert = True if jobs: job = jobs[0] is_insert = False elif create: job = Job() job.f_create_time = current_timestamp() else: return None job.f_job_id = self.job_id job.f_role = role job.f_party_id = party_id if 'f_status' in job_info: if job.f_status in [ JobStatus.SUCCESS, JobStatus.FAILED, JobStatus.PARTIAL, JobStatus.DELETED ]: # Termination status cannot be updated # TODO: pass for k, v in job_info.items(): if k in ['f_job_id', 'f_role', 'f_party_id'] or v == getattr( Job, k).default: continue setattr(job, k, v) if is_insert: job.save(force_insert=True) else: job.save()
def query_start_timeout_job(cls, timeout): jobs = Job.select().where(Job.f_status == JobStatus.WAITING, Job.f_cores > 0, Job.f_update_time < current_timestamp() - timeout) return [job for job in jobs]
def list_job(limit): if limit > 0: jobs = Job.select().order_by(Job.f_create_time.desc()).limit(limit) else: jobs = Job.select().order_by(Job.f_create_time.desc()) return [job for job in jobs]