def save_job_info(self, role, party_id, job_info, create=False): with DB.connection_context(): schedule_logger(self.job_id).info('save {} {} job: {}'.format( role, party_id, job_info)) jobs = Job.select().where(Job.f_job_id == self.job_id, Job.f_role == role, Job.f_party_id == party_id) is_insert = True if jobs: job = jobs[0] is_insert = False if job.f_status == JobStatus.TIMEOUT: return None elif create: job = Job() job.f_create_time = current_timestamp() else: return None job.f_job_id = self.job_id job.f_role = role job.f_party_id = party_id if 'f_status' in job_info: if job.f_status in [JobStatus.COMPLETE, JobStatus.FAILED]: # Termination status cannot be updated # TODO: return if (job_info['f_status'] in [ JobStatus.FAILED, JobStatus.TIMEOUT ]) and (not job.f_end_time): if not job.f_start_time: return job_info['f_end_time'] = current_timestamp() job_info['f_elapsed'] = job_info[ 'f_end_time'] - job.f_start_time job_info['f_update_time'] = current_timestamp() if (job_info['f_status'] in [ JobStatus.FAILED, JobStatus.TIMEOUT, JobStatus.CANCELED, JobStatus.COMPLETE ]): job_info['f_tag'] = 'job_end' update_fields = [] for k, v in job_info.items(): try: if k in ['f_job_id', 'f_role', 'f_party_id' ] or v == getattr(Job, k).default: continue setattr(job, k, v) update_fields.append(getattr(Job, k)) except: pass if is_insert: job.save(force_insert=True) else: job.save(only=update_fields)
def submit_job(job_data): job_id = generate_job_id() schedule_logger.info('submit job, job_id {}, body {}'.format(job_id, job_data)) job_dsl = job_data.get('job_dsl', {}) job_runtime_conf = job_data.get('job_runtime_conf', {}) job_utils.check_pipeline_job_runtime_conf(job_runtime_conf) job_parameters = job_runtime_conf['job_parameters'] job_initiator = job_runtime_conf['initiator'] job_type = job_parameters.get('job_type', '') if job_type != 'predict': # generate job model info job_parameters['model_id'] = '#'.join([dtable_utils.all_party_key(job_runtime_conf['role']), 'model']) job_parameters['model_version'] = job_id train_runtime_conf = {} else: detect_utils.check_config(job_parameters, ['model_id', 'model_version']) # get inference dsl from pipeline model as job dsl job_tracker = Tracking(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'], model_id=job_parameters['model_id'], model_version=job_parameters['model_version']) pipeline_model = job_tracker.get_output_model('pipeline') job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl) train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf) job_dsl_path, job_runtime_conf_path = save_job_conf(job_id=job_id, job_dsl=job_dsl, job_runtime_conf=job_runtime_conf) job = Job() job.f_job_id = job_id job.f_roles = json_dumps(job_runtime_conf['role']) job.f_work_mode = job_parameters['work_mode'] job.f_initiator_party_id = job_initiator['party_id'] job.f_dsl = json_dumps(job_dsl) job.f_runtime_conf = json_dumps(job_runtime_conf) job.f_train_runtime_conf = json_dumps(train_runtime_conf) job.f_run_ip = '' job.f_status = JobStatus.WAITING job.f_progress = 0 job.f_create_time = current_timestamp() # save job info TaskScheduler.distribute_job(job=job, roles=job_runtime_conf['role'], job_initiator=job_initiator) # push into queue RuntimeConfig.JOB_QUEUE.put_event({ 'job_id': job_id, "initiator_role": job_initiator['role'], "initiator_party_id": job_initiator['party_id'] } ) schedule_logger.info( 'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters['model_id'])) board_url = BOARD_DASHBOARD_URL.format(job_id, job_initiator['role'], job_initiator['party_id']) return job_id, job_dsl_path, job_runtime_conf_path, {'model_id': job_parameters['model_id'], 'model_version': job_parameters[ 'model_version']}, board_url
def save_job_info(self, role, party_id, job_info, create=False): with DB.connection_context(): stat_logger.info('save {} {} job: {}'.format( role, party_id, job_info)) jobs = Job.select().where(Job.f_job_id == self.job_id, Job.f_role == role, Job.f_party_id == party_id) is_insert = True if jobs: job = jobs[0] is_insert = False if job.f_status == JobStatus.TIMEOUT: return None elif create: job = Job() job.f_create_time = current_timestamp() else: return None job.f_job_id = self.job_id job.f_role = role job.f_party_id = party_id if 'f_status' in job_info: if job.f_status in [JobStatus.COMPLETE, JobStatus.FAILED]: # Termination status cannot be updated # TODO: pass if job_info[ 'f_status'] == JobStatus.FAILED and not job.f_end_time: job.f_end_time = current_timestamp() job.f_elapsed = job.f_end_time - job.f_start_time job.f_update_time = current_timestamp() for k, v in job_info.items(): try: if k in ['f_job_id', 'f_role', 'f_party_id' ] or v == getattr(Job, k).default: continue setattr(job, k, v) except: pass if is_insert: job.save(force_insert=True) else: job.save()
def save_job_info(self, role, party_id, job_info, create=False): with DB.connection_context(): stat_logger.info('save {} {} job: {}'.format( role, party_id, job_info)) jobs = Job.select().where(Job.f_job_id == self.job_id, Job.f_role == role, Job.f_party_id == party_id) is_insert = True if jobs: job = jobs[0] is_insert = False elif create: job = Job() job.f_create_time = current_timestamp() else: return None job.f_job_id = self.job_id job.f_role = role job.f_party_id = party_id if 'f_status' in job_info: if job.f_status in [ JobStatus.SUCCESS, JobStatus.FAILED, JobStatus.PARTIAL, JobStatus.DELETED ]: # Termination status cannot be updated # TODO: pass for k, v in job_info.items(): if k in ['f_job_id', 'f_role', 'f_party_id'] or v == getattr( Job, k).default: continue setattr(job, k, v) if is_insert: job.save(force_insert=True) else: job.save()
def submit_job(job_data): job_id = generate_job_id() schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(job_id, job_data)) job_dsl = job_data.get('job_dsl', {}) job_runtime_conf = job_data.get('job_runtime_conf', {}) job_utils.check_pipeline_job_runtime_conf(job_runtime_conf) job_parameters = job_runtime_conf['job_parameters'] job_initiator = job_runtime_conf['initiator'] job_type = job_parameters.get('job_type', '') if job_type != 'predict': # generate job model info job_parameters['model_id'] = '#'.join([dtable_utils.all_party_key(job_runtime_conf['role']), 'model']) job_parameters['model_version'] = job_id train_runtime_conf = {} else: detect_utils.check_config(job_parameters, ['model_id', 'model_version']) # get inference dsl from pipeline model as job dsl job_tracker = Tracking(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'], model_id=job_parameters['model_id'], model_version=job_parameters['model_version']) pipeline_model = job_tracker.get_output_model('pipeline') job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl) train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf) path_dict = save_job_conf(job_id=job_id, job_dsl=job_dsl, job_runtime_conf=job_runtime_conf, train_runtime_conf=train_runtime_conf, pipeline_dsl=None) job = Job() job.f_job_id = job_id job.f_roles = json_dumps(job_runtime_conf['role']) job.f_work_mode = job_parameters['work_mode'] job.f_initiator_party_id = job_initiator['party_id'] job.f_dsl = json_dumps(job_dsl) job.f_runtime_conf = json_dumps(job_runtime_conf) job.f_train_runtime_conf = json_dumps(train_runtime_conf) job.f_run_ip = '' job.f_status = JobStatus.WAITING job.f_progress = 0 job.f_create_time = current_timestamp() initiator_role = job_initiator['role'] initiator_party_id = job_initiator['party_id'] if initiator_party_id not in job_runtime_conf['role'][initiator_role]: schedule_logger(job_id).info("initiator party id error:{}".format(initiator_party_id)) raise Exception("initiator party id error {}".format(initiator_party_id)) get_job_dsl_parser(dsl=job_dsl, runtime_conf=job_runtime_conf, train_runtime_conf=train_runtime_conf) TaskScheduler.distribute_job(job=job, roles=job_runtime_conf['role'], job_initiator=job_initiator) # push into queue job_event = job_utils.job_event(job_id, initiator_role, initiator_party_id) try: RuntimeConfig.JOB_QUEUE.put_event(job_event) except Exception as e: raise Exception('push job into queue failed') schedule_logger(job_id).info( 'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters['model_id'])) board_url = BOARD_DASHBOARD_URL.format(job_id, job_initiator['role'], job_initiator['party_id']) logs_directory = get_job_log_directory(job_id) return job_id, path_dict['job_dsl_path'], path_dict['job_runtime_conf_path'], logs_directory, \ {'model_id': job_parameters['model_id'],'model_version': job_parameters['model_version']}, board_url