def save_pipeline(job_id, role, party_id, model_id, model_version): schedule_logger(job_id).info( 'job {} on {} {} start to save pipeline'.format( job_id, role, party_id)) job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration( job_id=job_id, role=role, party_id=party_id) job_parameters = job_runtime_conf.get('job_parameters', {}) job_type = job_parameters.get('job_type', '') if job_type == 'predict': return dag = job_utils.get_job_dsl_parser( dsl=job_dsl, runtime_conf=job_runtime_conf, train_runtime_conf=train_runtime_conf) predict_dsl = dag.get_predict_dsl(role=role) pipeline = pipeline_pb2.Pipeline() pipeline.inference_dsl = json_dumps(predict_dsl, byte=True) pipeline.train_dsl = json_dumps(job_dsl, byte=True) pipeline.train_runtime_conf = json_dumps(job_runtime_conf, byte=True) pipeline.fate_version = RuntimeConfig.get_env("FATE") pipeline.model_id = model_id pipeline.model_version = model_version job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id, model_id=model_id, model_version=model_version) job_tracker.save_pipeline(pipelined_buffer_object=pipeline) schedule_logger(job_id).info( 'job {} on {} {} save pipeline successfully'.format( job_id, role, party_id))
def component_metric_all(): request_data = request.json check_request_parameters(request_data) tracker = Tracking(job_id=request_data['job_id'], component_name=request_data['component_name'], role=request_data['role'], party_id=request_data['party_id']) metrics = tracker.get_metric_list() all_metric_data = {} if metrics: for metric_namespace, metric_names in metrics.items(): all_metric_data[metric_namespace] = all_metric_data.get( metric_namespace, {}) for metric_name in metric_names: all_metric_data[metric_namespace][ metric_name] = all_metric_data[metric_namespace].get( metric_name, {}) metric_data, metric_meta = get_metric_all_data( tracker=tracker, metric_namespace=metric_namespace, metric_name=metric_name) all_metric_data[metric_namespace][metric_name][ 'data'] = metric_data all_metric_data[metric_namespace][metric_name][ 'meta'] = metric_meta return get_json_result(retcode=0, retmsg='success', data=all_metric_data) else: return get_json_result(retcode=0, retmsg='no data', data={})
def update_job_status(job_id, role, party_id, job_info, create=False): job_info['f_run_ip'] = RuntimeConfig.JOB_SERVER_HOST if create: dsl = json_loads(job_info['f_dsl']) runtime_conf = json_loads(job_info['f_runtime_conf']) train_runtime_conf = json_loads(job_info['f_train_runtime_conf']) if USE_AUTHENTICATION: authentication_check(src_role=job_info.get('src_role', None), src_party_id=job_info.get('src_party_id', None), dsl=dsl, runtime_conf=runtime_conf, role=role, party_id=party_id) save_job_conf(job_id=job_id, job_dsl=dsl, job_runtime_conf=runtime_conf, train_runtime_conf=train_runtime_conf, pipeline_dsl=None) job_parameters = runtime_conf['job_parameters'] job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id, model_id=job_parameters["model_id"], model_version=job_parameters["model_version"]) if job_parameters.get("job_type", "") != "predict": job_tracker.init_pipelined_model() roles = json_loads(job_info['f_roles']) partner = {} show_role = {} is_initiator = job_info.get('f_is_initiator', 0) for _role, _role_party in roles.items(): if is_initiator or _role == role: show_role[_role] = show_role.get(_role, []) for _party_id in _role_party: if is_initiator or _party_id == party_id: show_role[_role].append(_party_id) if _role != role: partner[_role] = partner.get(_role, []) partner[_role].extend(_role_party) else: for _party_id in _role_party: if _party_id != party_id: partner[_role] = partner.get(_role, []) partner[_role].append(_party_id) dag = get_job_dsl_parser(dsl=dsl, runtime_conf=runtime_conf, train_runtime_conf=train_runtime_conf) job_args = dag.get_args_input() dataset = {} for _role, _role_party_args in job_args.items(): if is_initiator or _role == role: for _party_index in range(len(_role_party_args)): _party_id = roles[_role][_party_index] if is_initiator or _party_id == party_id: dataset[_role] = dataset.get(_role, {}) dataset[_role][_party_id] = dataset[_role].get(_party_id, {}) for _data_type, _data_location in _role_party_args[_party_index]['args']['data'].items(): dataset[_role][_party_id][_data_type] = '{}.{}'.format(_data_location['namespace'], _data_location['name']) job_tracker.log_job_view({'partner': partner, 'dataset': dataset, 'roles': show_role}) else: job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id) job_tracker.save_job_info(role=role, party_id=party_id, job_info=job_info, create=create)
def component_output_model(): request_data = request.json check_request_parameters(request_data) job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(job_id=request_data['job_id'], role=request_data['role'], party_id=request_data['party_id']) model_id = job_runtime_conf['job_parameters']['model_id'] model_version = job_runtime_conf['job_parameters']['model_version'] tracker = Tracking(job_id=request_data['job_id'], component_name=request_data['component_name'], role=request_data['role'], party_id=request_data['party_id'], model_id=model_id, model_version=model_version) dag = job_utils.get_job_dsl_parser(dsl=job_dsl, runtime_conf=job_runtime_conf, train_runtime_conf=train_runtime_conf) component = dag.get_component_info(request_data['component_name']) output_model_json = {} # There is only one model output at the current dsl version. output_model = tracker.get_output_model(component.get_output()['model'][0] if component.get_output().get('model') else 'default') for buffer_name, buffer_object in output_model.items(): if buffer_name.endswith('Param'): output_model_json = json_format.MessageToDict(buffer_object, including_default_value_fields=True) if output_model_json: component_define = tracker.get_component_define() this_component_model_meta = {} for buffer_name, buffer_object in output_model.items(): if buffer_name.endswith('Meta'): this_component_model_meta['meta_data'] = json_format.MessageToDict(buffer_object, including_default_value_fields=True) this_component_model_meta.update(component_define) return get_json_result(retcode=0, retmsg='success', data=output_model_json, meta=this_component_model_meta) else: return get_json_result(retcode=0, retmsg='no data', data={})
def job_view(): request_data = request.json check_request_parameters(request_data) job_tracker = Tracking(job_id=request_data['job_id'], role=request_data['role'], party_id=request_data['party_id']) job_view_data = job_tracker.get_job_view() if job_view_data: job_metric_list = job_tracker.get_metric_list(job_level=True) job_view_data['model_summary'] = {} for metric_namespace, namespace_metrics in job_metric_list.items(): job_view_data['model_summary'][metric_namespace] = job_view_data[ 'model_summary'].get(metric_namespace, {}) for metric_name in namespace_metrics: job_view_data['model_summary'][metric_namespace][ metric_name] = job_view_data['model_summary'][ metric_namespace].get(metric_name, {}) for metric_data in job_tracker.get_job_metric_data( metric_namespace=metric_namespace, metric_name=metric_name): job_view_data['model_summary'][metric_namespace][ metric_name][metric_data.key] = metric_data.value return get_json_result(retcode=0, retmsg='success', data=job_view_data) else: return get_json_result(retcode=101, retmsg='error')
def save_metric_meta(job_id, component_name, task_id, role, party_id): request_data = request.json tracker = Tracking(job_id=job_id, component_name=component_name, task_id=task_id, role=role, party_id=party_id) metric_meta = deserialize_b64(request_data['metric_meta']) tracker.save_metric_meta(metric_namespace=request_data['metric_namespace'], metric_name=request_data['metric_name'], metric_meta=metric_meta, job_level=request_data['job_level']) return get_json_result()
def component_metrics(): request_data = request.json check_request_parameters(request_data) tracker = Tracking(job_id=request_data['job_id'], component_name=request_data['component_name'], role=request_data['role'], party_id=request_data['party_id']) metrics = tracker.get_metric_list() if metrics: return get_json_result(retcode=0, retmsg='success', data=metrics) else: return get_json_result(retcode=0, retmsg='no data', data={})
def update_task_status(job_id, component_name, task_id, role, party_id, task_info): tracker = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=component_name, task_id=task_id) tracker.save_task(role=role, party_id=party_id, task_info=task_info) schedule_logger(job_id).info( 'job {} component {} {} {} status {}'.format( job_id, component_name, role, party_id, task_info.get('f_status', '')))
def job_quantity_constraint(job_id, role, party_id, job_info): lock = Lock() with lock: time.sleep(1) if RuntimeConfig.WORK_MODE == WorkMode.CLUSTER: if role == LIMIT_ROLE: running_jobs = job_utils.query_job(status='running', role=role) ready_jobs = job_utils.query_job(tag='ready', role=role) if len(running_jobs)+len(ready_jobs) >= MAX_CONCURRENT_JOB_RUN_HOST: return False else: tracker = Tracking(job_id=job_id, role=role, party_id=party_id) tracker.save_job_info(role=role, party_id=party_id, job_info={'f_tag': 'ready'}) return True
def get_component_output_data_table(task_data): check_request_parameters(task_data) tracker = Tracking(job_id=task_data['job_id'], component_name=task_data['component_name'], role=task_data['role'], party_id=task_data['party_id']) job_dsl_parser = job_utils.get_job_dsl_parser_by_job_id(job_id=task_data['job_id']) if not job_dsl_parser: raise Exception('can not get dag parser, please check if the parameters are correct') component = job_dsl_parser.get_component_info(task_data['component_name']) if not component: raise Exception('can not found component, please check if the parameters are correct') output_dsl = component.get_output() output_data_dsl = output_dsl.get('data', []) # The current version will only have one data output. output_data_table = tracker.get_output_data_table(output_data_dsl[0] if output_data_dsl else 'component') return output_data_table
def test_upsample(self): sampler = RandomSampler(fraction=3, method="upsample") tracker = Tracking("jobid", "guest", 9999, "abc", "123") sampler.set_tracker(tracker) sample_data, sample_ids = sampler.sample(self.table) self.assertTrue(sample_data.count() > 250 and sample_data.count() < 350) data_dict = dict(self.data) new_data = list(sample_data.collect()) for id, value in new_data: self.assertTrue( np.abs(value - data_dict[sample_ids[id]]) < consts.FLOAT_ZERO) trans_sampler = RandomSampler(method="upsample") trans_sampler.set_tracker(tracker) trans_sample_data = trans_sampler.sample(self.table_trans, sample_ids) trans_data = list(trans_sample_data.collect()) data_to_trans_dict = dict(self.data_to_trans) self.assertTrue(len(trans_data) == len(sample_ids)) for id, value in trans_data: self.assertTrue( np.abs(value - data_to_trans_dict[sample_ids[id]]) < consts.FLOAT_ZERO)
def test_downsample(self): sampler = RandomSampler(fraction=0.3, method="downsample") tracker = Tracking("jobid", "guest", 9999, "abc", "123") sampler.set_tracker(tracker) sample_data, sample_ids = sampler.sample(self.table) self.assertTrue(sample_data.count() > 25 and sample_data.count() < 35) self.assertTrue(len(set(sample_ids)) == len(sample_ids)) new_data = list(sample_data.collect()) data_dict = dict(self.data) for id, value in new_data: self.assertTrue(id in data_dict) self.assertTrue( np.abs(value - data_dict.get(id)) < consts.FLOAT_ZERO) trans_sampler = RandomSampler(method="downsample") trans_sampler.set_tracker(tracker) trans_sample_data = trans_sampler.sample(self.table_trans, sample_ids) trans_data = list(trans_sample_data.collect()) trans_sample_ids = [id for (id, value) in trans_data] data_to_trans_dict = dict(self.data_to_trans) sample_id_mapping = dict(zip(sample_ids, range(len(sample_ids)))) self.assertTrue(len(trans_data) == len(sample_ids)) self.assertTrue(set(trans_sample_ids) == set(sample_ids)) for id, value in trans_data: self.assertTrue(id in sample_id_mapping) self.assertTrue( np.abs(value - data_to_trans_dict.get(id)) < consts.FLOAT_ZERO)
def setUp(self): self.data = [] self.data_with_value = [] for i in range(100): row = [] row_with_value = [] for j in range(100): if random.randint(1, 100) > 30: continue str_r = ''.join( random.sample(string.ascii_letters + string.digits, 10)) row.append(str_r) row_with_value.append(str_r + ':' + str(random.random())) self.data.append((i, ' '.join(row))) self.data_with_value.append((i, ' '.join(row_with_value))) self.table1 = session.parallelize(self.data, include_key=True, partition=16) self.table2 = session.parallelize(self.data_with_value, include_key=True, partition=16) self.args1 = {"data": {"data_io_0": {"data": self.table1}}} self.args2 = {"data": {"data_io_1": {"data": self.table2}}} self.tracker = Tracking("jobid", "guest", 9999, "abc", "123")
def setUp(self): self.data = [] self.max_feature = -1 for i in range(100): row = [] label = i % 2 row.append(str(label)) dict = {} for j in range(20): x = random.randint(0, 1000) val = random.random() if x in dict: continue self.max_feature = max(self.max_feature, x) dict[x] = True row.append(":".join(map(str, [x, val]))) self.data.append((i, " ".join(row))) self.table = session.parallelize(self.data, include_key=True, partition=16) self.args = {"data": {"data_io_0": {"data": self.table}}} self.tracker = Tracking("jobid", "guest", 9999, "abc", "123")
def test_downsample(self): fractions = [(0, 0.3), (1, 0.4), (2, 0.5), (3, 0.8)] sampler = StratifiedSampler(fractions=fractions, method="downsample") tracker = Tracking("jobid", "guest", 9999, "abc", "123") sampler.set_tracker(tracker) sample_data, sample_ids = sampler.sample(self.table) count_label = [0 for i in range(4)] new_data = list(sample_data.collect()) data_dict = dict(self.data) self.assertTrue( set(sample_ids) & set(data_dict.keys()) == set(sample_ids)) for id, inst in new_data: count_label[inst.label] += 1 self.assertTrue( type(id).__name__ == 'int' and id >= 0 and id < 1000) self.assertTrue(inst.label == self.data[id][1].label and inst.features == self.data[id][1].features) for i in range(4): self.assertTrue( np.abs(count_label[i] - 250 * fractions[i][1]) < 10) trans_sampler = StratifiedSampler(method="downsample") trans_sampler.set_tracker(tracker) trans_sample_data = trans_sampler.sample(self.table_trans, sample_ids) trans_data = list(trans_sample_data.collect()) trans_sample_ids = [id for (id, value) in trans_data] data_to_trans_dict = dict(self.data_to_trans) self.assertTrue(set(trans_sample_ids) == set(sample_ids)) for id, inst in trans_data: self.assertTrue( inst.features == data_to_trans_dict.get(id).features)
def component_metric_data(): request_data = request.json check_request_parameters(request_data) tracker = Tracking(job_id=request_data['job_id'], component_name=request_data['component_name'], role=request_data['role'], party_id=request_data['party_id']) metric_data, metric_meta = get_metric_all_data(tracker=tracker, metric_namespace=request_data['metric_namespace'], metric_name=request_data['metric_name']) if metric_data or metric_meta: return get_json_result(retcode=0, retmsg='success', data=metric_data, meta=metric_meta) else: return get_json_result(retcode=0, retmsg='no data', data=[], meta={})
def clean_job(job_id, role, party_id, roles, party_ids): schedule_logger(job_id).info('job {} on {} {} start to clean'.format(job_id, role, party_id)) tasks = job_utils.query_task(job_id=job_id, role=role, party_id=party_id) for task in tasks: try: Tracking(job_id=job_id, role=role, party_id=party_id, task_id=task.f_task_id).clean_task(roles, party_ids) schedule_logger(job_id).info( 'job {} component {} on {} {} clean done'.format(job_id, task.f_component_name, role, party_id)) except Exception as e: schedule_logger(job_id).info( 'job {} component {} on {} {} clean failed'.format(job_id, task.f_component_name, role, party_id)) schedule_logger(job_id).exception(e) schedule_logger(job_id).info('job {} on {} {} clean done'.format(job_id, role, party_id))
def setUp(self): name1 = "dense_data_" + str(random.random()) name2 = "dense_data_" + str(random.random()) namespace = "data_io_dense_test" data1 = [("a", "1,2,-1,0,0,5"), ("b", "4,5,6,0,1,2")] schema = {"header": "x1,x2,x3,x4,x5,x6", "sid": "id"} table1 = session.parallelize(data1, include_key=True, partition=16) table1.save_as(name1, namespace) session.save_data_table_meta(schema, name1, namespace) self.table1 = session.table(name1, namespace) data2 = [("a", '-1,,na,null,null,2')] table2 = session.parallelize(data2, include_key=True, partition=16) table2.save_as(name2, namespace) session.save_data_table_meta(schema, name2, namespace) self.table2 = session.table(name2, namespace) self.args1 = {"data": {"data_io_0": {"data": self.table1}}} self.args2 = {"data": {"data_io_1": {"data": self.table2}}} self.tracker = Tracking("jobid", "guest", 9999, "abc", "123")
def test_upsample(self): fractions = [(0, 1.3), (1, 0.5), (2, 0.8), (3, 9)] sampler = StratifiedSampler(fractions=fractions, method="upsample") tracker = Tracking("jobid", "guest", 9999, "abc", "123") sampler.set_tracker(tracker) sample_data, sample_ids = sampler.sample(self.table) new_data = list(sample_data.collect()) count_label = [0 for i in range(4)] data_dict = dict(self.data) for id, inst in new_data: count_label[inst.label] += 1 self.assertTrue( type(id).__name__ == 'int' and id >= 0 and id < len(sample_ids)) real_id = sample_ids[id] self.assertTrue( inst.label == self.data[real_id][1].label and inst.features == self.data[real_id][1].features) for i in range(4): self.assertTrue( np.abs(count_label[i] - 250 * fractions[i][1]) < 10) trans_sampler = StratifiedSampler(method="upsample") trans_sampler.set_tracker(tracker) trans_sample_data = trans_sampler.sample(self.table_trans, sample_ids) trans_data = (trans_sample_data.collect()) trans_sample_ids = [id for (id, value) in trans_data] data_to_trans_dict = dict(self.data_to_trans) self.assertTrue( sorted(trans_sample_ids) == list(range(len(sample_ids)))) for id, inst in trans_data: real_id = sample_ids[id] self.assertTrue( inst.features == data_to_trans_dict[real_id][1].features)
def run_task(): task = Task() task.f_create_time = current_timestamp() try: parser = argparse.ArgumentParser() parser.add_argument('-j', '--job_id', required=True, type=str, help="job id") parser.add_argument('-n', '--component_name', required=True, type=str, help="component name") parser.add_argument('-t', '--task_id', required=True, type=str, help="task id") parser.add_argument('-r', '--role', required=True, type=str, help="role") parser.add_argument('-p', '--party_id', required=True, type=str, help="party id") parser.add_argument('-c', '--config', required=True, type=str, help="task config") parser.add_argument('--processors_per_node', help="processors_per_node", type=int) parser.add_argument('--job_server', help="job server", type=str) args = parser.parse_args() schedule_logger(args.job_id).info('enter task process') schedule_logger(args.job_id).info(args) # init function args if args.job_server: RuntimeConfig.init_config(HTTP_PORT=args.job_server.split(':')[1]) RuntimeConfig.set_process_role(ProcessRole.EXECUTOR) job_id = args.job_id component_name = args.component_name task_id = args.task_id role = args.role party_id = int(args.party_id) executor_pid = os.getpid() task_config = file_utils.load_json_conf(args.config) job_parameters = task_config['job_parameters'] job_initiator = task_config['job_initiator'] job_args = task_config['job_args'] task_input_dsl = task_config['input'] task_output_dsl = task_config['output'] component_parameters = TaskExecutor.get_parameters(job_id, component_name, role, party_id) task_parameters = task_config['task_parameters'] module_name = task_config['module_name'] TaskExecutor.monkey_patch() except Exception as e: traceback.print_exc() schedule_logger().exception(e) task.f_status = TaskStatus.FAILED return try: job_log_dir = os.path.join(job_utils.get_job_log_directory(job_id=job_id), role, str(party_id)) task_log_dir = os.path.join(job_log_dir, component_name) log_utils.LoggerFactory.set_directory(directory=task_log_dir, parent_log_dir=job_log_dir, append_to_parent_log=True, force=True) task.f_job_id = job_id task.f_component_name = component_name task.f_task_id = task_id task.f_role = role task.f_party_id = party_id task.f_operator = 'python_operator' tracker = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=component_name, task_id=task_id, model_id=job_parameters['model_id'], model_version=job_parameters['model_version'], component_module_name=module_name) task.f_start_time = current_timestamp() task.f_run_ip = get_lan_ip() task.f_run_pid = executor_pid run_class_paths = component_parameters.get('CodePath').split('/') run_class_package = '.'.join(run_class_paths[:-2]) + '.' + run_class_paths[-2].replace('.py', '') run_class_name = run_class_paths[-1] task.f_status = TaskStatus.RUNNING TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role, party_id=party_id, initiator_party_id=job_initiator.get('party_id', None), initiator_role=job_initiator.get('role', None), task_info=task.to_json()) # init environment, process is shared globally RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode'], BACKEND=job_parameters.get('backend', 0)) if args.processors_per_node and args.processors_per_node > 0 and RuntimeConfig.BACKEND == Backend.EGGROLL: session_options = {"eggroll.session.processors.per.node": args.processors_per_node} else: session_options = {} session.init(job_id=job_utils.generate_session_id(task_id, role, party_id), mode=RuntimeConfig.WORK_MODE, backend=RuntimeConfig.BACKEND, options=session_options) federation.init(job_id=task_id, runtime_conf=component_parameters) schedule_logger().info('run {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id)) schedule_logger().info(component_parameters) schedule_logger().info(task_input_dsl) task_run_args = TaskExecutor.get_task_run_args(job_id=job_id, role=role, party_id=party_id, task_id=task_id, job_args=job_args, job_parameters=job_parameters, task_parameters=task_parameters, input_dsl=task_input_dsl, if_save_as_task_input_data=job_parameters.get("save_as_task_input_data", SAVE_AS_TASK_INPUT_DATA_SWITCH) ) run_object = getattr(importlib.import_module(run_class_package), run_class_name)() run_object.set_tracker(tracker=tracker) run_object.set_taskid(taskid=task_id) run_object.run(component_parameters, task_run_args) output_data = run_object.save_data() tracker.save_output_data_table(output_data, task_output_dsl.get('data')[0] if task_output_dsl.get('data') else 'component') output_model = run_object.export_model() # There is only one model output at the current dsl version. tracker.save_output_model(output_model, task_output_dsl['model'][0] if task_output_dsl.get('model') else 'default') task.f_status = TaskStatus.COMPLETE except Exception as e: task.f_status = TaskStatus.FAILED schedule_logger().exception(e) finally: sync_success = False try: task.f_end_time = current_timestamp() task.f_elapsed = task.f_end_time - task.f_start_time task.f_update_time = current_timestamp() TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role, party_id=party_id, initiator_party_id=job_initiator.get('party_id', None), initiator_role=job_initiator.get('role', None), task_info=task.to_json()) sync_success = True except Exception as e: traceback.print_exc() schedule_logger().exception(e) schedule_logger().info('task {} {} {} start time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_start_time))) schedule_logger().info('task {} {} {} end time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_end_time))) schedule_logger().info('task {} {} {} takes {}s'.format(task_id, role, party_id, int(task.f_elapsed)/1000)) schedule_logger().info( 'finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED)) print('finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))
def get_task_run_args(job_id, role, party_id, task_id, job_args, job_parameters, task_parameters, input_dsl, if_save_as_task_input_data, filter_type=None, filter_attr=None): task_run_args = {} for input_type, input_detail in input_dsl.items(): if filter_type and input_type not in filter_type: continue if input_type == 'data': this_type_args = task_run_args[input_type] = task_run_args.get(input_type, {}) for data_type, data_list in input_detail.items(): for data_key in data_list: data_key_item = data_key.split('.') search_component_name, search_data_name = data_key_item[0], data_key_item[1] if search_component_name == 'args': if job_args.get('data', {}).get(search_data_name).get('namespace', '') and job_args.get( 'data', {}).get(search_data_name).get('name', ''): data_table = session.table( namespace=job_args['data'][search_data_name]['namespace'], name=job_args['data'][search_data_name]['name']) else: data_table = None else: data_table = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=search_component_name).get_output_data_table( data_name=search_data_name) args_from_component = this_type_args[search_component_name] = this_type_args.get( search_component_name, {}) # todo: If the same component has more than one identical input, save as is repeated if if_save_as_task_input_data: if data_table: schedule_logger().info("start save as task {} input data table {} {}".format( task_id, data_table.get_namespace(), data_table.get_name())) origin_table_metas = data_table.get_metas() origin_table_schema = data_table.schema save_as_options = {"store_type": StoreTypes.ROLLPAIR_IN_MEMORY} if SAVE_AS_TASK_INPUT_DATA_IN_MEMORY else {} data_table = data_table.save_as( namespace=job_utils.generate_session_id(task_id=task_id, role=role, party_id=party_id), name=data_table.get_name(), partition=task_parameters['input_data_partition'] if task_parameters.get('input_data_partition', 0) > 0 else data_table.get_partitions(), options=save_as_options) data_table.save_metas(origin_table_metas) data_table.schema = origin_table_schema schedule_logger().info("save as task {} input data table to {} {} done".format( task_id, data_table.get_namespace(), data_table.get_name())) else: schedule_logger().info("pass save as task {} input data table, because the table is none".format(task_id)) else: schedule_logger().info("pass save as task {} input data table, because the switch is off".format(task_id)) if not data_table or not filter_attr or not filter_attr.get("data", None): args_from_component[data_type] = data_table else: args_from_component[data_type] = dict([(a, getattr(data_table, "get_{}".format(a))()) for a in filter_attr["data"]]) elif input_type in ['model', 'isometric_model']: this_type_args = task_run_args[input_type] = task_run_args.get(input_type, {}) for dsl_model_key in input_detail: dsl_model_key_items = dsl_model_key.split('.') if len(dsl_model_key_items) == 2: search_component_name, search_model_alias = dsl_model_key_items[0], dsl_model_key_items[1] elif len(dsl_model_key_items) == 3 and dsl_model_key_items[0] == 'pipeline': search_component_name, search_model_alias = dsl_model_key_items[1], dsl_model_key_items[2] else: raise Exception('get input {} failed'.format(input_type)) models = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=search_component_name, model_id=job_parameters['model_id'], model_version=job_parameters['model_version']).get_output_model( model_alias=search_model_alias) this_type_args[search_component_name] = models return task_run_args
def submit_job(job_data, job_id=None): if not job_id: job_id = generate_job_id() schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(job_id, job_data)) job_dsl = job_data.get('job_dsl', {}) job_runtime_conf = job_data.get('job_runtime_conf', {}) job_utils.check_pipeline_job_runtime_conf(job_runtime_conf) job_parameters = job_runtime_conf['job_parameters'] job_initiator = job_runtime_conf['initiator'] job_type = job_parameters.get('job_type', '') if job_type != 'predict': # generate job model info job_parameters['model_id'] = '#'.join([dtable_utils.all_party_key(job_runtime_conf['role']), 'model']) job_parameters['model_version'] = job_id train_runtime_conf = {} else: detect_utils.check_config(job_parameters, ['model_id', 'model_version']) # get inference dsl from pipeline model as job dsl job_tracker = Tracking(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'], model_id=job_parameters['model_id'], model_version=job_parameters['model_version']) pipeline_model = job_tracker.get_output_model('pipeline') job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl) train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf) path_dict = save_job_conf(job_id=job_id, job_dsl=job_dsl, job_runtime_conf=job_runtime_conf, train_runtime_conf=train_runtime_conf, pipeline_dsl=None) job = Job() job.f_job_id = job_id job.f_roles = json_dumps(job_runtime_conf['role']) job.f_work_mode = job_parameters['work_mode'] job.f_initiator_party_id = job_initiator['party_id'] job.f_dsl = json_dumps(job_dsl) job.f_runtime_conf = json_dumps(job_runtime_conf) job.f_train_runtime_conf = json_dumps(train_runtime_conf) job.f_run_ip = '' job.f_status = JobStatus.WAITING job.f_progress = 0 job.f_create_time = current_timestamp() initiator_role = job_initiator['role'] initiator_party_id = job_initiator['party_id'] if initiator_party_id not in job_runtime_conf['role'][initiator_role]: schedule_logger(job_id).info("initiator party id error:{}".format(initiator_party_id)) raise Exception("initiator party id error {}".format(initiator_party_id)) get_job_dsl_parser(dsl=job_dsl, runtime_conf=job_runtime_conf, train_runtime_conf=train_runtime_conf) TaskScheduler.distribute_job(job=job, roles=job_runtime_conf['role'], job_initiator=job_initiator) # push into queue job_event = job_utils.job_event(job_id, initiator_role, initiator_party_id) try: RuntimeConfig.JOB_QUEUE.put_event(job_event) except Exception as e: raise Exception('push job into queue failed') schedule_logger(job_id).info( 'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters['model_id'])) board_url = BOARD_DASHBOARD_URL.format(job_id, job_initiator['role'], job_initiator['party_id']) logs_directory = get_job_log_directory(job_id) return job_id, path_dict['job_dsl_path'], path_dict['job_runtime_conf_path'], logs_directory, \ {'model_id': job_parameters['model_id'],'model_version': job_parameters['model_version']}, board_url