Beispiel #1
0
def create_job(job_id, role, party_id):
    try:
        JobController.create_job(job_id=job_id,
                                 role=role,
                                 party_id=int(party_id),
                                 job_info=request.json)
        return get_json_result(retcode=0, retmsg='success')
    except RuntimeError as e:
        return get_json_result(retcode=RetCode.OPERATING_ERROR, retmsg=str(e))
Beispiel #2
0
def stop_job(job_id, role, party_id, stop_status):
    kill_status, kill_details = JobController.stop_jobs(
        job_id=job_id, stop_status=stop_status, role=role, party_id=party_id)
    return get_json_result(
        retcode=RetCode.SUCCESS if kill_status else RetCode.EXCEPTION_ERROR,
        retmsg='success' if kill_status else 'failed',
        data=kill_details)
Beispiel #3
0
 def _run(self):
     job = JobSaver.query_job(job_id=self.args.job_id,
                              role=self.args.role,
                              party_id=self.args.party_id)[0]
     try:
         JobController.job_reload(job)
     except Exception as e:
         traceback.print_exc()
         JobSaver.update_job(
             job_info={
                 "job_id": job.f_job_id,
                 "role": job.f_role,
                 "party_id": job.f_party_id,
                 "inheritance_status": JobInheritanceStatus.FAILED
             })
         LOGGER.exception(e)
Beispiel #4
0
def stop_job():
    job_id = request.json.get('job_id')
    stop_status = request.json.get("stop_status", "canceled")
    jobs = JobSaver.query_job(job_id=job_id)
    if jobs:
        schedule_logger(job_id).info(f"stop job on this party")
        kill_status, kill_details = JobController.stop_jobs(
            job_id=job_id, stop_status=stop_status)
        schedule_logger(job_id).info(
            f"stop job on this party status {kill_status}")
        schedule_logger(job_id).info(
            f"request stop job {jobs[0]} to {stop_status}")
        status_code, response = FederatedScheduler.request_stop_job(
            job=jobs[0],
            stop_status=stop_status,
            command_body=jobs[0].to_json())
        if status_code == FederatedSchedulingStatusCode.SUCCESS:
            return get_json_result(
                retcode=RetCode.SUCCESS,
                retmsg=f"stop job on this party {kill_status};\n"
                f"stop job on all party success")
        else:
            return get_json_result(retcode=RetCode.OPERATING_ERROR,
                                   retmsg="stop job on this party {};\n"
                                   "stop job failed:\n{}".format(
                                       kill_status,
                                       json_dumps(response, indent=4)))
    else:
        schedule_logger(job_id).info(f"can not found job {job_id} to stop")
        return get_json_result(retcode=RetCode.DATA_ERROR,
                               retmsg="can not found job")
    def test_gen_updated_parameters(self):
        job_id = "202110211127411105150"
        initiator_role = "guest"
        initiator_party_id = 9999
        input_job_parameters = {
            "common": {
                "auto_retries": 1,
                "auto_retry_delay": 1
            }
        }
        input_job_parameters = {}
        input_component_parameters = {
            "common": {
                "hetero_lr_0": {
                    "alpha": 0.02
                }
            },
            "role": {
                "guest": {
                    "0": {
                        "reader_0": {
                            "table": {"name": "breast_hetero_guest", "namespace": "unitest_experiment"}
                        },
                        "homo_nn_0":{
                            "with_label": True,
                            "output_format": "dense"
                        },
                    }
                },
                "host": {
                    "1": {
                        "dataio_0":{
                            "with_label": True,
                            "output_format": "dense"
                        },
                        "evaluation_0": {
                            "need_run": True
                        }
                    }
                }
            }
        }
        job_configuration = job_utils.get_job_configuration(job_id=job_id,
                                                            role=initiator_role,
                                                            party_id=initiator_party_id)
        origin_job_parameters = job_configuration.runtime_conf["job_parameters"]
        origin_component_parameters = job_configuration.runtime_conf["component_parameters"]

        updated_job_parameters, updated_component_parameters, updated_components = JobController.gen_updated_parameters(
            job_id=job_id,
            initiator_role=initiator_role,
            initiator_party_id=initiator_party_id,
            input_job_parameters=input_job_parameters,
            input_component_parameters=input_component_parameters)
        jprint(updated_job_parameters)
        jprint(updated_component_parameters)
        self.assertTrue(check(input_component_parameters, updated_component_parameters)[0])
 def create_new_version_task(cls, job, task, dsl_parser, auto):
     # stop old version task
     FederatedScheduler.stop_task(job=job,
                                  task=task,
                                  stop_status=TaskStatus.CANCELED)
     FederatedScheduler.clean_task(
         job=job, task=task, content_type=TaskCleanResourceType.METRICS)
     # create new version task
     task.f_task_version = task.f_task_version + 1
     if auto:
         task.f_auto_retries = task.f_auto_retries - 1
     task.f_run_pid = None
     task.f_run_ip = None
     # todo: FederatedScheduler.create_task and JobController.initialize_tasks will create task twice
     status_code, response = FederatedScheduler.create_task(job=job,
                                                            task=task)
     if status_code != FederatedSchedulingStatusCode.SUCCESS:
         raise Exception(f"create {task.f_task_id} new version failed")
     # create the task holder in db to record information of all participants in the initiator for scheduling
     for _role in response:
         for _party_id in response[_role]:
             if _role == job.f_initiator_role and _party_id == job.f_initiator_party_id:
                 continue
             JobController.initialize_tasks(
                 job_id=job.f_job_id,
                 role=_role,
                 party_id=_party_id,
                 run_on_this_party=False,
                 initiator_role=job.f_initiator_role,
                 initiator_party_id=job.f_initiator_party_id,
                 job_parameters=RunParameters(
                     **job.f_runtime_conf_on_party["job_parameters"]),
                 dsl_parser=dsl_parser,
                 components=[task.f_component_name],
                 task_version=task.f_task_version,
                 auto_retries=task.f_auto_retries)
     schedule_logger(job.f_job_id).info(
         f"create task {task.f_task_id} new version {task.f_task_version} successfully"
     )
Beispiel #7
0
def job_status(job_id, role, party_id, status):
    job_info = {}
    job_info.update({
        "job_id": job_id,
        "role": role,
        "party_id": party_id,
        "status": status
    })
    if JobController.update_job_status(job_info=job_info):
        return get_json_result(retcode=0, retmsg='success')
    else:
        return get_json_result(retcode=RetCode.OPERATING_ERROR,
                               retmsg="update job status failed")
Beispiel #8
0
def job_status(job_id, role, party_id, status):
    job_info = request.json
    # some value of job_info is initiator, should be updated
    job_info.update({
        "job_id": job_id,
        "role": role,
        "party_id": party_id,
        "status": status
    })
    if JobController.update_job_status(job_info=job_info):
        return get_json_result(retcode=0, retmsg='success')
    else:
        return get_json_result(retcode=RetCode.NOT_EFFECTIVE,
                               retmsg="update job status does not take effect")
Beispiel #9
0
 def component_check(cls, job, check_type="inheritance"):
     if check_type == "rerun":
         task_list = JobSaver.query_task(job_id=job.f_job_id,
                                         party_id=job.f_party_id,
                                         role=job.f_role,
                                         status=TaskStatus.SUCCESS,
                                         only_latest=True)
         tasks = {}
         for task in task_list:
             tasks[task.f_component_name] = task
     else:
         tasks = JobController.load_tasks(
             component_list=job.f_inheritance_info.get(
                 "component_list", []),
             job_id=job.f_inheritance_info.get("job_id"),
             role=job.f_role,
             party_id=job.f_party_id)
     tracker_dict = JobController.load_task_tracker(tasks)
     missing_dependence_component_list = []
     # data dependence
     for tracker in tracker_dict.values():
         table_infos = tracker.get_output_data_info()
         for table in table_infos:
             table_meta = storage.StorageTableMeta(
                 name=table.f_table_name, namespace=table.f_table_namespace)
             if not table_meta:
                 missing_dependence_component_list.append(
                     tracker.component_name)
                 continue
     if check_type == "rerun":
         return missing_dependence_component_list
     elif check_type == "inheritance":
         # reload component list
         return list(
             set(job.f_inheritance_info.get("component_list", [])) -
             set(missing_dependence_component_list))
Beispiel #10
0
 def update_parameters(cls, job, job_parameters, component_parameters):
     updated_job_parameters, updated_component_parameters, updated_components = JobController.gen_updated_parameters(
         job_id=job.f_job_id,
         initiator_role=job.f_initiator_role,
         initiator_party_id=job.f_initiator_party_id,
         input_job_parameters=job_parameters,
         input_component_parameters=component_parameters)
     schedule_logger(job.f_job_id).info(
         f"components {updated_components} parameters has been updated")
     updated_parameters = {
         "job_parameters": updated_job_parameters,
         "component_parameters": updated_component_parameters,
         "components": updated_components
     }
     status_code, response = FederatedScheduler.update_parameter(
         job, updated_parameters=updated_parameters)
     if status_code == FederatedSchedulingStatusCode.SUCCESS:
         return RetCode.SUCCESS, updated_parameters
     else:
         return RetCode.OPERATING_ERROR, response
Beispiel #11
0
def create_job(job_id, role, party_id):
    src_fate_ver = request.json.get('src_fate_ver')
    if src_fate_ver is not None and compare_version(src_fate_ver,
                                                    '1.7.0') == 'lt':
        return get_json_result(retcode=RetCode.INCOMPATIBLE_FATE_VER,
                               retmsg='Incompatible FATE versions',
                               data={
                                   'src_fate_ver':
                                   src_fate_ver,
                                   "current_fate_ver":
                                   RuntimeConfig.get_env('FATE')
                               })

    try:
        result = JobController.create_job(job_id=job_id,
                                          role=role,
                                          party_id=int(party_id),
                                          job_info=request.json)
        return get_json_result(retcode=0, retmsg='success', data=result)
    except RuntimeError as e:
        return get_json_result(retcode=RetCode.OPERATING_ERROR,
                               retmsg=str(e),
                               data={"job_id": job_id})
Beispiel #12
0
def start_job(job_id, role, party_id):
    JobController.start_job(job_id=job_id,
                            role=role,
                            party_id=int(party_id),
                            extra_info=request.json)
    return get_json_result(retcode=0, retmsg='success')
Beispiel #13
0
    def submit(cls, submit_job_conf: JobConfigurationBase, job_id: str = None):
        if not job_id:
            job_id = job_utils.generate_job_id()
        submit_result = {"job_id": job_id}
        schedule_logger(job_id).info(
            f"submit job, body {submit_job_conf.to_dict()}")
        try:
            dsl = submit_job_conf.dsl
            runtime_conf = deepcopy(submit_job_conf.runtime_conf)
            job_utils.check_job_runtime_conf(runtime_conf)
            authentication_utils.check_constraint(runtime_conf, dsl)
            job_initiator = runtime_conf["initiator"]
            conf_adapter = JobRuntimeConfigAdapter(runtime_conf)
            common_job_parameters = conf_adapter.get_common_parameters()

            if common_job_parameters.job_type != "predict":
                # generate job model info
                conf_version = schedule_utils.get_conf_version(runtime_conf)
                if conf_version != 2:
                    raise Exception(
                        "only the v2 version runtime conf is supported")
                common_job_parameters.model_id = model_utils.gen_model_id(
                    runtime_conf["role"])
                common_job_parameters.model_version = job_id
                train_runtime_conf = {}
            else:
                # check predict job parameters
                detect_utils.check_config(common_job_parameters.to_dict(),
                                          ["model_id", "model_version"])
                # get inference dsl from pipeline model as job dsl
                tracker = Tracker(
                    job_id=job_id,
                    role=job_initiator["role"],
                    party_id=job_initiator["party_id"],
                    model_id=common_job_parameters.model_id,
                    model_version=common_job_parameters.model_version)
                pipeline_model = tracker.get_pipeline_model()
                train_runtime_conf = json_loads(
                    pipeline_model.train_runtime_conf)
                if not model_utils.check_if_deployed(
                        role=job_initiator["role"],
                        party_id=job_initiator["party_id"],
                        model_id=common_job_parameters.model_id,
                        model_version=common_job_parameters.model_version):
                    raise Exception(
                        f"Model {common_job_parameters.model_id} {common_job_parameters.model_version} has not been deployed yet."
                    )
                dsl = json_loads(pipeline_model.inference_dsl)
            # dsl = ProviderManager.fill_fate_flow_provider(dsl)

            job = Job()
            job.f_job_id = job_id
            job.f_dsl = dsl
            job.f_train_runtime_conf = train_runtime_conf
            job.f_roles = runtime_conf["role"]
            job.f_initiator_role = job_initiator["role"]
            job.f_initiator_party_id = job_initiator["party_id"]
            job.f_role = job_initiator["role"]
            job.f_party_id = job_initiator["party_id"]

            path_dict = job_utils.save_job_conf(
                job_id=job_id,
                role=job.f_initiator_role,
                party_id=job.f_initiator_party_id,
                dsl=dsl,
                runtime_conf=runtime_conf,
                runtime_conf_on_party={},
                train_runtime_conf=train_runtime_conf,
                pipeline_dsl=None)

            if job.f_initiator_party_id not in runtime_conf["role"][
                    job.f_initiator_role]:
                msg = f"initiator party id {job.f_initiator_party_id} not in roles {runtime_conf['role']}"
                schedule_logger(job_id).info(msg)
                raise Exception(msg)

            # create common parameters on initiator
            JobController.create_common_job_parameters(
                job_id=job.f_job_id,
                initiator_role=job.f_initiator_role,
                common_job_parameters=common_job_parameters)
            job.f_runtime_conf = conf_adapter.update_common_parameters(
                common_parameters=common_job_parameters)
            dsl_parser = schedule_utils.get_job_dsl_parser(
                dsl=job.f_dsl,
                runtime_conf=job.f_runtime_conf,
                train_runtime_conf=job.f_train_runtime_conf)

            # initiator runtime conf as template
            job.f_runtime_conf_on_party = job.f_runtime_conf.copy()
            job.f_runtime_conf_on_party[
                "job_parameters"] = common_job_parameters.to_dict()

            # inherit job
            job.f_inheritance_info = common_job_parameters.inheritance_info
            job.f_inheritance_status = JobInheritanceStatus.WAITING if common_job_parameters.inheritance_info else JobInheritanceStatus.PASS
            if job.f_inheritance_info:
                inheritance_jobs = JobSaver.query_job(
                    job_id=job.f_inheritance_info.get("job_id"),
                    role=job_initiator["role"],
                    party_id=job_initiator["party_id"])
                inheritance_tasks = JobSaver.query_task(
                    job_id=job.f_inheritance_info.get("job_id"),
                    role=job_initiator["role"],
                    party_id=job_initiator["party_id"],
                    only_latest=True)
                job_utils.check_job_inheritance_parameters(
                    job, inheritance_jobs, inheritance_tasks)

            status_code, response = FederatedScheduler.create_job(job=job)
            if status_code != FederatedSchedulingStatusCode.SUCCESS:
                job.f_status = JobStatus.FAILED
                job.f_tag = "submit_failed"
                FederatedScheduler.sync_job_status(job=job)
                raise Exception("create job failed", response)
            else:
                need_run_components = {}
                for role in response:
                    need_run_components[role] = {}
                    for party, res in response[role].items():
                        need_run_components[role][party] = [
                            name for name, value in response[role][party]
                            ["data"]["components"].items()
                            if value["need_run"] is True
                        ]
                if common_job_parameters.federated_mode == FederatedMode.MULTIPLE:
                    # create the task holder in db to record information of all participants in the initiator for scheduling
                    for role, party_ids in job.f_roles.items():
                        for party_id in party_ids:
                            if role == job.f_initiator_role and party_id == job.f_initiator_party_id:
                                continue
                            if not need_run_components[role][party_id]:
                                continue
                            JobController.initialize_tasks(
                                job_id=job_id,
                                role=role,
                                party_id=party_id,
                                run_on_this_party=False,
                                initiator_role=job.f_initiator_role,
                                initiator_party_id=job.f_initiator_party_id,
                                job_parameters=common_job_parameters,
                                dsl_parser=dsl_parser,
                                components=need_run_components[role][party_id])
                job.f_status = JobStatus.WAITING
                status_code, response = FederatedScheduler.sync_job_status(
                    job=job)
                if status_code != FederatedSchedulingStatusCode.SUCCESS:
                    raise Exception("set job to waiting status failed")

            schedule_logger(job_id).info(
                f"submit job successfully, job id is {job.f_job_id}, model id is {common_job_parameters.model_id}"
            )
            logs_directory = job_utils.get_job_log_directory(job_id)
            result = {
                "code":
                RetCode.SUCCESS,
                "message":
                "success",
                "model_info": {
                    "model_id": common_job_parameters.model_id,
                    "model_version": common_job_parameters.model_version
                },
                "logs_directory":
                logs_directory,
                "board_url":
                job_utils.get_board_url(job_id, job_initiator["role"],
                                        job_initiator["party_id"])
            }
            warn_parameter = JobRuntimeConfigAdapter(
                submit_job_conf.runtime_conf).check_removed_parameter()
            if warn_parameter:
                result[
                    "message"] = f"[WARN]{warn_parameter} is removed,it does not take effect!"
            submit_result.update(result)
            submit_result.update(path_dict)
        except Exception as e:
            submit_result["code"] = RetCode.OPERATING_ERROR
            submit_result["message"] = exception_to_trace_string(e)
            schedule_logger(job_id).exception(e)
        return submit_result
Beispiel #14
0
def clean(job_id, role, party_id):
    JobController.clean_job(job_id=job_id,
                            role=role,
                            party_id=party_id,
                            roles=request.json)
    return get_json_result(retcode=0, retmsg='success')
Beispiel #15
0
def save_pipelined_model(job_id, role, party_id):
    JobController.save_pipelined_model(job_id=job_id,
                                       role=role,
                                       party_id=party_id)
    return get_json_result(retcode=0, retmsg='success')
Beispiel #16
0
def align_job_args(job_id, role, party_id):
    JobController.align_job_args(job_info=request.json,
                                 role=role,
                                 party_id=party_id,
                                 job_id=job_id)
    return get_json_result(retcode=0, retmsg='success')
Beispiel #17
0
 def rerun_job(cls, job_id, initiator_role, initiator_party_id,
               component_name):
     schedule_logger(job_id=job_id).info(
         f"try to rerun job {job_id} on initiator {initiator_role} {initiator_party_id}"
     )
     jobs = JobSaver.query_job(job_id=job_id,
                               role=initiator_role,
                               party_id=initiator_party_id)
     if jobs:
         job = jobs[0]
     else:
         raise RuntimeError(
             f"can not found job {job_id} on initiator {initiator_role} {initiator_party_id}"
         )
     if component_name != job_utils.job_virtual_component_name():
         tasks = JobSaver.query_task(job_id=job_id,
                                     role=initiator_role,
                                     party_id=initiator_party_id,
                                     component_name=component_name)
     else:
         tasks = JobSaver.query_task(job_id=job_id,
                                     role=initiator_role,
                                     party_id=initiator_party_id)
     job_can_rerun = False
     dsl_parser = schedule_utils.get_job_dsl_parser(
         dsl=job.f_dsl,
         runtime_conf=job.f_runtime_conf_on_party,
         train_runtime_conf=job.f_train_runtime_conf)
     for task in tasks:
         if task.f_status in {TaskStatus.WAITING, TaskStatus.SUCCESS}:
             if task.f_status == TaskStatus.WAITING:
                 job_can_rerun = True
             schedule_logger(job_id=job_id).info(
                 f"task {task.f_task_id} {task.f_task_version} on {task.f_role} {task.f_party_id} is {task.f_status}, pass rerun"
             )
         else:
             # stop old version task
             FederatedScheduler.stop_task(job=job,
                                          task=task,
                                          stop_status=TaskStatus.CANCELED)
             FederatedScheduler.clean_task(job=job,
                                           task=task,
                                           content_type="metrics")
             # create new version task
             task.f_task_version = task.f_task_version + 1
             task.f_run_pid = None
             task.f_run_ip = None
             FederatedScheduler.create_task(job=job, task=task)
             # Save the status information of all participants in the initiator for scheduling
             schedule_logger(job_id=job_id).info(
                 f"create task {task.f_task_id} new version {task.f_task_version}"
             )
             for _role, _party_ids in job.f_runtime_conf_on_party[
                     "role"].items():
                 for _party_id in _party_ids:
                     if _role == initiator_role and _party_id == initiator_party_id:
                         continue
                     JobController.initialize_tasks(
                         job_id,
                         _role,
                         _party_id,
                         False,
                         job.f_initiator_role,
                         job.f_initiator_party_id,
                         RunParameters(
                             **
                             job.f_runtime_conf_on_party["job_parameters"]),
                         dsl_parser,
                         component_name=task.f_component_name,
                         task_version=task.f_task_version)
             schedule_logger(job_id=job_id).info(
                 f"create task {task.f_task_id} new version {task.f_task_version} successfully"
             )
             job_can_rerun = True
     if job_can_rerun:
         schedule_logger(
             job_id=job_id).info(f"job {job_id} set rerun signal")
         status = cls.rerun_signal(job_id=job_id, set_or_reset=True)
         if status:
             schedule_logger(job_id=job_id).info(
                 f"job {job_id} set rerun signal successfully")
         else:
             schedule_logger(job_id=job_id).info(
                 f"job {job_id} set rerun signal failed")
     else:
         FederatedScheduler.sync_job_status(job=job)
         schedule_logger(
             job_id=job_id).info(f"job {job_id} no task to rerun")
Beispiel #18
0
def query_job_input_args(job_id, role, party_id):
    job_input_args = JobController.query_job_input_args(
        input_data=request.json, role=role, party_id=party_id)
    return get_json_result(retcode=0, retmsg='success', data=job_input_args)
Beispiel #19
0
def create_task(job_id, component_name, task_id, task_version, role, party_id):
    JobController.initialize_task(role, party_id, request.json)
    return get_json_result(retcode=0, retmsg='success')
Beispiel #20
0
def update_parameters(job_id, role, party_id):
    JobController.update_parameter(job_id=job_id,
                                   role=role,
                                   party_id=party_id,
                                   updated_parameters=request.json)
    return get_json_result(retcode=0, retmsg='success')
Beispiel #21
0
def update_job(job_id, role, party_id):
    job_info = {}
    job_info.update(request.json)
    job_info.update({"job_id": job_id, "role": role, "party_id": party_id})
    JobController.update_job(job_info=job_info)
    return get_json_result(retcode=0, retmsg='success')
Beispiel #22
0
    def submit(cls, job_data, job_id=None):
        if not job_id:
            job_id = job_utils.generate_job_id()
        schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(
            job_id, job_data))
        job_dsl = job_data.get('job_dsl', {})
        job_runtime_conf = job_data.get('job_runtime_conf', {})
        job_utils.check_job_runtime_conf(job_runtime_conf)
        authentication_utils.check_constraint(job_runtime_conf, job_dsl)

        job_initiator = job_runtime_conf['initiator']
        conf_adapter = JobRuntimeConfigAdapter(job_runtime_conf)
        common_job_parameters = conf_adapter.get_common_parameters()

        if common_job_parameters.job_type != 'predict':
            # generate job model info
            common_job_parameters.model_id = model_utils.gen_model_id(
                job_runtime_conf['role'])
            common_job_parameters.model_version = job_id
            train_runtime_conf = {}
        else:
            # check predict job parameters
            detect_utils.check_config(common_job_parameters.to_dict(),
                                      ['model_id', 'model_version'])
            # get inference dsl from pipeline model as job dsl
            tracker = Tracker(
                job_id=job_id,
                role=job_initiator['role'],
                party_id=job_initiator['party_id'],
                model_id=common_job_parameters.model_id,
                model_version=common_job_parameters.model_version)
            pipeline_model = tracker.get_output_model('pipeline')
            train_runtime_conf = json_loads(
                pipeline_model['Pipeline'].train_runtime_conf)
            if not model_utils.check_if_deployed(
                    role=job_initiator['role'],
                    party_id=job_initiator['party_id'],
                    model_id=common_job_parameters.model_id,
                    model_version=common_job_parameters.model_version):
                raise Exception(
                    f"Model {common_job_parameters.model_id} {common_job_parameters.model_version} has not been deployed yet."
                )
            job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl)

        job = Job()
        job.f_job_id = job_id
        job.f_dsl = job_dsl
        job.f_train_runtime_conf = train_runtime_conf
        job.f_roles = job_runtime_conf['role']
        job.f_work_mode = common_job_parameters.work_mode
        job.f_initiator_role = job_initiator['role']
        job.f_initiator_party_id = job_initiator['party_id']
        job.f_role = job_initiator['role']
        job.f_party_id = job_initiator['party_id']

        path_dict = job_utils.save_job_conf(
            job_id=job_id,
            role=job.f_initiator_role,
            job_dsl=job_dsl,
            job_runtime_conf=job_runtime_conf,
            job_runtime_conf_on_party={},
            train_runtime_conf=train_runtime_conf,
            pipeline_dsl=None)

        if job.f_initiator_party_id not in job_runtime_conf['role'][
                job.f_initiator_role]:
            schedule_logger(job_id).info("initiator party id error:{}".format(
                job.f_initiator_party_id))
            raise Exception("initiator party id error {}".format(
                job.f_initiator_party_id))

        # create common parameters on initiator
        JobController.backend_compatibility(
            job_parameters=common_job_parameters)
        JobController.adapt_job_parameters(
            role=job.f_initiator_role,
            job_parameters=common_job_parameters,
            create_initiator_baseline=True)

        job.f_runtime_conf = conf_adapter.update_common_parameters(
            common_parameters=common_job_parameters)
        dsl_parser = schedule_utils.get_job_dsl_parser(
            dsl=job.f_dsl,
            runtime_conf=job.f_runtime_conf,
            train_runtime_conf=job.f_train_runtime_conf)

        # initiator runtime conf as template
        job.f_runtime_conf_on_party = job.f_runtime_conf.copy()
        job.f_runtime_conf_on_party[
            "job_parameters"] = common_job_parameters.to_dict()

        if common_job_parameters.work_mode == WorkMode.CLUSTER:
            # Save the status information of all participants in the initiator for scheduling
            for role, party_ids in job.f_roles.items():
                for party_id in party_ids:
                    if role == job.f_initiator_role and party_id == job.f_initiator_party_id:
                        continue
                    JobController.initialize_tasks(job_id, role, party_id,
                                                   False, job.f_initiator_role,
                                                   job.f_initiator_party_id,
                                                   common_job_parameters,
                                                   dsl_parser)

        status_code, response = FederatedScheduler.create_job(job=job)
        if status_code != FederatedSchedulingStatusCode.SUCCESS:
            job.f_status = JobStatus.FAILED
            job.f_tag = "submit_failed"
            FederatedScheduler.sync_job_status(job=job)
            raise Exception("create job failed", response)

        schedule_logger(job_id).info(
            'submit job successfully, job id is {}, model id is {}'.format(
                job.f_job_id, common_job_parameters.model_id))
        logs_directory = job_utils.get_job_log_directory(job_id)
        submit_result = {
            "job_id":
            job_id,
            "model_info": {
                "model_id": common_job_parameters.model_id,
                "model_version": common_job_parameters.model_version
            },
            "logs_directory":
            logs_directory,
            "board_url":
            job_utils.get_board_url(job_id, job_initiator['role'],
                                    job_initiator['party_id'])
        }
        submit_result.update(path_dict)
        return submit_result