Beispiel #1
0
    def update_parameter(cls, job_id, role, party_id,
                         updated_parameters: dict):
        job_configuration = job_utils.get_job_configuration(job_id=job_id,
                                                            role=role,
                                                            party_id=party_id)
        job_parameters = updated_parameters.get("job_parameters")
        component_parameters = updated_parameters.get("component_parameters")
        if job_parameters:
            job_configuration.runtime_conf["job_parameters"] = job_parameters
            job_parameters = RunParameters(**job_parameters["common"])
            cls.create_job_parameters_on_party(role=role,
                                               party_id=party_id,
                                               job_parameters=job_parameters)
            job_configuration.runtime_conf_on_party[
                "job_parameters"] = job_parameters.to_dict()
        if component_parameters:
            job_configuration.runtime_conf[
                "component_parameters"] = component_parameters
            job_configuration.runtime_conf_on_party[
                "component_parameters"] = component_parameters

        job_info = {}
        job_info["job_id"] = job_id
        job_info["role"] = role
        job_info["party_id"] = party_id
        job_info["runtime_conf"] = job_configuration.runtime_conf
        job_info[
            "runtime_conf_on_party"] = job_configuration.runtime_conf_on_party
        JobSaver.update_job(job_info)
Beispiel #2
0
 def gen_updated_parameters(cls, job_id, initiator_role, initiator_party_id,
                            input_job_parameters,
                            input_component_parameters):
     # todo: check can not update job parameters
     job_configuration = job_utils.get_job_configuration(
         job_id=job_id, role=initiator_role, party_id=initiator_party_id)
     updated_job_parameters = job_configuration.runtime_conf[
         "job_parameters"]
     updated_component_parameters = job_configuration.runtime_conf[
         "component_parameters"]
     if input_job_parameters:
         if input_job_parameters.get("common"):
             common_job_parameters = RunParameters(
                 **input_job_parameters["common"])
             cls.create_common_job_parameters(
                 job_id=job_id,
                 initiator_role=initiator_role,
                 common_job_parameters=common_job_parameters)
             for attr in {"model_id", "model_version"}:
                 setattr(common_job_parameters, attr,
                         updated_job_parameters["common"].get(attr))
             updated_job_parameters[
                 "common"] = common_job_parameters.to_dict()
         # not support role
     updated_components = set()
     if input_component_parameters:
         cls.merge_update(input_component_parameters,
                          updated_component_parameters)
     return updated_job_parameters, updated_component_parameters, list(
         updated_components)
Beispiel #3
0
def component_output_model():
    request_data = request.json
    check_request_parameters(request_data)
    job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(job_id=request_data['job_id'],
                                                                                    role=request_data['role'],
                                                                                    party_id=request_data['party_id'])
    model_id = job_runtime_conf['job_parameters']['model_id']
    model_version = job_runtime_conf['job_parameters']['model_version']
    tracker = Tracking(job_id=request_data['job_id'], component_name=request_data['component_name'],
                       role=request_data['role'], party_id=request_data['party_id'], model_id=model_id,
                       model_version=model_version)
    dag = job_utils.get_job_dsl_parser(dsl=job_dsl, runtime_conf=job_runtime_conf,
                                       train_runtime_conf=train_runtime_conf)
    component = dag.get_component_info(request_data['component_name'])
    output_model_json = {}
    # There is only one model output at the current dsl version.
    output_model = tracker.get_output_model(component.get_output()['model'][0] if component.get_output().get('model') else 'default')
    for buffer_name, buffer_object in output_model.items():
        if buffer_name.endswith('Param'):
            output_model_json = json_format.MessageToDict(buffer_object, including_default_value_fields=True)
    if output_model_json:
        component_define = tracker.get_component_define()
        this_component_model_meta = {}
        for buffer_name, buffer_object in output_model.items():
            if buffer_name.endswith('Meta'):
                this_component_model_meta['meta_data'] = json_format.MessageToDict(buffer_object,
                                                                                   including_default_value_fields=True)
        this_component_model_meta.update(component_define)
        return get_json_result(retcode=0, retmsg='success', data=output_model_json, meta=this_component_model_meta)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Beispiel #4
0
def component_output_model():
    request_data = request.json
    check_request_parameters(request_data)
    job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(job_id=request_data['job_id'],
                                                                                    role=request_data['role'],
                                                                                    party_id=request_data['party_id'])
    model_id = job_runtime_conf['job_parameters']['model_id']
    model_version = job_runtime_conf['job_parameters']['model_version']
    tracker = Tracking(job_id=request_data['job_id'], component_name=request_data['component_name'],
                       role=request_data['role'], party_id=request_data['party_id'], model_id=model_id,
                       model_version=model_version)
    dag = job_utils.get_job_dsl_parser(dsl=job_dsl, runtime_conf=job_runtime_conf,
                                       train_runtime_conf=train_runtime_conf)
    component = dag.get_component_info(request_data['component_name'])
    output_model_json = {}
    if component.get_output().get('model', []):
        # There is only one model output at the current dsl version.
        output_model = tracker.get_output_model(component.get_output()['model'][0])
        for buffer_name, buffer_object in output_model.items():
            if buffer_name.endswith('Param'):
                output_model_json = json_format.MessageToDict(buffer_object, including_default_value_fields=True)
    if output_model_json:
        pipeline_output_model = tracker.get_output_model_meta()
        this_component_model_meta = {}
        for k, v in pipeline_output_model.items():
            if k.endswith('_module_name'):
                if k == '{}_module_name'.format(request_data['component_name']):
                    this_component_model_meta['module_name'] = v
            else:
                k_i = k.split('.')
                if '.'.join(k_i[:-1]) == request_data['component_name']:
                    this_component_model_meta[k] = v
        return get_json_result(retcode=0, retmsg='success', data=output_model_json, meta=this_component_model_meta)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Beispiel #5
0
def get_config():
    kwargs = {}
    job_configuration = None

    for i in ('job_id', 'role', 'party_id'):
        if request.json.get(i) is None:
            return error_response(400, f"'{i}' is required.")
        kwargs[i] = str(request.json[i])

    for i in ('component_name', 'task_id', 'task_version'):
        if request.json.get(i) is None:
            break
        kwargs[i] = str(request.json[i])
    else:
        try:
            job_configuration = job_utils.get_task_using_job_conf(**kwargs)
        except Exception:
            pass

    if job_configuration is None:
        job_configuration = job_utils.get_job_configuration(
            kwargs['job_id'], kwargs['role'], kwargs['party_id'])

    if job_configuration is None:
        return error_response(404, 'Job not found.')

    return get_json_result(data=job_configuration.to_dict())
Beispiel #6
0
 def save_pipelined_model(cls, job_id, role, party_id):
     schedule_logger(job_id).info(
         'job {} on {} {} start to save pipeline'.format(
             job_id, role, party_id))
     job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(
         job_id=job_id, role=role, party_id=party_id)
     job_parameters = job_runtime_conf.get('job_parameters', {})
     model_id = job_parameters['model_id']
     model_version = job_parameters['model_version']
     job_type = job_parameters.get('job_type', '')
     if job_type == 'predict':
         return
     dag = schedule_utils.get_job_dsl_parser(
         dsl=job_dsl,
         runtime_conf=job_runtime_conf,
         train_runtime_conf=train_runtime_conf)
     predict_dsl = dag.get_predict_dsl(role=role)
     pipeline = pipeline_pb2.Pipeline()
     pipeline.inference_dsl = json_dumps(predict_dsl, byte=True)
     pipeline.train_dsl = json_dumps(job_dsl, byte=True)
     pipeline.train_runtime_conf = json_dumps(job_runtime_conf, byte=True)
     pipeline.fate_version = RuntimeConfig.get_env("FATE")
     pipeline.model_id = model_id
     pipeline.model_version = model_version
     tracker = Tracker(job_id=job_id,
                       role=role,
                       party_id=party_id,
                       model_id=model_id,
                       model_version=model_version)
     tracker.save_pipelined_model(pipelined_buffer_object=pipeline)
     if role != 'local':
         tracker.save_machine_learning_model_info()
     schedule_logger(job_id).info(
         'job {} on {} {} save pipeline successfully'.format(
             job_id, role, party_id))
    def test_gen_updated_parameters(self):
        job_id = "202110211127411105150"
        initiator_role = "guest"
        initiator_party_id = 9999
        input_job_parameters = {
            "common": {
                "auto_retries": 1,
                "auto_retry_delay": 1
            }
        }
        input_job_parameters = {}
        input_component_parameters = {
            "common": {
                "hetero_lr_0": {
                    "alpha": 0.02
                }
            },
            "role": {
                "guest": {
                    "0": {
                        "reader_0": {
                            "table": {"name": "breast_hetero_guest", "namespace": "unitest_experiment"}
                        },
                        "homo_nn_0":{
                            "with_label": True,
                            "output_format": "dense"
                        },
                    }
                },
                "host": {
                    "1": {
                        "dataio_0":{
                            "with_label": True,
                            "output_format": "dense"
                        },
                        "evaluation_0": {
                            "need_run": True
                        }
                    }
                }
            }
        }
        job_configuration = job_utils.get_job_configuration(job_id=job_id,
                                                            role=initiator_role,
                                                            party_id=initiator_party_id)
        origin_job_parameters = job_configuration.runtime_conf["job_parameters"]
        origin_component_parameters = job_configuration.runtime_conf["component_parameters"]

        updated_job_parameters, updated_component_parameters, updated_components = JobController.gen_updated_parameters(
            job_id=job_id,
            initiator_role=initiator_role,
            initiator_party_id=initiator_party_id,
            input_job_parameters=input_job_parameters,
            input_component_parameters=input_component_parameters)
        jprint(updated_job_parameters)
        jprint(updated_component_parameters)
        self.assertTrue(check(input_component_parameters, updated_component_parameters)[0])
Beispiel #8
0
 def calculate_task_resource(cls, task_parameters: RunParameters = None, task_info: dict = None):
     if not task_parameters:
         dsl, runtime_conf, train_runtime_conf = job_utils.get_job_configuration(job_id=task_info["job_id"],
                                                                                 role=task_info["role"],
                                                                                 party_id=task_info["party_id"])
         task_parameters = RunParameters(**runtime_conf["job_parameters"])
     cores_per_task = task_parameters.adaptation_parameters["task_cores_per_node"] * \
                      task_parameters.adaptation_parameters["task_nodes"]
     memory_per_task = task_parameters.adaptation_parameters["task_memory_per_node"] * \
                       task_parameters.adaptation_parameters["task_nodes"]
     return cores_per_task, memory_per_task
Beispiel #9
0
 def calculate_job_resource(cls, job_parameters: RunParameters = None, job_id=None, role=None, party_id=None):
     if not job_parameters:
         dsl, runtime_conf, train_runtime_conf = job_utils.get_job_configuration(job_id=job_id,
                                                                                 role=role,
                                                                                 party_id=party_id)
         job_parameters = RunParameters(**runtime_conf["job_parameters"])
     cores = job_parameters.adaptation_parameters["task_cores_per_node"] * job_parameters.adaptation_parameters[
         "task_nodes"] * job_parameters.task_parallelism
     memory = job_parameters.adaptation_parameters["task_memory_per_node"] * job_parameters.adaptation_parameters[
         "task_nodes"] * job_parameters.task_parallelism
     return job_parameters.computing_engine, cores, memory
Beispiel #10
0
    def save_pipelined_model(cls, job_id, role, party_id):
        schedule_logger(job_id).info(
            'job {} on {} {} start to save pipeline'.format(
                job_id, role, party_id))
        job_dsl, job_runtime_conf, runtime_conf_on_party, train_runtime_conf = job_utils.get_job_configuration(
            job_id=job_id, role=role, party_id=party_id)
        job_parameters = runtime_conf_on_party.get('job_parameters', {})
        if role in job_parameters.get("assistant_role", []):
            return
        model_id = job_parameters['model_id']
        model_version = job_parameters['model_version']
        job_type = job_parameters.get('job_type', '')
        work_mode = job_parameters['work_mode']
        roles = runtime_conf_on_party['role']
        initiator_role = runtime_conf_on_party['initiator']['role']
        initiator_party_id = runtime_conf_on_party['initiator']['party_id']
        if job_type == 'predict':
            return
        dag = schedule_utils.get_job_dsl_parser(
            dsl=job_dsl,
            runtime_conf=job_runtime_conf,
            train_runtime_conf=train_runtime_conf)
        predict_dsl = dag.get_predict_dsl(role=role)
        pipeline = pipeline_pb2.Pipeline()
        pipeline.inference_dsl = json_dumps(predict_dsl, byte=True)
        pipeline.train_dsl = json_dumps(job_dsl, byte=True)
        pipeline.train_runtime_conf = json_dumps(job_runtime_conf, byte=True)
        pipeline.fate_version = RuntimeConfig.get_env("FATE")
        pipeline.model_id = model_id
        pipeline.model_version = model_version

        pipeline.parent = True
        pipeline.loaded_times = 0
        pipeline.roles = json_dumps(roles, byte=True)
        pipeline.work_mode = work_mode
        pipeline.initiator_role = initiator_role
        pipeline.initiator_party_id = initiator_party_id
        pipeline.runtime_conf_on_party = json_dumps(runtime_conf_on_party,
                                                    byte=True)
        pipeline.parent_info = json_dumps({}, byte=True)

        tracker = Tracker(job_id=job_id,
                          role=role,
                          party_id=party_id,
                          model_id=model_id,
                          model_version=model_version)
        tracker.save_pipelined_model(pipelined_buffer_object=pipeline)
        if role != 'local':
            tracker.save_machine_learning_model_info()
        schedule_logger(job_id).info(
            'job {} on {} {} save pipeline successfully'.format(
                job_id, role, party_id))
Beispiel #11
0
def upload_history():
    request_data = request.json
    if request_data.get('job_id'):
        tasks = JobSaver.query_task(component_name='upload_0', status=StatusSet.SUCCESS, job_id=request_data.get('job_id'), run_on_this_party=True)
    else:
        tasks = JobSaver.query_task(component_name='upload_0', status=StatusSet.SUCCESS, run_on_this_party=True)
    limit = request_data.get('limit')
    if not limit:
        tasks = tasks[-1::-1]
    else:
        tasks = tasks[-1:-limit - 1:-1]
    jobs_run_conf = job_utils.get_job_configuration(None, None, None, tasks)
    data = get_upload_info(jobs_run_conf=jobs_run_conf)
    return get_json_result(retcode=0, retmsg='success', data=data)
Beispiel #12
0
 def cancel_ready(job_id, initiator_role, initiator_party_id):
     job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(
         job_id=job_id, role=initiator_role, party_id=initiator_party_id)
     job_parameters = job_runtime_conf.get('job_parameters', {})
     job_initiator = job_runtime_conf.get('initiator', {})
     status = TaskScheduler.check_job(
         job_id=job_id,
         roles=job_runtime_conf['role'],
         work_mode=job_parameters['work_mode'],
         initiator_party_id=job_initiator['party_id'],
         initiator_role=job_initiator['role'],
         job_info={'f_tag': 'cancel_ready'},
         way='status')
     return status
Beispiel #13
0
def get_upload_history():
    request_data = request.json
    if request_data.get('job_id'):
        tasks = job_utils.query_task(component_name='upload_0',
                                     status='success',
                                     job_id=request_data.get('job_id'))
    else:
        tasks = job_utils.query_task(component_name='upload_0',
                                     status='success')
    limit = request_data.get('limit')
    if not limit:
        tasks = tasks[-1::-1]
    else:
        tasks = tasks[-1:-limit - 1:-1]
    jobs_run_conf = get_job_configuration(None, None, None, tasks)
    return get_upload_info(jobs_run_conf)
Beispiel #14
0
def component_output_model():
    request_data = request.json
    check_request_parameters(request_data)
    job_dsl, job_runtime_conf, runtime_conf_on_party, train_runtime_conf = job_utils.get_job_configuration(job_id=request_data['job_id'],
                                                                                                           role=request_data['role'],
                                                                                                           party_id=request_data['party_id'])
    try:
        model_id = runtime_conf_on_party['job_parameters']['model_id']
        model_version = runtime_conf_on_party['job_parameters']['model_version']
    except Exception as e:
        job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_model_configuration(job_id=request_data['job_id'],
                                                                                          role=request_data['role'],
                                                                                          party_id=request_data['party_id'])
        if any([job_dsl, job_runtime_conf, train_runtime_conf]):
            adapter = JobRuntimeConfigAdapter(job_runtime_conf)
            model_id = adapter.get_common_parameters().to_dict().get('model_id')
            model_version = adapter.get_common_parameters().to_dict.get('model_version')
        else:
            stat_logger.exception(e)
            stat_logger.error(f"Can not find model info by filters: job id: {request_data.get('job_id')}, "
                              f"role: {request_data.get('role')}, party id: {request_data.get('party_id')}")
            raise Exception(f"Can not find model info by filters: job id: {request_data.get('job_id')}, "
                            f"role: {request_data.get('role')}, party id: {request_data.get('party_id')}")

    tracker = Tracker(job_id=request_data['job_id'], component_name=request_data['component_name'],
                      role=request_data['role'], party_id=request_data['party_id'], model_id=model_id,
                      model_version=model_version)
    dag = schedule_utils.get_job_dsl_parser(dsl=job_dsl, runtime_conf=job_runtime_conf,
                                            train_runtime_conf=train_runtime_conf)
    component = dag.get_component_info(request_data['component_name'])
    output_model_json = {}
    # There is only one model output at the current dsl version.
    output_model = tracker.get_output_model(component.get_output()['model'][0] if component.get_output().get('model') else 'default')
    for buffer_name, buffer_object in output_model.items():
        if buffer_name.endswith('Param'):
            output_model_json = json_format.MessageToDict(buffer_object, including_default_value_fields=True)
    if output_model_json:
        component_define = tracker.get_component_define()
        this_component_model_meta = {}
        for buffer_name, buffer_object in output_model.items():
            if buffer_name.endswith('Meta'):
                this_component_model_meta['meta_data'] = json_format.MessageToDict(buffer_object,
                                                                                   including_default_value_fields=True)
        this_component_model_meta.update(component_define)
        return get_json_result(retcode=0, retmsg='success', data=output_model_json, meta=this_component_model_meta)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Beispiel #15
0
 def save_pipeline(job_id, role, party_id, model_id, model_version):
     job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(job_id=job_id, role=role,
                                                                                     party_id=party_id)
     job_parameters = job_runtime_conf.get('job_parameters', {})
     job_type = job_parameters.get('job_type', '')
     if job_type == 'predict':
         return
     dag = job_utils.get_job_dsl_parser(dsl=job_dsl,
                                        runtime_conf=job_runtime_conf,
                                        train_runtime_conf=train_runtime_conf)
     predict_dsl = dag.get_predict_dsl(role=role)
     pipeline = pipeline_pb2.Pipeline()
     pipeline.inference_dsl = json_dumps(predict_dsl, byte=True)
     pipeline.train_dsl = json_dumps(job_dsl, byte=True)
     pipeline.train_runtime_conf = json_dumps(job_runtime_conf, byte=True)
     job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id, model_id=model_id,
                            model_version=model_version)
     job_tracker.save_output_model({'Pipeline': pipeline}, 'pipeline')
Beispiel #16
0
 def check(job_id, initiator_role, initiator_party_id):
     if WORK_MODE == WorkMode.CLUSTER:
         job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(
             job_id=job_id,
             role=initiator_role,
             party_id=initiator_party_id)
         job_parameters = job_runtime_conf.get('job_parameters', {})
         job_initiator = job_runtime_conf.get('initiator', {})
         status = TaskScheduler.check_job(
             job_id=job_id,
             roles=job_runtime_conf['role'],
             work_mode=job_parameters['work_mode'],
             initiator_party_id=job_initiator['party_id'],
             initiator_role=job_initiator['role'],
             job_info={
                 'job_id': job_id,
                 'initiator_role': initiator_role,
                 'initiator_party_id': initiator_party_id
             })
     else:
         status = True
     return status
Beispiel #17
0
    def save_pipelined_model(cls, job_id, role, party_id):
        schedule_logger(job_id).info(
            f"start to save pipeline model on {role} {party_id}")
        job_configuration = job_utils.get_job_configuration(job_id=job_id,
                                                            role=role,
                                                            party_id=party_id)
        runtime_conf_on_party = job_configuration.runtime_conf_on_party
        job_parameters = runtime_conf_on_party.get('job_parameters', {})
        if role in job_parameters.get("assistant_role", []):
            return
        model_id = job_parameters['model_id']
        model_version = job_parameters['model_version']
        job_type = job_parameters.get('job_type', '')
        roles = runtime_conf_on_party['role']
        initiator_role = runtime_conf_on_party['initiator']['role']
        initiator_party_id = runtime_conf_on_party['initiator']['party_id']
        if job_type == 'predict':
            return
        dsl_parser = schedule_utils.get_job_dsl_parser(
            dsl=job_configuration.dsl,
            runtime_conf=job_configuration.runtime_conf,
            train_runtime_conf=job_configuration.train_runtime_conf)

        components_parameters = {}
        tasks = JobSaver.query_task(job_id=job_id,
                                    role=role,
                                    party_id=party_id,
                                    only_latest=True)
        for task in tasks:
            components_parameters[
                task.f_component_name] = task.f_component_parameters
        predict_dsl = schedule_utils.fill_inference_dsl(
            dsl_parser,
            origin_inference_dsl=job_configuration.dsl,
            components_parameters=components_parameters)

        pipeline = pipeline_pb2.Pipeline()
        pipeline.inference_dsl = json_dumps(predict_dsl, byte=True)
        pipeline.train_dsl = json_dumps(job_configuration.dsl, byte=True)
        pipeline.train_runtime_conf = json_dumps(
            job_configuration.runtime_conf, byte=True)
        pipeline.fate_version = RuntimeConfig.get_env("FATE")
        pipeline.model_id = model_id
        pipeline.model_version = model_version

        pipeline.parent = True
        pipeline.loaded_times = 0
        pipeline.roles = json_dumps(roles, byte=True)
        pipeline.initiator_role = initiator_role
        pipeline.initiator_party_id = initiator_party_id
        pipeline.runtime_conf_on_party = json_dumps(runtime_conf_on_party,
                                                    byte=True)
        pipeline.parent_info = json_dumps({}, byte=True)

        tracker = Tracker(job_id=job_id,
                          role=role,
                          party_id=party_id,
                          model_id=model_id,
                          model_version=model_version,
                          job_parameters=RunParameters(**job_parameters))
        tracker.save_pipeline_model(pipeline_buffer_object=pipeline)
        if role != 'local':
            tracker.save_machine_learning_model_info()
        schedule_logger(job_id).info(
            f"save pipeline on {role} {party_id} successfully")
Beispiel #18
0
    def run_job(job_id, initiator_role, initiator_party_id):
        job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(job_id=job_id,
                                                                                        role=initiator_role,
                                                                                        party_id=initiator_party_id)
        job_parameters = job_runtime_conf.get('job_parameters', {})
        job_initiator = job_runtime_conf.get('initiator', {})
        dag = get_job_dsl_parser(dsl=job_dsl,
                                 runtime_conf=job_runtime_conf,
                                 train_runtime_conf=train_runtime_conf)
        job_args = dag.get_args_input()
        if not job_initiator:
            return False
        timeout = job_utils.get_timeout(job_id, job_parameters.get("timeout", None), job_runtime_conf, job_dsl)
        t = Timer(timeout, TaskScheduler.job_handler, [job_id])
        t.start()

        job = Job()
        job.f_job_id = job_id
        job.f_start_time = current_timestamp()
        job.f_status = JobStatus.RUNNING
        job.f_update_time = current_timestamp()
        TaskScheduler.sync_job_status(job_id=job_id, roles=job_runtime_conf['role'],
                                      work_mode=job_parameters['work_mode'],
                                      initiator_party_id=job_initiator['party_id'],
                                      initiator_role=job_initiator['role'],
                                      job_info=job.to_json())

        top_level_task_status = set()
        components = dag.get_next_components(None)
        schedule_logger(job_id).info(
            'job {} root components is {}'.format(job.f_job_id, [component.get_name() for component in components],
                                                  None))
        for component in components:
            try:
                # run a component as task
                run_status = TaskScheduler.run_component(job_id, job_runtime_conf, job_parameters, job_initiator,
                                                         job_args, dag,
                                                         component)
            except Exception as e:
                schedule_logger(job_id).exception(e)
                run_status = False
            top_level_task_status.add(run_status)
            if not run_status:
                break
        if len(top_level_task_status) == 2:
            job.f_status = JobStatus.FAILED
        elif True in top_level_task_status:
            job.f_status = JobStatus.COMPLETE
        else:
            job.f_status = JobStatus.FAILED
        job.f_end_time = current_timestamp()
        job.f_elapsed = job.f_end_time - job.f_start_time
        if job.f_status == JobStatus.COMPLETE:
            job.f_progress = 100
        job.f_update_time = current_timestamp()
        try:
            TaskScheduler.finish_job(job_id=job_id, job_runtime_conf=job_runtime_conf)
        except Exception as e:
            schedule_logger(job_id).exception(e)
            job.f_status = JobStatus.FAILED

        if job.f_status == JobStatus.FAILED:
            TaskScheduler.stop(job_id=job_id, end_status=JobStatus.FAILED)

        try:
            TaskScheduler.sync_job_status(job_id=job_id, roles=job_runtime_conf['role'],
                                          work_mode=job_parameters['work_mode'],
                                          initiator_party_id=job_initiator['party_id'],
                                          initiator_role=job_initiator['role'],
                                          job_info=job.to_json())
        except Exception as e:
            schedule_logger(job_id).exception(e)
            schedule_logger(job_id).warning('job {} sync status failed'.format(job.f_job_id))

        schedule_logger(job_id).info('job {} finished, status is {}'.format(job.f_job_id, job.f_status))
        t.cancel()
Beispiel #19
0
    def run_job(job_id, initiator_role, initiator_party_id):
        job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(
            job_id=job_id, role=initiator_role, party_id=initiator_party_id)
        job_parameters = job_runtime_conf.get('job_parameters', {})
        job_initiator = job_runtime_conf.get('initiator', {})
        dag = get_job_dsl_parser(dsl=job_dsl,
                                 runtime_conf=job_runtime_conf,
                                 train_runtime_conf=train_runtime_conf)
        job_args = dag.get_args_input()
        if not job_initiator:
            return False
        storage.init_storage(job_id=job_id, work_mode=RuntimeConfig.WORK_MODE)
        job = Job()
        job.f_job_id = job_id
        job.f_start_time = current_timestamp()
        job.f_status = JobStatus.RUNNING
        job.f_update_time = current_timestamp()
        TaskScheduler.sync_job_status(
            job_id=job_id,
            roles=job_runtime_conf['role'],
            work_mode=job_parameters['work_mode'],
            initiator_party_id=job_initiator['party_id'],
            job_info=job.to_json())

        top_level_task_status = set()
        components = dag.get_next_components(None)
        schedule_logger.info('job {} root components is {}'.format(
            job.f_job_id, [component.get_name() for component in components],
            None))
        for component in components:
            try:
                # run a component as task
                run_status = TaskScheduler.run_component(
                    job_id, job_runtime_conf, job_parameters, job_initiator,
                    job_args, dag, component)
            except Exception as e:
                schedule_logger.info(e)
                run_status = False
            top_level_task_status.add(run_status)
            if not run_status:
                break
        if len(top_level_task_status) == 2:
            job.f_status = JobStatus.PARTIAL
        elif True in top_level_task_status:
            job.f_status = JobStatus.SUCCESS
        else:
            job.f_status = JobStatus.FAILED
        job.f_end_time = current_timestamp()
        job.f_elapsed = job.f_end_time - job.f_start_time
        if job.f_status == JobStatus.SUCCESS:
            job.f_progress = 100
        job.f_update_time = current_timestamp()
        TaskScheduler.sync_job_status(
            job_id=job_id,
            roles=job_runtime_conf['role'],
            work_mode=job_parameters['work_mode'],
            initiator_party_id=job_initiator['party_id'],
            job_info=job.to_json())
        TaskScheduler.finish_job(job_id=job_id,
                                 job_runtime_conf=job_runtime_conf)
        schedule_logger.info('job {} finished, status is {}'.format(
            job.f_job_id, job.f_status))