Esempio n. 1
0
def get_component_summary():
    request_data = request.json
    try:
        required_params = ["job_id", "component_name", "role", "party_id"]
        detect_utils.check_config(request_data, required_params)
        tracker = Tracker(job_id=request_data["job_id"],
                          component_name=request_data["component_name"],
                          role=request_data["role"],
                          party_id=request_data["party_id"],
                          task_id=request_data.get("task_id", None),
                          task_version=request_data.get("task_version", None))
        summary = tracker.read_summary_from_db()
        if summary:
            if request_data.get("filename"):
                temp_filepath = os.path.join(TEMP_DIRECTORY,
                                             request_data.get("filename"))
                with open(temp_filepath, "w") as fout:
                    fout.write(json.dumps(summary, indent=4))
                return send_file(
                    open(temp_filepath, "rb"),
                    as_attachment=True,
                    attachment_filename=request_data.get("filename"))
            else:
                return get_json_result(data=summary)
        return error_response(
            210,
            "No component summary found, please check if arguments are specified correctly."
        )
    except Exception as e:
        stat_logger.exception(e)
        return error_response(210, str(e))
Esempio n. 2
0
def job_view():
    request_data = request.json
    check_request_parameters(request_data)
    job_tracker = Tracker(job_id=request_data['job_id'],
                          role=request_data['role'],
                          party_id=request_data['party_id'])
    job_view_data = job_tracker.get_job_view()
    if job_view_data:
        job_metric_list = job_tracker.get_metric_list(job_level=True)
        job_view_data['model_summary'] = {}
        for metric_namespace, namespace_metrics in job_metric_list.items():
            job_view_data['model_summary'][metric_namespace] = job_view_data[
                'model_summary'].get(metric_namespace, {})
            for metric_name in namespace_metrics:
                job_view_data['model_summary'][metric_namespace][
                    metric_name] = job_view_data['model_summary'][
                        metric_namespace].get(metric_name, {})
                for metric_data in job_tracker.get_job_metric_data(
                        metric_namespace=metric_namespace,
                        metric_name=metric_name):
                    job_view_data['model_summary'][metric_namespace][
                        metric_name][metric_data.key] = metric_data.value
        return get_json_result(retcode=0, retmsg='success', data=job_view_data)
    else:
        return get_json_result(retcode=101, retmsg='error')
Esempio n. 3
0
def component_metric_all():
    request_data = request.json
    check_request_parameters(request_data)
    tracker = Tracker(job_id=request_data['job_id'],
                      component_name=request_data['component_name'],
                      role=request_data['role'],
                      party_id=request_data['party_id'])
    metrics = tracker.get_metric_list()
    all_metric_data = {}
    if metrics:
        for metric_namespace, metric_names in metrics.items():
            all_metric_data[metric_namespace] = all_metric_data.get(
                metric_namespace, {})
            for metric_name in metric_names:
                all_metric_data[metric_namespace][
                    metric_name] = all_metric_data[metric_namespace].get(
                        metric_name, {})
                metric_data, metric_meta = get_metric_all_data(
                    tracker=tracker,
                    metric_namespace=metric_namespace,
                    metric_name=metric_name)
                all_metric_data[metric_namespace][metric_name][
                    'data'] = metric_data
                all_metric_data[metric_namespace][metric_name][
                    'meta'] = metric_meta
        return get_json_result(retcode=0,
                               retmsg='success',
                               data=all_metric_data)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Esempio n. 4
0
def component_metrics():
    request_data = request.json
    check_request_parameters(request_data)
    tracker = Tracker(job_id=request_data['job_id'], component_name=request_data['component_name'],
                      role=request_data['role'], party_id=request_data['party_id'])
    metrics = tracker.get_metric_list()
    if metrics:
        return get_json_result(retcode=0, retmsg='success', data=metrics)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Esempio n. 5
0
def save_component_summary(job_id: str, component_name: str, task_version: int,
                           task_id: str, role: str, party_id: int):
    request_data = request.json
    tracker = Tracker(job_id=job_id,
                      component_name=component_name,
                      task_id=task_id,
                      task_version=task_version,
                      role=role,
                      party_id=party_id)
    summary_data = request_data['summary']
    tracker.insert_summary_into_db(summary_data)
    return get_json_result()
Esempio n. 6
0
def get_component_output_tables_meta(task_data):
    check_request_parameters(task_data)
    tracker = Tracker(job_id=task_data['job_id'], component_name=task_data['component_name'],
                      role=task_data['role'], party_id=task_data['party_id'])
    job_dsl_parser = schedule_utils.get_job_dsl_parser_by_job_id(job_id=task_data['job_id'])
    if not job_dsl_parser:
        raise Exception('can not get dag parser, please check if the parameters are correct')
    component = job_dsl_parser.get_component_info(task_data['component_name'])
    if not component:
        raise Exception('can not found component, please check if the parameters are correct')
    output_data_table_infos = tracker.get_output_data_info()
    output_tables_meta = tracker.get_output_data_table(output_data_infos=output_data_table_infos)
    return output_tables_meta
Esempio n. 7
0
def save_output_data_info(job_id, component_name, task_version, task_id, role,
                          party_id):
    request_data = request.json
    tracker = Tracker(job_id=job_id,
                      component_name=component_name,
                      task_id=task_id,
                      task_version=task_version,
                      role=role,
                      party_id=party_id)
    tracker.insert_output_data_info_into_db(
        data_name=request_data["data_name"],
        table_namespace=request_data["table_namespace"],
        table_name=request_data["table_name"])
    return get_json_result()
Esempio n. 8
0
    def clean_table(cls, job_id, role, party_id, component_name):
        # clean data table
        stat_logger.info('start delete {} {} {} {} data table'.format(
            job_id, role, party_id, component_name))

        tracker = Tracker(job_id=job_id,
                          role=role,
                          party_id=party_id,
                          component_name=component_name)
        output_data_table_infos = tracker.get_output_data_info()
        if output_data_table_infos:
            delete_tables_by_table_infos(output_data_table_infos)
            stat_logger.info('delete {} {} {} {} data table success'.format(
                job_id, role, party_id, component_name))
Esempio n. 9
0
def read_output_data_info(job_id, component_name, task_version, task_id, role,
                          party_id):
    request_data = request.json
    tracker = Tracker(job_id=job_id,
                      component_name=component_name,
                      task_id=task_id,
                      task_version=task_version,
                      role=role,
                      party_id=party_id)
    output_data_infos = tracker.read_output_data_info_from_db(
        data_name=request_data["data_name"])
    response_data = []
    for output_data_info in output_data_infos:
        response_data.append(output_data_info.to_human_model_dict())
    return get_json_result(data=response_data)
Esempio n. 10
0
def save_metric_meta(job_id, component_name, task_version, task_id, role,
                     party_id):
    request_data = request.json
    tracker = Tracker(job_id=job_id,
                      component_name=component_name,
                      task_id=task_id,
                      task_version=task_version,
                      role=role,
                      party_id=party_id)
    metric_meta = deserialize_b64(request_data['metric_meta'])
    tracker.save_metric_meta(metric_namespace=request_data['metric_namespace'],
                             metric_name=request_data['metric_name'],
                             metric_meta=metric_meta,
                             job_level=request_data['job_level'])
    return get_json_result()
Esempio n. 11
0
 def __init__(self,
              job_id: str,
              role: str,
              party_id: int,
              model_id: str = None,
              model_version: str = None,
              component_name: str = None,
              component_module_name: str = None,
              task_id: str = None,
              task_version: int = None,
              job_parameters: RunParameters = None):
     self.job_id = job_id
     self.role = role
     self.party_id = party_id
     self.model_id = model_id
     self.model_version = model_version
     self.component_name = component_name if component_name else 'pipeline'
     self.module_name = component_module_name if component_module_name else 'Pipeline'
     self.task_id = task_id
     self.task_version = task_version
     self.job_parameters = job_parameters
     self.job_tracker = Tracker(job_id=job_id,
                                role=role,
                                party_id=party_id,
                                component_name=component_name,
                                task_id=task_id,
                                task_version=task_version,
                                model_id=model_id,
                                model_version=model_version,
                                job_parameters=job_parameters)
Esempio n. 12
0
def component_output_model():
    request_data = request.json
    check_request_parameters(request_data)
    job_dsl, job_runtime_conf, runtime_conf_on_party, train_runtime_conf = job_utils.get_job_configuration(job_id=request_data['job_id'],
                                                                                                           role=request_data['role'],
                                                                                                           party_id=request_data['party_id'])
    try:
        model_id = runtime_conf_on_party['job_parameters']['model_id']
        model_version = runtime_conf_on_party['job_parameters']['model_version']
    except Exception as e:
        job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_model_configuration(job_id=request_data['job_id'],
                                                                                          role=request_data['role'],
                                                                                          party_id=request_data['party_id'])
        if any([job_dsl, job_runtime_conf, train_runtime_conf]):
            adapter = JobRuntimeConfigAdapter(job_runtime_conf)
            model_id = adapter.get_common_parameters().to_dict().get('model_id')
            model_version = adapter.get_common_parameters().to_dict.get('model_version')
        else:
            stat_logger.exception(e)
            stat_logger.error(f"Can not find model info by filters: job id: {request_data.get('job_id')}, "
                              f"role: {request_data.get('role')}, party id: {request_data.get('party_id')}")
            raise Exception(f"Can not find model info by filters: job id: {request_data.get('job_id')}, "
                            f"role: {request_data.get('role')}, party id: {request_data.get('party_id')}")

    tracker = Tracker(job_id=request_data['job_id'], component_name=request_data['component_name'],
                      role=request_data['role'], party_id=request_data['party_id'], model_id=model_id,
                      model_version=model_version)
    dag = schedule_utils.get_job_dsl_parser(dsl=job_dsl, runtime_conf=job_runtime_conf,
                                            train_runtime_conf=train_runtime_conf)
    component = dag.get_component_info(request_data['component_name'])
    output_model_json = {}
    # There is only one model output at the current dsl version.
    output_model = tracker.get_output_model(component.get_output()['model'][0] if component.get_output().get('model') else 'default')
    for buffer_name, buffer_object in output_model.items():
        if buffer_name.endswith('Param'):
            output_model_json = json_format.MessageToDict(buffer_object, including_default_value_fields=True)
    if output_model_json:
        component_define = tracker.get_component_define()
        this_component_model_meta = {}
        for buffer_name, buffer_object in output_model.items():
            if buffer_name.endswith('Meta'):
                this_component_model_meta['meta_data'] = json_format.MessageToDict(buffer_object,
                                                                                   including_default_value_fields=True)
        this_component_model_meta.update(component_define)
        return get_json_result(retcode=0, retmsg='success', data=output_model_json, meta=this_component_model_meta)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Esempio n. 13
0
def query_component_output_data_info():
    output_data_infos = Tracker.query_output_data_infos(**request.json)
    if not output_data_infos:
        return get_json_result(retcode=101, retmsg='find data view failed')
    return get_json_result(retcode=0,
                           retmsg='success',
                           data=[
                               output_data_info.to_json()
                               for output_data_info in output_data_infos
                           ])
Esempio n. 14
0
def component_output_model():
    request_data = request.json
    check_request_parameters(request_data)
    job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(
        job_id=request_data['job_id'],
        role=request_data['role'],
        party_id=request_data['party_id'])
    model_id = job_runtime_conf['job_parameters']['model_id']
    model_version = job_runtime_conf['job_parameters']['model_version']
    tracker = Tracker(job_id=request_data['job_id'],
                      component_name=request_data['component_name'],
                      role=request_data['role'],
                      party_id=request_data['party_id'],
                      model_id=model_id,
                      model_version=model_version)
    dag = schedule_utils.get_job_dsl_parser(
        dsl=job_dsl,
        runtime_conf=job_runtime_conf,
        train_runtime_conf=train_runtime_conf)
    component = dag.get_component_info(request_data['component_name'])
    output_model_json = {}
    # There is only one model output at the current dsl version.
    output_model = tracker.get_output_model(component.get_output(
    )['model'][0] if component.get_output().get('model') else 'default')
    for buffer_name, buffer_object in output_model.items():
        if buffer_name.endswith('Param'):
            output_model_json = json_format.MessageToDict(
                buffer_object, including_default_value_fields=True)
    if output_model_json:
        component_define = tracker.get_component_define()
        this_component_model_meta = {}
        for buffer_name, buffer_object in output_model.items():
            if buffer_name.endswith('Meta'):
                this_component_model_meta[
                    'meta_data'] = json_format.MessageToDict(
                        buffer_object, including_default_value_fields=True)
        this_component_model_meta.update(component_define)
        return get_json_result(retcode=0,
                               retmsg='success',
                               data=output_model_json,
                               meta=this_component_model_meta)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Esempio n. 15
0
def component_metric_data():
    request_data = request.json
    check_request_parameters(request_data)
    tracker = Tracker(job_id=request_data['job_id'], component_name=request_data['component_name'],
                      role=request_data['role'], party_id=request_data['party_id'])
    metric_data, metric_meta = get_metric_all_data(tracker=tracker, metric_namespace=request_data['metric_namespace'],
                                                   metric_name=request_data['metric_name'])
    if metric_data or metric_meta:
        return get_json_result(retcode=0, retmsg='success', data=metric_data,
                               meta=metric_meta)
    else:
        return get_json_result(retcode=0, retmsg='no data', data=[], meta={})
Esempio n. 16
0
 def save_pipelined_model(cls, job_id, role, party_id):
     schedule_logger(job_id).info(
         'job {} on {} {} start to save pipeline'.format(
             job_id, role, party_id))
     job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(
         job_id=job_id, role=role, party_id=party_id)
     job_parameters = job_runtime_conf.get('job_parameters', {})
     model_id = job_parameters['model_id']
     model_version = job_parameters['model_version']
     job_type = job_parameters.get('job_type', '')
     if job_type == 'predict':
         return
     dag = schedule_utils.get_job_dsl_parser(
         dsl=job_dsl,
         runtime_conf=job_runtime_conf,
         train_runtime_conf=train_runtime_conf)
     predict_dsl = dag.get_predict_dsl(role=role)
     pipeline = pipeline_pb2.Pipeline()
     pipeline.inference_dsl = json_dumps(predict_dsl, byte=True)
     pipeline.train_dsl = json_dumps(job_dsl, byte=True)
     pipeline.train_runtime_conf = json_dumps(job_runtime_conf, byte=True)
     pipeline.fate_version = RuntimeConfig.get_env("FATE")
     pipeline.model_id = model_id
     pipeline.model_version = model_version
     tracker = Tracker(job_id=job_id,
                       role=role,
                       party_id=party_id,
                       model_id=model_id,
                       model_version=model_version)
     tracker.save_pipelined_model(pipelined_buffer_object=pipeline)
     if role != 'local':
         tracker.save_machine_learning_model_info()
     schedule_logger(job_id).info(
         'job {} on {} {} save pipeline successfully'.format(
             job_id, role, party_id))
Esempio n. 17
0
    def initialize_job_tracker(cls, job_id, role, party_id, job_parameters, roles, is_initiator, dsl_parser):
        tracker = Tracker(job_id=job_id, role=role, party_id=party_id,
                          model_id=job_parameters["model_id"],
                          model_version=job_parameters["model_version"])
        if job_parameters.get("job_type", "") != "predict":
            tracker.init_pipelined_model()
        partner = {}
        show_role = {}
        for _role, _role_party in roles.items():
            if is_initiator or _role == role:
                show_role[_role] = show_role.get(_role, [])
                for _party_id in _role_party:
                    if is_initiator or _party_id == party_id:
                        show_role[_role].append(_party_id)

            if _role != role:
                partner[_role] = partner.get(_role, [])
                partner[_role].extend(_role_party)
            else:
                for _party_id in _role_party:
                    if _party_id != party_id:
                        partner[_role] = partner.get(_role, [])
                        partner[_role].append(_party_id)

        job_args = dsl_parser.get_args_input()
        dataset = cls.get_dataset(is_initiator, role, party_id, roles, job_args)
        tracker.log_job_view({'partner': partner, 'dataset': dataset, 'roles': show_role})
Esempio n. 18
0
 def clean_task(cls, job_id, task_id, task_version, role, party_id,
                content_type):
     status = set()
     if content_type == "metrics":
         tracker = Tracker(job_id=job_id,
                           role=role,
                           party_id=party_id,
                           task_id=task_id,
                           task_version=task_version)
         status.add(tracker.clean_metrics())
     elif content_type == "table":
         jobs = JobSaver.query_job(job_id=job_id,
                                   role=role,
                                   party_id=party_id)
         if jobs:
             job = jobs[0]
             job_parameters = RunParameters(
                 **job.f_runtime_conf_on_party["job_parameters"])
             tracker = Tracker(job_id=job_id,
                               role=role,
                               party_id=party_id,
                               task_id=task_id,
                               task_version=task_version,
                               job_parameters=job_parameters)
             status.add(tracker.clean_task(job.f_runtime_conf_on_party))
     if len(status) == 1 and True in status:
         return True
     else:
         return False
Esempio n. 19
0
def component_output_data_table():
    output_data_infos = Tracker.query_output_data_infos(**request.json)
    if output_data_infos:
        return get_json_result(retcode=0,
                               retmsg='success',
                               data=[{
                                   'table_name': output_data_info.f_table_name,
                                   'table_namespace':
                                   output_data_info.f_table_namespace,
                                   "data_name": output_data_info.f_data_name
                               } for output_data_info in output_data_infos])
    else:
        return get_json_result(
            retcode=100,
            retmsg='No found table, please check if the parameters are correct'
        )
Esempio n. 20
0
def get_job_all_table(job):
    dsl_parser = schedule_utils.get_job_dsl_parser(
        dsl=job.f_dsl,
        runtime_conf=job.f_runtime_conf,
        train_runtime_conf=job.f_train_runtime_conf)
    _, hierarchical_structure = dsl_parser.get_dsl_hierarchical_structure()
    component_table = {}
    component_output_tables = Tracker.query_output_data_infos(
        job_id=job.f_job_id, role=job.f_role, party_id=job.f_party_id)
    for component_name_list in hierarchical_structure:
        for component_name in component_name_list:
            component_table[component_name] = {}
            component_input_table = get_component_input_table(
                dsl_parser, job, component_name)
            component_table[component_name]['input'] = component_input_table
            component_table[component_name]['output'] = {}
            for output_table in component_output_tables:
                if output_table.f_component_name == component_name:
                    component_table[component_name]['output'][output_table.f_data_name] = \
                        {'name': output_table.f_table_name, 'namespace': output_table.f_table_namespace}
    return component_table
Esempio n. 21
0
    def save_pipelined_model(cls, job_id, role, party_id):
        schedule_logger(job_id).info('job {} on {} {} start to save pipeline'.format(job_id, role, party_id))
        job_dsl, job_runtime_conf, runtime_conf_on_party, train_runtime_conf = job_utils.get_job_configuration(job_id=job_id, role=role,
                                                                                                               party_id=party_id)
        job_parameters = runtime_conf_on_party.get('job_parameters', {})
        if role in job_parameters.get("assistant_role", []):
            return
        model_id = job_parameters['model_id']
        model_version = job_parameters['model_version']
        job_type = job_parameters.get('job_type', '')
        work_mode = job_parameters['work_mode']
        roles = runtime_conf_on_party['role']
        initiator_role = runtime_conf_on_party['initiator']['role']
        initiator_party_id = runtime_conf_on_party['initiator']['party_id']
        if job_type == 'predict':
            return
        dag = schedule_utils.get_job_dsl_parser(dsl=job_dsl,
                                                runtime_conf=job_runtime_conf,
                                                train_runtime_conf=train_runtime_conf)
        predict_dsl = dag.get_predict_dsl(role=role)
        pipeline = pipeline_pb2.Pipeline()
        pipeline.inference_dsl = json_dumps(predict_dsl, byte=True)
        pipeline.train_dsl = json_dumps(job_dsl, byte=True)
        pipeline.train_runtime_conf = json_dumps(job_runtime_conf, byte=True)
        pipeline.fate_version = RuntimeConfig.get_env("FATE")
        pipeline.model_id = model_id
        pipeline.model_version = model_version

        pipeline.parent = True
        pipeline.loaded_times = 0
        pipeline.roles = json_dumps(roles, byte=True)
        pipeline.work_mode = work_mode
        pipeline.initiator_role = initiator_role
        pipeline.initiator_party_id = initiator_party_id
        pipeline.runtime_conf_on_party = json_dumps(runtime_conf_on_party, byte=True)
        pipeline.parent_info = json_dumps({}, byte=True)

        tracker = Tracker(job_id=job_id, role=role, party_id=party_id, model_id=model_id, model_version=model_version)
        tracker.save_pipelined_model(pipelined_buffer_object=pipeline)
        if role != 'local':
            tracker.save_machine_learning_model_info()
        schedule_logger(job_id).info('job {} on {} {} save pipeline successfully'.format(job_id, role, party_id))
Esempio n. 22
0
    def get_task_run_args(cls,
                          job_id,
                          role,
                          party_id,
                          task_id,
                          task_version,
                          job_args,
                          job_parameters: RunParameters,
                          task_parameters: RunParameters,
                          input_dsl,
                          filter_type=None,
                          filter_attr=None,
                          get_input_table=False):
        task_run_args = {}
        input_table = {}
        if 'idmapping' in role:
            return {}
        for input_type, input_detail in input_dsl.items():
            if filter_type and input_type not in filter_type:
                continue
            if input_type == 'data':
                this_type_args = task_run_args[input_type] = task_run_args.get(
                    input_type, {})
                for data_type, data_list in input_detail.items():
                    data_dict = {}
                    for data_key in data_list:
                        data_key_item = data_key.split('.')
                        data_dict[data_key_item[0]] = {data_type: []}
                    for data_key in data_list:
                        data_key_item = data_key.split('.')
                        search_component_name, search_data_name = data_key_item[
                            0], data_key_item[1]
                        storage_table_meta = None
                        if search_component_name == 'args':
                            if job_args.get(
                                    'data', {}).get(search_data_name).get(
                                        'namespace', '') and job_args.get(
                                            'data',
                                            {}).get(search_data_name).get(
                                                'name', ''):
                                storage_table_meta = storage.StorageTableMeta(
                                    name=job_args['data'][search_data_name]
                                    ['name'],
                                    namespace=job_args['data']
                                    [search_data_name]['namespace'])
                        else:
                            tracker_client = TrackerClient(
                                job_id=job_id,
                                role=role,
                                party_id=party_id,
                                component_name=search_component_name)
                            upstream_output_table_infos_json = tracker_client.get_output_data_info(
                                data_name=search_data_name)
                            if upstream_output_table_infos_json:
                                tracker = Tracker(
                                    job_id=job_id,
                                    role=role,
                                    party_id=party_id,
                                    component_name=search_component_name)
                                upstream_output_table_infos = []
                                for _ in upstream_output_table_infos_json:
                                    upstream_output_table_infos.append(
                                        fill_db_model_object(
                                            Tracker.get_dynamic_db_model(
                                                TrackingOutputDataInfo,
                                                job_id)(), _))
                                output_tables_meta = tracker.get_output_data_table(
                                    output_data_infos=
                                    upstream_output_table_infos)
                                if output_tables_meta:
                                    storage_table_meta = output_tables_meta.get(
                                        search_data_name, None)
                        args_from_component = this_type_args[
                            search_component_name] = this_type_args.get(
                                search_component_name, {})
                        if get_input_table and storage_table_meta:
                            input_table[data_key] = {
                                'namespace':
                                storage_table_meta.get_namespace(),
                                'name': storage_table_meta.get_name()
                            }
                            computing_table = None
                        elif storage_table_meta:
                            LOGGER.info(
                                f"load computing table use {task_parameters.computing_partitions}"
                            )
                            computing_table = session.get_latest_opened(
                            ).computing.load(
                                storage_table_meta.get_address(),
                                schema=storage_table_meta.get_schema(),
                                partitions=task_parameters.computing_partitions
                            )
                        else:
                            computing_table = None

                        if not computing_table or not filter_attr or not filter_attr.get(
                                "data", None):
                            data_dict[search_component_name][data_type].append(
                                computing_table)
                            args_from_component[data_type] = data_dict[
                                search_component_name][data_type]
                        else:
                            args_from_component[data_type] = dict([
                                (a, getattr(computing_table,
                                            "get_{}".format(a))())
                                for a in filter_attr["data"]
                            ])
            elif input_type in ['model', 'isometric_model']:
                this_type_args = task_run_args[input_type] = task_run_args.get(
                    input_type, {})
                for dsl_model_key in input_detail:
                    dsl_model_key_items = dsl_model_key.split('.')
                    if len(dsl_model_key_items) == 2:
                        search_component_name, search_model_alias = dsl_model_key_items[
                            0], dsl_model_key_items[1]
                    elif len(dsl_model_key_items
                             ) == 3 and dsl_model_key_items[0] == 'pipeline':
                        search_component_name, search_model_alias = dsl_model_key_items[
                            1], dsl_model_key_items[2]
                    else:
                        raise Exception(
                            'get input {} failed'.format(input_type))
                    models = Tracker(
                        job_id=job_id,
                        role=role,
                        party_id=party_id,
                        component_name=search_component_name,
                        model_id=job_parameters.model_id,
                        model_version=job_parameters.model_version
                    ).get_output_model(model_alias=search_model_alias)
                    this_type_args[search_component_name] = models
        if get_input_table:
            return input_table
        return task_run_args
Esempio n. 23
0
    def submit(cls, job_data, job_id=None):
        if not job_id:
            job_id = job_utils.generate_job_id()
        schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(
            job_id, job_data))
        job_dsl = job_data.get('job_dsl', {})
        job_runtime_conf = job_data.get('job_runtime_conf', {})
        job_utils.check_job_runtime_conf(job_runtime_conf)

        job_initiator = job_runtime_conf['initiator']
        conf_adapter = JobRuntimeConfigAdapter(job_runtime_conf)
        common_job_parameters = conf_adapter.get_common_parameters()

        if common_job_parameters.job_type != 'predict':
            # generate job model info
            common_job_parameters.model_id = model_utils.gen_model_id(
                job_runtime_conf['role'])
            common_job_parameters.model_version = job_id
            train_runtime_conf = {}
        else:
            # check predict job parameters
            detect_utils.check_config(common_job_parameters.to_dict(),
                                      ['model_id', 'model_version'])
            # get inference dsl from pipeline model as job dsl
            tracker = Tracker(
                job_id=job_id,
                role=job_initiator['role'],
                party_id=job_initiator['party_id'],
                model_id=common_job_parameters.model_id,
                model_version=common_job_parameters.model_version)
            pipeline_model = tracker.get_output_model('pipeline')
            if not job_dsl:
                job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl)
            train_runtime_conf = json_loads(
                pipeline_model['Pipeline'].train_runtime_conf)

        job = Job()
        job.f_job_id = job_id
        job.f_dsl = job_dsl
        job.f_train_runtime_conf = train_runtime_conf
        job.f_roles = job_runtime_conf['role']
        job.f_work_mode = common_job_parameters.work_mode
        job.f_initiator_role = job_initiator['role']
        job.f_initiator_party_id = job_initiator['party_id']

        path_dict = job_utils.save_job_conf(
            job_id=job_id,
            role=job.f_initiator_role,
            job_dsl=job_dsl,
            job_runtime_conf=job_runtime_conf,
            job_runtime_conf_on_party={},
            train_runtime_conf=train_runtime_conf,
            pipeline_dsl=None)

        if job.f_initiator_party_id not in job_runtime_conf['role'][
                job.f_initiator_role]:
            schedule_logger(job_id).info("initiator party id error:{}".format(
                job.f_initiator_party_id))
            raise Exception("initiator party id error {}".format(
                job.f_initiator_party_id))

        # create common parameters on initiator
        JobController.backend_compatibility(
            job_parameters=common_job_parameters)
        JobController.adapt_job_parameters(
            role=job.f_initiator_role,
            job_parameters=common_job_parameters,
            create_initiator_baseline=True)

        job.f_runtime_conf = conf_adapter.update_common_parameters(
            common_parameters=common_job_parameters)
        dsl_parser = schedule_utils.get_job_dsl_parser(
            dsl=job.f_dsl,
            runtime_conf=job.f_runtime_conf,
            train_runtime_conf=job.f_train_runtime_conf)

        # initiator runtime conf as template
        job.f_runtime_conf_on_party = job.f_runtime_conf.copy()
        job.f_runtime_conf_on_party[
            "job_parameters"] = common_job_parameters.to_dict()

        if common_job_parameters.work_mode == WorkMode.CLUSTER:
            # Save the status information of all participants in the initiator for scheduling
            for role, party_ids in job.f_roles.items():
                for party_id in party_ids:
                    if role == job.f_initiator_role and party_id == job.f_initiator_party_id:
                        continue
                    JobController.initialize_tasks(job_id, role, party_id,
                                                   False, job.f_initiator_role,
                                                   job.f_initiator_party_id,
                                                   common_job_parameters,
                                                   dsl_parser)

        status_code, response = FederatedScheduler.create_job(job=job)
        if status_code != FederatedSchedulingStatusCode.SUCCESS:
            job.f_status = JobStatus.FAILED
            job.f_tag = "submit_failed"
            FederatedScheduler.sync_job_status(job=job)
            raise Exception("create job failed", response)

        schedule_logger(job_id).info(
            'submit job successfully, job id is {}, model id is {}'.format(
                job.f_job_id, common_job_parameters.model_id))
        board_url = "http://{}:{}{}".format(
            ServiceUtils.get_item("fateboard", "host"),
            ServiceUtils.get_item("fateboard", "port"),
            FATE_BOARD_DASHBOARD_ENDPOINT).format(job_id,
                                                  job_initiator['role'],
                                                  job_initiator['party_id'])
        logs_directory = job_utils.get_job_log_directory(job_id)
        submit_result = {
            "job_id": job_id,
            "model_info": {
                "model_id": common_job_parameters.model_id,
                "model_version": common_job_parameters.model_version
            },
            "logs_directory": logs_directory,
            "board_url": board_url
        }
        submit_result.update(path_dict)
        return submit_result
Esempio n. 24
0
    def run_task(cls):
        task_info = {}
        try:
            parser = argparse.ArgumentParser()
            parser.add_argument('-j',
                                '--job_id',
                                required=True,
                                type=str,
                                help="job id")
            parser.add_argument('-n',
                                '--component_name',
                                required=True,
                                type=str,
                                help="component name")
            parser.add_argument('-t',
                                '--task_id',
                                required=True,
                                type=str,
                                help="task id")
            parser.add_argument('-v',
                                '--task_version',
                                required=True,
                                type=int,
                                help="task version")
            parser.add_argument('-r',
                                '--role',
                                required=True,
                                type=str,
                                help="role")
            parser.add_argument('-p',
                                '--party_id',
                                required=True,
                                type=int,
                                help="party id")
            parser.add_argument('-c',
                                '--config',
                                required=True,
                                type=str,
                                help="task parameters")
            parser.add_argument('--run_ip', help="run ip", type=str)
            parser.add_argument('--job_server', help="job server", type=str)
            args = parser.parse_args()
            schedule_logger(args.job_id).info('enter task process')
            schedule_logger(args.job_id).info(args)
            # init function args
            if args.job_server:
                RuntimeConfig.init_config(
                    JOB_SERVER_HOST=args.job_server.split(':')[0],
                    HTTP_PORT=args.job_server.split(':')[1])
                RuntimeConfig.set_process_role(ProcessRole.EXECUTOR)
            job_id = args.job_id
            component_name = args.component_name
            task_id = args.task_id
            task_version = args.task_version
            role = args.role
            party_id = args.party_id
            executor_pid = os.getpid()
            task_info.update({
                "job_id": job_id,
                "component_name": component_name,
                "task_id": task_id,
                "task_version": task_version,
                "role": role,
                "party_id": party_id,
                "run_ip": args.run_ip,
                "run_pid": executor_pid
            })
            start_time = current_timestamp()
            job_conf = job_utils.get_job_conf(job_id, role)
            job_dsl = job_conf["job_dsl_path"]
            job_runtime_conf = job_conf["job_runtime_conf_path"]
            dsl_parser = schedule_utils.get_job_dsl_parser(
                dsl=job_dsl,
                runtime_conf=job_runtime_conf,
                train_runtime_conf=job_conf["train_runtime_conf_path"],
                pipeline_dsl=job_conf["pipeline_dsl_path"])
            party_index = job_runtime_conf["role"][role].index(party_id)
            job_args_on_party = TaskExecutor.get_job_args_on_party(
                dsl_parser, job_runtime_conf, role, party_id)
            component = dsl_parser.get_component_info(
                component_name=component_name)
            component_parameters = component.get_role_parameters()
            component_parameters_on_party = component_parameters[role][
                party_index] if role in component_parameters else {}
            module_name = component.get_module()
            task_input_dsl = component.get_input()
            task_output_dsl = component.get_output()
            component_parameters_on_party[
                'output_data_name'] = task_output_dsl.get('data')
            task_parameters = RunParameters(
                **file_utils.load_json_conf(args.config))
            job_parameters = task_parameters
            if job_parameters.assistant_role:
                TaskExecutor.monkey_patch()
        except Exception as e:
            traceback.print_exc()
            schedule_logger().exception(e)
            task_info["party_status"] = TaskStatus.FAILED
            return
        try:
            job_log_dir = os.path.join(
                job_utils.get_job_log_directory(job_id=job_id), role,
                str(party_id))
            task_log_dir = os.path.join(job_log_dir, component_name)
            log.LoggerFactory.set_directory(directory=task_log_dir,
                                            parent_log_dir=job_log_dir,
                                            append_to_parent_log=True,
                                            force=True)

            tracker = Tracker(job_id=job_id,
                              role=role,
                              party_id=party_id,
                              component_name=component_name,
                              task_id=task_id,
                              task_version=task_version,
                              model_id=job_parameters.model_id,
                              model_version=job_parameters.model_version,
                              component_module_name=module_name,
                              job_parameters=job_parameters)
            tracker_client = TrackerClient(
                job_id=job_id,
                role=role,
                party_id=party_id,
                component_name=component_name,
                task_id=task_id,
                task_version=task_version,
                model_id=job_parameters.model_id,
                model_version=job_parameters.model_version,
                component_module_name=module_name,
                job_parameters=job_parameters)
            run_class_paths = component_parameters_on_party.get(
                'CodePath').split('/')
            run_class_package = '.'.join(
                run_class_paths[:-2]) + '.' + run_class_paths[-2].replace(
                    '.py', '')
            run_class_name = run_class_paths[-1]
            task_info["party_status"] = TaskStatus.RUNNING
            cls.report_task_update_to_driver(task_info=task_info)

            # init environment, process is shared globally
            RuntimeConfig.init_config(
                WORK_MODE=job_parameters.work_mode,
                COMPUTING_ENGINE=job_parameters.computing_engine,
                FEDERATION_ENGINE=job_parameters.federation_engine,
                FEDERATED_MODE=job_parameters.federated_mode)

            if RuntimeConfig.COMPUTING_ENGINE == ComputingEngine.EGGROLL:
                session_options = task_parameters.eggroll_run.copy()
            else:
                session_options = {}

            sess = session.Session(
                computing_type=job_parameters.computing_engine,
                federation_type=job_parameters.federation_engine)
            computing_session_id = job_utils.generate_session_id(
                task_id, task_version, role, party_id)
            sess.init_computing(computing_session_id=computing_session_id,
                                options=session_options)
            federation_session_id = job_utils.generate_task_version_id(
                task_id, task_version)
            component_parameters_on_party[
                "job_parameters"] = job_parameters.to_dict()
            sess.init_federation(
                federation_session_id=federation_session_id,
                runtime_conf=component_parameters_on_party,
                service_conf=job_parameters.engines_address.get(
                    EngineType.FEDERATION, {}))
            sess.as_default()

            schedule_logger().info('Run {} {} {} {} {} task'.format(
                job_id, component_name, task_id, role, party_id))
            schedule_logger().info("Component parameters on party {}".format(
                component_parameters_on_party))
            schedule_logger().info("Task input dsl {}".format(task_input_dsl))
            task_run_args = cls.get_task_run_args(
                job_id=job_id,
                role=role,
                party_id=party_id,
                task_id=task_id,
                task_version=task_version,
                job_args=job_args_on_party,
                job_parameters=job_parameters,
                task_parameters=task_parameters,
                input_dsl=task_input_dsl,
            )
            if module_name in {"Upload", "Download", "Reader", "Writer"}:
                task_run_args["job_parameters"] = job_parameters
            run_object = getattr(importlib.import_module(run_class_package),
                                 run_class_name)()
            run_object.set_tracker(tracker=tracker_client)
            run_object.set_task_version_id(
                task_version_id=job_utils.generate_task_version_id(
                    task_id, task_version))
            # add profile logs
            profile.profile_start()
            run_object.run(component_parameters_on_party, task_run_args)
            profile.profile_ends()
            output_data = run_object.save_data()
            if not isinstance(output_data, list):
                output_data = [output_data]
            for index in range(0, len(output_data)):
                data_name = task_output_dsl.get(
                    'data')[index] if task_output_dsl.get(
                        'data') else '{}'.format(index)
                persistent_table_namespace, persistent_table_name = tracker.save_output_data(
                    computing_table=output_data[index],
                    output_storage_engine=job_parameters.storage_engine,
                    output_storage_address=job_parameters.engines_address.get(
                        EngineType.STORAGE, {}))
                if persistent_table_namespace and persistent_table_name:
                    tracker.log_output_data_info(
                        data_name=data_name,
                        table_namespace=persistent_table_namespace,
                        table_name=persistent_table_name)
            output_model = run_object.export_model()
            # There is only one model output at the current dsl version.
            tracker.save_output_model(
                output_model, task_output_dsl['model'][0]
                if task_output_dsl.get('model') else 'default')
            task_info["party_status"] = TaskStatus.SUCCESS
        except Exception as e:
            task_info["party_status"] = TaskStatus.FAILED
            schedule_logger().exception(e)
        finally:
            try:
                task_info["end_time"] = current_timestamp()
                task_info["elapsed"] = task_info["end_time"] - start_time
                cls.report_task_update_to_driver(task_info=task_info)
            except Exception as e:
                task_info["party_status"] = TaskStatus.FAILED
                traceback.print_exc()
                schedule_logger().exception(e)
        schedule_logger().info('task {} {} {} start time: {}'.format(
            task_id, role, party_id, timestamp_to_date(start_time)))
        schedule_logger().info('task {} {} {} end time: {}'.format(
            task_id, role, party_id, timestamp_to_date(task_info["end_time"])))
        schedule_logger().info('task {} {} {} takes {}s'.format(
            task_id, role, party_id,
            int(task_info["elapsed"]) / 1000))
        schedule_logger().info('Finish {} {} {} {} {} {} task {}'.format(
            job_id, component_name, task_id, task_version, role, party_id,
            task_info["party_status"]))

        print('Finish {} {} {} {} {} {} task {}'.format(
            job_id, component_name, task_id, task_version, role, party_id,
            task_info["party_status"]))
        return task_info
Esempio n. 25
0
    def submit(cls, job_data, job_id=None):
        if not job_id:
            job_id = job_utils.generate_job_id()
        schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(job_id, job_data))
        job_dsl = job_data.get('job_dsl', {})
        job_runtime_conf = job_data.get('job_runtime_conf', {})
        job_initiator = job_runtime_conf['initiator']
        job_parameters = RunParameters(**job_runtime_conf['job_parameters'])
        cls.backend_compatibility(job_parameters=job_parameters)

        job_utils.check_job_runtime_conf(job_runtime_conf)
        if job_parameters.job_type != 'predict':
            # generate job model info
            job_parameters.model_id = model_utils.gen_model_id(job_runtime_conf['role'])
            job_parameters.model_version = job_id
            train_runtime_conf = {}
        else:
            detect_utils.check_config(job_parameters.to_dict(), ['model_id', 'model_version'])
            # get inference dsl from pipeline model as job dsl
            tracker = Tracker(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'],
                              model_id=job_parameters.model_id, model_version=job_parameters.model_version)
            pipeline_model = tracker.get_output_model('pipeline')
            if not job_dsl:
                job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl)
            train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf)

        path_dict = job_utils.save_job_conf(job_id=job_id,
                                            job_dsl=job_dsl,
                                            job_runtime_conf=job_runtime_conf,
                                            train_runtime_conf=train_runtime_conf,
                                            pipeline_dsl=None)

        job = Job()
        job.f_job_id = job_id
        job.f_dsl = job_dsl
        job_runtime_conf["job_parameters"] = job_parameters.to_dict()
        job.f_runtime_conf = job_runtime_conf
        job.f_train_runtime_conf = train_runtime_conf
        job.f_roles = job_runtime_conf['role']
        job.f_work_mode = job_parameters.work_mode
        job.f_initiator_role = job_initiator['role']
        job.f_initiator_party_id = job_initiator['party_id']

        initiator_role = job_initiator['role']
        initiator_party_id = job_initiator['party_id']
        if initiator_party_id not in job_runtime_conf['role'][initiator_role]:
            schedule_logger(job_id).info("initiator party id error:{}".format(initiator_party_id))
            raise Exception("initiator party id error {}".format(initiator_party_id))

        dsl_parser = schedule_utils.get_job_dsl_parser(dsl=job_dsl,
                                                       runtime_conf=job_runtime_conf,
                                                       train_runtime_conf=train_runtime_conf)

        cls.adapt_job_parameters(job_parameters=job_parameters)

        # update runtime conf
        job_runtime_conf["job_parameters"] = job_parameters.to_dict()
        job.f_runtime_conf = job_runtime_conf

        status_code, response = FederatedScheduler.create_job(job=job)
        if status_code != FederatedSchedulingStatusCode.SUCCESS:
            raise Exception("create job failed: {}".format(response))

        if job_parameters.work_mode == WorkMode.CLUSTER:
            # Save the status information of all participants in the initiator for scheduling
            for role, party_ids in job_runtime_conf["role"].items():
                for party_id in party_ids:
                    if role == job_initiator['role'] and party_id == job_initiator['party_id']:
                        continue
                    JobController.initialize_tasks(job_id, role, party_id, False, job_initiator, job_parameters, dsl_parser)

        # push into queue
        try:
            JobQueue.create_event(job_id=job_id, initiator_role=initiator_role, initiator_party_id=initiator_party_id)
        except Exception as e:
            raise Exception(f'push job into queue failed:\n{e}')

        schedule_logger(job_id).info(
            'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters.model_id))
        board_url = "http://{}:{}{}".format(
            ServiceUtils.get_item("fateboard", "host"),
            ServiceUtils.get_item("fateboard", "port"),
            FATE_BOARD_DASHBOARD_ENDPOINT).format(job_id, job_initiator['role'], job_initiator['party_id'])
        logs_directory = job_utils.get_job_log_directory(job_id)
        return job_id, path_dict['job_dsl_path'], path_dict['job_runtime_conf_path'], logs_directory, \
               {'model_id': job_parameters.model_id, 'model_version': job_parameters.model_version}, board_url