Example #1
0
 def output_model_reload(cls, job, source_job):
     source_model_id = model_utils.gen_party_model_id(
         source_job.f_runtime_conf.get("job_parameters").get("common").get(
             "model_id"), job.f_role, job.f_party_id)
     model_id = model_utils.gen_party_model_id(
         job.f_runtime_conf.get("job_parameters").get("common").get(
             "model_id"), job.f_role, job.f_party_id)
     PipelinedModel(
         model_id=model_id,
         model_version=job.f_job_id).reload_component_model(
             model_id=source_model_id,
             model_version=job.f_inheritance_info.get("job_id"),
             component_list=job.f_inheritance_info.get("component_list"))
Example #2
0
def operate_model(model_operation):
    request_config = request.json or request.form.to_dict()
    job_id = generate_job_id()
    if model_operation not in [ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT]:
        raise Exception('Can not support this operating now: {}'.format(model_operation))
    required_arguments = ["model_id", "model_version", "role", "party_id"]
    check_config(request_config, required_arguments=required_arguments)
    request_config["model_id"] = gen_party_model_id(model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"])
    if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]:
        if model_operation == ModelOperation.IMPORT:
            file = request.files.get('file')
            file_path = os.path.join(TEMP_DIRECTORY, file.filename)
            try:
                os.makedirs(os.path.dirname(file_path), exist_ok=True)
                file.save(file_path)
            except Exception as e:
                shutil.rmtree(file_path)
                raise e
            request_config['file'] = file_path
            model = pipelined_model.PipelinedModel(model_id=request_config["model_id"], model_version=request_config["model_version"])
            model.unpack_model(file_path)
            return get_json_result()
        else:
            model = pipelined_model.PipelinedModel(model_id=request_config["model_id"], model_version=request_config["model_version"])
            archive_file_path = model.packaging_model()
            return send_file(archive_file_path, attachment_filename=os.path.basename(archive_file_path), as_attachment=True)
    else:
        data = {}
        job_dsl, job_runtime_conf = gen_model_operation_job_config(request_config, model_operation)
        job_id, job_dsl_path, job_runtime_conf_path, logs_directory, model_info, board_url = JobController.submit_job(
            {'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf}, job_id=job_id)
        data.update({'job_dsl_path': job_dsl_path, 'job_runtime_conf_path': job_runtime_conf_path,
                     'board_url': board_url, 'logs_directory': logs_directory})
        return get_json_result(job_id=job_id, data=data)
Example #3
0
def publish_online(config_data):
    initiator_role = config_data['initiator']['role']
    initiator_party_id = config_data['initiator']['party_id']
    model_id = config_data['job_parameters']['model_id']
    model_version = config_data['job_parameters']['model_version']
    success = True
    for serving in config_data.get('servings'):
        with grpc.insecure_channel(serving) as channel:
            stub = model_service_pb2_grpc.ModelServiceStub(channel)
            publish_model_request = model_service_pb2.PublishRequest()
            for role_name, role_party in config_data.get("role").items():
                publish_model_request.role[role_name].partyId.extend(role_party)

            publish_model_request.model[initiator_role].roleModelInfo[initiator_party_id].tableName = model_version
            publish_model_request.model[initiator_role].roleModelInfo[
                initiator_party_id].namespace = model_utils.gen_party_model_id(model_id, initiator_role,
                                                                               initiator_party_id)
            publish_model_request.local.role = initiator_role
            publish_model_request.local.partyId = initiator_party_id
            stat_logger.info(publish_model_request)
            response = stub.publishOnline(publish_model_request)
            stat_logger.info(response)
            if response.statusCode != 0:
                success = False
    return success
Example #4
0
 def __init__(self,
              job_id: str,
              role: str,
              party_id: int,
              model_id: str = None,
              model_version: str = None,
              component_name: str = None,
              component_module_name: str = None,
              task_id: str = None):
     self.job_id = job_id
     self.role = role
     self.party_id = party_id
     self.component_name = component_name if component_name else 'pipeline'
     self.module_name = component_module_name if component_module_name else 'Pipeline'
     self.task_id = task_id if task_id else job_utils.generate_task_id(
         job_id=self.job_id, component_name=self.component_name)
     self.table_namespace = '_'.join([
         'fate_flow', 'tracking', 'data', self.job_id, self.role,
         str(self.party_id), self.component_name
     ])
     self.job_table_namespace = '_'.join([
         'fate_flow', 'tracking', 'data', self.job_id, self.role,
         str(self.party_id)
     ])
     self.model_id = model_id
     self.party_model_id = model_utils.gen_party_model_id(model_id=model_id,
                                                          role=role,
                                                          party_id=party_id)
     self.model_version = model_version
     self.pipelined_model = None
     if self.party_model_id and self.model_version:
         self.pipelined_model = pipelined_model.PipelinedModel(
             model_id=self.party_model_id, model_version=self.model_version)
Example #5
0
def do_load_model():
    request_data = request.json
    adapter_servings_config(request_data)
    retcode, retmsg = publish_model.load_model(config_data=request_data)
    try:
        if not retcode:
            with DB.connection_context():
                model = MLModel.get_or_none(MLModel.f_role == request_data.get("local").get("role"),
                                            MLModel.f_party_id == request_data.get("local").get("party_id"),
                                            MLModel.f_model_id == request_data.get("job_parameters").get("model_id"),
                                            MLModel.f_model_version == request_data.get("job_parameters").get("model_version"))
                if model:
                    count = model.f_loaded_times
                    model.f_loaded_times = count + 1
                    model.save()
    except Exception as modify_err:
        stat_logger.exception(modify_err)

    try:
        party_model_id = gen_party_model_id(role=request_data.get("local").get("role"),
                                            party_id=request_data.get("local").get("party_id"),
                                            model_id=request_data.get("job_parameters").get("model_id"))
        src_model_path = os.path.join(file_utils.get_project_base_directory(), 'model_local_cache', party_model_id,
                                      request_data.get("job_parameters").get("model_version"))
        dst_model_path = os.path.join(file_utils.get_project_base_directory(), 'loaded_model_backup',
                                      party_model_id, request_data.get("job_parameters").get("model_version"))
        if not os.path.exists(dst_model_path):
            shutil.copytree(src=src_model_path, dst=dst_model_path)
    except Exception as copy_err:
        stat_logger.exception(copy_err)
    operation_record(request_data, "load", "success" if not retcode else "failed")
    return get_json_result(retcode=retcode, retmsg=retmsg)
Example #6
0
def bind_model_service(config_data):
    service_id = config_data.get('service_id')
    initiator_role = config_data['initiator']['role']
    initiator_party_id = config_data['initiator']['party_id']
    model_id = config_data['job_parameters']['model_id']
    model_version = config_data['job_parameters']['model_version']
    if not config_data.get('servings'):
        return 100, 'Please configure servings address'
    for serving in config_data.get('servings'):
        with grpc.insecure_channel(serving) as channel:
            stub = model_service_pb2_grpc.ModelServiceStub(channel)
            publish_model_request = model_service_pb2.PublishRequest()
            publish_model_request.serviceId = service_id
            for role_name, role_party in config_data.get("role").items():
                publish_model_request.role[role_name].partyId.extend(role_party)

            publish_model_request.model[initiator_role].roleModelInfo[initiator_party_id].tableName = model_version
            publish_model_request.model[initiator_role].roleModelInfo[
                initiator_party_id].namespace = model_utils.gen_party_model_id(model_id, initiator_role,
                                                                               initiator_party_id)
            publish_model_request.local.role = initiator_role
            publish_model_request.local.partyId = initiator_party_id
            stat_logger.info(publish_model_request)
            response = stub.publishBind(publish_model_request)
            stat_logger.info(response)
            if response.statusCode != 0:
                return response.statusCode, response.message
    return 0, None
Example #7
0
    def __init__(self,
                 job_id: str,
                 role: str,
                 party_id: int,
                 model_id: str = None,
                 model_version: str = None,
                 component_name: str = None,
                 component_module_name: str = None,
                 task_id: str = None,
                 task_version: int = None,
                 job_parameters: RunParameters = None):
        self.job_id = job_id
        self.role = role
        self.party_id = party_id
        self.model_id = model_id
        self.party_model_id = model_utils.gen_party_model_id(model_id=model_id,
                                                             role=role,
                                                             party_id=party_id)
        self.model_version = model_version
        self.pipelined_model = None
        if self.party_model_id and self.model_version:
            self.pipelined_model = pipelined_model.PipelinedModel(
                model_id=self.party_model_id, model_version=self.model_version)

        self.component_name = component_name if component_name else job_utils.job_virtual_component_name(
        )
        self.module_name = component_module_name if component_module_name else job_utils.job_virtual_component_module_name(
        )
        self.task_id = task_id
        self.task_version = task_version
        self.job_parameters = job_parameters
Example #8
0
def do_load_model():
    request_data = request.json
    request_data['servings'] = RuntimeConfig.SERVICE_DB.get_urls('servings')

    role = request_data['local']['role']
    party_id = request_data['local']['party_id']
    model_id = request_data['job_parameters']['model_id']
    model_version = request_data['job_parameters']['model_version']
    party_model_id = model_utils.gen_party_model_id(model_id, role, party_id)

    if get_base_config('enable_model_store', False):
        pipeline_model = pipelined_model.PipelinedModel(
            party_model_id, model_version)

        component_parameters = {
            'model_id': party_model_id,
            'model_version': model_version,
            'store_address': ServiceRegistry.MODEL_STORE_ADDRESS,
        }
        model_storage = get_model_storage(component_parameters)

        if pipeline_model.exists() and not model_storage.exists(
                **component_parameters):
            stat_logger.info(
                f'Uploading {pipeline_model.model_path} to model storage.')
            model_storage.store(**component_parameters)
        elif not pipeline_model.exists() and model_storage.exists(
                **component_parameters):
            stat_logger.info(
                f'Downloading {pipeline_model.model_path} from model storage.')
            model_storage.restore(**component_parameters)

    if not model_utils.check_if_deployed(role, party_id, model_id,
                                         model_version):
        return get_json_result(
            retcode=100,
            retmsg=
            "Only deployed models could be used to execute process of loading. "
            "Please deploy model before loading.")

    retcode, retmsg = publish_model.load_model(request_data)
    try:
        if not retcode:
            with DB.connection_context():
                model = MLModel.get_or_none(
                    MLModel.f_role == request_data["local"]["role"],
                    MLModel.f_party_id == request_data["local"]["party_id"],
                    MLModel.f_model_id == request_data["job_parameters"]
                    ["model_id"], MLModel.f_model_version ==
                    request_data["job_parameters"]["model_version"])
                if model:
                    model.f_loaded_times += 1
                    model.save()
    except Exception as modify_err:
        stat_logger.exception(modify_err)

    operation_record(request_data, "load",
                     "success" if not retcode else "failed")
    return get_json_result(retcode=retcode, retmsg=retmsg)
Example #9
0
def generate_publish_model_info(config_data):
    model_id = config_data['job_parameters']['model_id']
    model_version = config_data['job_parameters']['model_version']
    config_data['model'] = {}
    for role, role_party in config_data.get("role").items():
        config_data['model'][role] = {}
        for party_id in role_party:
            config_data['model'][role][party_id] = {
                'model_id': model_utils.gen_party_model_id(model_id, role, party_id),
                'model_version': model_version}
Example #10
0
def deploy_homo_model(request_data):
    party_model_id = model_utils.gen_party_model_id(
        model_id=request_data["model_id"],
        role=request_data["role"],
        party_id=request_data["party_id"])
    model_version = request_data["model_version"]
    component_name = request_data['component_name']
    service_id = request_data['service_id']
    framework_name = request_data.get('framework_name')
    model = pipelined_model.PipelinedModel(model_id=party_model_id,
                                           model_version=model_version)
    if not model.exists():
        return 100, 'Model {} {} does not exist'.format(
            party_model_id, model_version), None

    # get the model alias from the dsl saved with the pipeline
    pipeline = model.read_pipeline_model()
    train_dsl = json_loads(pipeline.train_dsl)
    if component_name not in train_dsl.get('components', {}):
        return 100, 'Model {} {} does not contain component {}'.\
            format(party_model_id, model_version, component_name), None

    model_alias_list = train_dsl['components'][component_name].get(
        'output', {}).get('model')
    if not model_alias_list:
        return 100, 'Component {} in Model {} {} does not have output model'. \
            format(component_name, party_model_id, model_version), None

    # currently there is only one model output
    model_alias = model_alias_list[0]
    converted_model_dir = os.path.join(model.variables_data_path,
                                       component_name, model_alias,
                                       "converted_model")
    if not os.path.isdir(converted_model_dir):
        return 100, '''Component {} in Model {} {} isn't converted'''.\
            format(component_name, party_model_id, model_version), None

    # todo: use subprocess?
    convert_tool = model.get_homo_model_convert_tool()
    if not framework_name:
        module_name = train_dsl['components'][component_name].get('module')
        buffer_obj = model.read_component_model(component_name, model_alias)
        framework_name = convert_tool.get_default_target_framework(
            model_contents=buffer_obj, module_name=module_name)

    model_object = convert_tool.load_converted_model(
        base_dir=converted_model_dir, framework_name=framework_name)
    deployed_service = model_deploy(party_model_id, model_version,
                                    model_object, framework_name, service_id,
                                    request_data['deployment_type'],
                                    request_data['deployment_parameters'])
    return (
        0,
        f"An online serving service is started in the {request_data['deployment_type']} system.",
        deployed_service)
Example #11
0
def convert_homo_model(request_data):
    party_model_id = model_utils.gen_party_model_id(
        model_id=request_data["model_id"],
        role=request_data["role"],
        party_id=request_data["party_id"])
    model_version = request_data.get("model_version")
    model = pipelined_model.PipelinedModel(model_id=party_model_id,
                                           model_version=model_version)
    if not model.exists():
        return 100, 'Model {} {} does not exist'.format(
            party_model_id, model_version), None

    with open(model.define_meta_path, "r", encoding="utf-8") as fr:
        define_index = yaml.safe_load(fr)

    framework_name = request_data.get("framework_name")
    detail = []
    # todo: use subprocess?
    convert_tool = model.get_homo_model_convert_tool()
    for key, value in define_index.get("model_proto", {}).items():
        if key == 'pipeline':
            continue
        for model_alias in value.keys():
            buffer_obj = model.read_component_model(key, model_alias)
            module_name = define_index.get("component_define",
                                           {}).get(key, {}).get('module_name')
            converted_framework, converted_model = convert_tool.model_convert(
                model_contents=buffer_obj,
                module_name=module_name,
                framework_name=framework_name)
            if converted_model:
                converted_model_dir = os.path.join(model.variables_data_path,
                                                   key, model_alias,
                                                   "converted_model")
                os.makedirs(converted_model_dir, exist_ok=True)

                saved_path = convert_tool.save_converted_model(
                    converted_model, converted_framework, converted_model_dir)
                detail.append({
                    "component_name": key,
                    "model_alias": model_alias,
                    "converted_model_path": saved_path
                })
    if len(detail) > 0:
        return (
            0,
            f"Conversion of homogeneous federated learning component(s) in model "
            f"{party_model_id}:{model_version} completed. Use export or h**o/deploy "
            f"to download or deploy the converted model.", detail)
    else:
        return 100, f"No component in model {party_model_id}:{model_version} can be converted.", None
Example #12
0
    def read_component_model(self):
        pipelined_model = PipelinedModel(
            gen_party_model_id(self.model_id, self.tracker.role,
                               self.tracker.party_id), self.model_version)

        component_model = pipelined_model._read_component_model(
            self.component_name, self.model_alias)
        if not component_model:
            raise ValueError('The component model is empty.')

        self.model_output = component_model
        self.tracker.set_metric_meta(
            'model_loader', f'{self.component_name}-{self.model_alias}',
            MetricMeta(
                'component_model', 'component_model_info', {
                    'model_id': self.model_id,
                    'model_version': self.model_version,
                    'component_name': self.component_name,
                    'model_alias': self.model_alias,
                }))
Example #13
0
    def __init__(
        self,
        job_id: str = None,
        role: str = None,
        party_id: int = None,
        model_id: str = None,
        model_version: str = None,
        component_name: str = None,
        component_module_name: str = None,
        task_id: str = None,
        task_version: int = None,
        job_parameters: RunParameters = None,
        max_to_keep: int = None,
        mkdir: bool = True,
    ):
        self.job_id = job_id
        self.role = role
        self.party_id = party_id
        self.model_id = model_id
        self.model_version = model_version
        self.party_model_id = gen_party_model_id(self.model_id, self.role,
                                                 self.party_id)
        self.component_name = component_name if component_name else 'pipeline'
        self.module_name = component_module_name if component_module_name else 'Pipeline'
        self.task_id = task_id
        self.task_version = task_version
        self.job_parameters = job_parameters
        self.mkdir = mkdir

        self.directory = (Path(get_fate_flow_directory()) /
                          'model_local_cache' / self.party_model_id /
                          model_version / 'checkpoint' / self.component_name)
        if self.mkdir:
            self.directory.mkdir(0o755, True, True)

        if isinstance(max_to_keep, int):
            if max_to_keep <= 0:
                raise ValueError('max_to_keep must be positive')
        elif max_to_keep is not None:
            raise TypeError('max_to_keep must be an integer')
        self.checkpoints = deque(maxlen=max_to_keep)
Example #14
0
def operate_model(model_operation):
    request_config = request.json or request.form.to_dict()
    job_id = job_utils.generate_job_id()
    if model_operation not in [
            ModelOperation.STORE, ModelOperation.RESTORE,
            ModelOperation.EXPORT, ModelOperation.IMPORT
    ]:
        raise Exception(
            'Can not support this operating now: {}'.format(model_operation))
    required_arguments = ["model_id", "model_version", "role", "party_id"]
    check_config(request_config, required_arguments=required_arguments)
    request_config["model_id"] = gen_party_model_id(
        model_id=request_config["model_id"],
        role=request_config["role"],
        party_id=request_config["party_id"])
    if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]:
        if model_operation == ModelOperation.IMPORT:
            try:
                file = request.files.get('file')
                file_path = os.path.join(TEMP_DIRECTORY, file.filename)
                # if not os.path.exists(file_path):
                #     raise Exception('The file is obtained from the fate flow client machine, but it does not exist, '
                #                     'please check the path: {}'.format(file_path))
                try:
                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    file.save(file_path)
                except Exception as e:
                    shutil.rmtree(file_path)
                    raise e
                request_config['file'] = file_path
                model = pipelined_model.PipelinedModel(
                    model_id=request_config["model_id"],
                    model_version=request_config["model_version"])
                model.unpack_model(file_path)

                pipeline = model.read_component_model('pipeline',
                                                      'pipeline')['Pipeline']
                train_runtime_conf = json_loads(pipeline.train_runtime_conf)
                permitted_party_id = []
                for key, value in train_runtime_conf.get('role', {}).items():
                    for v in value:
                        permitted_party_id.extend([v, str(v)])
                if request_config["party_id"] not in permitted_party_id:
                    shutil.rmtree(model.model_path)
                    raise Exception(
                        "party id {} is not in model roles, please check if the party id is valid."
                    )
                try:
                    adapter = JobRuntimeConfigAdapter(train_runtime_conf)
                    job_parameters = adapter.get_common_parameters().to_dict()
                    with DB.connection_context():
                        db_model = MLModel.get_or_none(
                            MLModel.f_job_id == job_parameters.get(
                                "model_version"),
                            MLModel.f_role == request_config["role"])
                    if not db_model:
                        model_info = model_utils.gather_model_info_data(model)
                        model_info['imported'] = 1
                        model_info['job_id'] = model_info['f_model_version']
                        model_info['size'] = model.calculate_model_file_size()
                        model_info['role'] = request_config["model_id"].split(
                            '#')[0]
                        model_info['party_id'] = request_config[
                            "model_id"].split('#')[1]
                        if model_utils.compare_version(
                                model_info['f_fate_version'], '1.5.1') == 'lt':
                            model_info['roles'] = model_info.get(
                                'f_train_runtime_conf', {}).get('role', {})
                            model_info['initiator_role'] = model_info.get(
                                'f_train_runtime_conf',
                                {}).get('initiator', {}).get('role')
                            model_info['initiator_party_id'] = model_info.get(
                                'f_train_runtime_conf',
                                {}).get('initiator', {}).get('party_id')
                            model_info[
                                'work_mode'] = adapter.get_job_work_mode()
                            model_info['parent'] = False if model_info.get(
                                'f_inference_dsl') else True
                        model_utils.save_model_info(model_info)
                    else:
                        stat_logger.info(
                            f'job id: {job_parameters.get("model_version")}, '
                            f'role: {request_config["role"]} model info already existed in database.'
                        )
                except peewee.IntegrityError as e:
                    stat_logger.exception(e)
                operation_record(request_config, "import", "success")
                return get_json_result()
            except Exception:
                operation_record(request_config, "import", "failed")
                raise
        else:
            try:
                model = pipelined_model.PipelinedModel(
                    model_id=request_config["model_id"],
                    model_version=request_config["model_version"])
                if model.exists():
                    archive_file_path = model.packaging_model()
                    operation_record(request_config, "export", "success")
                    return send_file(archive_file_path,
                                     attachment_filename=os.path.basename(
                                         archive_file_path),
                                     as_attachment=True)
                else:
                    operation_record(request_config, "export", "failed")
                    res = error_response(
                        response_code=210,
                        retmsg="Model {} {} is not exist.".format(
                            request_config.get("model_id"),
                            request_config.get("model_version")))
                    return res
            except Exception as e:
                operation_record(request_config, "export", "failed")
                stat_logger.exception(e)
                return error_response(response_code=210, retmsg=str(e))
    else:
        data = {}
        job_dsl, job_runtime_conf = gen_model_operation_job_config(
            request_config, model_operation)
        submit_result = DAGScheduler.submit(
            {
                'job_dsl': job_dsl,
                'job_runtime_conf': job_runtime_conf
            },
            job_id=job_id)
        data.update(submit_result)
        operation_record(data=job_runtime_conf,
                         oper_type=model_operation,
                         oper_status='')
        return get_json_result(job_id=job_id, data=data)
Example #15
0
def deploy(config_data):
    model_id = config_data.get('model_id')
    model_version = config_data.get('model_version')
    local_role = config_data.get('local').get('role')
    local_party_id = config_data.get('local').get('party_id')
    child_model_version = config_data.get('child_model_version')
    components_checkpoint = config_data.get('components_checkpoint', {})
    warning_msg = ""

    try:
        party_model_id = gen_party_model_id(model_id=model_id,
                                            role=local_role,
                                            party_id=local_party_id)
        model = PipelinedModel(model_id=party_model_id,
                               model_version=model_version)
        model_data = model.collect_models(in_bytes=True)
        if "pipeline.pipeline:Pipeline" not in model_data:
            raise Exception("Can not found pipeline file in model.")

        # check if the model could be executed the deploy process (parent/child)
        if not check_before_deploy(model):
            raise Exception('Child model could not be deployed.')

        # copy proto content from parent model and generate a child model
        deploy_model = PipelinedModel(model_id=party_model_id,
                                      model_version=child_model_version)
        shutil.copytree(src=model.model_path,
                        dst=deploy_model.model_path,
                        ignore=lambda src, names: {'checkpoint'}
                        if src == model.model_path else {})
        pipeline_model = deploy_model.read_pipeline_model()

        train_runtime_conf = json_loads(pipeline_model.train_runtime_conf)
        runtime_conf_on_party = json_loads(
            pipeline_model.runtime_conf_on_party)
        dsl_version = train_runtime_conf.get("dsl_version", "1")

        parser = get_dsl_parser_by_version(dsl_version)
        train_dsl = json_loads(pipeline_model.train_dsl)
        parent_predict_dsl = json_loads(pipeline_model.inference_dsl)

        if config_data.get('dsl') or config_data.get('predict_dsl'):
            inference_dsl = config_data.get('dsl') if config_data.get(
                'dsl') else config_data.get('predict_dsl')
            if not isinstance(inference_dsl, dict):
                inference_dsl = json_loads(inference_dsl)
        else:
            if config_data.get('cpn_list', None):
                cpn_list = config_data.pop('cpn_list')
            else:
                cpn_list = list(train_dsl.get('components', {}).keys())
            if int(dsl_version) == 1:
                # convert v1 dsl to v2 dsl
                inference_dsl, warning_msg = parser.convert_dsl_v1_to_v2(
                    parent_predict_dsl)
            else:
                parser = get_dsl_parser_by_version(dsl_version)
                inference_dsl = parser.deploy_component(cpn_list, train_dsl)

        # convert v1 conf to v2 conf
        if int(dsl_version) == 1:
            components = parser.get_components_light_weight(inference_dsl)

            from fate_flow.db.component_registry import ComponentRegistry
            job_providers = parser.get_job_providers(
                dsl=inference_dsl, provider_detail=ComponentRegistry.REGISTRY)
            cpn_role_parameters = dict()
            for cpn in components:
                cpn_name = cpn.get_name()
                role_params = parser.parse_component_role_parameters(
                    component=cpn_name,
                    dsl=inference_dsl,
                    runtime_conf=train_runtime_conf,
                    provider_detail=ComponentRegistry.REGISTRY,
                    provider_name=job_providers[cpn_name]["provider"]["name"],
                    provider_version=job_providers[cpn_name]["provider"]
                    ["version"])
                cpn_role_parameters[cpn_name] = role_params
            train_runtime_conf = parser.convert_conf_v1_to_v2(
                train_runtime_conf, cpn_role_parameters)

        adapter = JobRuntimeConfigAdapter(train_runtime_conf)
        train_runtime_conf = adapter.update_model_id_version(
            model_version=deploy_model.model_version)
        pipeline_model.model_version = child_model_version
        pipeline_model.train_runtime_conf = json_dumps(train_runtime_conf,
                                                       byte=True)

        #  save inference dsl into child model file
        parser = get_dsl_parser_by_version(2)
        parser.verify_dsl(inference_dsl, "predict")
        inference_dsl = JobSaver.fill_job_inference_dsl(
            job_id=model_version,
            role=local_role,
            party_id=local_party_id,
            dsl_parser=parser,
            origin_inference_dsl=inference_dsl)
        pipeline_model.inference_dsl = json_dumps(inference_dsl, byte=True)

        if compare_version(pipeline_model.fate_version, '1.5.0') == 'gt':
            pipeline_model.parent_info = json_dumps(
                {
                    'parent_model_id': model_id,
                    'parent_model_version': model_version
                },
                byte=True)
            pipeline_model.parent = False
            runtime_conf_on_party['job_parameters'][
                'model_version'] = child_model_version
            pipeline_model.runtime_conf_on_party = json_dumps(
                runtime_conf_on_party, byte=True)

        # save model file
        deploy_model.save_pipeline(pipeline_model)
        shutil.copyfile(
            os.path.join(deploy_model.model_path, "pipeline.pb"),
            os.path.join(deploy_model.model_path, "variables", "data",
                         "pipeline", "pipeline", "Pipeline"))

        model_info = gather_model_info_data(deploy_model)
        model_info['job_id'] = model_info['f_model_version']
        model_info['size'] = deploy_model.calculate_model_file_size()
        model_info['role'] = local_role
        model_info['party_id'] = local_party_id
        model_info['parent'] = False if model_info.get(
            'f_inference_dsl') else True
        if compare_version(model_info['f_fate_version'], '1.5.0') == 'eq':
            model_info['roles'] = model_info.get('f_train_runtime_conf',
                                                 {}).get('role', {})
            model_info['initiator_role'] = model_info.get(
                'f_train_runtime_conf', {}).get('initiator', {}).get('role')
            model_info['initiator_party_id'] = model_info.get(
                'f_train_runtime_conf', {}).get('initiator',
                                                {}).get('party_id')
        save_model_info(model_info)

        for component_name, component in train_dsl.get('components',
                                                       {}).items():
            step_index = components_checkpoint.get(component_name,
                                                   {}).get('step_index')
            step_name = components_checkpoint.get(component_name,
                                                  {}).get('step_name')
            if step_index is not None:
                step_index = int(step_index)
                step_name = None
            elif step_name is None:
                continue

            checkpoint_manager = CheckpointManager(
                role=local_role,
                party_id=local_party_id,
                model_id=model_id,
                model_version=model_version,
                component_name=component_name,
                mkdir=False,
            )
            checkpoint_manager.load_checkpoints_from_disk()
            if checkpoint_manager.latest_checkpoint is not None:
                checkpoint_manager.deploy(
                    child_model_version,
                    component['output']['model'][0] if component.get(
                        'output', {}).get('model') else 'default',
                    step_index,
                    step_name,
                )
    except Exception as e:
        stat_logger.exception(e)
        return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}"
    else:
        msg = f"deploy model of role {local_role} {local_party_id} success"
        if warning_msg:
            msg = msg + f", warning: {warning_msg}"
        return 0, msg
Example #16
0
def migration(config_data: dict):
    try:
        party_model_id = model_utils.gen_party_model_id(
            model_id=config_data["model_id"],
            role=config_data["local"]["role"],
            party_id=config_data["local"]["party_id"])
        model = pipelined_model.PipelinedModel(
            model_id=party_model_id,
            model_version=config_data["model_version"])
        if not model.exists():
            raise Exception("Can not found {} {} model local cache".format(
                config_data["model_id"], config_data["model_version"]))
        with DB.connection_context():
            if MLModel.get_or_none(MLModel.f_model_version ==
                                   config_data["unify_model_version"]):
                raise Exception(
                    "Unify model version {} has been occupied in database. "
                    "Please choose another unify model version and try again.".
                    format(config_data["unify_model_version"]))

        model_data = model.collect_models(in_bytes=True)
        if "pipeline.pipeline:Pipeline" not in model_data:
            raise Exception("Can not found pipeline file in model.")

        migrate_model = pipelined_model.PipelinedModel(
            model_id=model_utils.gen_party_model_id(
                model_id=model_utils.gen_model_id(config_data["migrate_role"]),
                role=config_data["local"]["role"],
                party_id=config_data["local"]["migrate_party_id"]),
            model_version=config_data["unify_model_version"])

        # migrate_model.create_pipelined_model()
        shutil.copytree(src=model.model_path, dst=migrate_model.model_path)

        pipeline = migrate_model.read_component_model('pipeline',
                                                      'pipeline')['Pipeline']

        # Utilize Pipeline_model collect model data. And modify related inner information of model
        train_runtime_conf = json_loads(pipeline.train_runtime_conf)
        train_runtime_conf["role"] = config_data["migrate_role"]
        train_runtime_conf["initiator"] = config_data["migrate_initiator"]

        adapter = JobRuntimeConfigAdapter(train_runtime_conf)
        train_runtime_conf = adapter.update_model_id_version(
            model_id=model_utils.gen_model_id(train_runtime_conf["role"]),
            model_version=migrate_model.model_version)

        # update pipeline.pb file
        pipeline.train_runtime_conf = json_dumps(train_runtime_conf, byte=True)
        pipeline.model_id = bytes(
            adapter.get_common_parameters().to_dict.get("model_id"), "utf-8")
        pipeline.model_version = bytes(
            adapter.get_common_parameters().to_dict().get("model_version"),
            "utf-8")

        # save updated pipeline.pb file
        migrate_model.save_pipeline(pipeline)
        shutil.copyfile(
            os.path.join(migrate_model.model_path, "pipeline.pb"),
            os.path.join(migrate_model.model_path, "variables", "data",
                         "pipeline", "pipeline", "Pipeline"))

        # modify proto
        with open(
                os.path.join(migrate_model.model_path, 'define',
                             'define_meta.yaml'), 'r') as fin:
            define_yaml = yaml.safe_load(fin)

        for key, value in define_yaml['model_proto'].items():
            if key == 'pipeline':
                continue
            for v in value.keys():
                buffer_obj = migrate_model.read_component_model(key, v)
                module_name = define_yaml['component_define'].get(
                    key, {}).get('module_name')
                modified_buffer = model_migration(
                    model_contents=buffer_obj,
                    module_name=module_name,
                    old_guest_list=config_data['role']['guest'],
                    new_guest_list=config_data['migrate_role']['guest'],
                    old_host_list=config_data['role']['host'],
                    new_host_list=config_data['migrate_role']['host'],
                    old_arbiter_list=config_data.get('role',
                                                     {}).get('arbiter', None),
                    new_arbiter_list=config_data.get('migrate_role',
                                                     {}).get('arbiter', None))
                migrate_model.save_component_model(
                    component_name=key,
                    component_module_name=module_name,
                    model_alias=v,
                    model_buffers=modified_buffer)

        archive_path = migrate_model.packaging_model()
        shutil.rmtree(os.path.abspath(migrate_model.model_path))

        return (0, f"Migrating model successfully. " \
                  "The configuration of model has been modified automatically. " \
                  "New model id is: {}, model version is: {}. " \
                  "Model files can be found at '{}'.".format(adapter.get_common_parameters()["model_id"],
                                                             migrate_model.model_version,
                                                             os.path.abspath(archive_path)),
                {"model_id": migrate_model.model_id,
                 "model_version": migrate_model.model_version,
                 "path": os.path.abspath(archive_path)})

    except Exception as e:
        return 100, str(e), {}
Example #17
0
def deploy(config_data):
    model_id = config_data.get('model_id')
    model_version = config_data.get('model_version')
    local_role = config_data.get('local').get('role')
    local_party_id = config_data.get('local').get('party_id')
    child_model_version = config_data.get('child_model_version')

    try:
        party_model_id = model_utils.gen_party_model_id(
            model_id=model_id, role=local_role, party_id=local_party_id)
        model = PipelinedModel(model_id=party_model_id,
                               model_version=model_version)
        model_data = model.collect_models(in_bytes=True)
        if "pipeline.pipeline:Pipeline" not in model_data:
            raise Exception("Can not found pipeline file in model.")

        # check if the model could be executed the deploy process (parent/child)
        if not check_before_deploy(model):
            raise Exception('Child model could not be deployed.')

        # copy proto content from parent model and generate a child model
        deploy_model = PipelinedModel(model_id=party_model_id,
                                      model_version=child_model_version)
        shutil.copytree(src=model.model_path, dst=deploy_model.model_path)
        pipeline = deploy_model.read_component_model('pipeline',
                                                     'pipeline')['Pipeline']

        # modify two pipeline files (model version/ train_runtime_conf)
        train_runtime_conf = json_loads(pipeline.train_runtime_conf)
        adapter = JobRuntimeConfigAdapter(train_runtime_conf)
        train_runtime_conf = adapter.update_model_id_version(
            model_version=deploy_model.model_version)
        pipeline.model_version = child_model_version
        pipeline.train_runtime_conf = json_dumps(train_runtime_conf, byte=True)

        parser = get_dsl_parser_by_version(
            train_runtime_conf.get('dsl_version', '1'))
        train_dsl = json_loads(pipeline.train_dsl)
        parent_predict_dsl = json_loads(pipeline.inference_dsl)

        if str(train_runtime_conf.get('dsl_version', '1')) == '1':
            predict_dsl = json_loads(pipeline.inference_dsl)
        else:
            if config_data.get('dsl') or config_data.get('predict_dsl'):
                predict_dsl = config_data.get('dsl') if config_data.get(
                    'dsl') else config_data.get('predict_dsl')
                if not isinstance(predict_dsl, dict):
                    predict_dsl = json_loads(predict_dsl)
            else:
                if config_data.get('cpn_list', None):
                    cpn_list = config_data.pop('cpn_list')
                else:
                    cpn_list = list(train_dsl.get('components', {}).keys())
                parser_version = train_runtime_conf.get('dsl_version', '1')
                if str(parser_version) == '1':
                    predict_dsl = parent_predict_dsl
                else:
                    parser = schedule_utils.get_dsl_parser_by_version(
                        parser_version)
                    predict_dsl = parser.deploy_component(cpn_list, train_dsl)

        #  save predict dsl into child model file
        parser.verify_dsl(predict_dsl, "predict")
        inference_dsl = parser.get_predict_dsl(
            role=local_role,
            predict_dsl=predict_dsl,
            setting_conf_prefix=file_utils.
            get_federatedml_setting_conf_directory())
        pipeline.inference_dsl = json_dumps(inference_dsl, byte=True)
        if model_utils.compare_version(pipeline.fate_version, '1.5.0') == 'gt':
            pipeline.parent_info = json_dumps(
                {
                    'parent_model_id': model_id,
                    'parent_model_version': model_version
                },
                byte=True)
            pipeline.parent = False
            runtime_conf_on_party = json_loads(pipeline.runtime_conf_on_party)
            runtime_conf_on_party['job_parameters'][
                'model_version'] = child_model_version
            pipeline.runtime_conf_on_party = json_dumps(runtime_conf_on_party,
                                                        byte=True)

        # save model file
        deploy_model.save_pipeline(pipeline)
        shutil.copyfile(
            os.path.join(deploy_model.model_path, "pipeline.pb"),
            os.path.join(deploy_model.model_path, "variables", "data",
                         "pipeline", "pipeline", "Pipeline"))

        model_info = model_utils.gather_model_info_data(deploy_model)
        model_info['job_id'] = model_info['f_model_version']
        model_info['size'] = deploy_model.calculate_model_file_size()
        model_info['role'] = local_role
        model_info['party_id'] = local_party_id
        model_info['work_mode'] = adapter.get_job_work_mode()
        model_info['parent'] = False if model_info.get(
            'f_inference_dsl') else True
        if model_utils.compare_version(model_info['f_fate_version'],
                                       '1.5.0') == 'eq':
            model_info['roles'] = model_info.get('f_train_runtime_conf',
                                                 {}).get('role', {})
            model_info['initiator_role'] = model_info.get(
                'f_train_runtime_conf', {}).get('initiator', {}).get('role')
            model_info['initiator_party_id'] = model_info.get(
                'f_train_runtime_conf', {}).get('initiator',
                                                {}).get('party_id')
        model_utils.save_model_info(model_info)

    except Exception as e:
        stat_logger.exception(e)
        return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}"
    else:
        return 0, f"deploy model of role {local_role} {local_party_id} success"
Example #18
0
def operate_model(model_operation):
    request_config = request.json or request.form.to_dict()
    job_id = job_utils.generate_job_id()
    if model_operation not in [
            ModelOperation.STORE, ModelOperation.RESTORE,
            ModelOperation.EXPORT, ModelOperation.IMPORT
    ]:
        raise Exception(
            'Can not support this operating now: {}'.format(model_operation))
    required_arguments = ["model_id", "model_version", "role", "party_id"]
    check_config(request_config, required_arguments=required_arguments)
    request_config["model_id"] = gen_party_model_id(
        model_id=request_config["model_id"],
        role=request_config["role"],
        party_id=request_config["party_id"])
    if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]:
        if model_operation == ModelOperation.IMPORT:
            try:
                file = request.files.get('file')
                file_path = os.path.join(TEMP_DIRECTORY, file.filename)
                # if not os.path.exists(file_path):
                #     raise Exception('The file is obtained from the fate flow client machine, but it does not exist, '
                #                     'please check the path: {}'.format(file_path))
                try:
                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    file.save(file_path)
                except Exception as e:
                    shutil.rmtree(file_path)
                    raise e
                request_config['file'] = file_path
                model = pipelined_model.PipelinedModel(
                    model_id=request_config["model_id"],
                    model_version=request_config["model_version"])
                model.unpack_model(file_path)

                pipeline = model.read_component_model('pipeline',
                                                      'pipeline')['Pipeline']
                train_runtime_conf = json_loads(pipeline.train_runtime_conf)
                permitted_party_id = []
                for key, value in train_runtime_conf.get('role', {}).items():
                    for v in value:
                        permitted_party_id.extend([v, str(v)])
                if request_config["party_id"] not in permitted_party_id:
                    shutil.rmtree(model.model_path)
                    raise Exception(
                        "party id {} is not in model roles, please check if the party id is valid."
                    )
                try:
                    with DB.connection_context():
                        model = MLModel.get_or_none(
                            MLModel.f_job_id == train_runtime_conf[
                                "job_parameters"]["model_version"],
                            MLModel.f_role == request_config["role"])
                        if not model:
                            MLModel.create(
                                f_role=request_config["role"],
                                f_party_id=request_config["party_id"],
                                f_roles=train_runtime_conf["role"],
                                f_job_id=train_runtime_conf["job_parameters"]
                                ["model_version"],
                                f_model_id=train_runtime_conf["job_parameters"]
                                ["model_id"],
                                f_model_version=train_runtime_conf[
                                    "job_parameters"]["model_version"],
                                f_initiator_role=train_runtime_conf[
                                    "initiator"]["role"],
                                f_initiator_party_id=train_runtime_conf[
                                    "initiator"]["party_id"],
                                f_runtime_conf=train_runtime_conf,
                                f_work_mode=train_runtime_conf[
                                    "job_parameters"]["work_mode"],
                                f_dsl=json_loads(pipeline.train_dsl),
                                f_imported=1,
                                f_job_status='complete')
                        else:
                            stat_logger.info(
                                f'job id: {train_runtime_conf["job_parameters"]["model_version"]}, '
                                f'role: {request_config["role"]} model info already existed in database.'
                            )
                except peewee.IntegrityError as e:
                    stat_logger.exception(e)
                operation_record(request_config, "import", "success")
                return get_json_result()
            except Exception:
                operation_record(request_config, "import", "failed")
                raise
        else:
            try:
                model = pipelined_model.PipelinedModel(
                    model_id=request_config["model_id"],
                    model_version=request_config["model_version"])
                if model.exists():
                    archive_file_path = model.packaging_model()
                    operation_record(request_config, "export", "success")
                    return send_file(archive_file_path,
                                     attachment_filename=os.path.basename(
                                         archive_file_path),
                                     as_attachment=True)
                else:
                    operation_record(request_config, "export", "failed")
                    res = error_response(
                        response_code=210,
                        retmsg="Model {} {} is not exist.".format(
                            request_config.get("model_id"),
                            request_config.get("model_version")))
                    return res
            except Exception as e:
                operation_record(request_config, "export", "failed")
                stat_logger.exception(e)
                return error_response(response_code=210, retmsg=str(e))
    else:
        data = {}
        job_dsl, job_runtime_conf = gen_model_operation_job_config(
            request_config, model_operation)
        job_id, job_dsl_path, job_runtime_conf_path, logs_directory, model_info, board_url = DAGScheduler.submit(
            {
                'job_dsl': job_dsl,
                'job_runtime_conf': job_runtime_conf
            },
            job_id=job_id)
        data.update({
            'job_dsl_path': job_dsl_path,
            'job_runtime_conf_path': job_runtime_conf_path,
            'board_url': board_url,
            'logs_directory': logs_directory
        })
        operation_record(data=job_runtime_conf,
                         oper_type=model_operation,
                         oper_status='')
        return get_json_result(job_id=job_id, data=data)