コード例 #1
0
ファイル: log_utils.py プロジェクト: FederatedAI/FATE-Flow
def get_job_logger(job_id, log_type):
    fate_flow_log_dir = get_fate_flow_directory('logs', 'fate_flow')
    job_log_dir = get_fate_flow_directory('logs', job_id)
    if not job_id:
        log_dirs = [fate_flow_log_dir]
    else:
        if log_type == 'audit':
            log_dirs = [job_log_dir, fate_flow_log_dir]
        else:
            log_dirs = [job_log_dir]
    if LoggerFactory.log_share:
        oldmask = os.umask(000)
        os.makedirs(job_log_dir, exist_ok=True)
        os.makedirs(fate_flow_log_dir, exist_ok=True)
        os.umask(oldmask)
    else:
        os.makedirs(job_log_dir, exist_ok=True)
        os.makedirs(fate_flow_log_dir, exist_ok=True)
    logger = LoggerFactory.new_logger(f"{job_id}_{log_type}")
    for job_log_dir in log_dirs:
        handler = LoggerFactory.get_handler(class_name=None,
                                            level=LoggerFactory.LEVEL,
                                            log_dir=job_log_dir,
                                            log_type=log_type,
                                            job_id=job_id)
        error_handler = LoggerFactory.get_handler(class_name=None,
                                                  level=logging.ERROR,
                                                  log_dir=job_log_dir,
                                                  log_type=log_type,
                                                  job_id=job_id)
        logger.addHandler(handler)
        logger.addHandler(error_handler)
    with LoggerFactory.lock:
        LoggerFactory.schedule_logger_dict[job_id + log_type] = logger
    return logger
コード例 #2
0
 def setUp(self):
     self.data_dir = os.path.join(get_fate_flow_directory(), "examples", "data")
     self.upload_guest_config = {"file": os.path.join(self.data_dir, "breast_hetero_guest.csv"), "head": 1,
                                 "partition": 10, "namespace": "experiment",
                                 "table_name": "breast_hetero_guest", "use_local_data": 0, 'drop': 1, 'backend': 0, "id_delimiter": ',', }
     self.upload_host_config = {"file": os.path.join(self.data_dir, "breast_hetero_host.csv"), "head": 1,
                                "partition": 10, "namespace": "experiment",
                                "table_name": "breast_hetero_host", "use_local_data": 0, 'drop': 1, 'backend': 0, "id_delimiter": ',', }
     self.download_config = {"output_path": os.path.join(get_fate_flow_directory(),
                                                         "fate_flow/fate_flow_unittest_breast_b.csv"),
                             "namespace": "experiment",
                             "table_name": "breast_hetero_guest"}
     self.server_url = "http://{}:{}/{}".format(HOST, HTTP_PORT, API_VERSION)
コード例 #3
0
    def __init__(self, model_id, model_version):
        """
        Support operations on FATE PipelinedModels
        :param model_id: the model id stored at the local party.
        :param model_version: the model version.
        """
        os.makedirs(TEMP_DIRECTORY, exist_ok=True)

        self.model_id = model_id
        self.model_version = model_version
        self.model_path = get_fate_flow_directory("model_local_cache",
                                                  model_id, model_version)
        self.define_proto_path = os.path.join(self.model_path, "define",
                                              "proto")
        self.define_proto_generated_path = os.path.join(
            self.model_path, "define", "proto_generated_python")
        self.define_meta_path = os.path.join(self.model_path, "define",
                                             "define_meta.yaml")
        self.variables_index_path = os.path.join(self.model_path, "variables",
                                                 "index")
        self.variables_data_path = os.path.join(self.model_path, "variables",
                                                "data")
        self.run_parameters_path = os.path.join(self.model_path,
                                                "run_parameters")
        self.default_archive_format = "zip"
        self.pipeline_model_name = "Pipeline"
        self.pipeline_model_alias = "pipeline"

        super().__init__(self.model_path)
コード例 #4
0
 def create_component_model(self,
                            component_name,
                            component_module_name,
                            model_alias,
                            model_buffers: typing.Dict[str,
                                                       typing.Tuple[str,
                                                                    bytes,
                                                                    dict]],
                            user_specified_run_parameters: dict = None):
     model_proto_index = {}
     component_model = {"buffer": {}}
     component_model_storage_path = os.path.join(self.variables_data_path,
                                                 component_name,
                                                 model_alias)
     for model_name, (proto_index, object_serialized,
                      object_json) in model_buffers.items():
         storage_path = os.path.join(component_model_storage_path,
                                     model_name)
         component_model["buffer"][storage_path.replace(
             get_fate_flow_directory(),
             "")] = (base64.b64encode(object_serialized).decode(),
                     object_json)
         model_proto_index[
             model_name] = proto_index  # index of model name and proto buffer class name
         stat_logger.info("save {} {} {} buffer".format(
             component_name, model_alias, model_name))
     component_model["component_name"] = component_name
     component_model["component_module_name"] = component_module_name
     component_model["model_alias"] = model_alias
     component_model["model_proto_index"] = model_proto_index
     component_model["run_parameters"] = user_specified_run_parameters
     return component_model
コード例 #5
0
ファイル: template_app.py プロジェクト: FederatedAI/FATE-Flow
def template_download():
    min_data = request.json.get("min_data", False) if request.json else False
    memory_file = io.BytesIO()
    dir_dict = {}
    template_info = file_utils.load_yaml_conf(TEMPLATE_INFO_PATH)
    data_dir = template_info.get("template_data", {}).get("base_dir")
    min_data_file = template_info.get("template_data", {}).get("min_data", [])
    for name, dir_name in template_info.get("template_path", {}).items():
        dir_dict[name] = os.path.join(get_fate_flow_directory(), dir_name)
    delete_dir_list = []
    for name, dir_list in template_info.get("delete_path").items():
        for dir_name in dir_list:
            delete_dir_list.append(os.path.join(dir_dict[name], dir_name))
    tar = tarfile.open(fileobj=memory_file, mode='w:gz')
    for name, base_dir in dir_dict.items():
        for root, dir, files in os.walk(base_dir):
            for file in files:
                if min_data:
                    if data_dir in root and file not in min_data_file:
                        continue
                if root in delete_dir_list:
                    continue
                full_path = os.path.join(root, file)
                rel_path = os.path.join(name,
                                        os.path.relpath(full_path, base_dir))
                tar.add(full_path, rel_path)
    tar.close()
    memory_file.seek(0)
    return send_file(memory_file,
                     attachment_filename=f'template.tar.gz',
                     as_attachment=True)
コード例 #6
0
    def deploy(self,
               new_model_version: str,
               model_alias: str,
               step_index: int = None,
               step_name: str = None):
        if step_index is not None:
            checkpoint = self.get_checkpoint_by_index(step_index)
        elif step_name is not None:
            checkpoint = self.get_checkpoint_by_name(step_name)
        else:
            raise KeyError('step_index or step_name is required.')

        if checkpoint is None:
            raise TypeError('Checkpoint not found.')
        # check files hash
        checkpoint.read()

        directory = Path(get_fate_flow_directory(
        )) / 'model_local_cache' / self.party_model_id / new_model_version
        target = directory / 'variables' / 'data' / self.component_name / model_alias
        locker = Locker(directory)

        with locker.lock:
            rmtree(target, True)
            copytree(checkpoint.directory,
                     target,
                     ignore=lambda src, names:
                     {i
                      for i in names if i.startswith('.')})

            for f in target.glob('*.pb'):
                f.replace(f.with_suffix(''))
コード例 #7
0
def query_model_info_from_file(model_id=None,
                               model_version=None,
                               role=None,
                               party_id=None,
                               query_filters=None,
                               to_dict=False,
                               **kwargs):
    res = {} if to_dict else []
    model_dir = os.path.join(get_fate_flow_directory(), 'model_local_cache')
    glob_dir = f"{model_dir}{os.sep}{role if role else '*'}#{party_id if party_id else '*'}#{model_id if model_id else '*'}{os.sep}{model_version if model_version else '*'}"
    stat_logger.info(f'glob model dir: {glob_dir}')
    model_fp_list = glob.glob(glob_dir)
    if model_fp_list:
        for fp in model_fp_list:
            pipeline_model = PipelinedModel(model_id=fp.split(os.path.sep)[-2],
                                            model_version=fp.split(
                                                os.path.sep)[-1])
            model_info = gather_model_info_data(pipeline_model,
                                                query_filters=query_filters)
            if model_info:
                _role = fp.split('/')[-2].split('#')[0]
                _party_id = fp.split('/')[-2].split('#')[1]
                model_info["f_role"] = _role
                model_info["f_party_id"] = _party_id
                if isinstance(res, dict):
                    res[fp] = model_info
                else:
                    res.append(model_info)

                if kwargs.get('save'):
                    try:
                        insert_info = gather_model_info_data(
                            pipeline_model).copy()
                        insert_info['role'] = _role
                        insert_info['party_id'] = _party_id
                        insert_info['job_id'] = insert_info.get(
                            'f_model_version')
                        insert_info[
                            'size'] = pipeline_model.calculate_model_file_size(
                            )
                        if compare_version(insert_info['f_fate_version'],
                                           '1.5.1') == 'lt':
                            insert_info['roles'] = insert_info.get(
                                'f_train_runtime_conf', {}).get('role', {})
                            insert_info['initiator_role'] = insert_info.get(
                                'f_train_runtime_conf',
                                {}).get('initiator', {}).get('role')
                            insert_info[
                                'initiator_party_id'] = insert_info.get(
                                    'f_train_runtime_conf',
                                    {}).get('initiator', {}).get('party_id')
                        save_model_info(insert_info)
                    except Exception as e:
                        stat_logger.exception(e)
    if res:
        return 0, 'Query model info from local model success.', res
    return 100, 'Query model info failed, cannot find model from local model files.', res
コード例 #8
0
ファイル: table_test.py プロジェクト: FederatedAI/FATE-Flow
 def setUp(self):
     self.data_dir = os.path.join(get_fate_flow_directory(), "examples",
                                  "data")
     self.upload_config = {
         "file": os.path.join(self.data_dir, "breast_hetero_guest.csv"),
         "head": 1,
         "partition": 10,
         "namespace": "fate_flow_test_table_breast_hetero",
         "table_name": "breast_hetero_guest",
         "use_local_data": 0,
         'drop': 1,
         'backend': 0,
         "id_delimiter": ','
     }
コード例 #9
0
 def get_log_file_path(self):
     status = parameters_check(self.log_type, self.job_id, self.role, self.party_id, self.component_name)
     if not status:
         raise Exception(f"job type {self.log_type} Missing parameters")
     type_dict = {
         "jobSchedule": os.path.join(self.job_id, "fate_flow_schedule.log"),
         "jobScheduleError": os.path.join(self.job_id, "fate_flow_schedule_error.log"),
         "partyError": os.path.join(self.job_id, self.role, self.party_id, "ERROR.log"),
         "partyWarning": os.path.join(self.job_id, self.role, self.party_id, "WARNING.log"),
         "partyInfo": os.path.join(self.job_id,self.role, self.party_id, "INFO.log"),
         "partyDebug": os.path.join(self.job_id, self.role, self.party_id, "DEBUG.log"),
         "componentInfo": os.path.join(self.job_id, self.role, self.party_id, self.component_name, "INFO.log")
     }
     if self.log_type not in type_dict.keys():
         raise Exception(f"no found log  type {self.log_type}")
     return os.path.join(get_fate_flow_directory('logs'), type_dict[self.log_type])
コード例 #10
0
    def __init__(
        self,
        job_id: str = None,
        role: str = None,
        party_id: int = None,
        model_id: str = None,
        model_version: str = None,
        component_name: str = None,
        component_module_name: str = None,
        task_id: str = None,
        task_version: int = None,
        job_parameters: RunParameters = None,
        max_to_keep: int = None,
        mkdir: bool = True,
    ):
        self.job_id = job_id
        self.role = role
        self.party_id = party_id
        self.model_id = model_id
        self.model_version = model_version
        self.party_model_id = gen_party_model_id(self.model_id, self.role,
                                                 self.party_id)
        self.component_name = component_name if component_name else 'pipeline'
        self.module_name = component_module_name if component_module_name else 'Pipeline'
        self.task_id = task_id
        self.task_version = task_version
        self.job_parameters = job_parameters
        self.mkdir = mkdir

        self.directory = (Path(get_fate_flow_directory()) /
                          'model_local_cache' / self.party_model_id /
                          model_version / 'checkpoint' / self.component_name)
        if self.mkdir:
            self.directory.mkdir(0o755, True, True)

        if isinstance(max_to_keep, int):
            if max_to_keep <= 0:
                raise ValueError('max_to_keep must be positive')
        elif max_to_keep is not None:
            raise TypeError('max_to_keep must be an integer')
        self.checkpoints = deque(maxlen=max_to_keep)
コード例 #11
0
ファイル: model_app.py プロジェクト: FederatedAI/FATE-Flow
def get_predict_conf():
    request_data = request.json
    model_dir = os.path.join(get_fate_flow_directory(), 'model_local_cache')
    model_fp_list = glob.glob(
        model_dir +
        f"/guest#*#{request_data['model_id']}/{request_data['model_version']}")
    if model_fp_list:
        fp = model_fp_list[0]
        pipeline_model = pipelined_model.PipelinedModel(
            fp.split('/')[-2],
            fp.split('/')[-1])
        pipeline = pipeline_model.read_pipeline_model()
        predict_dsl = json_loads(pipeline.inference_dsl)

        train_runtime_conf = json_loads(pipeline.train_runtime_conf)
        parser = schedule_utils.get_dsl_parser_by_version(
            train_runtime_conf.get('dsl_version', '1'))
        predict_conf = parser.generate_predict_conf_template(
            predict_dsl, train_runtime_conf, request_data['model_id'],
            request_data['model_version'])
    else:
        predict_conf = ''
    if predict_conf:
        if request_data.get("filename"):
            os.makedirs(TEMP_DIRECTORY, exist_ok=True)
            temp_filepath = os.path.join(TEMP_DIRECTORY,
                                         request_data.get("filename"))
            with open(temp_filepath, "w") as fout:

                fout.write(json_dumps(predict_conf, indent=4))
            return send_file(open(temp_filepath, "rb"),
                             as_attachment=True,
                             attachment_filename=request_data.get("filename"))
        else:
            return get_json_result(data=predict_conf)
    return error_response(
        210,
        "No model found, please check if arguments are specified correctly.")
コード例 #12
0
 def write_component_model(self, component_model):
     for storage_path, (
             object_serialized_encoded,
             object_json) in component_model.get("buffer").items():
         storage_path = get_fate_flow_directory() + storage_path
         os.makedirs(os.path.dirname(storage_path), exist_ok=True)
         with self.lock, open(storage_path, "wb") as fw:
             fw.write(base64.b64decode(object_serialized_encoded.encode()))
         with self.lock, open(f"{storage_path}.json", "w",
                              encoding="utf8") as fw:
             fw.write(base_utils.json_dumps(object_json))
     run_parameters = component_model.get("run_parameters", {}) or {}
     p = self.component_run_parameters_path(
         component_model["component_name"])
     os.makedirs(os.path.dirname(p), exist_ok=True)
     with self.lock, open(p, "w", encoding="utf8") as fw:
         fw.write(base_utils.json_dumps(run_parameters))
     self.update_component_meta(
         component_name=component_model["component_name"],
         component_module_name=component_model["component_module_name"],
         model_alias=component_model["model_alias"],
         model_proto_index=component_model["model_proto_index"])
     stat_logger.info("save {} {} successfully".format(
         component_model["component_name"], component_model["model_alias"]))
コード例 #13
0
ファイル: upload.py プロジェクト: FederatedAI/FATE-Flow
 def _run(self, cpn_input: ComponentInputProtocol):
     self.parameters = cpn_input.parameters
     LOGGER.info(self.parameters)
     self.parameters["role"] = cpn_input.roles["role"]
     self.parameters["local"] = cpn_input.roles["local"]
     storage_engine = self.parameters["storage_engine"].upper()
     storage_address = self.parameters["storage_address"]
     # if not set storage, use job storage as default
     if not storage_engine:
         storage_engine = cpn_input.job_parameters.storage_engine
     self.storage_engine = storage_engine
     if not storage_address:
         storage_address = cpn_input.job_parameters.engines_address[
             EngineType.STORAGE]
     job_id = self.task_version_id.split("_")[0]
     if not os.path.isabs(self.parameters.get("file", "")):
         self.parameters["file"] = os.path.join(get_fate_flow_directory(),
                                                self.parameters["file"])
     if not os.path.exists(self.parameters["file"]):
         raise Exception("%s is not exist, please check the configure" %
                         (self.parameters["file"]))
     if not os.path.getsize(self.parameters["file"]):
         raise Exception("%s is an empty file" % (self.parameters["file"]))
     name, namespace = self.parameters.get("name"), self.parameters.get(
         "namespace")
     _namespace, _table_name = self.generate_table_name(
         self.parameters["file"])
     if namespace is None:
         namespace = _namespace
     if name is None:
         name = _table_name
     read_head = self.parameters["head"]
     if read_head == 0:
         head = False
     elif read_head == 1:
         head = True
     else:
         raise Exception("'head' in conf.json should be 0 or 1")
     partitions = self.parameters["partition"]
     if partitions <= 0 or partitions >= self.MAX_PARTITIONS:
         raise Exception(
             "Error number of partition, it should between %d and %d" %
             (0, self.MAX_PARTITIONS))
     self.session_id = job_utils.generate_session_id(
         self.tracker.task_id,
         self.tracker.task_version,
         self.tracker.role,
         self.tracker.party_id,
     )
     sess = Session.get_global()
     self.session = sess
     if self.parameters.get("destroy", False):
         table = sess.get_table(namespace=namespace, name=name)
         if table:
             LOGGER.info(
                 f"destroy table name: {name} namespace: {namespace} engine: {table.engine}"
             )
             try:
                 table.destroy()
             except Exception as e:
                 LOGGER.error(e)
         else:
             LOGGER.info(
                 f"can not found table name: {name} namespace: {namespace}, pass destroy"
             )
     address_dict = storage_address.copy()
     storage_session = sess.storage(storage_engine=storage_engine,
                                    options=self.parameters.get("options"))
     upload_address = {}
     if storage_engine in {StorageEngine.EGGROLL, StorageEngine.STANDALONE}:
         upload_address = {
             "name": name,
             "namespace": namespace,
             "storage_type": EggRollStoreType.ROLLPAIR_LMDB,
         }
     elif storage_engine in {StorageEngine.MYSQL, StorageEngine.HIVE}:
         if not address_dict.get("db") or not address_dict.get("name"):
             upload_address = {"db": namespace, "name": name}
     elif storage_engine in {StorageEngine.PATH}:
         upload_address = {"path": self.parameters["file"]}
     elif storage_engine in {StorageEngine.HDFS}:
         upload_address = {
             "path":
             default_input_fs_path(
                 name=name,
                 namespace=namespace,
                 prefix=address_dict.get("path_prefix"),
             )
         }
     elif storage_engine in {StorageEngine.LOCALFS}:
         upload_address = {
             "path":
             default_input_fs_path(name=name,
                                   namespace=namespace,
                                   storage_engine=storage_engine)
         }
     else:
         raise RuntimeError(
             f"can not support this storage engine: {storage_engine}")
     address_dict.update(upload_address)
     LOGGER.info(
         f"upload to {storage_engine} storage, address: {address_dict}")
     address = storage.StorageTableMeta.create_address(
         storage_engine=storage_engine, address_dict=address_dict)
     self.parameters["partitions"] = partitions
     self.parameters["name"] = name
     self.table = storage_session.create_table(
         address=address,
         origin=StorageTableOrigin.UPLOAD,
         **self.parameters)
     if storage_engine not in [StorageEngine.PATH]:
         data_table_count = self.save_data_table(job_id, name, namespace,
                                                 storage_engine, head)
     else:
         data_table_count = self.get_data_table_count(
             self.parameters["file"], name, namespace)
     self.table.meta.update_metas(in_serialized=True)
     DataTableTracker.create_table_tracker(
         table_name=name,
         table_namespace=namespace,
         entity_info={
             "job_id": job_id,
             "have_parent": False
         },
     )
     LOGGER.info("------------load data finish!-----------------")
     # rm tmp file
     try:
         if "{}/fate_upload_tmp".format(job_id) in self.parameters["file"]:
             LOGGER.info("remove tmp upload file")
             LOGGER.info(os.path.dirname(self.parameters["file"]))
             shutil.rmtree(os.path.dirname(self.parameters["file"]))
     except:
         LOGGER.info("remove tmp file failed")
     LOGGER.info("file: {}".format(self.parameters["file"]))
     LOGGER.info("total data_count: {}".format(data_table_count))
     LOGGER.info("table name: {}, table namespace: {}".format(
         name, namespace))
コード例 #14
0
ファイル: log_utils.py プロジェクト: FederatedAI/FATE-Flow
def get_logger_base_dir():
    job_log_dir = get_fate_flow_directory('logs')
    return job_log_dir
コード例 #15
0
ファイル: data_manager.py プロジェクト: FederatedAI/FATE-Flow
 def send_table(output_tables_meta,
                tar_file_name,
                limit=-1,
                need_head=True):
     output_data_file_list = []
     output_data_meta_file_list = []
     output_tmp_dir = os.path.join(
         get_fate_flow_directory(),
         'tmp/{}/{}'.format(datetime.datetime.now().strftime("%Y%m%d"),
                            fate_uuid()))
     for output_name, output_table_meta in output_tables_meta.items():
         output_data_count = 0
         output_data_file_path = "{}/{}.csv".format(output_tmp_dir,
                                                    output_name)
         output_data_meta_file_path = "{}/{}.meta".format(
             output_tmp_dir, output_name)
         os.makedirs(os.path.dirname(output_data_file_path), exist_ok=True)
         with open(output_data_file_path, 'w') as fw:
             with Session() as sess:
                 output_table = sess.get_table(
                     name=output_table_meta.get_name(),
                     namespace=output_table_meta.get_namespace())
                 if output_table:
                     for k, v in output_table.collect():
                         data_line, is_str, extend_header = feature_utils.get_component_output_data_line(
                             src_key=k,
                             src_value=v,
                             schema=output_table_meta.get_schema())
                         # save meta
                         if output_data_count == 0:
                             output_data_file_list.append(
                                 output_data_file_path)
                             header = get_component_output_data_schema(
                                 output_table_meta=output_table_meta,
                                 is_str=is_str,
                                 extend_header=extend_header)
                             output_data_meta_file_list.append(
                                 output_data_meta_file_path)
                             with open(output_data_meta_file_path,
                                       'w') as f:
                                 json.dump({'header': header}, f, indent=4)
                             if need_head and header and output_table_meta.get_have_head(
                             ):
                                 fw.write('{}\n'.format(','.join(header)))
                         fw.write('{}\n'.format(','.join(
                             map(lambda x: str(x), data_line))))
                         output_data_count += 1
                         if output_data_count == limit:
                             break
         # tar
     output_data_tarfile = "{}/{}".format(output_tmp_dir, tar_file_name)
     tar = tarfile.open(output_data_tarfile, mode='w:gz')
     for index in range(0, len(output_data_file_list)):
         tar.add(
             output_data_file_list[index],
             os.path.relpath(output_data_file_list[index], output_tmp_dir))
         tar.add(
             output_data_meta_file_list[index],
             os.path.relpath(output_data_meta_file_list[index],
                             output_tmp_dir))
     tar.close()
     for key, path in enumerate(output_data_file_list):
         try:
             os.remove(path)
             os.remove(output_data_meta_file_list[key])
         except Exception as e:
             # warning
             stat_logger.warning(e)
     return send_file(output_data_tarfile,
                      attachment_filename=tar_file_name)
コード例 #16
0
ファイル: job_utils.py プロジェクト: FederatedAI/FATE-Flow
def get_general_worker_log_directory(worker_name, worker_id, *args):
    return os.path.join(get_fate_flow_directory(), 'logs', worker_name,
                        worker_id, *args)
コード例 #17
0
ファイル: job_utils.py プロジェクト: FederatedAI/FATE-Flow
def get_job_log_directory(job_id, *args):
    return os.path.join(get_fate_flow_directory(), 'logs', job_id, *args)
コード例 #18
0
def call_fun(func, config_data, dsl_path, config_path):
    server_url = "http://{}:{}/{}".format(HOST, HTTP_PORT, API_VERSION)

    if func in JOB_OPERATE_FUNC:
        if func == 'submit_job':
            if not config_path:
                raise Exception(
                    'the following arguments are required: {}'.format(
                        'runtime conf path'))
            if not dsl_path and config_data.get('job_parameters', {}).get(
                    'job_type', '') == 'predict':
                raise Exception(
                    'for train job, the following arguments are required: {}'.
                    format('dsl path'))
            dsl_data = {}
            if dsl_path:
                dsl_path = os.path.abspath(dsl_path)
                with open(dsl_path, 'r') as f:
                    dsl_data = json.load(f)
            post_data = {'job_dsl': dsl_data, 'job_runtime_conf': config_data}
            response = requests_utils.request(
                method="post",
                url="/".join([server_url, "job",
                              func.rstrip('_job')]),
                json=post_data)
            try:
                if response.json()['retcode'] == 999:
                    start_cluster_standalone_job_server()
                    response = requests_utils.request(
                        method="post",
                        url="/".join([server_url, "job",
                                      func.rstrip('_job')]),
                        json=post_data)
            except:
                pass
        elif func == 'data_view_query' or func == 'clean_queue':
            response = requests_utils.request(
                method="post",
                url="/".join([server_url, "job",
                              func.replace('_', '/')]),
                json=config_data)
        else:
            if func != 'query_job':
                detect_utils.check_config(config=config_data,
                                          required_arguments=['job_id'])
            post_data = config_data
            response = requests_utils.request(
                method="post",
                url="/".join([server_url, "job",
                              func.rstrip('_job')]),
                json=post_data)
            if func == 'query_job':
                response = response.json()
                if response['retcode'] == 0:
                    for i in range(len(response['data'])):
                        del response['data'][i]['f_runtime_conf']
                        del response['data'][i]['f_dsl']
    elif func in JOB_FUNC:
        if func == 'job_config':
            detect_utils.check_config(config=config_data,
                                      required_arguments=[
                                          'job_id', 'role', 'party_id',
                                          'output_path'
                                      ])
            response = requests_utils.request(
                method="post",
                url="/".join([server_url, func.replace('_', '/')]),
                json=config_data)
            response_data = response.json()
            if response_data['retcode'] == 0:
                job_id = response_data['data']['job_id']
                download_directory = os.path.join(
                    config_data['output_path'], 'job_{}_config'.format(job_id))
                os.makedirs(download_directory, exist_ok=True)
                for k, v in response_data['data'].items():
                    if k == 'job_id':
                        continue
                    with open('{}/{}.json'.format(download_directory, k),
                              'w') as fw:
                        json.dump(v, fw, indent=4)
                del response_data['data']['dsl']
                del response_data['data']['runtime_conf']
                response_data['directory'] = download_directory
                response_data[
                    'retmsg'] = 'download successfully, please check {} directory'.format(
                        download_directory)
                response = response_data
        elif func == 'job_log':
            detect_utils.check_config(
                config=config_data,
                required_arguments=['job_id', 'output_path'])
            job_id = config_data['job_id']
            tar_file_name = 'job_{}_log.tar.gz'.format(job_id)
            extract_dir = os.path.join(config_data['output_path'],
                                       'job_{}_log'.format(job_id))
            with closing(
                    requests_utils.request(
                        method="get",
                        url="/".join([server_url,
                                      func.replace('_', '/')]),
                        json=config_data,
                        stream=True)) as response:
                if response.status_code == 200:
                    download_from_request(http_response=response,
                                          tar_file_name=tar_file_name,
                                          extract_dir=extract_dir)
                    response = {
                        'retcode':
                        0,
                        'directory':
                        extract_dir,
                        'retmsg':
                        'download successfully, please check {} directory'.
                        format(extract_dir)
                    }
                else:
                    response = response.json()
    elif func in TASK_OPERATE_FUNC:
        response = requests_utils.request(
            method="post",
            url="/".join([server_url, "job", "task",
                          func.rstrip('_task')]),
            json=config_data)
    elif func in TRACKING_FUNC:
        if func != 'component_metric_delete':
            detect_utils.check_config(config=config_data,
                                      required_arguments=[
                                          'job_id', 'component_name', 'role',
                                          'party_id'
                                      ])
        if func == 'component_output_data':
            detect_utils.check_config(config=config_data,
                                      required_arguments=['output_path'])
            tar_file_name = 'job_{}_{}_{}_{}_output_data.tar.gz'.format(
                config_data['job_id'], config_data['component_name'],
                config_data['role'], config_data['party_id'])
            extract_dir = os.path.join(config_data['output_path'],
                                       tar_file_name.replace('.tar.gz', ''))
            with closing(
                    requests_utils.request(method="get",
                                           url="/".join([
                                               server_url, "tracking",
                                               func.replace('_', '/'),
                                               'download'
                                           ]),
                                           json=config_data,
                                           stream=True)) as response:
                if response.status_code == 200:
                    try:
                        download_from_request(http_response=response,
                                              tar_file_name=tar_file_name,
                                              extract_dir=extract_dir)
                        response = {
                            'retcode':
                            0,
                            'directory':
                            extract_dir,
                            'retmsg':
                            'download successfully, please check {} directory'.
                            format(extract_dir)
                        }
                    except:
                        response = {
                            'retcode':
                            100,
                            'retmsg':
                            'download failed, please check if the parameters are correct'
                        }
                else:
                    response = response.json()

        else:
            response = requests_utils.request(
                method="post",
                url="/".join([server_url, "tracking",
                              func.replace('_', '/')]),
                json=config_data)
    elif func in DATA_FUNC:
        if func == 'upload' and config_data.get('use_local_data', 1) != 0:
            file_name = config_data.get('file')
            if not os.path.isabs(file_name):
                file_name = os.path.join(get_fate_flow_directory(), file_name)
            if os.path.exists(file_name):
                with open(file_name, 'rb') as fp:
                    data = MultipartEncoder(
                        fields={
                            'file': (os.path.basename(file_name), fp,
                                     'application/octet-stream')
                        })
                    tag = [0]

                    def read_callback(monitor):
                        if config_data.get('verbose') == 1:
                            sys.stdout.write("\r UPLOADING:{0}{1}".format(
                                "|" *
                                (monitor.bytes_read * 100 // monitor.len),
                                '%.2f%%' %
                                (monitor.bytes_read * 100 // monitor.len)))
                            sys.stdout.flush()
                            if monitor.bytes_read / monitor.len == 1:
                                tag[0] += 1
                                if tag[0] == 2:
                                    sys.stdout.write('\n')

                    data = MultipartEncoderMonitor(data, read_callback)
                    response = requests_utils.request(
                        method="post",
                        url="/".join(
                            [server_url, "data",
                             func.replace('_', '/')]),
                        data=data,
                        params=json.dumps(config_data),
                        headers={'Content-Type': data.content_type})
            else:
                raise Exception(
                    'The file is obtained from the fate flow client machine, but it does not exist, '
                    'please check the path: {}'.format(file_name))
        else:
            response = requests_utils.request(
                method="post",
                url="/".join([server_url, "data",
                              func.replace('_', '/')]),
                json=config_data)
        try:
            if response.json()['retcode'] == 999:
                start_cluster_standalone_job_server()
                response = requests_utils.request(
                    method="post",
                    url="/".join([server_url, "data", func]),
                    json=config_data)
        except:
            pass
    elif func in TABLE_FUNC:
        if func == "table_info":
            detect_utils.check_config(
                config=config_data,
                required_arguments=['namespace', 'table_name'])
            response = requests_utils.request(method="post",
                                              url="/".join(
                                                  [server_url, "table", func]),
                                              json=config_data)
        else:
            response = requests_utils.request(
                method="post",
                url="/".join([server_url, func.replace('_', '/')]),
                json=config_data)
    elif func in MODEL_FUNC:
        if func == "import":
            file_path = config_data["file"]
            if not os.path.isabs(file_path):
                file_path = os.path.join(get_fate_flow_directory(), file_path)
            if os.path.exists(file_path):
                files = {'file': open(file_path, 'rb')}
            else:
                raise Exception(
                    'The file is obtained from the fate flow client machine, but it does not exist, '
                    'please check the path: {}'.format(file_path))
            response = requests_utils.request(method="post",
                                              url="/".join(
                                                  [server_url, "model", func]),
                                              data=config_data,
                                              files=files)
        elif func == "export":
            with closing(
                    requests_utils.request(method="get",
                                           url="/".join(
                                               [server_url, "model", func]),
                                           json=config_data,
                                           stream=True)) as response:
                if response.status_code == 200:
                    archive_file_name = re.findall(
                        "filename=(.+)",
                        response.headers["Content-Disposition"])[0]
                    os.makedirs(config_data["output_path"], exist_ok=True)
                    archive_file_path = os.path.join(
                        config_data["output_path"], archive_file_name)
                    with open(archive_file_path, 'wb') as fw:
                        for chunk in response.iter_content(1024):
                            if chunk:
                                fw.write(chunk)
                    response = {
                        'retcode':
                        0,
                        'file':
                        archive_file_path,
                        'retmsg':
                        'download successfully, please check {}'.format(
                            archive_file_path)
                    }
                else:
                    response = response.json()
        else:
            response = requests_utils.request(method="post",
                                              url="/".join(
                                                  [server_url, "model", func]),
                                              json=config_data)
    elif func in PERMISSION_FUNC:
        detect_utils.check_config(
            config=config_data,
            required_arguments=['src_party_id', 'src_role'])
        response = requests_utils.request(
            method="post",
            url="/".join([server_url, "permission",
                          func.replace('_', '/')]),
            json=config_data)
    return response.json() if isinstance(
        response, requests.models.Response) else response
コード例 #19
0
def gen_model_file_path(model_id, model_version):
    return os.path.join(get_fate_flow_directory(), "model_local_cache",
                        model_id, model_version)