Exemple #1
0
    def load_node(cls) -> NoReturn:
        """
        加载节点目录中的key文件

        :return:
        """
        key_dir = PathUtils.join(GflConf.home_dir, "key")
        cls.__load_node(PathUtils.join(key_dir, "key.json"))
Exemple #2
0
 def __init__(self, id):
     super(DatasetPath, self).__init__(id)
     self.__root_dir = PathUtils.join(GflConf.data_dir, "dataset", id)
     self.__metadata_file = PathUtils.join(self.__root_dir, "metadata.json")
     self.__config_dir = PathUtils.join(self.__root_dir, "dataset")
     self.__dataset_config_file = PathUtils.join(self.__root_dir, "dataset",
                                                 "dataset.json")
     self.__module_name = "fl_dataset"
     self.__module_dir = PathUtils.join(self.__root_dir, "dataset")
Exemple #3
0
 def init_node(cls) -> NoReturn:
     """
     initialize GFL node
     """
     node = cls.__new_node()
     key_dir = PathUtils.join(GflConf.home_dir, "key")
     os.makedirs(key_dir, exist_ok=True)
     key_file = PathUtils.join(key_dir, "key.json")
     cls.__save_node(node, key_file)
     cls.default_node = node
Exemple #4
0
    def init_node(cls) -> NoReturn:
        """
        初始化GFL节点

        :return:
        """
        cls.__new_node()
        key_dir = PathUtils.join(GflConf.home_dir, "key")
        os.makedirs(key_dir, exist_ok=True)
        key_file = PathUtils.join(key_dir, "key.json")
        cls.__save_node(key_file)
Exemple #5
0
 def load_node(cls) -> NoReturn:
     """
     Load the key file in the node directory, and create default_node and standalone_nodes objects
     """
     key_dir = PathUtils.join(GflConf.home_dir, "key")
     cls.default_node = cls.__load_node(PathUtils.join(key_dir, "key.json"))
     for filename in os.listdir(key_dir):
         if filename.startswith("node-"):
             node_idx = int(filename[5:-5])
             cls.standalone_nodes[node_idx] = cls.__load_node(
                 PathUtils.join(key_dir, filename))
Exemple #6
0
 def load_all_job(cls) -> List[Job]:
     job_dir = PathUtils.join(GflConf.data_dir, "job")
     jobs = []
     for filename in os.listdir(job_dir):
         path = PathUtils.join(job_dir, filename)
         if os.path.isdir(path):
             try:
                 job = cls.load_job(filename)
                 jobs.append(job)
             except:
                 pass
     return jobs
Exemple #7
0
 def load_node(cls) -> NoReturn:
     """
     加载节点目录中的key文件,创建default_node和standalone_nodes对象
     :return:
     """
     key_dir = PathUtils.join(GflConf.home_dir, "key")
     cls.default_node = cls.__load_node(PathUtils.join(key_dir, "key.json"))
     for filename in os.listdir(key_dir):
         if filename.startswith("node-"):
             node_idx = int(filename[5:-5])
             cls.standalone_nodes[node_idx] = cls.__load_node(
                 PathUtils.join(key_dir, filename))
Exemple #8
0
    def generate_config(cls, path: str = None) -> None:
        """
        generate config file in ``path``.

        :param path: the config file path, if it's None, will be replaced by './config.yaml'.
        :return:
        """
        if path is None:
            path = "config.yaml"
        src_path = PathUtils.join(PathUtils.src_root_dir(), "resources",
                                  "config.yaml")
        shutil.copy(src_path, path)
Exemple #9
0
    def load_logging_config(cls) -> None:
        """

        """
        logging_config_path = PathUtils.join(PathUtils.src_root_dir(),
                                             "resources", "logging.yaml")
        with open(logging_config_path) as f:
            text = f.read().replace("{logs_root}", GflConf.logs_dir)
            data = yaml.load(text, yaml.SafeLoader)

        if cls.get_property("debug"):
            data["root"]["level"] = "DEBUG"
            data["loggers"]["gfl"]["level"] = "DEBUG"

        logging.config.dictConfig(data)
Exemple #10
0
 def reload(cls):
     """
     Reload readonly parameters from the YAML file.
     :return:
     """
     with open(PathUtils.join(cls.__home_dir, "conf.yaml"), "r") as f:
         cls.readonly_props = yaml.safe_load(f.read())
Exemple #11
0
    def save_job(cls, job: Job, *, module=None, module_data=None) -> NoReturn:
        """
        Save job

        :param job: job to save
        :param module: job module
        :param module_data:
        """
        job_path = JobPath(job.job_id)
        job_path.makedirs()
        cls.__save_json(job_path.metadata_file, job.metadata)
        cls.__save_json(job_path.job_config_file, job.job_config)
        cls.__save_json(job_path.train_config_file, job.train_config)
        cls.__save_json(job_path.aggregate_config_file, job.aggregate_config)
        if module_data is not None:
            ZipUtils.extract_data(module_data, GflConf.temp_dir)
            ModuleUtils.migrate_module(
                PathUtils.join(GflConf.temp_dir, job.job_id),
                job_path.module_name, job_path.module_dir)
        elif module is not None:
            ModuleUtils.submit_module(module, job_path.module_name,
                                      job_path.module_dir)
        else:
            ModuleUtils.submit_module(job.module, job_path.module_name,
                                      job_path.module_dir)
Exemple #12
0
    def save_dataset(cls,
                     dataset: Dataset,
                     *,
                     module=None,
                     module_data=None) -> NoReturn:
        """
        Save dataset

        :param dataset: dataset to save
        :param module: dataset module
        """
        if module is None:
            module = dataset.module
        dataset_path = DatasetPath(dataset.dataset_id)
        dataset_path.makedirs()
        cls.__save_json(dataset_path.metadata_file, dataset.metadata)
        cls.__save_json(dataset_path.dataset_config_file,
                        dataset.dataset_config)
        if module_data is not None:
            ZipUtils.extract_data(module_data, GflConf.temp_dir)
            ModuleUtils.migrate_module(
                PathUtils.join(GflConf.temp_dir, dataset.dataset_id),
                dataset_path.module_name, dataset_path.module_dir)
        elif module is not None:
            ModuleUtils.submit_module(module, dataset_path.module_name,
                                      dataset_path.module_dir)
        else:
            ModuleUtils.submit_module(dataset.module, dataset_path.module_name,
                                      dataset_path.module_dir)
Exemple #13
0
 def init_node(cls) -> NoReturn:
     """
     初始化GFL节点
     :return:
     """
     node = cls.__new_node()
     key_dir = PathUtils.join(GflConf.home_dir, "key")  # /Users/YY/.gfl/key
     os.makedirs(key_dir, exist_ok=True)
     key_file = PathUtils.join(key_dir,
                               "key.json")  # /Users/YY/.gfl/key/key.json
     cls.__save_node(node, key_file)
     # key.json中的内容如下
     # {
     #     "address": "a8C03cEBFc6C11C1707032590adf2ACF4ccAc655",
     #     "pub_key": "d2a95fb211c91f79d052c3c927f51b22893a3b3f7a28090f32d03fc7224bdca0be91173445f71bf1bf91d0fee52ee7c805b7b10dc1b12fa2ed5267b818eb1bc8",
     #     "priv_key": "708d8f67deb461bdf2a3c9c2d82584b8304cbad32398a5ce5706a8e45f5210bf"
     # }
     cls.default_node = node
Exemple #14
0
    def run(cls, role, console, **kwargs):
        sys.stderr = open(os.devnull, "w")
        cls.logger = logging.getLogger("gfl")
        with Daemonizer() as (is_setup, daemonizer):
            main_pid = None
            if is_setup:
                main_pid = os.getpid()
            pid_file = PathUtils.join(GflConf.home_dir, "proc.lock")
            stdout_file = PathUtils.join(GflConf.logs_dir, "console_out")
            stderr_file = PathUtils.join(GflConf.logs_dir, "console_err")
            is_parent = daemonizer(pid_file,
                                   stdout_goto=stdout_file,
                                   stderr_goto=stderr_file)
            if is_parent:
                if console and main_pid == os.getpid():
                    Shell.startup()

        GflNode.load_node()

        if GflConf.get_property("net.mode") == "standalone":
            client_number = GflConf.get_property(
                "net.standalone.client_number")
            for _ in range(len(GflNode.standalone_nodes), client_number):
                GflNode.add_standalone_node()

            ManagerHolder.default_manager = NodeManager(
                node=GflNode.default_node, role="server")

            for i in range(client_number):
                client_manager = NodeManager(node=GflNode.standalone_nodes[i],
                                             role="client")
                ManagerHolder.standalone_managers.append(client_manager)
        else:
            ManagerHolder.default_manager = NodeManager(
                node=GflNode.default_node, role=role)

        # cls.__startup_node_managers()
        HttpListener.start()

        while HttpListener.is_alive():
            time.sleep(2)
Exemple #15
0
class Log(LogBase):

    debug_filename = PathUtils.join(GflConf.logs_dir, "debug.log")
    info_filename = PathUtils.join(GflConf.logs_dir, "info.log")
    warn_filename = PathUtils.join(GflConf.logs_dir, "warn.log")
    error_filename = PathUtils.join(GflConf.logs_dir, "error.log")

    def __init__(self, name):
        super(Log, self).__init__(name)

    def debug(self, msg, *params):
        pass

    def info(self, msg, *params):
        pass

    def warn(self, msg, *params):
        pass

    def error(self, msg, *params):
        pass
Exemple #16
0
    def load(cls) -> None:
        """
        load config properties from disk file.

        :return:
        """
        base_config_path = PathUtils.join(PathUtils.src_root_dir(),
                                          "resources", "config.yaml")
        with open(base_config_path) as f:
            cls.__readonly_props = yaml.load(f, Loader=yaml.SafeLoader)

        path = PathUtils.join(cls.home_dir, "config.yaml")
        if os.path.exists(path):
            with open(path) as f:
                config_data = yaml.load(f, Loader=yaml.SafeLoader)
                cls.__readonly_props.update(config_data)

        if os.path.exists(cls.logs_dir):
            cls.load_logging_config()
        else:
            warnings.warn("cannot found logs dir.")
Exemple #17
0
 def add_standalone_node(cls) -> NoReturn:
     # 添加【一个】standalone_node
     node = cls.__new_node()
     for i in range(100):
         # 限制最多100个模拟节点, 防止此处出现死循环
         if i not in cls.standalone_nodes:
             key_file = PathUtils.join(GflConf.home_dir, "key",
                                       "node-%d.json" % i)
             cls.__save_node(node, key_file)
             cls.standalone_nodes[i] = node
             return
     raise ValueError("最多只支持100个standalone模式虚拟节点.")
Exemple #18
0
    def run(cls, console=True, **kwargs):
        sys.stderr = open(os.devnull, "w")
        cls.logger = logging.getLogger("gfl")
        with Daemonizer() as (is_setup, daemonizer):
            main_pid = None
            if is_setup:
                main_pid = os.getpid()
            pid_file = PathUtils.join(GflConf.home_dir, "proc.lock")
            stdout_file = PathUtils.join(GflConf.logs_dir, "console_out")
            stderr_file = PathUtils.join(GflConf.logs_dir, "console_err")
            is_parent = daemonizer(pid_file,
                                   stdout_goto=stdout_file,
                                   stderr_goto=stderr_file)
            if is_parent:
                if console and main_pid == os.getpid():
                    Shell.startup()

        GflNode.load_node()

        HttpListener.start()

        NodeManager.get_instance().run()
Exemple #19
0
 def send_partial_params(cls, client: str, job_id: str, step: int,
                         params) -> NoReturn:
     # 这里的参数client,暂时认为是client_address
     # 在standalone模式下,trainer当前训练轮次得到的模型保存在指定路径下
     client_params_dir = JobPath(job_id).client_params_dir(step, client)
     os.makedirs(client_params_dir, exist_ok=True)
     # 保存 job_id.pth为文件名
     path = PathUtils.join(client_params_dir, job_id + '.pkl')
     # path = client_params_dir + 'job_id.pth'
     # torch.save(params, path)
     with open(path, 'wb') as f:
         pickle.dump(params, f)
     print("训练完成,已将模型保存至:" + str(client_params_dir))
Exemple #20
0
    def run(cls, **kwargs):
        daemon = kwargs.pop("daemon", False)
        if daemon:
            print("DAEMON")
            with Daemonizer() as (is_setup, daemonizer):
                if is_setup:
                    pass
                pid_file = "proc.lock"
                stdout_file = PathUtils.join(GflConf.logs_dir, "console_out")
                stderr_file = PathUtils.join(GflConf.logs_dir, "console_err")
                is_parent = daemonizer(pid_file,
                                       stdout_goto=stdout_file,
                                       stderr_goto=stderr_file)
                if is_parent:
                    pass

        GflConf.reload()
        GflNode.load_node()

        if GflConf.get_property("standalone.enabled"):
            server_number = GflConf.get_property("standalone.server_number")
            client_number = GflConf.get_property("standalone.client_number")
            for _ in range(len(GflNode.standalone_nodes),
                           server_number + client_number):
                GflNode.add_standalone_node()
            for i in range(0, server_number):
                node_manager = NodeManager(node=GflNode.standalone_nodes[i],
                                           role="server")
                cls.node_managers.append(node_manager)
            for i in range(server_number, server_number + client_number):
                node_manager = NodeManager(node=GflNode.standalone_nodes[i],
                                           role="client")
                cls.node_managers.append(node_manager)
        else:
            role = kwargs.pop("role")
            print(role)
            node_manager = NodeManager(node=GflNode.default_node, role=role)
            cls.node_managers.append(node_manager)
        cls.__startup_node_managers()
Exemple #21
0
 def add_standalone_node(cls) -> NoReturn:
     """
     add standalone GFL node
     """
     node = cls.__new_node()
     for i in range(100):
         # Limit up to 100 mock nodes to prevent an endless loop here
         if i not in cls.standalone_nodes:
             key_file = PathUtils.join(GflConf.home_dir, "key",
                                       "node-%d.json" % i)
             cls.__save_node(node, key_file)
             cls.standalone_nodes[i] = node
             return
     raise ValueError("最多只支持100个standalone模式虚拟节点.")
Exemple #22
0
    def init(cls, force):
        if os.path.exists(GflConf.home_dir):
            if force:
                logging.shutdown()
                shutil.rmtree(GflConf.home_dir)
            else:
                raise ValueError("homedir not empty.")
        # create home dir
        os.makedirs(GflConf.home_dir)

        # generate config file
        GflConf.generate_config(PathUtils.join(GflConf.home_dir,
                                               "config.yaml"))
        # generate node address and key
        GflNode.init_node()
        # create data directories
        Lfs.init()
Exemple #23
0
 def receive_global_params(cls, job_id: str, cur_round: int):
     # 在standalone模式下,trainer获取当前聚合轮次下的全局模型
     # 根据 Job 中的 job_id 和 cur_round 获取指定轮次聚合后的 全局模型参数的路径
     global_params_dir = JobPath(job_id).global_params_dir(cur_round)
     model_params_path = PathUtils.join(global_params_dir, job_id + '.pkl')
     # 判断是否存在模型参数文件,如果存在则返回。
     if os.path.exists(global_params_dir) and os.path.isfile(
             model_params_path):
         # resources_already:1
         # self.__status = JobStatus.RESOURCE_ALREADY
         print("训练方接收全局模型")
         return model_params_path
     else:
         # 等待一段时间。在这段时间内获取到了模型参数文件,则返回
         # 暂时不考虑这种情况
         # 否则,认为当前模型参数文件已经无法获取
         return None
Exemple #24
0
    def setUp(self) -> None:
        self.dataset = generate_dataset()
        print("dataset_id:" + self.dataset.dataset_id)
        self.job = generate_job()
        print("job_id:" + self.job.job_id)
        self.job.mount_dataset(self.dataset)
        GflNode.init_node()
        node = GflNode.default_node
        self.jobTrainerScheduler = JobTrainScheduler(node=node, job=self.job)
        self.jobTrainerScheduler.register()

        # aggregator需要初始化随机模型
        global_params_dir = JobPath(self.job.job_id).global_params_dir(
            self.job.cur_round)
        # print("global_params_dir:"+global_params_dir)
        os.makedirs(global_params_dir, exist_ok=True)
        model_params_path = PathUtils.join(global_params_dir,
                                           self.job.job_id + '.pth')
        # print("model_params_path:"+model_params_path)
        model = Net()
        torch.save(model.state_dict(), model_params_path)
Exemple #25
0
import logging.config
import os
import shutil
import tempfile
import warnings

import yaml

from gfl.utils import PathUtils

os_tempdir = tempfile.gettempdir()
gfl_tempdir = PathUtils.join(os_tempdir, "gfl")
if os.path.exists(gfl_tempdir):
    os.makedirs(gfl_tempdir, exist_ok=True)


class GflConfMetadata(type):
    @property
    def home_dir(cls):
        return cls._GflConf__home_dir

    @home_dir.setter
    def home_dir(cls, value):
        cls._GflConf__home_dir = PathUtils.abspath(value)
        cls._GflConf__data_dir = PathUtils.join(value, "data")
        cls._GflConf__logs_dir = PathUtils.join(value, "logs")
        cls._GflConf__cache_dir = PathUtils.join(value, "cache")

    @property
    def data_dir(cls):
        return cls._GflConf__data_dir
Exemple #26
0
 def home_dir(cls, value):
     cls._GflConf__home_dir = PathUtils.abspath(value)
     cls._GflConf__data_dir = PathUtils.join(value, "data")
     cls._GflConf__logs_dir = PathUtils.join(value, "logs")
     cls._GflConf__cache_dir = PathUtils.join(value, "cache")
Exemple #27
0
class GflConf(object, metaclass=GflConfMetadata):

    # Parameters that can be modified at run time
    __props = {}
    # Parameters that are read from a configuration file and cannot be changed at run time
    __readonly_props = {}

    __home_dir = PathUtils.join(PathUtils.user_home_dir(), ".gfl")
    __data_dir = PathUtils.join(__home_dir, "data")
    __logs_dir = PathUtils.join(__home_dir, "logs")
    __cache_dir = PathUtils.join(__home_dir, "cache")
    __temp_dir = gfl_tempdir

    @classmethod
    def load(cls) -> None:
        """
        load config properties from disk file.

        :return:
        """
        base_config_path = PathUtils.join(PathUtils.src_root_dir(),
                                          "resources", "config.yaml")
        with open(base_config_path) as f:
            cls.__readonly_props = yaml.load(f, Loader=yaml.SafeLoader)

        path = PathUtils.join(cls.home_dir, "config.yaml")
        if os.path.exists(path):
            with open(path) as f:
                config_data = yaml.load(f, Loader=yaml.SafeLoader)
                cls.__readonly_props.update(config_data)

        if os.path.exists(cls.logs_dir):
            cls.load_logging_config()
        else:
            warnings.warn("cannot found logs dir.")

    @classmethod
    def load_logging_config(cls) -> None:
        """

        """
        logging_config_path = PathUtils.join(PathUtils.src_root_dir(),
                                             "resources", "logging.yaml")
        with open(logging_config_path) as f:
            text = f.read().replace("{logs_root}", GflConf.logs_dir)
            data = yaml.load(text, yaml.SafeLoader)

        if cls.get_property("debug"):
            data["root"]["level"] = "DEBUG"
            data["loggers"]["gfl"]["level"] = "DEBUG"

        logging.config.dictConfig(data)

    @classmethod
    def generate_config(cls, path: str = None) -> None:
        """
        generate config file in ``path``.

        :param path: the config file path, if it's None, will be replaced by './config.yaml'.
        :return:
        """
        if path is None:
            path = "config.yaml"
        src_path = PathUtils.join(PathUtils.src_root_dir(), "resources",
                                  "config.yaml")
        shutil.copy(src_path, path)

    @classmethod
    def set_config(cls, d: dict) -> None:
        """
        Batch update config properties. Generally, this method is not recommend.

        :param d: a dict represent config properties.
        :return:
        """
        cls.__props.update(d.copy())

    @classmethod
    def get_property(cls, key, default=None):
        """
        Get the value of readonly parameters.

        :param key: a string of the key to get the value
        :param default: return value if key not found
        """
        op_res, val = cls.__get_from_dict(cls.__props, cls.__split_key(key),
                                          default)
        if op_res:
            return val
        return cls.__get_from_dict(cls.__readonly_props, cls.__split_key(key),
                                   default)[1]

    @classmethod
    def set_property(cls, key, value):
        """
        Set parameters at run time.

        :param key:
        :param value:
        :return:
        """
        cls.__set_to_dict(cls.__props, cls.__split_key(key), value)

    @classmethod
    def remove_property(cls, key):
        cls.__remove_from_dict(cls.__props, cls.__split_key(key))

    @classmethod
    def __split_key(cls, key: str):
        if key is None or key.strip() == "":
            raise ValueError("key cannot be none or empty.")
        return key.split(".")

    @classmethod
    def __exists_in_dict(cls, d: dict, k_seq: list):
        if k_seq is None or len(k_seq) == 0:
            return False
        for k in k_seq:
            if k in d:
                d = d[k]
            else:
                return False
        return True

    @classmethod
    def __get_from_dict(cls, d: dict, k_seq: list, default=None):
        if k_seq is None or len(k_seq) == 0:
            raise ValueError("key cannot be none or empty")
        for k in k_seq:
            if k in d:
                d = d[k]
            else:
                return False, default
        return True, d

    @classmethod
    def __remove_from_dict(cls, d: dict, k_seq: list):
        if k_seq is None or len(k_seq) == 0:
            raise ValueError("key cannot be none or empty")
        for k in k_seq[:-1]:
            if k not in d:
                return False
            d = d[k]
        try:
            del d[k_seq[-1]]
            return True
        except:
            return False

    @classmethod
    def __set_to_dict(cls, d: dict, k_seq: list, value):
        if k_seq is None or len(k_seq) == 0:
            raise ValueError("key cannot be none or empty")
        for k in k_seq[:-1]:
            if k not in d:
                d[k] = {}
            d = d[k]
        d[k_seq[-1]] = value
Exemple #28
0
 def __load_json(cls, file_path, clazz):
     if not PathUtils.exists(file_path):
         return None
     with open(file_path, "r") as f:
         d = json.loads(f.read())
     return clazz().from_dict(d)
Exemple #29
0
 def init(cls):
     os.makedirs(GflConf.cache_dir)
     os.makedirs(GflConf.data_dir)
     os.makedirs(GflConf.logs_dir)
     os.makedirs(PathUtils.join(GflConf.data_dir, "job"))
     os.makedirs(PathUtils.join(GflConf.data_dir, "dataset"))
Exemple #30
0
 def __init__(self, id):
     super(JobPath, self).__init__(id)
     self.__root_dir = PathUtils.join(GflConf.data_dir, "job", id)
     self.__metadata_file = PathUtils.join(self.__root_dir, "metadata.json")
     self.__sqlite_file = PathUtils.join(self.__root_dir, "job.sqlite")
     self.__config_dir = PathUtils.join(self.__root_dir, "job")
     self.__job_config_file = PathUtils.join(self.__root_dir, "job",
                                             "job.json")
     self.__train_config_file = PathUtils.join(self.__root_dir, "job",
                                               "train.json")
     self.__aggregate_config_file = PathUtils.join(self.__root_dir, "job",
                                                   "aggregate.json")
     self.__module_name = "fl_model"
     self.__module_dir = PathUtils.join(self.__root_dir, "job")
     self.__metrics_dir = PathUtils.join(self.__root_dir, "results",
                                         "metrics")
     self.__params_dir = PathUtils.join(self.__root_dir, "results",
                                        "params")
     self.__reports_dir = PathUtils.join(self.__root_dir, "results",
                                         "reports")
     self.__client_params_dir = PathUtils.join(self.__root_dir, "round-%d",
                                               "%s", "params")
     self.__client_word_dir = PathUtils.join(self.__root_dir, "round-%d",
                                             "%s", "work")