Exemple #1
0
    def init_table_manager_and_federation(cls,
                                          job_id,
                                          role,
                                          num_hosts,
                                          host_ind=0):
        from arch.api import session
        from arch.api import federation

        role_id = {
            "host": [10000 + i for i in range(num_hosts)],
            "guest": [9999],
            "arbiter": [9999]
        }
        session.init(job_id)
        federation.init(
            job_id, {
                "local": {
                    "role":
                    role,
                    "party_id":
                    role_id[role][0]
                    if role != "host" else role_id[role][host_ind]
                },
                "role": role_id
            })
Exemple #2
0
 def _init_argument(self):
     self._init_LOGGER(LOGGER_path)
     self._initialize(config_path)
     with open(config_path) as conf_f:
         runtime_json = json.load(conf_f)
     eggroll.init(job_id)
     federation.init(job_id, runtime_json)
Exemple #3
0
def test_model(role1, role2):
    with open("%s_runtime_conf.json" % role1) as conf_fr:
        runtime_conf = json_loads(conf_fr.read())
    federation.init(job_id=job_id, runtime_conf=runtime_conf)
    print(federation.get_field("role"))

    model_meta_save = ModelMeta()
    model_meta_save.name = "HeteroLR%s" % (role2)
    commit_id = save_model("model_meta", model_meta_save, commit_log="xxx")
    print("save guest model success, commit id is %s" % commit_id)

    model_meta_read = ModelMeta()
    read_model("model_meta", model_meta_read)
    print(model_meta_read)

    model_param_save = ModelParam()
    model_param_save.weight["k1"] = 1
    model_param_save.weight["k2"] = 2
    commit_id = save_model("model_param", model_param_save, commit_log="xxx")
    print("save guest model success, commit id is %s" % commit_id)

    # read
    model_param_read = ModelParam()
    read_model("model_param", model_param_read)
    print(model_param_read)

    data_transform = DataTransformServer()
    data_transform.missing_replace_method = "xxxx"
    save_model("data_transform", data_transform)
Exemple #4
0
    def _init_argument(self):
        parser = argparse.ArgumentParser()
        parser.add_argument('-c',
                            '--config',
                            required=True,
                            type=str,
                            help="Specify a config json file path")
        parser.add_argument('-j',
                            '--job_id',
                            type=str,
                            required=True,
                            help="Specify the job id")
        # parser.add_argument('-p', '--party_id', type=str, required=True, help="Specify the party id")
        # parser.add_argument('-l', '--LOGGER_path', type=str, required=True, help="Specify the LOGGER path")
        args = parser.parse_args()
        config_path = args.config
        self.config_path = config_path
        if not args.config:
            LOGGER.error("Config File should be provided")
            exit(-100)
        self.job_id = args.job_id

        all_checker = AllChecker(config_path)
        all_checker.check_all()
        self._initialize(config_path)
        with open(config_path) as conf_f:
            runtime_json = json.load(conf_f)
        eggroll.init(self.job_id, self.workflow_param.work_mode)
        LOGGER.debug("The job id is {}".format(self.job_id))
        federation.init(self.job_id, runtime_json)
        LOGGER.debug("Finish eggroll and federation init")
        self._init_pipeline()
Exemple #5
0
    def _init_argument(self):
        self._initialize(config_path)
        with open(config_path) as conf_f:
            runtime_json = json.load(conf_f)

        LOGGER.debug("The Guest job id is {}".format(job_id))
        LOGGER.debug("The Guest work mode id is {}".format(self.workflow_param.work_mode))
        eggroll.init(job_id, self.workflow_param.work_mode)
        federation.init(job_id, runtime_json)
        LOGGER.debug("Finish eggroll and federation init")
Exemple #6
0
 def test_remote(self):
     table = session.table(name='remote_name',
                           namespace='remote_namespace',
                           partition=1)
     table.put_all(range(12))
     from arch.api import federation
     federation.init(session.get_session_id(), runtime_conf=None)
     federation.remote(table,
                       name="roll_pair_name.table",
                       tag="roll_pair_tag")
Exemple #7
0
    def init_session_and_federation(job_id, role, partyid, partyid_map):
        from arch.api import session
        from arch.api import federation

        session.init(job_id)
        federation.init(job_id=job_id,
                        runtime_conf={
                            "local": {
                                "role": role,
                                "party_id": partyid
                            },
                            "role": partyid_map
                        })
Exemple #8
0
def session_init(job_id, idx):
    from arch.api import session
    from arch.api import federation

    role = "guest" if idx < 1 else "host"
    party_id = 9999 + idx if idx < 1 else 10000 + (idx - 1)
    role_parties = {
        "host": [10000 + i for i in range(NUM_HOSTS)],
        "guest": [9999 + i for i in range(1)]
    }
    session.init(job_id)
    federation.init(
        job_id,
        dict(local=dict(role=role, party_id=party_id), role=role_parties))
    return federation.local_party(), federation.all_parties()
Exemple #9
0
    def _init_argument(self, config_json, job_id):
        self.config_json = config_json
        self.job_id = job_id
        from federatedml.param import param
        self.valid_classes = [
            class_info[0]
            for class_info in inspect.getmembers(param, inspect.isclass)
        ]

        # home_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
        # param_validation_path = home_dir + "/conf/param_validation.json"
        # all_checker = AllChecker(config_json, param_validation_path)
        # all_checker.check_all()
        # LOGGER.debug("Finish all parameter checkers")
        self._initialize()
        eggroll.init(self.job_id, self.workflow_param.work_mode)
        LOGGER.debug("The job id is {}".format(self.job_id))
        federation.init(self.job_id, self.config_json)
        LOGGER.debug("Finish eggroll and federation init")
        self._init_pipeline()
Exemple #10
0
    def run_task():
        task = Task()
        task.f_create_time = current_timestamp()
        try:
            parser = argparse.ArgumentParser()
            parser.add_argument('-j',
                                '--job_id',
                                required=True,
                                type=str,
                                help="job id")
            parser.add_argument('-n',
                                '--component_name',
                                required=True,
                                type=str,
                                help="component name")
            parser.add_argument('-t',
                                '--task_id',
                                required=True,
                                type=str,
                                help="task id")
            parser.add_argument('-r',
                                '--role',
                                required=True,
                                type=str,
                                help="role")
            parser.add_argument('-p',
                                '--party_id',
                                required=True,
                                type=str,
                                help="party id")
            parser.add_argument('-c',
                                '--config',
                                required=True,
                                type=str,
                                help="task config")
            parser.add_argument('--job_server', help="job server", type=str)
            args = parser.parse_args()
            schedule_logger.info('enter task process')
            schedule_logger.info(args)
            # init function args
            if args.job_server:
                RuntimeConfig.init_config(
                    HTTP_PORT=args.job_server.split(':')[1])
            job_id = args.job_id
            component_name = args.component_name
            task_id = args.task_id
            role = args.role
            party_id = int(args.party_id)
            task_config = file_utils.load_json_conf(args.config)
            job_parameters = task_config['job_parameters']
            job_initiator = task_config['job_initiator']
            job_args = task_config['job_args']
            task_input_dsl = task_config['input']
            task_output_dsl = task_config['output']
            parameters = task_config['parameters']
            module_name = task_config['module_name']
        except Exception as e:
            schedule_logger.exception(e)
            task.f_status = TaskStatus.FAILED
            return
        try:
            # init environment, process is shared globally
            RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode'])
            storage.init_storage(job_id=task_id,
                                 work_mode=RuntimeConfig.WORK_MODE)
            federation.init(job_id=task_id, runtime_conf=parameters)
            job_log_dir = os.path.join(
                job_utils.get_job_log_directory(job_id=job_id), role,
                str(party_id))
            task_log_dir = os.path.join(job_log_dir, component_name)
            log_utils.LoggerFactory.set_directory(directory=task_log_dir,
                                                  parent_log_dir=job_log_dir,
                                                  append_to_parent_log=True,
                                                  force=True)

            task.f_job_id = job_id
            task.f_component_name = component_name
            task.f_task_id = task_id
            task.f_role = role
            task.f_party_id = party_id
            task.f_operator = 'python_operator'
            tracker = Tracking(job_id=job_id,
                               role=role,
                               party_id=party_id,
                               component_name=component_name,
                               task_id=task_id,
                               model_id=job_parameters['model_id'],
                               model_version=job_parameters['model_version'],
                               module_name=module_name)
            task.f_start_time = current_timestamp()
            task.f_run_ip = get_lan_ip()
            task.f_run_pid = os.getpid()
            run_class_paths = parameters.get('CodePath').split('/')
            run_class_package = '.'.join(
                run_class_paths[:-2]) + '.' + run_class_paths[-2].replace(
                    '.py', '')
            run_class_name = run_class_paths[-1]
            task_run_args = TaskExecutor.get_task_run_args(
                job_id=job_id,
                role=role,
                party_id=party_id,
                job_parameters=job_parameters,
                job_args=job_args,
                input_dsl=task_input_dsl)
            run_object = getattr(importlib.import_module(run_class_package),
                                 run_class_name)()
            run_object.set_tracker(tracker=tracker)
            run_object.set_taskid(taskid=task_id)
            task.f_status = TaskStatus.RUNNING
            TaskExecutor.sync_task_status(job_id=job_id,
                                          component_name=component_name,
                                          task_id=task_id,
                                          role=role,
                                          party_id=party_id,
                                          initiator_party_id=job_initiator.get(
                                              'party_id', None),
                                          task_info=task.to_json())

            schedule_logger.info('run {} {} {} {} {} task'.format(
                job_id, component_name, task_id, role, party_id))
            schedule_logger.info(parameters)
            schedule_logger.info(task_input_dsl)
            run_object.run(parameters, task_run_args)
            if task_output_dsl:
                if task_output_dsl.get('data', []):
                    output_data = run_object.save_data()
                    tracker.save_output_data_table(
                        output_data,
                        task_output_dsl.get('data')[0])
                if task_output_dsl.get('model', []):
                    output_model = run_object.export_model()
                    # There is only one model output at the current dsl version.
                    tracker.save_output_model(output_model,
                                              task_output_dsl['model'][0])
            task.f_status = TaskStatus.SUCCESS
        except Exception as e:
            schedule_logger.exception(e)
            task.f_status = TaskStatus.FAILED
        finally:
            try:
                task.f_end_time = current_timestamp()
                task.f_elapsed = task.f_end_time - task.f_start_time
                task.f_update_time = current_timestamp()
                TaskExecutor.sync_task_status(
                    job_id=job_id,
                    component_name=component_name,
                    task_id=task_id,
                    role=role,
                    party_id=party_id,
                    initiator_party_id=job_initiator.get('party_id', None),
                    task_info=task.to_json())
            except Exception as e:
                schedule_logger.exception(e)
        schedule_logger.info('finish {} {} {} {} {} {} task'.format(
            job_id, component_name, task_id, role, party_id, task.f_status))
        print('finish {} {} {} {} {} {} task'.format(job_id, component_name,
                                                     task_id, role, party_id,
                                                     task.f_status))
Exemple #11
0
    def run_task():
        task = Task()
        task.f_create_time = current_timestamp()
        try:
            parser = argparse.ArgumentParser()
            parser.add_argument('-j', '--job_id', required=True, type=str, help="job id")
            parser.add_argument('-n', '--component_name', required=True, type=str,
                                help="component name")
            parser.add_argument('-t', '--task_id', required=True, type=str, help="task id")
            parser.add_argument('-r', '--role', required=True, type=str, help="role")
            parser.add_argument('-p', '--party_id', required=True, type=str, help="party id")
            parser.add_argument('-c', '--config', required=True, type=str, help="task config")
            parser.add_argument('--processors_per_node', help="processors_per_node", type=int)
            parser.add_argument('--job_server', help="job server", type=str)
            args = parser.parse_args()
            schedule_logger(args.job_id).info('enter task process')
            schedule_logger(args.job_id).info(args)
            # init function args
            if args.job_server:
                RuntimeConfig.init_config(HTTP_PORT=args.job_server.split(':')[1])
                RuntimeConfig.set_process_role(ProcessRole.EXECUTOR)
            job_id = args.job_id
            component_name = args.component_name
            task_id = args.task_id
            role = args.role
            party_id = int(args.party_id)
            executor_pid = os.getpid()
            task_config = file_utils.load_json_conf(args.config)
            job_parameters = task_config['job_parameters']
            job_initiator = task_config['job_initiator']
            job_args = task_config['job_args']
            task_input_dsl = task_config['input']
            task_output_dsl = task_config['output']
            component_parameters = TaskExecutor.get_parameters(job_id, component_name, role, party_id)
            task_parameters = task_config['task_parameters']
            module_name = task_config['module_name']
            TaskExecutor.monkey_patch()
        except Exception as e:
            traceback.print_exc()
            schedule_logger().exception(e)
            task.f_status = TaskStatus.FAILED
            return
        try:
            job_log_dir = os.path.join(job_utils.get_job_log_directory(job_id=job_id), role, str(party_id))
            task_log_dir = os.path.join(job_log_dir, component_name)
            log_utils.LoggerFactory.set_directory(directory=task_log_dir, parent_log_dir=job_log_dir,
                                                  append_to_parent_log=True, force=True)

            task.f_job_id = job_id
            task.f_component_name = component_name
            task.f_task_id = task_id
            task.f_role = role
            task.f_party_id = party_id
            task.f_operator = 'python_operator'
            tracker = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=component_name,
                               task_id=task_id,
                               model_id=job_parameters['model_id'],
                               model_version=job_parameters['model_version'],
                               component_module_name=module_name)
            task.f_start_time = current_timestamp()
            task.f_run_ip = get_lan_ip()
            task.f_run_pid = executor_pid
            run_class_paths = component_parameters.get('CodePath').split('/')
            run_class_package = '.'.join(run_class_paths[:-2]) + '.' + run_class_paths[-2].replace('.py', '')
            run_class_name = run_class_paths[-1]
            task.f_status = TaskStatus.RUNNING
            TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role,
                                          party_id=party_id, initiator_party_id=job_initiator.get('party_id', None),
                                          initiator_role=job_initiator.get('role', None),
                                          task_info=task.to_json())

            # init environment, process is shared globally
            RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode'],
                                      BACKEND=job_parameters.get('backend', 0))
            if args.processors_per_node and args.processors_per_node > 0 and RuntimeConfig.BACKEND == Backend.EGGROLL:
                session_options = {"eggroll.session.processors.per.node": args.processors_per_node}
            else:
                session_options = {}
            session.init(job_id=job_utils.generate_session_id(task_id, role, party_id),
                         mode=RuntimeConfig.WORK_MODE,
                         backend=RuntimeConfig.BACKEND,
                         options=session_options)
            federation.init(job_id=task_id, runtime_conf=component_parameters)

            schedule_logger().info('run {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id))
            schedule_logger().info(component_parameters)
            schedule_logger().info(task_input_dsl)
            task_run_args = TaskExecutor.get_task_run_args(job_id=job_id, role=role, party_id=party_id,
                                                           task_id=task_id,
                                                           job_args=job_args,
                                                           job_parameters=job_parameters,
                                                           task_parameters=task_parameters,
                                                           input_dsl=task_input_dsl,
                                                           if_save_as_task_input_data=job_parameters.get("save_as_task_input_data", SAVE_AS_TASK_INPUT_DATA_SWITCH)
                                                           )
            run_object = getattr(importlib.import_module(run_class_package), run_class_name)()
            run_object.set_tracker(tracker=tracker)
            run_object.set_taskid(taskid=task_id)
            run_object.run(component_parameters, task_run_args)
            output_data = run_object.save_data()
            tracker.save_output_data_table(output_data, task_output_dsl.get('data')[0] if task_output_dsl.get('data') else 'component')
            output_model = run_object.export_model()
            # There is only one model output at the current dsl version.
            tracker.save_output_model(output_model, task_output_dsl['model'][0] if task_output_dsl.get('model') else 'default')
            task.f_status = TaskStatus.COMPLETE
        except Exception as e:
            task.f_status = TaskStatus.FAILED
            schedule_logger().exception(e)
        finally:
            sync_success = False
            try:
                task.f_end_time = current_timestamp()
                task.f_elapsed = task.f_end_time - task.f_start_time
                task.f_update_time = current_timestamp()
                TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role,
                                              party_id=party_id,
                                              initiator_party_id=job_initiator.get('party_id', None),
                                              initiator_role=job_initiator.get('role', None),
                                              task_info=task.to_json())
                sync_success = True
            except Exception as e:
                traceback.print_exc()
                schedule_logger().exception(e)
        schedule_logger().info('task {} {} {} start time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_start_time)))
        schedule_logger().info('task {} {} {} end time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_end_time)))
        schedule_logger().info('task {} {} {} takes {}s'.format(task_id, role, party_id, int(task.f_elapsed)/1000))
        schedule_logger().info(
            'finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))

        print('finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))
Exemple #12
0
        for i in range(len(self.data)):
            ori_feature = [0 for i in range(len(tags))]

            for tag in self.data[i][1].split(" ", -1):
                ori_feature[tag_dict.get(tag)] = 1

            ori_feature = np.asarray(ori_feature, dtype='int')
            self.assertTrue(
                np.abs(ori_feature - features).all() < consts.FLOAT_ZERO)


def save_data(input_data, table_name, namespace):
    storage.save_data(input_data, table_name, namespace)


if __name__ == '__main__':
    eggroll.init("test_dataio" + str(int(time.time())))
    federation.init(
        "test_dataio", {
            "local": {
                "role": "guest",
                "party_id": 10000
            },
            "role": {
                "host": [9999],
                "guest": [10000]
            }
        })
    unittest.main()
Exemple #13
0
        result_data = selection_guest.save_data()
        local_data = result_data.collect()
        print("data in transform")
        for k, v in local_data:
            print("k: {}, v: {}".format(k, v.features))

    def tearDown(self):
        self.table.destroy()


if __name__ == '__main__':
    import sys
    job_id = str(sys.argv[1])

    eggroll.init(job_id)
    federation.init(job_id,
                    {"local": {
                        "role": "guest",
                        "party_id": 9999
                    },
                        "role": {
                            "host": [
                                10000
                            ],
                            "guest": [
                                9999
                            ]
                        }
                    })
    selection_obj = TestHeteroFeatureSelection()
    selection_obj.test_feature_selection()
                        default='10000')
    parser.add_argument('-j',
                        '--job_id',
                        required=True,
                        type=str,
                        help="job_id")

    args = parser.parse_args()
    job_id = args.job_id
    guest_id = args.gid
    host_id = args.hid
    role = args.role

    session.init(job_id)
    federation.init(
        job_id, {
            "local": {
                "role": role,
                "party_id": guest_id if role == GUEST else host_id
            },
            "role": {
                "host": [host_id],
                "guest": [guest_id]
            }
        })

    test_obj = TestHeteroFeatureBinning(role, guest_id, host_id)
    # homo_obj.test_homo_lr()
    test_obj.test_feature_binning()
    test_obj.tearDown()
Exemple #15
0
        meta_obj = model.get('HomoLogisticRegressionMeta')
        print("HomoLR meta info")
        print(meta_obj)

        param_obj = model.get('HomoLogisticRegressionParam')
        print("HomoLR param info")
        print(param_obj)

    def tearDown(self):
        self.table.destroy()


if __name__ == '__main__':
    import sys
    job_id = str(sys.argv[1])
    eggroll.init(job_id)
    federation.init(
        job_id, {
            "local": {
                "role": "arbiter",
                "party_id": 10000
            },
            "role": {
                "host": [10000],
                "guest": [9999],
                "arbiter": [10000]
            }
        })
    homo_obj = TestHomoLR()
    # homo_obj.test_homo_lr()
    homo_obj.test_cv()
 def init_federation(self, job_id, conf):
     federation.init(job_id, conf)
    parser.add_argument('-j',
                        '--job_id',
                        required=True,
                        type=str,
                        help="job_id")

    args = parser.parse_args()
    job_id = args.job_id
    own_party_id = args.pid
    role = args.role
    print("args: {}".format(args))
    session.init(job_id)
    federation.init(
        job_id, {
            "local": {
                "role": role,
                "party_id": own_party_id
            },
            "role": {
                "host": [str(x) for x in host_id_list],
                "guest": ['9999'],
                "arbiter": ['9998']
            }
        })

    test_obj = TestHomoFeatureBinning(role, own_party_id)
    # homo_obj.test_homo_lr()
    test_obj.test_homo_split_points()
    test_obj.test_homo_split_points(is_sparse=True)
    test_obj.tearDown()
Exemple #18
0
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

from arch.api import eggroll
from arch.api import federation

if __name__ == '__main__':
    eggroll.init("atest")
    federation.init(
        "atest", {
            "local": {
                "role": "host",
                "party_id": 10002
            },
            "role": {
                "host": [10001, 10002],
                "arbiter": [99999],
                "guest": [10001]
            }
        })
    for _tag in range(0, 1000, 2):
        c = eggroll.parallelize(range(_tag), partition=3,
                                persistent=True).map(lambda k, v: (v, k + 1))
        print(c)
        a = _tag
        federation.remote(a,
                          "RsaIntersectTransferVariable.rsa_pubkey",
                          tag="{}".format(_tag))
        federation.remote(c,
                          "RsaIntersectTransferVariable.rsa_pubkey",